1# Copyright 2013 The Distro Tracker Developers 

2# See the COPYRIGHT file at the top-level directory of this distribution and 

3# at https://deb.li/DTAuthors 

4# 

5# This file is part of Distro Tracker. It is subject to the license terms 

6# in the LICENSE file found in the top-level directory of this 

7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker, 

8# including this file, may be copied, modified, propagated, or distributed 

9# except according to the terms contained in the LICENSE file. 

10""" 

11Implements the Distro Tracker tasks necessary for interesting package source 

12files. 

13""" 

14import logging 

15import os 

16from pathlib import Path 

17 

18from django.conf import settings 

19from django.core.files import File 

20 

21from distro_tracker.core.models import ExtractedSourceFile 

22from distro_tracker.core.tasks import BaseTask 

23from distro_tracker.core.tasks.mixins import ProcessSourcePackage 

24from distro_tracker.core.tasks.schedulers import IntervalScheduler 

25from distro_tracker.core.utils.packages import AptCache 

26 

27logger = logging.getLogger('distro_tracker.core.tasks') 

28 

29 

30class ExtractSourcePackageFiles(BaseTask, ProcessSourcePackage): 

31 """ 

32 A task which extracts some files from a new source package version. 

33 The extracted files are: 

34 

35 - debian/changelog 

36 - debian/copyright 

37 - debian/rules 

38 - debian/control 

39 - debian/watch 

40 """ 

41 

42 class Scheduler(IntervalScheduler): 

43 interval = 3600 

44 

45 ALL_FILES_TO_EXTRACT = ( 

46 'changelog', 

47 'copyright', 

48 'rules', 

49 'control', 

50 'watch', 

51 ) 

52 

53 def items_extend_queryset(self, queryset): 

54 return queryset.prefetch_related('extracted_source_files') 

55 

56 def extract_files(self, source_package, files_to_extract=None): 

57 """ 

58 Extract files for just the given source package. 

59 

60 :type source_package: :class:`SourcePackage 

61 <distro_tracker.core.models.SourcePackage>` 

62 :type files_to_extract: An iterable of file names which should be 

63 extracted 

64 """ 

65 if not hasattr(self, 'cache'): 65 ↛ 68line 65 didn't jump to line 68, because the condition on line 65 was never false

66 self.cache = AptCache() 

67 

68 source_directory = self.cache.retrieve_source( 

69 source_package.source_package_name.name, 

70 source_package.version, 

71 debian_directory_only=True) 

72 debian_directory = os.path.join(source_directory, 'debian') 

73 

74 if files_to_extract is None: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 files_to_extract = self.ALL_FILES_TO_EXTRACT 

76 

77 for file_name in files_to_extract: 

78 file_path = os.path.join(debian_directory, file_name) 

79 if not os.path.exists(file_path): 

80 continue 

81 with open(file_path, 'rb') as f: 

82 extracted_file = File(f) 

83 ExtractedSourceFile.objects.create( 

84 source_package=source_package, 

85 extracted_file=extracted_file, 

86 name=file_name) 

87 

88 def execute_main(self): 

89 # First remove all source files which are no longer to be included. 

90 qs = ExtractedSourceFile.objects.exclude( 

91 name__in=self.ALL_FILES_TO_EXTRACT) 

92 qs.delete() 

93 

94 # Process pending items 

95 for srcpkg in self.items_to_process(): 

96 # Save what has been processed when it takes long enough that we 

97 # had to extend the lock 

98 if self.extend_lock(): 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true

99 self.save_data() 

100 

101 extracted_files = [ 

102 extracted_file.name 

103 for extracted_file in srcpkg.extracted_source_files.all() 

104 ] 

105 files_to_extract = [ 

106 file_name 

107 for file_name in self.ALL_FILES_TO_EXTRACT 

108 if file_name not in extracted_files 

109 ] 

110 if files_to_extract: 110 ↛ 119line 110 didn't jump to line 119, because the condition on line 110 was never false

111 try: 

112 self.extract_files(srcpkg, files_to_extract) 

113 self.item_mark_processed(srcpkg) 

114 except Exception: 

115 logger.exception( 

116 'Problem extracting source files for %s version %s', 

117 srcpkg, srcpkg.version) 

118 else: 

119 self.item_mark_processed(srcpkg) 

120 

121 # Remove extracted files associated to vanished source packages 

122 qs = ExtractedSourceFile.objects.only('extracted_file') 

123 extracted_files = set() 

124 for esf in qs: 

125 extracted_files.add(esf.extracted_file.name) 

126 

127 media_root = Path(settings.MEDIA_ROOT) 

128 for root, dirs, files in os.walk(media_root / 'packages'): 

129 relative_root = Path(root).relative_to(media_root) 

130 

131 for filename in files: 

132 full_path = Path(root) / filename 

133 relative_path = relative_root / filename 

134 

135 if str(relative_path) in extracted_files: 

136 # This file is referenced in the database, keep it 

137 continue 

138 

139 managed_file = [ 

140 filename.startswith("%s-" % x) 

141 for x in self.ALL_FILES_TO_EXTRACT 

142 ] 

143 if not any(managed_file): 

144 # Ignore this file, it's not managed by this task 

145 logger.debug("Ignore unrelated file %s", full_path) 

146 continue 

147 

148 logger.info("Removing outdated file %s", full_path) 

149 full_path.unlink()