1# Copyright 2013 The Distro Tracker Developers
2# See the COPYRIGHT file at the top-level directory of this distribution and
3# at https://deb.li/DTAuthors
4#
5# This file is part of Distro Tracker. It is subject to the license terms
6# in the LICENSE file found in the top-level directory of this
7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker,
8# including this file, may be copied, modified, propagated, or distributed
9# except according to the terms contained in the LICENSE file.
10"""
11Implements the Distro Tracker tasks necessary for interesting package source
12files.
13"""
14import logging
15import os
16from pathlib import Path
18from django.conf import settings
19from django.core.files import File
21from distro_tracker.core.models import ExtractedSourceFile
22from distro_tracker.core.tasks import BaseTask
23from distro_tracker.core.tasks.mixins import ProcessSourcePackage
24from distro_tracker.core.tasks.schedulers import IntervalScheduler
25from distro_tracker.core.utils.packages import AptCache
27logger = logging.getLogger('distro_tracker.core.tasks')
30class ExtractSourcePackageFiles(BaseTask, ProcessSourcePackage):
31 """
32 A task which extracts some files from a new source package version.
33 The extracted files are:
35 - debian/changelog
36 - debian/copyright
37 - debian/rules
38 - debian/control
39 - debian/watch
40 """
42 class Scheduler(IntervalScheduler):
43 interval = 3600
45 ALL_FILES_TO_EXTRACT = (
46 'changelog',
47 'copyright',
48 'rules',
49 'control',
50 'watch',
51 )
53 def items_extend_queryset(self, queryset):
54 return queryset.prefetch_related('extracted_source_files')
56 def extract_files(self, source_package, files_to_extract=None):
57 """
58 Extract files for just the given source package.
60 :type source_package: :class:`SourcePackage
61 <distro_tracker.core.models.SourcePackage>`
62 :type files_to_extract: An iterable of file names which should be
63 extracted
64 """
65 if not hasattr(self, 'cache'): 65 ↛ 68line 65 didn't jump to line 68, because the condition on line 65 was never false
66 self.cache = AptCache()
68 source_directory = self.cache.retrieve_source(
69 source_package.source_package_name.name,
70 source_package.version,
71 debian_directory_only=True)
72 debian_directory = os.path.join(source_directory, 'debian')
74 if files_to_extract is None: 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true
75 files_to_extract = self.ALL_FILES_TO_EXTRACT
77 for file_name in files_to_extract:
78 file_path = os.path.join(debian_directory, file_name)
79 if not os.path.exists(file_path):
80 continue
81 with open(file_path, 'rb') as f:
82 extracted_file = File(f)
83 ExtractedSourceFile.objects.create(
84 source_package=source_package,
85 extracted_file=extracted_file,
86 name=file_name)
88 def execute_main(self):
89 # First remove all source files which are no longer to be included.
90 qs = ExtractedSourceFile.objects.exclude(
91 name__in=self.ALL_FILES_TO_EXTRACT)
92 qs.delete()
94 # Process pending items
95 for srcpkg in self.items_to_process():
96 # Save what has been processed when it takes long enough that we
97 # had to extend the lock
98 if self.extend_lock(): 98 ↛ 99line 98 didn't jump to line 99, because the condition on line 98 was never true
99 self.save_data()
101 extracted_files = [
102 extracted_file.name
103 for extracted_file in srcpkg.extracted_source_files.all()
104 ]
105 files_to_extract = [
106 file_name
107 for file_name in self.ALL_FILES_TO_EXTRACT
108 if file_name not in extracted_files
109 ]
110 if files_to_extract: 110 ↛ 119line 110 didn't jump to line 119, because the condition on line 110 was never false
111 try:
112 self.extract_files(srcpkg, files_to_extract)
113 self.item_mark_processed(srcpkg)
114 except Exception:
115 logger.exception(
116 'Problem extracting source files for %s version %s',
117 srcpkg, srcpkg.version)
118 else:
119 self.item_mark_processed(srcpkg)
121 # Remove extracted files associated to vanished source packages
122 qs = ExtractedSourceFile.objects.only('extracted_file')
123 extracted_files = set()
124 for esf in qs:
125 extracted_files.add(esf.extracted_file.name)
127 media_root = Path(settings.MEDIA_ROOT)
128 for root, dirs, files in os.walk(media_root / 'packages'):
129 relative_root = Path(root).relative_to(media_root)
131 for filename in files:
132 full_path = Path(root) / filename
133 relative_path = relative_root / filename
135 if str(relative_path) in extracted_files:
136 # This file is referenced in the database, keep it
137 continue
139 managed_file = [
140 filename.startswith("%s-" % x)
141 for x in self.ALL_FILES_TO_EXTRACT
142 ]
143 if not any(managed_file):
144 # Ignore this file, it's not managed by this task
145 logger.debug("Ignore unrelated file %s", full_path)
146 continue
148 logger.info("Removing outdated file %s", full_path)
149 full_path.unlink()