Coverage for distro_tracker/core/retrieve_data.py: 85%
419 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-01-12 09:15 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-01-12 09:15 +0000
1# Copyright 2013-2018 The Distro Tracker Developers
2# See the COPYRIGHT file at the top-level directory of this distribution and
3# at https://deb.li/DTAuthors
4#
5# This file is part of Distro Tracker. It is subject to the license terms
6# in the LICENSE file found in the top-level directory of this
7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker,
8# including this file, may be copied, modified, propagated, or distributed
9# except according to the terms contained in the LICENSE file.
10"""Implements core data retrieval from various external resources."""
11import itertools
12import logging
13import re
15from debian import deb822
17from django.conf import settings
18from django.core.exceptions import ValidationError
19from django.db import models, transaction
21import requests
23from distro_tracker import vendor
24from distro_tracker.accounts.models import UserEmail
25from distro_tracker.core.models import (
26 Architecture,
27 BinaryPackage,
28 BinaryPackageName,
29 BinaryPackageRepositoryEntry,
30 ContributorName,
31 PackageData,
32 PackageName,
33 PseudoPackageName,
34 Repository,
35 SourcePackage,
36 SourcePackageDeps,
37 SourcePackageName,
38 SourcePackageRepositoryEntry,
39 Team
40)
41from distro_tracker.core.tasks import BaseTask
42from distro_tracker.core.tasks.mixins import (
43 PackageTagging,
44 ProcessMainRepoEntry,
45 ProcessSrcRepoEntry,
46 ProcessSrcRepoEntryInDefaultRepository,
47)
48from distro_tracker.core.tasks.schedulers import IntervalScheduler
49from distro_tracker.core.utils import get_or_none
50from distro_tracker.core.utils.packages import (
51 AptCache,
52 extract_information_from_packages_entry,
53 extract_information_from_sources_entry
54)
56logger = logging.getLogger('distro_tracker.tasks')
57logger_input = logging.getLogger('distro_tracker.input')
60class InvalidRepositoryException(Exception):
61 pass
64def update_pseudo_package_list():
65 """
66 Retrieves the list of all allowed pseudo packages and updates the stored
67 list if necessary.
69 Uses a vendor-provided function
70 :func:`get_pseudo_package_list
71 <distro_tracker.vendor.skeleton.rules.get_pseudo_package_list>`
72 to get the list of currently available pseudo packages.
73 """
74 try:
75 pseudo_packages, implemented = vendor.call('get_pseudo_package_list')
76 except RuntimeError:
77 # Error accessing pseudo package resource: do not update the list
78 return
80 if not implemented or pseudo_packages is None: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true
81 return
83 # Faster lookups than if this were a list
84 pseudo_packages = set(pseudo_packages)
85 for existing_package in PseudoPackageName.objects.all():
86 if existing_package.name not in pseudo_packages:
87 # Existing packages which are no longer considered pseudo packages
88 # are demoted -- losing their pseudo package flag.
89 existing_package.pseudo = False
90 existing_package.save()
91 else:
92 # If an existing package remained a pseudo package there will be no
93 # action required so it is removed from the set.
94 pseudo_packages.remove(existing_package.name)
96 # The left over packages in the set are the ones that do not exist.
97 for package_name in pseudo_packages:
98 PseudoPackageName.objects.create(name=package_name)
101def retrieve_repository_info(sources_list_entry):
102 """
103 A function which accesses a ``Release`` file for the given repository and
104 returns a dict representing the parsed information.
106 :rtype: dict
107 """
108 entry_split = sources_list_entry.split(None, 3)
109 if len(entry_split) < 3: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true
110 raise InvalidRepositoryException("Invalid sources.list entry")
112 repository_type, url, distribution = entry_split[:3]
113 tls_verify = settings.DISTRO_TRACKER_CA_BUNDLE or True
115 # Access the Release file
116 try:
117 response = requests.get(Repository.release_file_url(url, distribution),
118 verify=tls_verify,
119 allow_redirects=True)
120 except requests.exceptions.RequestException as original:
121 raise InvalidRepositoryException(
122 "Could not connect to {url}".format(url=url)) from original
123 if response.status_code != 200: 123 ↛ 124line 123 didn't jump to line 124, because the condition on line 123 was never true
124 raise InvalidRepositoryException(
125 "No Release file found at the URL: {url}\n"
126 "Response status code {status_code}".format(
127 url=url, status_code=response.status_code))
129 # Parse the retrieved information
130 release = deb822.Release(response.text)
131 if not release: 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true
132 raise InvalidRepositoryException(
133 "No data could be extracted from the Release file at {url}".format(
134 url=url))
135 REQUIRED_KEYS = (
136 'architectures',
137 'components',
138 )
139 # A mapping of optional keys to their default values, if any
140 OPTIONAL_KEYS = {
141 'suite': distribution,
142 'codename': None,
143 }
144 # Make sure all necessary keys were found in the file
145 for key in REQUIRED_KEYS:
146 if key not in release:
147 raise InvalidRepositoryException(
148 "Property {key} not found in the Release file at {url}".format(
149 key=key,
150 url=url))
151 # Finally build the return dictionary with the information about the
152 # repository.
153 repository_information = {
154 'uri': url,
155 'architectures': release['architectures'].split(),
156 'components': release['components'].split(),
157 'binary': repository_type == 'deb',
158 'source': repository_type == 'deb-src',
159 }
160 # Add in optional info
161 for key, default in OPTIONAL_KEYS.items():
162 repository_information[key] = release.get(key, default)
164 return repository_information
167class TagPackagesWithBugs(BaseTask, PackageTagging):
168 """
169 Performs an update of 'bugs' tag for packages.
170 """
172 class Scheduler(IntervalScheduler):
173 interval = 3600
175 TAG_NAME = 'tag:bugs'
176 TAG_DISPLAY_NAME = 'bugs'
177 TAG_COLOR_TYPE = 'warning'
178 TAG_DESCRIPTION = 'The package has bugs'
179 TAG_TABLE_TITLE = 'Packages with bugs'
181 def packages_to_tag(self):
182 return PackageName.objects.filter(bug_stats__stats__isnull=False)
185class UpdateRepositoriesTask(BaseTask):
186 """
187 Performs an update of repository information.
189 New (source and binary) packages are created if necessary and old ones are
190 deleted. An event is emitted for each situation, allowing other tasks to
191 perform updates based on updated package information.
192 """
194 class Scheduler(IntervalScheduler):
195 interval = 3600 * 4
197 SOURCE_DEPENDENCY_TYPES = ('Build-Depends', 'Build-Depends-Indep')
198 BINARY_DEPENDENCY_TYPES = ('Depends', 'Recommends', 'Suggests')
200 def initialize(self, **kwargs):
201 super().initialize(**kwargs)
202 self._all_packages = []
203 self._all_repository_entries = []
205 def _clear_processed_repository_entries(self):
206 self._all_repository_entries = []
208 def _add_processed_repository_entry(self, repository_entry):
209 self._all_repository_entries.append(repository_entry.id)
211 def _extract_information_from_sources_entry(self, src_pkg, stanza):
212 entry = extract_information_from_sources_entry(stanza)
214 # Convert the parsed data into corresponding model instances
215 if 'architectures' in entry: 215 ↛ 221line 215 didn't jump to line 221, because the condition on line 215 was never false
216 # Map the list of architecture names to their objects
217 # Discards any unknown architectures.
218 entry['architectures'] = Architecture.objects.filter(
219 name__in=entry['architectures'])
221 if 'binary_packages' in entry: 221 ↛ 243line 221 didn't jump to line 243, because the condition on line 221 was never false
222 # Map the list of binary package names to list of existing
223 # binary package names.
224 binary_package_names = entry['binary_packages']
225 existing_binaries_qs = BinaryPackageName.objects.filter(
226 name__in=binary_package_names)
227 existing_binaries_names = []
228 binaries = []
229 for binary in existing_binaries_qs:
230 binaries.append(binary)
231 existing_binaries_names.append(binary.name)
232 for binary_name in binary_package_names:
233 if binary_name not in existing_binaries_names:
234 binary_package_name, _ = PackageName.objects.get_or_create(
235 name=binary_name)
236 binary_package_name.binary = True
237 binary_package_name.save()
238 binary_package_name = BinaryPackageName.objects.get(
239 name=binary_name)
240 binaries.append(binary_package_name)
241 entry['binary_packages'] = binaries
243 if 'maintainer' in entry: 243 ↛ 258line 243 didn't jump to line 258, because the condition on line 243 was never false
244 try:
245 maintainer_email, _ = UserEmail.objects.get_or_create(
246 email=entry['maintainer']['email'])
247 maintainer = ContributorName.objects.get_or_create(
248 contributor_email=maintainer_email,
249 name=entry['maintainer'].get('name', ''))[0]
250 entry['maintainer'] = maintainer
251 except ValidationError:
252 email = entry['maintainer']['email']
253 logger_input.warning(
254 'Invalid email in maintainer field of %s: %s',
255 src_pkg, email)
256 del entry['maintainer']
258 if 'uploaders' in entry: 258 ↛ 261line 258 didn't jump to line 261, because the condition on line 258 was never false
259 self._process_uploaders(entry, src_pkg)
261 return entry
263 def _process_uploaders(self, entry, src_pkg):
264 uploader_emails = [
265 uploader['email']
266 for uploader in entry['uploaders']
267 ]
268 uploader_names = [
269 uploader.get('name', '')
270 for uploader in entry['uploaders']
271 ]
272 existing_contributor_emails_qs = UserEmail.objects.filter(
273 email__in=uploader_emails)
274 existing_contributor_emails = {
275 contributor.email: contributor
276 for contributor in existing_contributor_emails_qs
277 }
278 uploaders = []
279 for email, name in zip(uploader_emails, uploader_names):
280 if email not in existing_contributor_emails:
281 try:
282 contributor_email, _ = UserEmail.objects.get_or_create(
283 email=email)
284 existing_contributor_emails[email] = contributor_email
285 except ValidationError:
286 contributor_email = None
287 logger_input.warning(
288 'Bad email in uploaders in %s for %s: %s',
289 src_pkg, name, email)
290 else:
291 contributor_email = existing_contributor_emails[email]
292 if contributor_email: 292 ↛ 279line 292 didn't jump to line 279, because the condition on line 292 was never false
293 uploaders.append(ContributorName.objects.get_or_create(
294 contributor_email=contributor_email,
295 name=name)[0]
296 )
298 entry['uploaders'] = uploaders
300 def _extract_information_from_packages_entry(self, bin_pkg, stanza):
301 entry = extract_information_from_packages_entry(stanza)
303 return entry
305 def _update_sources_file(self, repository, component, sources_file):
306 for stanza in deb822.Sources.iter_paragraphs(sources_file):
307 allow, implemented = vendor.call('allow_package', stanza)
308 if allow is not None and implemented and not allow:
309 # The vendor-provided function indicates that the package
310 # should not be included
311 continue
313 src_pkg_name, _ = SourcePackageName.objects.get_or_create(
314 name=stanza['package']
315 )
317 src_pkg, created_new_version = SourcePackage.objects.get_or_create(
318 source_package_name=src_pkg_name,
319 version=stanza['version']
320 )
321 if created_new_version or self.force_update:
322 # Extract package data from Sources
323 entry = self._extract_information_from_sources_entry(
324 src_pkg, stanza)
325 # Update the source package information based on the newly
326 # extracted data.
327 src_pkg.update(**entry)
328 src_pkg.save()
330 if not repository.has_source_package(src_pkg):
331 # Add it to the repository
332 entry = repository.add_source_package(
333 src_pkg, component=component)
334 else:
335 # We get the entry to mark that the package version is still in
336 # the repository.
337 entry = SourcePackageRepositoryEntry.objects.get(
338 repository=repository,
339 source_package=src_pkg
340 )
342 self._add_processed_repository_entry(entry)
344 def get_source_for_binary(self, stanza):
345 """
346 :param stanza: a ``Packages`` file entry
347 :returns: A ``(source_name, source_version)`` pair for the binary
348 package described by the entry
349 """
350 source_name = (
351 stanza['source']
352 if 'source' in stanza else
353 stanza['package'])
354 # Extract the source version, if given in the Source field
355 match = re.match(r'(.+) \((.+)\)', source_name)
356 if match:
357 source_name, source_version = match.group(1), match.group(2)
358 else:
359 source_version = stanza['version']
361 return source_name, source_version
363 def _update_packages_file(self, repository, packages_file):
364 for stanza in deb822.Packages.iter_paragraphs(packages_file):
365 bin_pkg_name, created = BinaryPackageName.objects.get_or_create(
366 name=stanza['package']
367 )
368 # Find the matching SourcePackage for the binary package
369 source_name, source_version = self.get_source_for_binary(stanza)
370 src_pkg, _ = SourcePackage.objects.get_or_create(
371 source_package_name=SourcePackageName.objects.get_or_create(
372 name=source_name)[0],
373 version=source_version)
375 bin_pkg, created_new_version = BinaryPackage.objects.get_or_create(
376 binary_package_name=bin_pkg_name,
377 version=stanza['version'],
378 source_package=src_pkg
379 )
380 if created_new_version: 380 ↛ 389line 380 didn't jump to line 389, because the condition on line 380 was never false
381 # Since it's a new version, extract package data from Packages
382 entry = self._extract_information_from_packages_entry(
383 bin_pkg, stanza)
384 # Update the binary package information based on the newly
385 # extracted data.
386 bin_pkg.update(**entry)
387 bin_pkg.save()
389 if not repository.has_binary_package(bin_pkg): 389 ↛ 402line 389 didn't jump to line 402, because the condition on line 389 was never false
390 # Add it to the repository
391 architecture, _ = Architecture.objects.get_or_create(
392 name=stanza['architecture'])
393 kwargs = {
394 'priority': stanza.get('priority', ''),
395 'section': stanza.get('section', ''),
396 'architecture': architecture,
397 }
398 entry = repository.add_binary_package(bin_pkg, **kwargs)
399 else:
400 # We get the entry to mark that the package version is still in
401 # the repository.
402 entry = BinaryPackageRepositoryEntry.objects.get(
403 repository=repository,
404 binary_package=bin_pkg)
406 self._add_processed_repository_entry(entry)
408 def _remove_query_set_if_count_zero(self, qs, count_field):
409 """
410 Removes elements from the given query set if their count of the given
411 ``count_field`` is ``0``.
413 :param qs: Instances which should be deleted in case their count of the
414 field ``count_field`` is 0.
415 :type qs: :class:`QuerySet <django.db.models.query.QuerySet>`
417 :param count_field: Each instance in ``qs`` that has a 0 count for the
418 field with this name is deleted.
419 :type count_field: string
420 """
421 qs = qs.annotate(count=models.Count(count_field))
422 qs = qs.filter(count=0)
423 qs.delete()
425 def _remove_obsolete_packages(self):
426 self.log("Removing obsolete source packages")
427 # Clean up package versions which no longer exist in any repository.
428 self._remove_query_set_if_count_zero(SourcePackage.objects.all(),
429 'repository')
430 # Clean up names which no longer exist.
431 self._remove_query_set_if_count_zero(SourcePackageName.objects.all(),
432 'source_package_versions')
433 # Clean up binary package names which are no longer used by any source
434 # package.
435 self._remove_query_set_if_count_zero(BinaryPackageName.objects.all(),
436 'sourcepackage')
438 def _update_repository_entries(self, all_entries_qs):
439 """
440 Removes all repository entries which are no longer found in the
441 repository after the last update.
442 If the ``event_generator`` argument is provided, an event returned by
443 the function is raised for each removed entry.
445 :param all_entries_qs: All currently existing entries which should be
446 filtered to only contain the ones still found after the update.
447 :type all_entries_qs:
448 :class:`QuerySet <django.db.models.query.QuerySet>`
449 :event_generator: Takes a repository entry as a parameter and returns a
450 two-tuple of ``(event_name, event_arguments)``. An event with the
451 return parameters is raised by the function for each removed entry.
452 :type event_generator: callable
453 """
454 # Out of all entries in this repository, only those found in
455 # the last update need to stay, so exclude them from the delete
456 all_entries_qs = all_entries_qs.exclude(
457 id__in=self._all_repository_entries)
458 # Emit events for all packages that were removed from the repository
459 all_entries_qs.delete()
461 self._clear_processed_repository_entries()
463 def extract_package_versions(self, file_name):
464 """
465 :param file_name: The name of the file from which package versions
466 should be extracted.
467 :type file_name: string
468 :returns: A dict mapping package names to a list of versions found in
469 Deb822 formatted file.
470 """
471 with open(file_name, 'r') as packages_file:
472 packages = {}
473 for stanza in deb822.Deb822.iter_paragraphs(packages_file):
474 package_name, version = stanza['package'], stanza['version']
475 packages.setdefault(package_name, [])
476 packages[package_name].append(version)
478 return packages
480 def _mark_file_not_processed(self, repository, file_name, entry_manager):
481 """
482 The given ``Sources`` or ``Packages`` file has not been changed in the
483 last update. This method marks all package versions found in it as
484 still existing in order to avoid deleting them.
486 :param repository: The repository to which the file is associated
487 :type repository:
488 :class:`Repository <distro_tracker.core.models.Repository>`
489 :param file_name: The name of the file whose packages should be saved
490 :param entry_manager: The manager instance which handles the package
491 entries.
492 :type entry_manager: :class:`Manager <django.db.models.Manager>`
493 """
494 # Extract all package versions from the file
495 packages = self.extract_package_versions(file_name)
497 # Only issue one DB query to retrieve the entries for packages with
498 # the given names
499 repository_entries = \
500 entry_manager.filter_by_package_name(packages.keys())
501 repository_entries = repository_entries.filter(
502 repository=repository)
503 repository_entries = repository_entries.select_related()
504 # For each of those entries, make sure to keep only the ones
505 # corresponding to the version found in the sources file
506 for entry in repository_entries:
507 if entry.version in packages[entry.name]: 507 ↛ 506line 507 didn't jump to line 506, because the condition on line 507 was never false
508 self._add_processed_repository_entry(entry)
510 def group_files_by_repository(self, cached_files):
511 """
512 :param cached_files: A list of ``(repository, component, file_name)``
513 pairs
514 :returns: A Two-Tuple (repository_files, component).
515 repository_files is a dict mapping repositories to all
516 file names found for that repository. component is a string
517 pointing to the component of the repository.
518 """
519 repository_files = {}
521 for repository, component, file_name in cached_files:
522 repository_files.setdefault(repository, [])
523 repository_files[repository].append((file_name, component))
525 return repository_files
527 def sources_file_in_sources_files_data(
528 self, sources_file, sources_files_data):
529 """
530 Performs a search for the sources file in the sources_files_data list.
532 :param sources_file: The file to search for
533 :param sources_files_data: list of (`sources_file`, `component`) to
534 search the sources_file.
535 :return: True or false depending on whether the sources_file was found
536 in the sources_files_data list.
537 """
538 for sources_f, component in sources_files_data:
539 if sources_f == sources_file:
540 return True
541 return False
543 def update_sources_files(self, updated_sources):
544 """
545 Performs an update of tracked packages based on the updated Sources
546 files.
548 :param updated_sources: A list of ``(repository, component,
549 sources_file_name)`` giving the Sources files which were updated and
550 should be used to update the Distro Tracker tracked information too.
551 """
552 # Group all files by repository to which they belong
553 repository_files = self.group_files_by_repository(updated_sources)
555 for repository, sources_files_data in repository_files.items():
556 self.extend_lock()
557 with transaction.atomic():
558 self.log("Processing Sources files of %s repository",
559 repository.shorthand)
560 # First update package information based on updated files
561 for sources_file, component in sources_files_data:
562 with open(sources_file) as sources_fd:
563 self._update_sources_file(
564 repository, component, sources_fd)
566 # Mark package versions found in un-updated files as still
567 # existing
568 all_sources = \
569 self.apt_cache.get_sources_files_for_repository(repository)
570 for sources_file in all_sources:
571 if not self.sources_file_in_sources_files_data(
572 sources_file, sources_files_data):
573 self._mark_file_not_processed(
574 repository,
575 sources_file,
576 SourcePackageRepositoryEntry.objects)
578 # When all the files for the repository are handled, update
579 # which packages are still found in it.
580 self._update_repository_entries(
581 SourcePackageRepositoryEntry.objects.filter(
582 repository=repository)
583 )
585 with transaction.atomic():
586 # When all repositories are handled, update which packages are
587 # still found in at least one repository.
588 self._remove_obsolete_packages()
590 def update_packages_files(self, updated_packages):
591 """
592 Performs an update of tracked packages based on the updated Packages
593 files.
595 :param updated_packages: A list of ``(repository, packages_file_name)``
596 pairs giving the Packages files which were updated and should be
597 used to update the Distro Tracker tracked information too.
598 """
599 # Group all files by repository to which they belong
600 repository_files = self.group_files_by_repository(updated_packages)
602 for repository, packages_files_data in repository_files.items():
603 # This operation is really slow, ensure we have one hour safety
604 self.extend_lock(expire_delay=3600, delay=3600)
605 with transaction.atomic():
606 self.log("Processing Packages files of %s repository",
607 repository.shorthand)
608 # First update package information based on updated files
609 for packages_file, component in packages_files_data:
610 with open(packages_file) as packages_fd:
611 self._update_packages_file(repository, packages_fd)
613 # Mark package versions found in un-updated files as still
614 # existing
615 all_sources = \
616 self.apt_cache.get_packages_files_for_repository(repository)
617 for packages_file in all_sources: 617 ↛ 618line 617 didn't jump to line 618, because the loop on line 617 never started
618 if not self.sources_file_in_sources_files_data(
619 packages_file, packages_files_data):
620 self._mark_file_not_processed(
621 repository, packages_file,
622 BinaryPackageRepositoryEntry.objects)
624 # When all the files for the repository are handled, update
625 # which packages are still found in it.
626 self._update_repository_entries(
627 BinaryPackageRepositoryEntry.objects.filter(
628 repository=repository))
630 def _update_dependencies_for_source(self, stanza, dependency_types):
631 """
632 Updates the dependencies for a source package based on the ones found
633 in the given ``Packages`` or ``Sources`` stanza.
635 :param source_name: The name of the source package for which the
636 dependencies are updated.
637 :param stanza: The ``Packages`` or ``Sources`` entry
638 :param dependency_type: A list of dependency types which should be
639 considered (e.g. Build-Depends, Recommends, etc.)
640 :param source_to_binary_deps: The dictionary which should be updated
641 with the new dependencies. Maps source names to a list of dicts
642 each describing a dependency.
643 """
644 binary_dependencies = []
645 for dependency_type in dependency_types:
646 # The Deb822 instance is case sensitive when it comes to relations
647 dependencies = stanza.relations.get(dependency_type.lower(), ())
649 for dependency in itertools.chain(*dependencies):
650 binary_name = dependency['name']
651 binary_dependencies.append({
652 'dependency_type': dependency_type,
653 'binary': binary_name,
654 })
656 return binary_dependencies
658 def _process_source_to_binary_deps(self, source_to_binary_deps, all_sources,
659 bin_to_src, default_repository):
660 dependency_instances = []
661 for source_name, dependencies in source_to_binary_deps.items():
662 if source_name not in all_sources: 662 ↛ 663line 662 didn't jump to line 663, because the condition on line 662 was never true
663 continue
665 # All dependencies for the current source package.
666 all_dependencies = {}
667 for dependency in dependencies:
668 binary_name = dependency['binary']
669 dependency_type = dependency.pop('dependency_type')
670 if binary_name not in bin_to_src: 670 ↛ 673line 670 didn't jump to line 673, because the condition on line 670 was never false
671 continue
673 for source_dependency in bin_to_src[binary_name]:
674 if source_name == source_dependency:
675 continue
677 source_dependencies = \
678 all_dependencies.setdefault(source_dependency, {})
679 source_dependencies.setdefault(dependency_type, [])
680 if dependency not in source_dependencies[dependency_type]:
681 source_dependencies[dependency_type].append(dependency)
683 # Create the dependency instances for the current source package.
684 for dependency_name, details in all_dependencies.items(): 684 ↛ 685line 684 didn't jump to line 685, because the loop on line 684 never started
685 if dependency_name in all_sources:
686 build_dep = any(dependency_type in details
687 for dependency_type
688 in self.SOURCE_DEPENDENCY_TYPES)
689 binary_dep = any(dependency_type in details
690 for dependency_type
691 in self.BINARY_DEPENDENCY_TYPES)
692 dependency_instances.append(
693 SourcePackageDeps(
694 source=all_sources[source_name],
695 dependency=all_sources[dependency_name],
696 build_dep=build_dep,
697 binary_dep=binary_dep,
698 repository=default_repository,
699 details=details))
701 return dependency_instances
703 def update_dependencies(self):
704 """
705 Updates source-to-source package dependencies stemming from
706 build bependencies and their binary packages' dependencies.
707 """
708 self.extend_lock()
710 # Build the dependency mapping
711 try:
712 default_repository = Repository.objects.get(default=True)
713 except Repository.DoesNotExist:
714 self.log("No default repository, no dependencies created.",
715 level=logging.WARNING)
716 return
718 self.log("Parsing files to discover dependencies")
719 sources_files = self.apt_cache.get_sources_files_for_repository(
720 default_repository)
721 packages_files = self.apt_cache.get_packages_files_for_repository(
722 default_repository)
724 bin_to_src = {}
725 source_to_binary_deps = {}
727 # First builds a list of binary dependencies of all source packages
728 # based on the Sources file.
729 for sources_file in sources_files:
730 with open(sources_file) as sources_fd:
731 for stanza in deb822.Sources.iter_paragraphs(sources_fd):
732 source_name = stanza['package']
734 for binary in itertools.chain(*stanza.relations['binary']):
735 sources_set = bin_to_src.setdefault(binary['name'],
736 set())
737 sources_set.add(source_name)
739 dependencies = source_to_binary_deps.setdefault(source_name,
740 [])
741 dependencies.extend(self._update_dependencies_for_source(
742 stanza,
743 self.SOURCE_DEPENDENCY_TYPES))
745 # Then a list of binary dependencies based on the Packages file.
746 for packages_file in packages_files: 746 ↛ 747line 746 didn't jump to line 747, because the loop on line 746 never started
747 with open(packages_file) as packages_fd:
748 for stanza in deb822.Packages.iter_paragraphs(packages_fd):
749 binary_name = stanza['package']
750 source_name, source_version = \
751 self.get_source_for_binary(stanza)
753 sources_set = bin_to_src.setdefault(binary_name, set())
754 sources_set.add(source_name)
756 new_dependencies = self._update_dependencies_for_source(
757 stanza,
758 self.BINARY_DEPENDENCY_TYPES)
759 for dependency in new_dependencies:
760 dependency['source_binary'] = binary_name
761 dependencies = source_to_binary_deps.setdefault(source_name,
762 [])
763 dependencies.extend(new_dependencies)
765 # The binary packages are matched with their source packages and each
766 # source to source dependency created.
767 all_sources = {
768 source.name: source
769 for source in SourcePackageName.objects.all()
770 }
772 self.log("Creating in-memory SourcePackageDeps")
773 # Keeps a list of SourcePackageDeps instances which are to be bulk
774 # created in the end.
775 dependency_instances = \
776 self._process_source_to_binary_deps(source_to_binary_deps,
777 all_sources, bin_to_src,
778 default_repository)
780 # Create all the model instances in one transaction
781 self.log("Committing SourcePackagesDeps to database")
782 SourcePackageDeps.objects.all().delete()
783 SourcePackageDeps.objects.bulk_create(dependency_instances)
785 def execute_main(self):
786 self.log("Updating apt's cache")
787 self.apt_cache = AptCache()
788 updated_sources, updated_packages = (
789 self.apt_cache.update_repositories(self.force_update)
790 )
792 self.log("Updating data from Sources files")
793 self.update_sources_files(updated_sources)
794 self.log("Updating data from Packages files")
795 self.update_packages_files(updated_packages)
796 self.log("Updating dependencies")
797 self.update_dependencies()
800class UpdatePackageGeneralInformation(BaseTask, ProcessMainRepoEntry):
801 """
802 Updates the general information regarding packages.
803 """
805 class Scheduler(IntervalScheduler):
806 interval = 3600 * 4
808 def _get_info_from_entry(self, entry):
809 srcpkg = entry.source_package
810 if srcpkg.maintainer: 810 ↛ 813line 810 didn't jump to line 813, because the condition on line 810 was never false
811 maintainer_data = srcpkg.maintainer.to_dict()
812 else:
813 maintainer_data = dict(name='Unknown Maintainer', email='')
814 general_information = {
815 'name': srcpkg.name,
816 'component': entry.component,
817 'version': entry.source_package.version,
818 'maintainer': maintainer_data,
819 'uploaders': [
820 uploader.to_dict()
821 for uploader in srcpkg.uploaders.all()
822 ],
823 'architectures': list(
824 map(str, srcpkg.architectures.order_by('name'))),
825 'standards_version': srcpkg.standards_version,
826 'vcs': srcpkg.vcs,
827 }
829 return general_information
831 @transaction.atomic
832 def execute_main(self):
833 for entry in self.items_to_process():
834 general, _ = PackageData.objects.get_or_create(
835 key='general',
836 package=entry.source_package.source_package_name
837 )
838 general.value = self._get_info_from_entry(entry)
839 general.save()
840 self.item_mark_processed(entry)
843class UpdateVersionInformation(BaseTask, ProcessSrcRepoEntry):
844 """
845 Updates extracted version information about packages.
846 """
848 class Scheduler(IntervalScheduler):
849 interval = 3600 * 4
851 def _extract_versions_for_package(self, package_name):
852 """
853 Returns a list where each element is a dictionary with the following
854 keys: repository_name, repository_shorthand, package_version.
855 """
856 version_list = []
857 for repository in package_name.repositories:
858 if repository.get_flags()['hidden']:
859 continue
860 entry = repository.get_source_package_entry(package_name)
861 version_list.append({
862 'repository': {
863 'name': entry.repository.name,
864 'shorthand': entry.repository.shorthand,
865 'codename': entry.repository.codename,
866 'suite': entry.repository.suite,
867 'id': entry.repository.id,
868 },
869 'version': entry.source_package.version,
870 })
871 default_pool_url = None
872 if package_name.main_entry:
873 default_pool_url = package_name.main_entry.directory_url
874 versions = {
875 'version_list': version_list,
876 'default_pool_url': default_pool_url,
877 }
879 return versions
881 def process_package(self, package):
882 versions, _ = PackageData.objects.get_or_create(key='versions',
883 package=package)
884 versions.value = self._extract_versions_for_package(package)
885 versions.save()
887 @transaction.atomic
888 def execute_main(self):
889 seen = {}
890 for entry in self.items_to_process():
891 name = entry.source_package.name
892 if entry.repository.get_flags()['hidden'] or name in seen: 892 ↛ 893line 892 didn't jump to line 893, because the condition on line 892 was never true
893 self.item_mark_processed(entry)
894 continue
896 package = entry.source_package.source_package_name
897 self.process_package(package)
899 seen[name] = True
900 self.item_mark_processed(entry)
902 for key, data in self.items_to_cleanup():
903 if data['name'] in seen: 903 ↛ 904line 903 didn't jump to line 904, because the condition on line 903 was never true
904 continue
905 package = get_or_none(SourcePackageName, name=data['name'])
906 if not package: 906 ↛ 907line 906 didn't jump to line 907, because the condition on line 906 was never true
907 continue
909 self.process_package(package)
910 seen[data['name']] = True
913class UpdateSourceToBinariesInformation(BaseTask, ProcessMainRepoEntry):
914 """
915 Updates extracted source-binary mapping for packages.
916 These are the binary packages which appear in the binary panel on each
917 source package's Web page.
918 """
920 class Scheduler(IntervalScheduler):
921 interval = 3600 * 4
923 def _get_all_binaries(self, entry):
924 """
925 Returns a list representing binary packages linked to the given
926 repository entry.
927 """
928 repository = entry.repository
929 return [
930 {
931 'name': pkg.name,
932 'repository': {
933 'name': repository.name,
934 'shorthand': repository.shorthand,
935 'suite': repository.suite,
936 'codename': repository.codename,
937 'id': repository.id,
938 },
939 }
940 for pkg in entry.source_package.binary_packages.all()
941 ]
943 @transaction.atomic
944 def execute_main(self):
945 for entry in self.items_to_process():
946 package = entry.source_package.source_package_name
947 binaries, _ = PackageData.objects.get_or_create(key='binaries',
948 package=package)
949 binaries.value = self._get_all_binaries(entry)
950 binaries.save()
952 self.item_mark_processed(entry)
955class UpdateTeamPackagesTask(BaseTask, ProcessSrcRepoEntryInDefaultRepository):
956 """
957 Based on new source packages detected during a repository update, the task
958 updates teams to include new packages which are associated with its
959 maintainer email.
960 """
962 class Scheduler(IntervalScheduler):
963 interval = 3600 * 4
965 def add_package_to_maintainer_teams(self, package, maintainer):
966 """
967 Adds the given package to all the teams where the given maintainer is
968 set as the maintainer email.
970 :param package: The package to add to the maintainers teams.
971 :type package: :class:`SourcePackageName
972 <distro_tracker.core.models.SourcePackageName>`
973 :param maintainer: The maintainer to whose teams the package should be
974 added.
975 :type maintainer:
976 :class:`ContributorName <distro_tracker.core.models.UserEmail>`
977 """
978 teams = Team.objects.filter(maintainer_email__email=maintainer.email)
979 for team in teams:
980 team.packages.add(package)
981 if maintainer.email.endswith("@" + settings.DISTRO_TRACKER_FQDN):
982 localpart, _ = maintainer.email.split('@', 1)
983 if not localpart.startswith("team+"):
984 return
985 service, slug = localpart.split('+', 1)
986 team = get_or_none(Team, slug=slug)
987 if team: 987 ↛ exitline 987 didn't return from function 'add_package_to_maintainer_teams', because the condition on line 987 was never false
988 team.packages.add(package)
990 @transaction.atomic
991 def execute_main(self):
992 for entry in self.items_to_process():
993 # Add the package to the maintainer's teams packages
994 package = entry.source_package.source_package_name
995 maintainer = entry.source_package.maintainer
996 self.add_package_to_maintainer_teams(package, maintainer)
998 # Add the package to all the uploaders' teams packages
999 for uploader in entry.source_package.uploaders.all():
1000 self.add_package_to_maintainer_teams(package, uploader)
1002 self.item_mark_processed(entry)