Coverage for distro_tracker/core/retrieve_data.py: 85%

419 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-01-12 09:15 +0000

1# Copyright 2013-2018 The Distro Tracker Developers 

2# See the COPYRIGHT file at the top-level directory of this distribution and 

3# at https://deb.li/DTAuthors 

4# 

5# This file is part of Distro Tracker. It is subject to the license terms 

6# in the LICENSE file found in the top-level directory of this 

7# distribution and at https://deb.li/DTLicense. No part of Distro Tracker, 

8# including this file, may be copied, modified, propagated, or distributed 

9# except according to the terms contained in the LICENSE file. 

10"""Implements core data retrieval from various external resources.""" 

11import itertools 

12import logging 

13import re 

14 

15from debian import deb822 

16 

17from django.conf import settings 

18from django.core.exceptions import ValidationError 

19from django.db import models, transaction 

20 

21import requests 

22 

23from distro_tracker import vendor 

24from distro_tracker.accounts.models import UserEmail 

25from distro_tracker.core.models import ( 

26 Architecture, 

27 BinaryPackage, 

28 BinaryPackageName, 

29 BinaryPackageRepositoryEntry, 

30 ContributorName, 

31 PackageData, 

32 PackageName, 

33 PseudoPackageName, 

34 Repository, 

35 SourcePackage, 

36 SourcePackageDeps, 

37 SourcePackageName, 

38 SourcePackageRepositoryEntry, 

39 Team 

40) 

41from distro_tracker.core.tasks import BaseTask 

42from distro_tracker.core.tasks.mixins import ( 

43 PackageTagging, 

44 ProcessMainRepoEntry, 

45 ProcessSrcRepoEntry, 

46 ProcessSrcRepoEntryInDefaultRepository, 

47) 

48from distro_tracker.core.tasks.schedulers import IntervalScheduler 

49from distro_tracker.core.utils import get_or_none 

50from distro_tracker.core.utils.packages import ( 

51 AptCache, 

52 extract_information_from_packages_entry, 

53 extract_information_from_sources_entry 

54) 

55 

56logger = logging.getLogger('distro_tracker.tasks') 

57logger_input = logging.getLogger('distro_tracker.input') 

58 

59 

60class InvalidRepositoryException(Exception): 

61 pass 

62 

63 

64def update_pseudo_package_list(): 

65 """ 

66 Retrieves the list of all allowed pseudo packages and updates the stored 

67 list if necessary. 

68 

69 Uses a vendor-provided function 

70 :func:`get_pseudo_package_list 

71 <distro_tracker.vendor.skeleton.rules.get_pseudo_package_list>` 

72 to get the list of currently available pseudo packages. 

73 """ 

74 try: 

75 pseudo_packages, implemented = vendor.call('get_pseudo_package_list') 

76 except RuntimeError: 

77 # Error accessing pseudo package resource: do not update the list 

78 return 

79 

80 if not implemented or pseudo_packages is None: 80 ↛ 81line 80 didn't jump to line 81, because the condition on line 80 was never true

81 return 

82 

83 # Faster lookups than if this were a list 

84 pseudo_packages = set(pseudo_packages) 

85 for existing_package in PseudoPackageName.objects.all(): 

86 if existing_package.name not in pseudo_packages: 

87 # Existing packages which are no longer considered pseudo packages 

88 # are demoted -- losing their pseudo package flag. 

89 existing_package.pseudo = False 

90 existing_package.save() 

91 else: 

92 # If an existing package remained a pseudo package there will be no 

93 # action required so it is removed from the set. 

94 pseudo_packages.remove(existing_package.name) 

95 

96 # The left over packages in the set are the ones that do not exist. 

97 for package_name in pseudo_packages: 

98 PseudoPackageName.objects.create(name=package_name) 

99 

100 

101def retrieve_repository_info(sources_list_entry): 

102 """ 

103 A function which accesses a ``Release`` file for the given repository and 

104 returns a dict representing the parsed information. 

105 

106 :rtype: dict 

107 """ 

108 entry_split = sources_list_entry.split(None, 3) 

109 if len(entry_split) < 3: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true

110 raise InvalidRepositoryException("Invalid sources.list entry") 

111 

112 repository_type, url, distribution = entry_split[:3] 

113 tls_verify = settings.DISTRO_TRACKER_CA_BUNDLE or True 

114 

115 # Access the Release file 

116 try: 

117 response = requests.get(Repository.release_file_url(url, distribution), 

118 verify=tls_verify, 

119 allow_redirects=True) 

120 except requests.exceptions.RequestException as original: 

121 raise InvalidRepositoryException( 

122 "Could not connect to {url}".format(url=url)) from original 

123 if response.status_code != 200: 123 ↛ 124line 123 didn't jump to line 124, because the condition on line 123 was never true

124 raise InvalidRepositoryException( 

125 "No Release file found at the URL: {url}\n" 

126 "Response status code {status_code}".format( 

127 url=url, status_code=response.status_code)) 

128 

129 # Parse the retrieved information 

130 release = deb822.Release(response.text) 

131 if not release: 131 ↛ 132line 131 didn't jump to line 132, because the condition on line 131 was never true

132 raise InvalidRepositoryException( 

133 "No data could be extracted from the Release file at {url}".format( 

134 url=url)) 

135 REQUIRED_KEYS = ( 

136 'architectures', 

137 'components', 

138 ) 

139 # A mapping of optional keys to their default values, if any 

140 OPTIONAL_KEYS = { 

141 'suite': distribution, 

142 'codename': None, 

143 } 

144 # Make sure all necessary keys were found in the file 

145 for key in REQUIRED_KEYS: 

146 if key not in release: 

147 raise InvalidRepositoryException( 

148 "Property {key} not found in the Release file at {url}".format( 

149 key=key, 

150 url=url)) 

151 # Finally build the return dictionary with the information about the 

152 # repository. 

153 repository_information = { 

154 'uri': url, 

155 'architectures': release['architectures'].split(), 

156 'components': release['components'].split(), 

157 'binary': repository_type == 'deb', 

158 'source': repository_type == 'deb-src', 

159 } 

160 # Add in optional info 

161 for key, default in OPTIONAL_KEYS.items(): 

162 repository_information[key] = release.get(key, default) 

163 

164 return repository_information 

165 

166 

167class TagPackagesWithBugs(BaseTask, PackageTagging): 

168 """ 

169 Performs an update of 'bugs' tag for packages. 

170 """ 

171 

172 class Scheduler(IntervalScheduler): 

173 interval = 3600 

174 

175 TAG_NAME = 'tag:bugs' 

176 TAG_DISPLAY_NAME = 'bugs' 

177 TAG_COLOR_TYPE = 'warning' 

178 TAG_DESCRIPTION = 'The package has bugs' 

179 TAG_TABLE_TITLE = 'Packages with bugs' 

180 

181 def packages_to_tag(self): 

182 return PackageName.objects.filter(bug_stats__stats__isnull=False) 

183 

184 

185class UpdateRepositoriesTask(BaseTask): 

186 """ 

187 Performs an update of repository information. 

188 

189 New (source and binary) packages are created if necessary and old ones are 

190 deleted. An event is emitted for each situation, allowing other tasks to 

191 perform updates based on updated package information. 

192 """ 

193 

194 class Scheduler(IntervalScheduler): 

195 interval = 3600 * 4 

196 

197 SOURCE_DEPENDENCY_TYPES = ('Build-Depends', 'Build-Depends-Indep') 

198 BINARY_DEPENDENCY_TYPES = ('Depends', 'Recommends', 'Suggests') 

199 

200 def initialize(self, **kwargs): 

201 super().initialize(**kwargs) 

202 self._all_packages = [] 

203 self._all_repository_entries = [] 

204 

205 def _clear_processed_repository_entries(self): 

206 self._all_repository_entries = [] 

207 

208 def _add_processed_repository_entry(self, repository_entry): 

209 self._all_repository_entries.append(repository_entry.id) 

210 

211 def _extract_information_from_sources_entry(self, src_pkg, stanza): 

212 entry = extract_information_from_sources_entry(stanza) 

213 

214 # Convert the parsed data into corresponding model instances 

215 if 'architectures' in entry: 215 ↛ 221line 215 didn't jump to line 221, because the condition on line 215 was never false

216 # Map the list of architecture names to their objects 

217 # Discards any unknown architectures. 

218 entry['architectures'] = Architecture.objects.filter( 

219 name__in=entry['architectures']) 

220 

221 if 'binary_packages' in entry: 221 ↛ 243line 221 didn't jump to line 243, because the condition on line 221 was never false

222 # Map the list of binary package names to list of existing 

223 # binary package names. 

224 binary_package_names = entry['binary_packages'] 

225 existing_binaries_qs = BinaryPackageName.objects.filter( 

226 name__in=binary_package_names) 

227 existing_binaries_names = [] 

228 binaries = [] 

229 for binary in existing_binaries_qs: 

230 binaries.append(binary) 

231 existing_binaries_names.append(binary.name) 

232 for binary_name in binary_package_names: 

233 if binary_name not in existing_binaries_names: 

234 binary_package_name, _ = PackageName.objects.get_or_create( 

235 name=binary_name) 

236 binary_package_name.binary = True 

237 binary_package_name.save() 

238 binary_package_name = BinaryPackageName.objects.get( 

239 name=binary_name) 

240 binaries.append(binary_package_name) 

241 entry['binary_packages'] = binaries 

242 

243 if 'maintainer' in entry: 243 ↛ 258line 243 didn't jump to line 258, because the condition on line 243 was never false

244 try: 

245 maintainer_email, _ = UserEmail.objects.get_or_create( 

246 email=entry['maintainer']['email']) 

247 maintainer = ContributorName.objects.get_or_create( 

248 contributor_email=maintainer_email, 

249 name=entry['maintainer'].get('name', ''))[0] 

250 entry['maintainer'] = maintainer 

251 except ValidationError: 

252 email = entry['maintainer']['email'] 

253 logger_input.warning( 

254 'Invalid email in maintainer field of %s: %s', 

255 src_pkg, email) 

256 del entry['maintainer'] 

257 

258 if 'uploaders' in entry: 258 ↛ 261line 258 didn't jump to line 261, because the condition on line 258 was never false

259 self._process_uploaders(entry, src_pkg) 

260 

261 return entry 

262 

263 def _process_uploaders(self, entry, src_pkg): 

264 uploader_emails = [ 

265 uploader['email'] 

266 for uploader in entry['uploaders'] 

267 ] 

268 uploader_names = [ 

269 uploader.get('name', '') 

270 for uploader in entry['uploaders'] 

271 ] 

272 existing_contributor_emails_qs = UserEmail.objects.filter( 

273 email__in=uploader_emails) 

274 existing_contributor_emails = { 

275 contributor.email: contributor 

276 for contributor in existing_contributor_emails_qs 

277 } 

278 uploaders = [] 

279 for email, name in zip(uploader_emails, uploader_names): 

280 if email not in existing_contributor_emails: 

281 try: 

282 contributor_email, _ = UserEmail.objects.get_or_create( 

283 email=email) 

284 existing_contributor_emails[email] = contributor_email 

285 except ValidationError: 

286 contributor_email = None 

287 logger_input.warning( 

288 'Bad email in uploaders in %s for %s: %s', 

289 src_pkg, name, email) 

290 else: 

291 contributor_email = existing_contributor_emails[email] 

292 if contributor_email: 292 ↛ 279line 292 didn't jump to line 279, because the condition on line 292 was never false

293 uploaders.append(ContributorName.objects.get_or_create( 

294 contributor_email=contributor_email, 

295 name=name)[0] 

296 ) 

297 

298 entry['uploaders'] = uploaders 

299 

300 def _extract_information_from_packages_entry(self, bin_pkg, stanza): 

301 entry = extract_information_from_packages_entry(stanza) 

302 

303 return entry 

304 

305 def _update_sources_file(self, repository, component, sources_file): 

306 for stanza in deb822.Sources.iter_paragraphs(sources_file): 

307 allow, implemented = vendor.call('allow_package', stanza) 

308 if allow is not None and implemented and not allow: 

309 # The vendor-provided function indicates that the package 

310 # should not be included 

311 continue 

312 

313 src_pkg_name, _ = SourcePackageName.objects.get_or_create( 

314 name=stanza['package'] 

315 ) 

316 

317 src_pkg, created_new_version = SourcePackage.objects.get_or_create( 

318 source_package_name=src_pkg_name, 

319 version=stanza['version'] 

320 ) 

321 if created_new_version or self.force_update: 

322 # Extract package data from Sources 

323 entry = self._extract_information_from_sources_entry( 

324 src_pkg, stanza) 

325 # Update the source package information based on the newly 

326 # extracted data. 

327 src_pkg.update(**entry) 

328 src_pkg.save() 

329 

330 if not repository.has_source_package(src_pkg): 

331 # Add it to the repository 

332 entry = repository.add_source_package( 

333 src_pkg, component=component) 

334 else: 

335 # We get the entry to mark that the package version is still in 

336 # the repository. 

337 entry = SourcePackageRepositoryEntry.objects.get( 

338 repository=repository, 

339 source_package=src_pkg 

340 ) 

341 

342 self._add_processed_repository_entry(entry) 

343 

344 def get_source_for_binary(self, stanza): 

345 """ 

346 :param stanza: a ``Packages`` file entry 

347 :returns: A ``(source_name, source_version)`` pair for the binary 

348 package described by the entry 

349 """ 

350 source_name = ( 

351 stanza['source'] 

352 if 'source' in stanza else 

353 stanza['package']) 

354 # Extract the source version, if given in the Source field 

355 match = re.match(r'(.+) \((.+)\)', source_name) 

356 if match: 

357 source_name, source_version = match.group(1), match.group(2) 

358 else: 

359 source_version = stanza['version'] 

360 

361 return source_name, source_version 

362 

363 def _update_packages_file(self, repository, packages_file): 

364 for stanza in deb822.Packages.iter_paragraphs(packages_file): 

365 bin_pkg_name, created = BinaryPackageName.objects.get_or_create( 

366 name=stanza['package'] 

367 ) 

368 # Find the matching SourcePackage for the binary package 

369 source_name, source_version = self.get_source_for_binary(stanza) 

370 src_pkg, _ = SourcePackage.objects.get_or_create( 

371 source_package_name=SourcePackageName.objects.get_or_create( 

372 name=source_name)[0], 

373 version=source_version) 

374 

375 bin_pkg, created_new_version = BinaryPackage.objects.get_or_create( 

376 binary_package_name=bin_pkg_name, 

377 version=stanza['version'], 

378 source_package=src_pkg 

379 ) 

380 if created_new_version: 380 ↛ 389line 380 didn't jump to line 389, because the condition on line 380 was never false

381 # Since it's a new version, extract package data from Packages 

382 entry = self._extract_information_from_packages_entry( 

383 bin_pkg, stanza) 

384 # Update the binary package information based on the newly 

385 # extracted data. 

386 bin_pkg.update(**entry) 

387 bin_pkg.save() 

388 

389 if not repository.has_binary_package(bin_pkg): 389 ↛ 402line 389 didn't jump to line 402, because the condition on line 389 was never false

390 # Add it to the repository 

391 architecture, _ = Architecture.objects.get_or_create( 

392 name=stanza['architecture']) 

393 kwargs = { 

394 'priority': stanza.get('priority', ''), 

395 'section': stanza.get('section', ''), 

396 'architecture': architecture, 

397 } 

398 entry = repository.add_binary_package(bin_pkg, **kwargs) 

399 else: 

400 # We get the entry to mark that the package version is still in 

401 # the repository. 

402 entry = BinaryPackageRepositoryEntry.objects.get( 

403 repository=repository, 

404 binary_package=bin_pkg) 

405 

406 self._add_processed_repository_entry(entry) 

407 

408 def _remove_query_set_if_count_zero(self, qs, count_field): 

409 """ 

410 Removes elements from the given query set if their count of the given 

411 ``count_field`` is ``0``. 

412 

413 :param qs: Instances which should be deleted in case their count of the 

414 field ``count_field`` is 0. 

415 :type qs: :class:`QuerySet <django.db.models.query.QuerySet>` 

416 

417 :param count_field: Each instance in ``qs`` that has a 0 count for the 

418 field with this name is deleted. 

419 :type count_field: string 

420 """ 

421 qs = qs.annotate(count=models.Count(count_field)) 

422 qs = qs.filter(count=0) 

423 qs.delete() 

424 

425 def _remove_obsolete_packages(self): 

426 self.log("Removing obsolete source packages") 

427 # Clean up package versions which no longer exist in any repository. 

428 self._remove_query_set_if_count_zero(SourcePackage.objects.all(), 

429 'repository') 

430 # Clean up names which no longer exist. 

431 self._remove_query_set_if_count_zero(SourcePackageName.objects.all(), 

432 'source_package_versions') 

433 # Clean up binary package names which are no longer used by any source 

434 # package. 

435 self._remove_query_set_if_count_zero(BinaryPackageName.objects.all(), 

436 'sourcepackage') 

437 

438 def _update_repository_entries(self, all_entries_qs): 

439 """ 

440 Removes all repository entries which are no longer found in the 

441 repository after the last update. 

442 If the ``event_generator`` argument is provided, an event returned by 

443 the function is raised for each removed entry. 

444 

445 :param all_entries_qs: All currently existing entries which should be 

446 filtered to only contain the ones still found after the update. 

447 :type all_entries_qs: 

448 :class:`QuerySet <django.db.models.query.QuerySet>` 

449 :event_generator: Takes a repository entry as a parameter and returns a 

450 two-tuple of ``(event_name, event_arguments)``. An event with the 

451 return parameters is raised by the function for each removed entry. 

452 :type event_generator: callable 

453 """ 

454 # Out of all entries in this repository, only those found in 

455 # the last update need to stay, so exclude them from the delete 

456 all_entries_qs = all_entries_qs.exclude( 

457 id__in=self._all_repository_entries) 

458 # Emit events for all packages that were removed from the repository 

459 all_entries_qs.delete() 

460 

461 self._clear_processed_repository_entries() 

462 

463 def extract_package_versions(self, file_name): 

464 """ 

465 :param file_name: The name of the file from which package versions 

466 should be extracted. 

467 :type file_name: string 

468 :returns: A dict mapping package names to a list of versions found in 

469 Deb822 formatted file. 

470 """ 

471 with open(file_name, 'r') as packages_file: 

472 packages = {} 

473 for stanza in deb822.Deb822.iter_paragraphs(packages_file): 

474 package_name, version = stanza['package'], stanza['version'] 

475 packages.setdefault(package_name, []) 

476 packages[package_name].append(version) 

477 

478 return packages 

479 

480 def _mark_file_not_processed(self, repository, file_name, entry_manager): 

481 """ 

482 The given ``Sources`` or ``Packages`` file has not been changed in the 

483 last update. This method marks all package versions found in it as 

484 still existing in order to avoid deleting them. 

485 

486 :param repository: The repository to which the file is associated 

487 :type repository: 

488 :class:`Repository <distro_tracker.core.models.Repository>` 

489 :param file_name: The name of the file whose packages should be saved 

490 :param entry_manager: The manager instance which handles the package 

491 entries. 

492 :type entry_manager: :class:`Manager <django.db.models.Manager>` 

493 """ 

494 # Extract all package versions from the file 

495 packages = self.extract_package_versions(file_name) 

496 

497 # Only issue one DB query to retrieve the entries for packages with 

498 # the given names 

499 repository_entries = \ 

500 entry_manager.filter_by_package_name(packages.keys()) 

501 repository_entries = repository_entries.filter( 

502 repository=repository) 

503 repository_entries = repository_entries.select_related() 

504 # For each of those entries, make sure to keep only the ones 

505 # corresponding to the version found in the sources file 

506 for entry in repository_entries: 

507 if entry.version in packages[entry.name]: 507 ↛ 506line 507 didn't jump to line 506, because the condition on line 507 was never false

508 self._add_processed_repository_entry(entry) 

509 

510 def group_files_by_repository(self, cached_files): 

511 """ 

512 :param cached_files: A list of ``(repository, component, file_name)`` 

513 pairs 

514 :returns: A Two-Tuple (repository_files, component). 

515 repository_files is a dict mapping repositories to all 

516 file names found for that repository. component is a string 

517 pointing to the component of the repository. 

518 """ 

519 repository_files = {} 

520 

521 for repository, component, file_name in cached_files: 

522 repository_files.setdefault(repository, []) 

523 repository_files[repository].append((file_name, component)) 

524 

525 return repository_files 

526 

527 def sources_file_in_sources_files_data( 

528 self, sources_file, sources_files_data): 

529 """ 

530 Performs a search for the sources file in the sources_files_data list. 

531 

532 :param sources_file: The file to search for 

533 :param sources_files_data: list of (`sources_file`, `component`) to 

534 search the sources_file. 

535 :return: True or false depending on whether the sources_file was found 

536 in the sources_files_data list. 

537 """ 

538 for sources_f, component in sources_files_data: 

539 if sources_f == sources_file: 

540 return True 

541 return False 

542 

543 def update_sources_files(self, updated_sources): 

544 """ 

545 Performs an update of tracked packages based on the updated Sources 

546 files. 

547 

548 :param updated_sources: A list of ``(repository, component, 

549 sources_file_name)`` giving the Sources files which were updated and 

550 should be used to update the Distro Tracker tracked information too. 

551 """ 

552 # Group all files by repository to which they belong 

553 repository_files = self.group_files_by_repository(updated_sources) 

554 

555 for repository, sources_files_data in repository_files.items(): 

556 self.extend_lock() 

557 with transaction.atomic(): 

558 self.log("Processing Sources files of %s repository", 

559 repository.shorthand) 

560 # First update package information based on updated files 

561 for sources_file, component in sources_files_data: 

562 with open(sources_file) as sources_fd: 

563 self._update_sources_file( 

564 repository, component, sources_fd) 

565 

566 # Mark package versions found in un-updated files as still 

567 # existing 

568 all_sources = \ 

569 self.apt_cache.get_sources_files_for_repository(repository) 

570 for sources_file in all_sources: 

571 if not self.sources_file_in_sources_files_data( 

572 sources_file, sources_files_data): 

573 self._mark_file_not_processed( 

574 repository, 

575 sources_file, 

576 SourcePackageRepositoryEntry.objects) 

577 

578 # When all the files for the repository are handled, update 

579 # which packages are still found in it. 

580 self._update_repository_entries( 

581 SourcePackageRepositoryEntry.objects.filter( 

582 repository=repository) 

583 ) 

584 

585 with transaction.atomic(): 

586 # When all repositories are handled, update which packages are 

587 # still found in at least one repository. 

588 self._remove_obsolete_packages() 

589 

590 def update_packages_files(self, updated_packages): 

591 """ 

592 Performs an update of tracked packages based on the updated Packages 

593 files. 

594 

595 :param updated_packages: A list of ``(repository, packages_file_name)`` 

596 pairs giving the Packages files which were updated and should be 

597 used to update the Distro Tracker tracked information too. 

598 """ 

599 # Group all files by repository to which they belong 

600 repository_files = self.group_files_by_repository(updated_packages) 

601 

602 for repository, packages_files_data in repository_files.items(): 

603 # This operation is really slow, ensure we have one hour safety 

604 self.extend_lock(expire_delay=3600, delay=3600) 

605 with transaction.atomic(): 

606 self.log("Processing Packages files of %s repository", 

607 repository.shorthand) 

608 # First update package information based on updated files 

609 for packages_file, component in packages_files_data: 

610 with open(packages_file) as packages_fd: 

611 self._update_packages_file(repository, packages_fd) 

612 

613 # Mark package versions found in un-updated files as still 

614 # existing 

615 all_sources = \ 

616 self.apt_cache.get_packages_files_for_repository(repository) 

617 for packages_file in all_sources: 617 ↛ 618line 617 didn't jump to line 618, because the loop on line 617 never started

618 if not self.sources_file_in_sources_files_data( 

619 packages_file, packages_files_data): 

620 self._mark_file_not_processed( 

621 repository, packages_file, 

622 BinaryPackageRepositoryEntry.objects) 

623 

624 # When all the files for the repository are handled, update 

625 # which packages are still found in it. 

626 self._update_repository_entries( 

627 BinaryPackageRepositoryEntry.objects.filter( 

628 repository=repository)) 

629 

630 def _update_dependencies_for_source(self, stanza, dependency_types): 

631 """ 

632 Updates the dependencies for a source package based on the ones found 

633 in the given ``Packages`` or ``Sources`` stanza. 

634 

635 :param source_name: The name of the source package for which the 

636 dependencies are updated. 

637 :param stanza: The ``Packages`` or ``Sources`` entry 

638 :param dependency_type: A list of dependency types which should be 

639 considered (e.g. Build-Depends, Recommends, etc.) 

640 :param source_to_binary_deps: The dictionary which should be updated 

641 with the new dependencies. Maps source names to a list of dicts 

642 each describing a dependency. 

643 """ 

644 binary_dependencies = [] 

645 for dependency_type in dependency_types: 

646 # The Deb822 instance is case sensitive when it comes to relations 

647 dependencies = stanza.relations.get(dependency_type.lower(), ()) 

648 

649 for dependency in itertools.chain(*dependencies): 

650 binary_name = dependency['name'] 

651 binary_dependencies.append({ 

652 'dependency_type': dependency_type, 

653 'binary': binary_name, 

654 }) 

655 

656 return binary_dependencies 

657 

658 def _process_source_to_binary_deps(self, source_to_binary_deps, all_sources, 

659 bin_to_src, default_repository): 

660 dependency_instances = [] 

661 for source_name, dependencies in source_to_binary_deps.items(): 

662 if source_name not in all_sources: 662 ↛ 663line 662 didn't jump to line 663, because the condition on line 662 was never true

663 continue 

664 

665 # All dependencies for the current source package. 

666 all_dependencies = {} 

667 for dependency in dependencies: 

668 binary_name = dependency['binary'] 

669 dependency_type = dependency.pop('dependency_type') 

670 if binary_name not in bin_to_src: 670 ↛ 673line 670 didn't jump to line 673, because the condition on line 670 was never false

671 continue 

672 

673 for source_dependency in bin_to_src[binary_name]: 

674 if source_name == source_dependency: 

675 continue 

676 

677 source_dependencies = \ 

678 all_dependencies.setdefault(source_dependency, {}) 

679 source_dependencies.setdefault(dependency_type, []) 

680 if dependency not in source_dependencies[dependency_type]: 

681 source_dependencies[dependency_type].append(dependency) 

682 

683 # Create the dependency instances for the current source package. 

684 for dependency_name, details in all_dependencies.items(): 684 ↛ 685line 684 didn't jump to line 685, because the loop on line 684 never started

685 if dependency_name in all_sources: 

686 build_dep = any(dependency_type in details 

687 for dependency_type 

688 in self.SOURCE_DEPENDENCY_TYPES) 

689 binary_dep = any(dependency_type in details 

690 for dependency_type 

691 in self.BINARY_DEPENDENCY_TYPES) 

692 dependency_instances.append( 

693 SourcePackageDeps( 

694 source=all_sources[source_name], 

695 dependency=all_sources[dependency_name], 

696 build_dep=build_dep, 

697 binary_dep=binary_dep, 

698 repository=default_repository, 

699 details=details)) 

700 

701 return dependency_instances 

702 

703 def update_dependencies(self): 

704 """ 

705 Updates source-to-source package dependencies stemming from 

706 build bependencies and their binary packages' dependencies. 

707 """ 

708 self.extend_lock() 

709 

710 # Build the dependency mapping 

711 try: 

712 default_repository = Repository.objects.get(default=True) 

713 except Repository.DoesNotExist: 

714 self.log("No default repository, no dependencies created.", 

715 level=logging.WARNING) 

716 return 

717 

718 self.log("Parsing files to discover dependencies") 

719 sources_files = self.apt_cache.get_sources_files_for_repository( 

720 default_repository) 

721 packages_files = self.apt_cache.get_packages_files_for_repository( 

722 default_repository) 

723 

724 bin_to_src = {} 

725 source_to_binary_deps = {} 

726 

727 # First builds a list of binary dependencies of all source packages 

728 # based on the Sources file. 

729 for sources_file in sources_files: 

730 with open(sources_file) as sources_fd: 

731 for stanza in deb822.Sources.iter_paragraphs(sources_fd): 

732 source_name = stanza['package'] 

733 

734 for binary in itertools.chain(*stanza.relations['binary']): 

735 sources_set = bin_to_src.setdefault(binary['name'], 

736 set()) 

737 sources_set.add(source_name) 

738 

739 dependencies = source_to_binary_deps.setdefault(source_name, 

740 []) 

741 dependencies.extend(self._update_dependencies_for_source( 

742 stanza, 

743 self.SOURCE_DEPENDENCY_TYPES)) 

744 

745 # Then a list of binary dependencies based on the Packages file. 

746 for packages_file in packages_files: 746 ↛ 747line 746 didn't jump to line 747, because the loop on line 746 never started

747 with open(packages_file) as packages_fd: 

748 for stanza in deb822.Packages.iter_paragraphs(packages_fd): 

749 binary_name = stanza['package'] 

750 source_name, source_version = \ 

751 self.get_source_for_binary(stanza) 

752 

753 sources_set = bin_to_src.setdefault(binary_name, set()) 

754 sources_set.add(source_name) 

755 

756 new_dependencies = self._update_dependencies_for_source( 

757 stanza, 

758 self.BINARY_DEPENDENCY_TYPES) 

759 for dependency in new_dependencies: 

760 dependency['source_binary'] = binary_name 

761 dependencies = source_to_binary_deps.setdefault(source_name, 

762 []) 

763 dependencies.extend(new_dependencies) 

764 

765 # The binary packages are matched with their source packages and each 

766 # source to source dependency created. 

767 all_sources = { 

768 source.name: source 

769 for source in SourcePackageName.objects.all() 

770 } 

771 

772 self.log("Creating in-memory SourcePackageDeps") 

773 # Keeps a list of SourcePackageDeps instances which are to be bulk 

774 # created in the end. 

775 dependency_instances = \ 

776 self._process_source_to_binary_deps(source_to_binary_deps, 

777 all_sources, bin_to_src, 

778 default_repository) 

779 

780 # Create all the model instances in one transaction 

781 self.log("Committing SourcePackagesDeps to database") 

782 SourcePackageDeps.objects.all().delete() 

783 SourcePackageDeps.objects.bulk_create(dependency_instances) 

784 

785 def execute_main(self): 

786 self.log("Updating apt's cache") 

787 self.apt_cache = AptCache() 

788 updated_sources, updated_packages = ( 

789 self.apt_cache.update_repositories(self.force_update) 

790 ) 

791 

792 self.log("Updating data from Sources files") 

793 self.update_sources_files(updated_sources) 

794 self.log("Updating data from Packages files") 

795 self.update_packages_files(updated_packages) 

796 self.log("Updating dependencies") 

797 self.update_dependencies() 

798 

799 

800class UpdatePackageGeneralInformation(BaseTask, ProcessMainRepoEntry): 

801 """ 

802 Updates the general information regarding packages. 

803 """ 

804 

805 class Scheduler(IntervalScheduler): 

806 interval = 3600 * 4 

807 

808 def _get_info_from_entry(self, entry): 

809 srcpkg = entry.source_package 

810 if srcpkg.maintainer: 810 ↛ 813line 810 didn't jump to line 813, because the condition on line 810 was never false

811 maintainer_data = srcpkg.maintainer.to_dict() 

812 else: 

813 maintainer_data = dict(name='Unknown Maintainer', email='') 

814 general_information = { 

815 'name': srcpkg.name, 

816 'component': entry.component, 

817 'version': entry.source_package.version, 

818 'maintainer': maintainer_data, 

819 'uploaders': [ 

820 uploader.to_dict() 

821 for uploader in srcpkg.uploaders.all() 

822 ], 

823 'architectures': list( 

824 map(str, srcpkg.architectures.order_by('name'))), 

825 'standards_version': srcpkg.standards_version, 

826 'vcs': srcpkg.vcs, 

827 } 

828 

829 return general_information 

830 

831 @transaction.atomic 

832 def execute_main(self): 

833 for entry in self.items_to_process(): 

834 general, _ = PackageData.objects.get_or_create( 

835 key='general', 

836 package=entry.source_package.source_package_name 

837 ) 

838 general.value = self._get_info_from_entry(entry) 

839 general.save() 

840 self.item_mark_processed(entry) 

841 

842 

843class UpdateVersionInformation(BaseTask, ProcessSrcRepoEntry): 

844 """ 

845 Updates extracted version information about packages. 

846 """ 

847 

848 class Scheduler(IntervalScheduler): 

849 interval = 3600 * 4 

850 

851 def _extract_versions_for_package(self, package_name): 

852 """ 

853 Returns a list where each element is a dictionary with the following 

854 keys: repository_name, repository_shorthand, package_version. 

855 """ 

856 version_list = [] 

857 for repository in package_name.repositories: 

858 if repository.get_flags()['hidden']: 

859 continue 

860 entry = repository.get_source_package_entry(package_name) 

861 version_list.append({ 

862 'repository': { 

863 'name': entry.repository.name, 

864 'shorthand': entry.repository.shorthand, 

865 'codename': entry.repository.codename, 

866 'suite': entry.repository.suite, 

867 'id': entry.repository.id, 

868 }, 

869 'version': entry.source_package.version, 

870 }) 

871 default_pool_url = None 

872 if package_name.main_entry: 

873 default_pool_url = package_name.main_entry.directory_url 

874 versions = { 

875 'version_list': version_list, 

876 'default_pool_url': default_pool_url, 

877 } 

878 

879 return versions 

880 

881 def process_package(self, package): 

882 versions, _ = PackageData.objects.get_or_create(key='versions', 

883 package=package) 

884 versions.value = self._extract_versions_for_package(package) 

885 versions.save() 

886 

887 @transaction.atomic 

888 def execute_main(self): 

889 seen = {} 

890 for entry in self.items_to_process(): 

891 name = entry.source_package.name 

892 if entry.repository.get_flags()['hidden'] or name in seen: 892 ↛ 893line 892 didn't jump to line 893, because the condition on line 892 was never true

893 self.item_mark_processed(entry) 

894 continue 

895 

896 package = entry.source_package.source_package_name 

897 self.process_package(package) 

898 

899 seen[name] = True 

900 self.item_mark_processed(entry) 

901 

902 for key, data in self.items_to_cleanup(): 

903 if data['name'] in seen: 903 ↛ 904line 903 didn't jump to line 904, because the condition on line 903 was never true

904 continue 

905 package = get_or_none(SourcePackageName, name=data['name']) 

906 if not package: 906 ↛ 907line 906 didn't jump to line 907, because the condition on line 906 was never true

907 continue 

908 

909 self.process_package(package) 

910 seen[data['name']] = True 

911 

912 

913class UpdateSourceToBinariesInformation(BaseTask, ProcessMainRepoEntry): 

914 """ 

915 Updates extracted source-binary mapping for packages. 

916 These are the binary packages which appear in the binary panel on each 

917 source package's Web page. 

918 """ 

919 

920 class Scheduler(IntervalScheduler): 

921 interval = 3600 * 4 

922 

923 def _get_all_binaries(self, entry): 

924 """ 

925 Returns a list representing binary packages linked to the given 

926 repository entry. 

927 """ 

928 repository = entry.repository 

929 return [ 

930 { 

931 'name': pkg.name, 

932 'repository': { 

933 'name': repository.name, 

934 'shorthand': repository.shorthand, 

935 'suite': repository.suite, 

936 'codename': repository.codename, 

937 'id': repository.id, 

938 }, 

939 } 

940 for pkg in entry.source_package.binary_packages.all() 

941 ] 

942 

943 @transaction.atomic 

944 def execute_main(self): 

945 for entry in self.items_to_process(): 

946 package = entry.source_package.source_package_name 

947 binaries, _ = PackageData.objects.get_or_create(key='binaries', 

948 package=package) 

949 binaries.value = self._get_all_binaries(entry) 

950 binaries.save() 

951 

952 self.item_mark_processed(entry) 

953 

954 

955class UpdateTeamPackagesTask(BaseTask, ProcessSrcRepoEntryInDefaultRepository): 

956 """ 

957 Based on new source packages detected during a repository update, the task 

958 updates teams to include new packages which are associated with its 

959 maintainer email. 

960 """ 

961 

962 class Scheduler(IntervalScheduler): 

963 interval = 3600 * 4 

964 

965 def add_package_to_maintainer_teams(self, package, maintainer): 

966 """ 

967 Adds the given package to all the teams where the given maintainer is 

968 set as the maintainer email. 

969 

970 :param package: The package to add to the maintainers teams. 

971 :type package: :class:`SourcePackageName 

972 <distro_tracker.core.models.SourcePackageName>` 

973 :param maintainer: The maintainer to whose teams the package should be 

974 added. 

975 :type maintainer: 

976 :class:`ContributorName <distro_tracker.core.models.UserEmail>` 

977 """ 

978 teams = Team.objects.filter(maintainer_email__email=maintainer.email) 

979 for team in teams: 

980 team.packages.add(package) 

981 if maintainer.email.endswith("@" + settings.DISTRO_TRACKER_FQDN): 

982 localpart, _ = maintainer.email.split('@', 1) 

983 if not localpart.startswith("team+"): 

984 return 

985 service, slug = localpart.split('+', 1) 

986 team = get_or_none(Team, slug=slug) 

987 if team: 987 ↛ exitline 987 didn't return from function 'add_package_to_maintainer_teams', because the condition on line 987 was never false

988 team.packages.add(package) 

989 

990 @transaction.atomic 

991 def execute_main(self): 

992 for entry in self.items_to_process(): 

993 # Add the package to the maintainer's teams packages 

994 package = entry.source_package.source_package_name 

995 maintainer = entry.source_package.maintainer 

996 self.add_package_to_maintainer_teams(package, maintainer) 

997 

998 # Add the package to all the uploaders' teams packages 

999 for uploader in entry.source_package.uploaders.all(): 

1000 self.add_package_to_maintainer_teams(package, uploader) 

1001 

1002 self.item_mark_processed(entry)