public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462)
@ 2020-08-17  3:26 Zac Medico
  2020-08-17 15:24 ` Brian Dolbec
  0 siblings, 1 reply; 2+ messages in thread
From: Zac Medico @ 2020-08-17  3:26 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Add --jobs and --load-average options which allow dependency checks
for multiple profiles to run in parallel. The increase in performance
is non-linear for the number of jobs, but it can be worthwhile
(I measured a 35% decrease in time when running 'repoman -j8 full'
on sys-apps/portage). For the -j1 case (default), all dependency
checks run in the main process as usual, so there is no significant
performance penalty for the default case.

Bug: https://bugs.gentoo.org/448462
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
 repoman/lib/repoman/argparser.py              |   9 ++
 .../repoman/modules/scan/depend/profile.py    | 117 +++++++++++++++---
 repoman/man/repoman.1                         |   9 +-
 3 files changed, 116 insertions(+), 19 deletions(-)

diff --git a/repoman/lib/repoman/argparser.py b/repoman/lib/repoman/argparser.py
index 670a0e91d..6d545ccca 100644
--- a/repoman/lib/repoman/argparser.py
+++ b/repoman/lib/repoman/argparser.py
@@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts):
 		'--output-style', dest='output_style', choices=output_keys,
 		help='select output type', default='default')
 
+	parser.add_argument(
+		'-j', '--jobs', dest='jobs', action='store', type=int, default=1,
+		help='Specifies the number of jobs (processes) to run simultaneously.')
+
+	parser.add_argument(
+		'-l', '--load-average', dest='load_average', action='store', type=float, default=None,
+		help='Specifies that no new jobs (processes) should be started if there are others '
+			'jobs running and the load average is at least load (a floating-point number).')
+
 	parser.add_argument(
 		'--mode', dest='mode', choices=mode_keys,
 		help='specify which mode repoman will run in (default=full)')
diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py b/repoman/lib/repoman/modules/scan/depend/profile.py
index 39d8b550c..1eb69422a 100644
--- a/repoman/lib/repoman/modules/scan/depend/profile.py
+++ b/repoman/lib/repoman/modules/scan/depend/profile.py
@@ -2,7 +2,9 @@
 
 
 import copy
+import functools
 import os
+import types
 from pprint import pformat
 
 from _emerge.Package import Package
@@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches import _gen_arches
 from portage.dep import Atom
 from portage.package.ebuild.profile_iuse import iter_iuse_vars
 from portage.util import getconfig
+from portage.util.futures import asyncio
+from portage.util.futures.compat_coroutine import coroutine, coroutine_return
+from portage.util.futures.executor.fork import ForkExecutor
+from portage.util.futures.iter_completed import async_iter_completed
 
 
 def sort_key(item):
@@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase):
 	def check(self, **kwargs):
 		'''Perform profile dependant dependency checks
 
-		@param arches:
 		@param pkg: Package in which we check (object).
 		@param ebuild: Ebuild which we check (object).
-		@param baddepsyntax: boolean
-		@param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
 		@returns: dictionary
 		'''
 		ebuild = kwargs.get('ebuild').get()
 		pkg = kwargs.get('pkg').get()
-		unknown_pkgs, baddepsyntax = _depend_checks(
+
+		ebuild.unknown_pkgs, ebuild.baddepsyntax = _depend_checks(
 			ebuild, pkg, self.portdb, self.qatracker, self.repo_metadata,
 			self.repo_settings.qadata)
 
@@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase):
 				relevant_profiles.append((keyword, groups, prof))
 
 		relevant_profiles.sort(key=sort_key)
+		ebuild.relevant_profiles = relevant_profiles
+
+		if self.options.jobs <= 1:
+			for task in self._iter_tasks(None, None, ebuild, pkg):
+				task, results = task
+				for result in results:
+					self._check_result(task, result)
+
+		loop = asyncio._wrap_loop()
+		loop.run_until_complete(self._async_check(loop=loop, **kwargs))
+
+		return False
+
+	@coroutine
+	def _async_check(self, loop=None, **kwargs):
+		'''Perform async profile dependant dependency checks
+
+		@param arches:
+		@param pkg: Package in which we check (object).
+		@param ebuild: Ebuild which we check (object).
+		@param baddepsyntax: boolean
+		@param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
+		@returns: dictionary
+		'''
+		loop = asyncio._wrap_loop(loop)
+		ebuild = kwargs.get('ebuild').get()
+		pkg = kwargs.get('pkg').get()
+		unknown_pkgs = ebuild.unknown_pkgs
+		baddepsyntax = ebuild.baddepsyntax
+
+		# Use max_workers=True to ensure immediate fork, since _iter_tasks
+		# needs the fork to create a snapshot of current state.
+		executor = ForkExecutor(max_workers=self.options.jobs)
+
+		if self.options.jobs > 1:
+			for future_done_set in async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg),
+				max_jobs=self.options.jobs, max_load=self.options.load_average, loop=loop):
+				for task in (yield future_done_set):
+					task, results = task.result()
+					for result in results:
+						self._check_result(task, result)
+
+		if not baddepsyntax and unknown_pkgs:
+			type_map = {}
+			for mytype, atom in unknown_pkgs:
+				type_map.setdefault(mytype, set()).add(atom)
+			for mytype, atoms in type_map.items():
+				self.qatracker.add_error(
+					"dependency.unknown", "%s: %s: %s"
+					% (ebuild.relative_path, mytype, ", ".join(sorted(atoms))))
 
-		for keyword, groups, prof in relevant_profiles:
+	@coroutine
+	def _task(self, task):
+		yield task.future
+		coroutine_return((task, task.future.result()))
+
+	def _iter_tasks(self, loop, executor, ebuild, pkg):
+		for keyword, groups, prof in ebuild.relevant_profiles:
 
 			is_stable_profile = prof.status == "stable"
 			is_dev_profile = prof.status == "dev" and \
@@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase):
 			dep_settings.usemask = dep_settings._use_manager.getUseMask(
 				pkg, stable=dep_settings._parent_stable)
 
+			task = types.SimpleNamespace(ebuild=ebuild, prof=prof, keyword=keyword)
+
+			target = functools.partial(self._task_subprocess, task, pkg, dep_settings)
+
+			if self.options.jobs <= 1:
+				yield (task, target())
+			else:
+				task.future = asyncio.ensure_future(loop.run_in_executor(executor, target), loop=loop)
+				yield self._task(task)
+
+
+	def _task_subprocess(self, task, pkg, dep_settings):
+			ebuild = task.ebuild
+			baddepsyntax = ebuild.baddepsyntax
+			results = []
+			prof = task.prof
 			if not baddepsyntax:
 				ismasked = not ebuild.archs or \
 					pkg.cpv not in self.portdb.xmatch("match-visible",
@@ -163,7 +239,7 @@ class ProfileDependsChecks(ScanBase):
 						self.have['pmasked'] = bool(dep_settings._getMaskAtom(
 							pkg.cpv, ebuild.metadata))
 					if self.options.ignore_masked:
-						continue
+						return results
 					# we are testing deps for a masked package; give it some lee-way
 					suffix = "masked"
 					matchmode = "minimum-all-ignore-profile"
@@ -191,6 +267,22 @@ class ProfileDependsChecks(ScanBase):
 						myvalue, self.portdb, dep_settings,
 						use="all", mode=matchmode, trees=self.repo_settings.trees)
 
+					results.append(types.SimpleNamespace(atoms=atoms, success=success, mykey=mykey, mytype=mytype))
+
+			return results
+
+
+	def _check_result(self, task, result):
+					prof = task.prof
+					keyword = task.keyword
+					ebuild = task.ebuild
+					unknown_pkgs = ebuild.unknown_pkgs
+
+					success = result.success
+					atoms = result.atoms
+					mykey = result.mykey
+					mytype = result.mytype
+
 					if success:
 						if atoms:
 
@@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase):
 
 							# if we emptied out our list, continue:
 							if not all_atoms:
-								continue
+								return
 
 							# Filter out duplicates.  We do this by hand (rather
 							# than use a set) so the order is stable and better
@@ -255,17 +347,6 @@ class ProfileDependsChecks(ScanBase):
 								% (ebuild.relative_path, mytype, keyword,
 									prof, pformat(atoms, indent=6)))
 
-		if not baddepsyntax and unknown_pkgs:
-			type_map = {}
-			for mytype, atom in unknown_pkgs:
-				type_map.setdefault(mytype, set()).add(atom)
-			for mytype, atoms in type_map.items():
-				self.qatracker.add_error(
-					"dependency.unknown", "%s: %s: %s"
-					% (ebuild.relative_path, mytype, ", ".join(sorted(atoms))))
-
-		return False
-
 	@property
 	def runInEbuilds(self):
 		'''Ebuild level scans'''
diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1
index a6a9937e5..6f9a24544 100644
--- a/repoman/man/repoman.1
+++ b/repoman/man/repoman.1
@@ -1,4 +1,4 @@
-.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman"
+.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman"
 .SH NAME
 repoman \- Gentoo's program to enforce a minimal level of quality assurance in
 packages added to the ebuild repository
@@ -83,6 +83,13 @@ Be less verbose about extraneous info
 \fB-p\fR, \fB--pretend\fR
 Don't commit or fix anything; just show what would be done
 .TP
+\fB\-j\fR, \fB\-\-jobs\fR
+Specifies the number of jobs (processes) to run simultaneously.
+.TP
+\fB\-l\fR, \fB\-\-load-average\fR
+Specifies that no new jobs (processes) should be started if there are others
+jobs running and the load average is at least load (a floating\-point number).
+.TP
 \fB-x\fR, \fB--xmlparse\fR
 Forces the metadata.xml parse check to be carried out
 .TP
-- 
2.25.3



^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-08-17 15:24 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-17  3:26 [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462) Zac Medico
2020-08-17 15:24 ` Brian Dolbec

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox