* [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462)
@ 2020-08-17 3:26 Zac Medico
2020-08-17 15:24 ` Brian Dolbec
0 siblings, 1 reply; 2+ messages in thread
From: Zac Medico @ 2020-08-17 3:26 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Add --jobs and --load-average options which allow dependency checks
for multiple profiles to run in parallel. The increase in performance
is non-linear for the number of jobs, but it can be worthwhile
(I measured a 35% decrease in time when running 'repoman -j8 full'
on sys-apps/portage). For the -j1 case (default), all dependency
checks run in the main process as usual, so there is no significant
performance penalty for the default case.
Bug: https://bugs.gentoo.org/448462
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
repoman/lib/repoman/argparser.py | 9 ++
.../repoman/modules/scan/depend/profile.py | 117 +++++++++++++++---
repoman/man/repoman.1 | 9 +-
3 files changed, 116 insertions(+), 19 deletions(-)
diff --git a/repoman/lib/repoman/argparser.py b/repoman/lib/repoman/argparser.py
index 670a0e91d..6d545ccca 100644
--- a/repoman/lib/repoman/argparser.py
+++ b/repoman/lib/repoman/argparser.py
@@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts):
'--output-style', dest='output_style', choices=output_keys,
help='select output type', default='default')
+ parser.add_argument(
+ '-j', '--jobs', dest='jobs', action='store', type=int, default=1,
+ help='Specifies the number of jobs (processes) to run simultaneously.')
+
+ parser.add_argument(
+ '-l', '--load-average', dest='load_average', action='store', type=float, default=None,
+ help='Specifies that no new jobs (processes) should be started if there are others '
+ 'jobs running and the load average is at least load (a floating-point number).')
+
parser.add_argument(
'--mode', dest='mode', choices=mode_keys,
help='specify which mode repoman will run in (default=full)')
diff --git a/repoman/lib/repoman/modules/scan/depend/profile.py b/repoman/lib/repoman/modules/scan/depend/profile.py
index 39d8b550c..1eb69422a 100644
--- a/repoman/lib/repoman/modules/scan/depend/profile.py
+++ b/repoman/lib/repoman/modules/scan/depend/profile.py
@@ -2,7 +2,9 @@
import copy
+import functools
import os
+import types
from pprint import pformat
from _emerge.Package import Package
@@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches import _gen_arches
from portage.dep import Atom
from portage.package.ebuild.profile_iuse import iter_iuse_vars
from portage.util import getconfig
+from portage.util.futures import asyncio
+from portage.util.futures.compat_coroutine import coroutine, coroutine_return
+from portage.util.futures.executor.fork import ForkExecutor
+from portage.util.futures.iter_completed import async_iter_completed
def sort_key(item):
@@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase):
def check(self, **kwargs):
'''Perform profile dependant dependency checks
- @param arches:
@param pkg: Package in which we check (object).
@param ebuild: Ebuild which we check (object).
- @param baddepsyntax: boolean
- @param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
@returns: dictionary
'''
ebuild = kwargs.get('ebuild').get()
pkg = kwargs.get('pkg').get()
- unknown_pkgs, baddepsyntax = _depend_checks(
+
+ ebuild.unknown_pkgs, ebuild.baddepsyntax = _depend_checks(
ebuild, pkg, self.portdb, self.qatracker, self.repo_metadata,
self.repo_settings.qadata)
@@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase):
relevant_profiles.append((keyword, groups, prof))
relevant_profiles.sort(key=sort_key)
+ ebuild.relevant_profiles = relevant_profiles
+
+ if self.options.jobs <= 1:
+ for task in self._iter_tasks(None, None, ebuild, pkg):
+ task, results = task
+ for result in results:
+ self._check_result(task, result)
+
+ loop = asyncio._wrap_loop()
+ loop.run_until_complete(self._async_check(loop=loop, **kwargs))
+
+ return False
+
+ @coroutine
+ def _async_check(self, loop=None, **kwargs):
+ '''Perform async profile dependant dependency checks
+
+ @param arches:
+ @param pkg: Package in which we check (object).
+ @param ebuild: Ebuild which we check (object).
+ @param baddepsyntax: boolean
+ @param unknown_pkgs: set of tuples (type, atom.unevaluated_atom)
+ @returns: dictionary
+ '''
+ loop = asyncio._wrap_loop(loop)
+ ebuild = kwargs.get('ebuild').get()
+ pkg = kwargs.get('pkg').get()
+ unknown_pkgs = ebuild.unknown_pkgs
+ baddepsyntax = ebuild.baddepsyntax
+
+ # Use max_workers=True to ensure immediate fork, since _iter_tasks
+ # needs the fork to create a snapshot of current state.
+ executor = ForkExecutor(max_workers=self.options.jobs)
+
+ if self.options.jobs > 1:
+ for future_done_set in async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg),
+ max_jobs=self.options.jobs, max_load=self.options.load_average, loop=loop):
+ for task in (yield future_done_set):
+ task, results = task.result()
+ for result in results:
+ self._check_result(task, result)
+
+ if not baddepsyntax and unknown_pkgs:
+ type_map = {}
+ for mytype, atom in unknown_pkgs:
+ type_map.setdefault(mytype, set()).add(atom)
+ for mytype, atoms in type_map.items():
+ self.qatracker.add_error(
+ "dependency.unknown", "%s: %s: %s"
+ % (ebuild.relative_path, mytype, ", ".join(sorted(atoms))))
- for keyword, groups, prof in relevant_profiles:
+ @coroutine
+ def _task(self, task):
+ yield task.future
+ coroutine_return((task, task.future.result()))
+
+ def _iter_tasks(self, loop, executor, ebuild, pkg):
+ for keyword, groups, prof in ebuild.relevant_profiles:
is_stable_profile = prof.status == "stable"
is_dev_profile = prof.status == "dev" and \
@@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase):
dep_settings.usemask = dep_settings._use_manager.getUseMask(
pkg, stable=dep_settings._parent_stable)
+ task = types.SimpleNamespace(ebuild=ebuild, prof=prof, keyword=keyword)
+
+ target = functools.partial(self._task_subprocess, task, pkg, dep_settings)
+
+ if self.options.jobs <= 1:
+ yield (task, target())
+ else:
+ task.future = asyncio.ensure_future(loop.run_in_executor(executor, target), loop=loop)
+ yield self._task(task)
+
+
+ def _task_subprocess(self, task, pkg, dep_settings):
+ ebuild = task.ebuild
+ baddepsyntax = ebuild.baddepsyntax
+ results = []
+ prof = task.prof
if not baddepsyntax:
ismasked = not ebuild.archs or \
pkg.cpv not in self.portdb.xmatch("match-visible",
@@ -163,7 +239,7 @@ class ProfileDependsChecks(ScanBase):
self.have['pmasked'] = bool(dep_settings._getMaskAtom(
pkg.cpv, ebuild.metadata))
if self.options.ignore_masked:
- continue
+ return results
# we are testing deps for a masked package; give it some lee-way
suffix = "masked"
matchmode = "minimum-all-ignore-profile"
@@ -191,6 +267,22 @@ class ProfileDependsChecks(ScanBase):
myvalue, self.portdb, dep_settings,
use="all", mode=matchmode, trees=self.repo_settings.trees)
+ results.append(types.SimpleNamespace(atoms=atoms, success=success, mykey=mykey, mytype=mytype))
+
+ return results
+
+
+ def _check_result(self, task, result):
+ prof = task.prof
+ keyword = task.keyword
+ ebuild = task.ebuild
+ unknown_pkgs = ebuild.unknown_pkgs
+
+ success = result.success
+ atoms = result.atoms
+ mykey = result.mykey
+ mytype = result.mytype
+
if success:
if atoms:
@@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase):
# if we emptied out our list, continue:
if not all_atoms:
- continue
+ return
# Filter out duplicates. We do this by hand (rather
# than use a set) so the order is stable and better
@@ -255,17 +347,6 @@ class ProfileDependsChecks(ScanBase):
% (ebuild.relative_path, mytype, keyword,
prof, pformat(atoms, indent=6)))
- if not baddepsyntax and unknown_pkgs:
- type_map = {}
- for mytype, atom in unknown_pkgs:
- type_map.setdefault(mytype, set()).add(atom)
- for mytype, atoms in type_map.items():
- self.qatracker.add_error(
- "dependency.unknown", "%s: %s: %s"
- % (ebuild.relative_path, mytype, ", ".join(sorted(atoms))))
-
- return False
-
@property
def runInEbuilds(self):
'''Ebuild level scans'''
diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1
index a6a9937e5..6f9a24544 100644
--- a/repoman/man/repoman.1
+++ b/repoman/man/repoman.1
@@ -1,4 +1,4 @@
-.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman"
+.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman"
.SH NAME
repoman \- Gentoo's program to enforce a minimal level of quality assurance in
packages added to the ebuild repository
@@ -83,6 +83,13 @@ Be less verbose about extraneous info
\fB-p\fR, \fB--pretend\fR
Don't commit or fix anything; just show what would be done
.TP
+\fB\-j\fR, \fB\-\-jobs\fR
+Specifies the number of jobs (processes) to run simultaneously.
+.TP
+\fB\-l\fR, \fB\-\-load-average\fR
+Specifies that no new jobs (processes) should be started if there are others
+jobs running and the load average is at least load (a floating\-point number).
+.TP
\fB-x\fR, \fB--xmlparse\fR
Forces the metadata.xml parse check to be carried out
.TP
--
2.25.3
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462)
2020-08-17 3:26 [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462) Zac Medico
@ 2020-08-17 15:24 ` Brian Dolbec
0 siblings, 0 replies; 2+ messages in thread
From: Brian Dolbec @ 2020-08-17 15:24 UTC (permalink / raw
To: gentoo-portage-dev
On Sun, 16 Aug 2020 20:26:56 -0700
Zac Medico <zmedico@gentoo.org> wrote:
> Add --jobs and --load-average options which allow dependency checks
> for multiple profiles to run in parallel. The increase in performance
> is non-linear for the number of jobs, but it can be worthwhile
> (I measured a 35% decrease in time when running 'repoman -j8 full'
> on sys-apps/portage). For the -j1 case (default), all dependency
> checks run in the main process as usual, so there is no significant
> performance penalty for the default case.
>
> Bug: https://bugs.gentoo.org/448462
> Signed-off-by: Zac Medico <zmedico@gentoo.org>
> ---
> repoman/lib/repoman/argparser.py | 9 ++
> .../repoman/modules/scan/depend/profile.py | 117
> +++++++++++++++--- repoman/man/repoman.1 |
> 9 +- 3 files changed, 116 insertions(+), 19 deletions(-)
>
> diff --git a/repoman/lib/repoman/argparser.py
> b/repoman/lib/repoman/argparser.py index 670a0e91d..6d545ccca 100644
> --- a/repoman/lib/repoman/argparser.py
> +++ b/repoman/lib/repoman/argparser.py
> @@ -199,6 +199,15 @@ def parse_args(argv, repoman_default_opts):
> '--output-style', dest='output_style',
> choices=output_keys, help='select output type', default='default')
>
> + parser.add_argument(
> + '-j', '--jobs', dest='jobs', action='store',
> type=int, default=1,
> + help='Specifies the number of jobs (processes) to
> run simultaneously.') +
> + parser.add_argument(
> + '-l', '--load-average', dest='load_average',
> action='store', type=float, default=None,
> + help='Specifies that no new jobs (processes) should
> be started if there are others '
> + 'jobs running and the load average is at
> least load (a floating-point number).') +
> parser.add_argument(
> '--mode', dest='mode', choices=mode_keys,
> help='specify which mode repoman will run in
> (default=full)') diff --git
> a/repoman/lib/repoman/modules/scan/depend/profile.py
> b/repoman/lib/repoman/modules/scan/depend/profile.py index
> 39d8b550c..1eb69422a 100644 ---
> a/repoman/lib/repoman/modules/scan/depend/profile.py +++
> b/repoman/lib/repoman/modules/scan/depend/profile.py @@ -2,7 +2,9 @@
>
> import copy
> +import functools
> import os
> +import types
> from pprint import pformat
>
> from _emerge.Package import Package
> @@ -15,6 +17,10 @@ from repoman.modules.scan.depend._gen_arches
> import _gen_arches from portage.dep import Atom
> from portage.package.ebuild.profile_iuse import iter_iuse_vars
> from portage.util import getconfig
> +from portage.util.futures import asyncio
> +from portage.util.futures.compat_coroutine import coroutine,
> coroutine_return +from portage.util.futures.executor.fork import
> ForkExecutor +from portage.util.futures.iter_completed import
> async_iter_completed
>
> def sort_key(item):
> @@ -58,16 +64,14 @@ class ProfileDependsChecks(ScanBase):
> def check(self, **kwargs):
> '''Perform profile dependant dependency checks
>
> - @param arches:
> @param pkg: Package in which we check (object).
> @param ebuild: Ebuild which we check (object).
> - @param baddepsyntax: boolean
> - @param unknown_pkgs: set of tuples (type,
> atom.unevaluated_atom) @returns: dictionary
> '''
> ebuild = kwargs.get('ebuild').get()
> pkg = kwargs.get('pkg').get()
> - unknown_pkgs, baddepsyntax = _depend_checks(
> +
> + ebuild.unknown_pkgs, ebuild.baddepsyntax =
> _depend_checks( ebuild, pkg, self.portdb, self.qatracker,
> self.repo_metadata, self.repo_settings.qadata)
>
> @@ -90,8 +94,64 @@ class ProfileDependsChecks(ScanBase):
> relevant_profiles.append((keyword,
> groups, prof))
> relevant_profiles.sort(key=sort_key)
> + ebuild.relevant_profiles = relevant_profiles
> +
> + if self.options.jobs <= 1:
> + for task in self._iter_tasks(None, None,
> ebuild, pkg):
> + task, results = task
> + for result in results:
> + self._check_result(task,
> result) +
> + loop = asyncio._wrap_loop()
> + loop.run_until_complete(self._async_check(loop=loop,
> **kwargs)) +
> + return False
> +
> + @coroutine
> + def _async_check(self, loop=None, **kwargs):
> + '''Perform async profile dependant dependency checks
> +
> + @param arches:
> + @param pkg: Package in which we check (object).
> + @param ebuild: Ebuild which we check (object).
> + @param baddepsyntax: boolean
> + @param unknown_pkgs: set of tuples (type,
> atom.unevaluated_atom)
> + @returns: dictionary
> + '''
> + loop = asyncio._wrap_loop(loop)
> + ebuild = kwargs.get('ebuild').get()
> + pkg = kwargs.get('pkg').get()
> + unknown_pkgs = ebuild.unknown_pkgs
> + baddepsyntax = ebuild.baddepsyntax
> +
> + # Use max_workers=True to ensure immediate fork,
> since _iter_tasks
> + # needs the fork to create a snapshot of current
> state.
> + executor =
> ForkExecutor(max_workers=self.options.jobs) +
> + if self.options.jobs > 1:
> + for future_done_set in
> async_iter_completed(self._iter_tasks(loop, executor, ebuild, pkg),
> + max_jobs=self.options.jobs,
> max_load=self.options.load_average, loop=loop):
> + for task in (yield future_done_set):
> + task, results = task.result()
> + for result in results:
> +
> self._check_result(task, result) +
> + if not baddepsyntax and unknown_pkgs:
> + type_map = {}
> + for mytype, atom in unknown_pkgs:
> + type_map.setdefault(mytype,
> set()).add(atom)
> + for mytype, atoms in type_map.items():
> + self.qatracker.add_error(
> + "dependency.unknown", "%s:
> %s: %s"
> + % (ebuild.relative_path,
> mytype, ", ".join(sorted(atoms))))
> - for keyword, groups, prof in relevant_profiles:
> + @coroutine
> + def _task(self, task):
> + yield task.future
> + coroutine_return((task, task.future.result()))
> +
> + def _iter_tasks(self, loop, executor, ebuild, pkg):
> + for keyword, groups, prof in
> ebuild.relevant_profiles:
> is_stable_profile = prof.status == "stable"
> is_dev_profile = prof.status == "dev" and \
> @@ -154,6 +214,22 @@ class ProfileDependsChecks(ScanBase):
> dep_settings.usemask =
> dep_settings._use_manager.getUseMask( pkg,
> stable=dep_settings._parent_stable)
> + task = types.SimpleNamespace(ebuild=ebuild,
> prof=prof, keyword=keyword) +
> + target =
> functools.partial(self._task_subprocess, task, pkg, dep_settings) +
> + if self.options.jobs <= 1:
> + yield (task, target())
> + else:
> + task.future =
> asyncio.ensure_future(loop.run_in_executor(executor, target),
> loop=loop)
> + yield self._task(task)
> +
> +
> + def _task_subprocess(self, task, pkg, dep_settings):
> + ebuild = task.ebuild
> + baddepsyntax = ebuild.baddepsyntax
> + results = []
> + prof = task.prof
> if not baddepsyntax:
> ismasked = not ebuild.archs or \
> pkg.cpv not in
> self.portdb.xmatch("match-visible", @@ -163,7 +239,7 @@ class
> ProfileDependsChecks(ScanBase): self.have['pmasked'] =
> bool(dep_settings._getMaskAtom( pkg.cpv, ebuild.metadata))
> if
> self.options.ignore_masked:
> - continue
> + return results
> # we are testing deps for a
> masked package; give it some lee-way suffix = "masked"
> matchmode =
> "minimum-all-ignore-profile" @@ -191,6 +267,22 @@ class
> ProfileDependsChecks(ScanBase): myvalue, self.portdb, dep_settings,
> use="all",
> mode=matchmode, trees=self.repo_settings.trees)
> +
> results.append(types.SimpleNamespace(atoms=atoms, success=success,
> mykey=mykey, mytype=mytype)) +
> + return results
> +
> +
> + def _check_result(self, task, result):
> + prof = task.prof
> + keyword = task.keyword
> + ebuild = task.ebuild
> + unknown_pkgs =
> ebuild.unknown_pkgs +
> + success = result.success
> + atoms = result.atoms
> + mykey = result.mykey
> + mytype = result.mytype
> +
> if success:
> if atoms:
>
> @@ -223,7 +315,7 @@ class ProfileDependsChecks(ScanBase):
>
> # if we
> emptied out our list, continue: if not all_atoms:
> -
> continue
> +
> return
> # Filter out
> duplicates. We do this by hand (rather # than use a set) so the
> order is stable and better @@ -255,17 +347,6 @@ class
> ProfileDependsChecks(ScanBase): % (ebuild.relative_path, mytype,
> keyword, prof, pformat(atoms, indent=6)))
>
> - if not baddepsyntax and unknown_pkgs:
> - type_map = {}
> - for mytype, atom in unknown_pkgs:
> - type_map.setdefault(mytype,
> set()).add(atom)
> - for mytype, atoms in type_map.items():
> - self.qatracker.add_error(
> - "dependency.unknown", "%s:
> %s: %s"
> - % (ebuild.relative_path,
> mytype, ", ".join(sorted(atoms)))) -
> - return False
> -
> @property
> def runInEbuilds(self):
> '''Ebuild level scans'''
> diff --git a/repoman/man/repoman.1 b/repoman/man/repoman.1
> index a6a9937e5..6f9a24544 100644
> --- a/repoman/man/repoman.1
> +++ b/repoman/man/repoman.1
> @@ -1,4 +1,4 @@
> -.TH "REPOMAN" "1" "Mar 2018" "Repoman VERSION" "Repoman"
> +.TH "REPOMAN" "1" "Aug 2020" "Repoman VERSION" "Repoman"
> .SH NAME
> repoman \- Gentoo's program to enforce a minimal level of quality
> assurance in packages added to the ebuild repository
> @@ -83,6 +83,13 @@ Be less verbose about extraneous info
> \fB-p\fR, \fB--pretend\fR
> Don't commit or fix anything; just show what would be done
> .TP
> +\fB\-j\fR, \fB\-\-jobs\fR
> +Specifies the number of jobs (processes) to run simultaneously.
> +.TP
> +\fB\-l\fR, \fB\-\-load-average\fR
> +Specifies that no new jobs (processes) should be started if there
> are others +jobs running and the load average is at least load (a
> floating\-point number). +.TP
> \fB-x\fR, \fB--xmlparse\fR
> Forces the metadata.xml parse check to be carried out
> .TP
code looks good for me
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-08-17 15:24 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-17 3:26 [gentoo-portage-dev] [PATCH] repoman: Add --jobs and --load-average options (bug 448462) Zac Medico
2020-08-17 15:24 ` Brian Dolbec
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox