* [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
@ 2012-05-11 16:39 Mike Frysinger
2012-05-11 17:32 ` Zac Medico
` (2 more replies)
0 siblings, 3 replies; 17+ messages in thread
From: Mike Frysinger @ 2012-05-11 16:39 UTC (permalink / raw
To: gentoo-portage-dev
Stealing some ideas from ferringb, add a new API for doing parallel
processing in bash, and then deploy this with the stripping and
compressing stages.
For stripping coreutils which has about 100 ELFs, this brings time
to strip down from ~7 seconds to ~0.7 seconds on my system.
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
note: i'm not terribly happy with the name "helper-functions.sh", so any
better suggestions would be good. i didn't want to use "ebuild-helpers.sh"
as that messes up tab completion ;).
bin/ebuild-helpers/ecompressdir | 30 ++++++++++++++++---
bin/ebuild-helpers/prepstrip | 20 ++++++++++---
bin/helper-functions.sh | 62 +++++++++++++++++++++++++++++++++++++++
3 files changed, 104 insertions(+), 8 deletions(-)
create mode 100644 bin/helper-functions.sh
diff --git a/bin/ebuild-helpers/ecompressdir b/bin/ebuild-helpers/ecompressdir
index 17ecd80..a2c9e52 100755
--- a/bin/ebuild-helpers/ecompressdir
+++ b/bin/ebuild-helpers/ecompressdir
@@ -2,7 +2,7 @@
# Copyright 1999-2011 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
-source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh
+source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/helper-functions.sh
if [[ -z $1 ]] ; then
helpers_die "${0##*/}: at least one argument needed"
@@ -116,6 +116,16 @@ ret=0
rm -rf "${T}"/ecompress-skip
+decompressors=(
+ ".Z" "gunzip -f"
+ ".gz" "gunzip -f"
+ ".bz2" "bunzip2 -f"
+ ".xz" "unxz -f"
+ ".lzma" "unxz -f"
+)
+
+multijob_init
+
for dir in "$@" ; do
dir=${dir#/}
dir="${ED}${dir}"
@@ -136,14 +146,26 @@ for dir in "$@" ; do
find "${dir}" -type f -name '*.ecompress.file' -print0 | ${XARGS} -0 rm -f
# not uncommon for packages to compress doc files themselves
- funk_up_dir "decompress" ".Z" "gunzip -f"
- funk_up_dir "decompress" ".gz" "gunzip -f"
- funk_up_dir "decompress" ".bz2" "bunzip2 -f"
+ for (( d = 0; d < ${#decompressors[@]}; d += 2 )) ; do
+ # It's faster to parallelize at this stage than to try to
+ # parallelize the compressors. This is because the find|xargs
+ # ends up launching less compressors overall, so the overhead
+ # of forking children ends up dominating.
+ (
+ multijob_child_init
+ funk_up_dir "decompress" "${decompressors[i]}" "${decompressors[i+1]}"
+ ) &
+ multijob_post_fork
+ : $(( ret |= $? ))
+ done
# forcibly break all hard links as some compressors whine about it
find "${dir}" -type f -links +1 -exec env file="{}" sh -c \
'cp -p "${file}" "${file}.ecompress.break" ; mv -f "${file}.ecompress.break" "${file}"' \;
+ multijob_finish
+ : $(( ret |= $? ))
+
# now lets do our work
if [[ -n ${suffix} ]] ; then
vecho "${0##*/}: $(ecompress --bin) /${actual_dir#${ED}}"
diff --git a/bin/ebuild-helpers/prepstrip b/bin/ebuild-helpers/prepstrip
index daaa252..09b0333 100755
--- a/bin/ebuild-helpers/prepstrip
+++ b/bin/ebuild-helpers/prepstrip
@@ -1,8 +1,8 @@
#!/bin/bash
-# Copyright 1999-2011 Gentoo Foundation
+# Copyright 1999-2012 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
-source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh
+source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/helper-functions.sh
# avoid multiple calls to `has`. this creates things like:
# FEATURES_foo=false
@@ -62,6 +62,8 @@ prepstrip_sources_dir=${EPREFIX}/usr/src/debug/${CATEGORY}/${PF}
type -P debugedit >/dev/null && debugedit_found=true || debugedit_found=false
debugedit_warned=false
+multijob_init
+
unset ${!INODE_*}
inode_var_name() {
@@ -171,6 +173,8 @@ process_elf() {
# We want to log already stripped binaries, as this may be a QA violation.
# They prevent us from getting the splitdebug data.
if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
+ (
+ multijob_child_init
log=$T/scanelf-already-stripped.log
qa_var="QA_PRESTRIPPED_${ARCH/-/_}"
[[ -n ${!qa_var} ]] && QA_PRESTRIPPED="${!qa_var}"
@@ -193,6 +197,8 @@ if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
else
rm -f "$log"
fi
+ ) &
+ multijob_post_fork
fi
# Now we look for unstripped binaries.
@@ -205,8 +211,10 @@ do
banner=true
fi
- f=$(file "${x}") || continue
- [[ -z ${f} ]] && continue
+ (
+ multijob_child_init
+ f=$(file "${x}") || exit 0
+ [[ -z ${f} ]] && exit 0
if ! ${SKIP_STRIP} ; then
# The noglob funk is to support STRIP_MASK="/*/booga" and to keep
@@ -253,6 +261,8 @@ do
if ${was_not_writable} ; then
chmod u-w "${x}"
fi
+ ) &
+ multijob_post_fork
done
if [[ -s ${T}/debug.sources ]] && \
@@ -274,3 +284,5 @@ then
>> "$emptydir"/.keepdir
done < <(find "${D}${prepstrip_sources_dir}/" -type d -empty -print0)
fi
+
+multijob_finish
diff --git a/bin/helper-functions.sh b/bin/helper-functions.sh
new file mode 100644
index 0000000..c69a41a
--- /dev/null
+++ b/bin/helper-functions.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright 1999-2012 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+# For routines we want to use in ebuild-helpers/ but don't want to
+# expose to the general ebuild environment.
+
+source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh
+
+#
+# API functions for doing parallel processing
+#
+numjobs() {
+ # Copied from eutils.eclass:makeopts_jobs()
+ local jobs=$(echo " ${MAKEOPTS} " | \
+ sed -r -n 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p')
+ echo ${jobs:-1}
+}
+
+multijob_init() {
+ # Setup a pipe for children to write their pids to when they finish.
+ mj_control_pipe=$(mktemp -t multijob.XXXXXX)
+ rm "${mj_control_pipe}"
+ mkfifo "${mj_control_pipe}"
+ exec {mj_control_fd}<>${mj_control_pipe}
+ rm -f "${mj_control_pipe}"
+
+ # See how many children we can fork based on the user's settings.
+ mj_max_jobs=$(numjobs)
+ mj_num_jobs=0
+}
+
+multijob_child_init() {
+ trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
+ trap 'exit 1' INT TERM
+}
+
+multijob_finish_one() {
+ local pid ret
+ read -r -u ${mj_control_fd} pid ret
+ : $(( --mj_num_jobs ))
+ return ${ret}
+}
+
+multijob_finish() {
+ local ret=0
+ while [[ ${mj_num_jobs} -gt 0 ]] ; do
+ multijob_finish_one
+ : $(( ret += $? ))
+ done
+ # Let bash clean up its internal child tracking state.
+ wait
+ return ${ret}
+}
+
+multijob_post_fork() {
+ : $(( ++mj_num_jobs ))
+ if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
+ multijob_finish_one
+ fi
+ return 0
+}
--
1.7.9.7
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-11 16:39 [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations Mike Frysinger
@ 2012-05-11 17:32 ` Zac Medico
2012-05-11 18:14 ` Mike Frysinger
2012-05-14 7:33 ` Michael Haubenwallner
2012-06-02 18:53 ` Zac Medico
2 siblings, 1 reply; 17+ messages in thread
From: Zac Medico @ 2012-05-11 17:32 UTC (permalink / raw
To: gentoo-portage-dev
On 05/11/2012 09:39 AM, Mike Frysinger wrote:
> +multijob_finish() {
> + local ret=0
> + while [[ ${mj_num_jobs} -gt 0 ]] ; do
> + multijob_finish_one
> + : $(( ret += $? ))
> + done
> + # Let bash clean up its internal child tracking state.
> + wait
> + return ${ret}
> +}
Wouldn't it be better to use $(( ret |= $? )) there, in order to avoid a
possible integer overflow? Other than that, the patch looks good to me.
--
Thanks,
Zac
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-11 17:32 ` Zac Medico
@ 2012-05-11 18:14 ` Mike Frysinger
0 siblings, 0 replies; 17+ messages in thread
From: Mike Frysinger @ 2012-05-11 18:14 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 726 bytes --]
On Friday 11 May 2012 13:32:46 Zac Medico wrote:
> On 05/11/2012 09:39 AM, Mike Frysinger wrote:
> > +multijob_finish() {
> > + local ret=0
> > + while [[ ${mj_num_jobs} -gt 0 ]] ; do
> > + multijob_finish_one
> > + : $(( ret += $? ))
> > + done
> > + # Let bash clean up its internal child tracking state.
> > + wait
> > + return ${ret}
> > +}
>
> Wouldn't it be better to use $(( ret |= $? )) there, in order to avoid a
> possible integer overflow? Other than that, the patch looks good to me.
i meant to use |= like the other places ...
not that i'm too worried about overflow here as the exit value is clamped to
[0..255], so it'd take millions of failing processes to cause a problem :).
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-11 16:39 [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations Mike Frysinger
2012-05-11 17:32 ` Zac Medico
@ 2012-05-14 7:33 ` Michael Haubenwallner
2012-05-14 8:44 ` Zac Medico
` (2 more replies)
2012-06-02 18:53 ` Zac Medico
2 siblings, 3 replies; 17+ messages in thread
From: Michael Haubenwallner @ 2012-05-14 7:33 UTC (permalink / raw
To: gentoo-portage-dev
On 05/11/2012 06:39 PM, Mike Frysinger wrote:
> +multijob_child_init() {
> + trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
> + trap 'exit 1' INT TERM
> +}
Just wondering why $! in parent isn't used anywhere, even not for some
integrity check if the child's BASHPID actually was forked by parent.
> +multijob_post_fork() {
> + : $(( ++mj_num_jobs ))
> + if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> + multijob_finish_one
Feels like ignoring this child's exitstatus isn't intentional here.
> + fi
> + return 0
> +}
/haubi/
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 7:33 ` Michael Haubenwallner
@ 2012-05-14 8:44 ` Zac Medico
2012-05-14 17:37 ` Mike Frysinger
2012-05-14 8:48 ` Brian Harring
2012-05-14 17:33 ` Mike Frysinger
2 siblings, 1 reply; 17+ messages in thread
From: Zac Medico @ 2012-05-14 8:44 UTC (permalink / raw
To: gentoo-portage-dev
On 05/14/2012 12:33 AM, Michael Haubenwallner wrote:
>> +multijob_post_fork() {
>> + : $(( ++mj_num_jobs ))
>> + if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
>> + multijob_finish_one
>
> Feels like ignoring this child's exitstatus isn't intentional here.
Thanks, fixed now:
http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=2adc44295a5b5c77640c32cd24ebbd8d52e5237b
And here are a couple of more related fixes:
http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=b4fba3e9fa2e285244de491f57700978158c1838
http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=c534e32f78cf7c543e9203e7fe1c7b1630144ffb
--
Thanks,
Zac
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 7:33 ` Michael Haubenwallner
2012-05-14 8:44 ` Zac Medico
@ 2012-05-14 8:48 ` Brian Harring
2012-05-14 17:33 ` Mike Frysinger
2 siblings, 0 replies; 17+ messages in thread
From: Brian Harring @ 2012-05-14 8:48 UTC (permalink / raw
To: gentoo-portage-dev
On Mon, May 14, 2012 at 09:33:58AM +0200, Michael Haubenwallner wrote:
>
>
> On 05/11/2012 06:39 PM, Mike Frysinger wrote:
> > +multijob_child_init() {
> > + trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
> > + trap 'exit 1' INT TERM
> > +}
>
> Just wondering why $! in parent isn't used anywhere, even not for some
> integrity check if the child's BASHPID actually was forked by parent.
wait'ing on it can fail; roughly bash basically reaps on it's own
(uncontrollably so), but bash still will actually do the wait,
basically falling back to it's internal list of what it reaped.
That's *roughly* what I got out of it when I wrote what vapier's
ape'ing here, and is exactly the issue that bit me in the ass on a 48
core. If things are moving fast enough, sooner or later that whacky
wait behaviour will intersect a real pid, one that isn't a direct
child, and bash will puke a horrible error.
That pretty much leaves you w/ 'wait jobspec' or 'wait' to clean up
the bash innards. This exact issue is why the code passes the exit
status back.
~harring
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 7:33 ` Michael Haubenwallner
2012-05-14 8:44 ` Zac Medico
2012-05-14 8:48 ` Brian Harring
@ 2012-05-14 17:33 ` Mike Frysinger
2 siblings, 0 replies; 17+ messages in thread
From: Mike Frysinger @ 2012-05-14 17:33 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 761 bytes --]
On Monday 14 May 2012 03:33:58 Michael Haubenwallner wrote:
> On 05/11/2012 06:39 PM, Mike Frysinger wrote:
> > +multijob_child_init() {
> > + trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
> > + trap 'exit 1' INT TERM
> > +}
>
> Just wondering why $! in parent isn't used anywhere, even not for some
> integrity check if the child's BASHPID actually was forked by parent.
i don't know of any cases where this would error out. if there are too many
processes, bash itself will retry a few times before aborting. so checking $!
wouldn't help.
keep in mind, what you're proposing is basically checking the return value of
fork(), and that can fail in very few ways. all of which, afaik, bash does
not bubble up to the script.
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 8:44 ` Zac Medico
@ 2012-05-14 17:37 ` Mike Frysinger
2012-05-14 18:53 ` Mike Frysinger
0 siblings, 1 reply; 17+ messages in thread
From: Mike Frysinger @ 2012-05-14 17:37 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 2524 bytes --]
On Monday 14 May 2012 04:44:12 Zac Medico wrote:
> On 05/14/2012 12:33 AM, Michael Haubenwallner wrote:
> >> +multijob_post_fork() {
> >> + : $(( ++mj_num_jobs ))
> >> + if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> >> + multijob_finish_one
> >
> > Feels like ignoring this child's exitstatus isn't intentional here.
>
> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=2adc44
> 295a5b5c77640c32cd24ebbd8d52e5237b
simpler:
--- a/bin/helper-functions.sh
+++ b/bin/helper-functions.sh
@@ -54,11 +54,9 @@ multijob_finish() {
}
multijob_post_fork() {
- local ret=0
: $(( ++mj_num_jobs ))
if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
multijob_finish_one
- : $(( ret |= $? ))
fi
- return ${ret}
+ return $?
}
> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=b4fba3
> e9fa2e285244de491f57700978158c1838
should really fix it to make the code parallel safe rather than disabling it
completely. i'll work on that.
> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=c534e
> 32f78cf7c543e9203e7fe1c7b1630144ffb
forking & waiting for a single child doesn't make much sense. might as well
not fork at all. but this can still be parallelizied a little:
--- a/bin/ebuild-helpers/prepstrip
+++ b/bin/ebuild-helpers/prepstrip
@@ -187,12 +187,15 @@ process_elf() {
# We want to log already stripped binaries, as this may be a QA violation.
# They prevent us from getting the splitdebug data.
if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
+ # We need to do the non-stripped scan serially first before we turn around
+ # and start stripping the files ourselves. The log parsing can be done in
+ # parallel though.
+ log=$T/scanelf-already-stripped.log
+ scanelf -yqRBF '#k%F' -k '!.symtab' "$@" | sed -e "s#^${ED}##" > "$log"
(
multijob_child_init
- log=$T/scanelf-already-stripped.log
qa_var="QA_PRESTRIPPED_${ARCH/-/_}"
[[ -n ${!qa_var} ]] && QA_PRESTRIPPED="${!qa_var}"
- scanelf -yqRBF '#k%F' -k '!.symtab' "$@" | sed -e "s#^${ED}##" > "$log"
if [[ -n $QA_PRESTRIPPED && -s $log && \
${QA_STRICT_PRESTRIPPED-unset} = unset ]] ; then
shopts=$-
@@ -215,9 +218,6 @@ if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
multijob_post_fork
fi
-# Let the Pre-stripped check finish before we start stripping
-multijob_finish
-
# Now we look for unstripped binaries.
for x in \
$(scanelf -yqRBF '#k%F' -k '.symtab' "$@") \
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 17:37 ` Mike Frysinger
@ 2012-05-14 18:53 ` Mike Frysinger
2012-05-14 19:02 ` Zac Medico
0 siblings, 1 reply; 17+ messages in thread
From: Mike Frysinger @ 2012-05-14 18:53 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 3910 bytes --]
On Monday 14 May 2012 13:37:40 Mike Frysinger wrote:
> On Monday 14 May 2012 04:44:12 Zac Medico wrote:
> > http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=b4fb
> > a3 e9fa2e285244de491f57700978158c1838
>
> should really fix it to make the code parallel safe rather than disabling
> it completely. i'll work on that.
this should make it parallel safe
-mike
--- a/bin/ebuild-helpers/prepstrip
+++ b/bin/ebuild-helpers/prepstrip
@@ -62,22 +62,13 @@ prepstrip_sources_dir=${EPREFIX}/usr/src/debug/${CATEGORY}/${PF}
type -P debugedit >/dev/null && debugedit_found=true || debugedit_found=false
debugedit_warned=false
-disable_parallel=false
-${FEATURES_splitdebug} && disable_parallel=true
-${FEATURES_installsources} && \
- ! ${RESTRICT_installsources} && \
- ${debugedit_found} && disable_parallel=true
-
-if ${disable_parallel} ; then
- # Disable parallel processing, in order to prevent interference with
- # temp files like debug.sources or prepstrip.split.debug
- numjobs() {
- echo 1
- }
-fi
-
multijob_init
+# Setup $T filesystem layout that we care about.
+tmpdir="${T}/prepstrip"
+rm -rf "${tmpdir}"
+mkdir -p "${tmpdir}"/{splitdebug,sources}
+
unset ${!INODE_*}
# Usage: inode_var_name: <file>
@@ -112,11 +103,11 @@ save_elf_sources() {
buildid=$(debugedit -i \
-b "${WORKDIR}" \
-d "${prepstrip_sources_dir}" \
- -l "${T}"/debug.sources \
+ -l "${tmpdir}/sources/${x##*/}.${BASHPID}" \
"${x}")
}
-# Usage: save_elf_debug <elf>
+# Usage: save_elf_debug <elf> [splitdebug file]
save_elf_debug() {
${FEATURES_splitdebug} || return 0
@@ -125,6 +116,7 @@ save_elf_debug() {
# twice in this path) in order for gdb's debug-file-directory
# lookup to work correctly.
local x=$1
+ local splitdebug=$2
local y=${ED}usr/lib/debug/${x:${#D}}.debug
# dont save debug info twice
@@ -137,8 +129,8 @@ save_elf_debug() {
ln "${ED}usr/lib/debug/${!inode:${#D}}.debug" "${y}"
else
eval ${inode}=\${x}
- if [[ -e ${T}/prepstrip.split.debug ]] ; then
- mv "${T}"/prepstrip.split.debug "${y}"
+ if [[ -n ${splitdebug} ]] ; then
+ mv "${splitdebug}" "${y}"
else
local objcopy_flags="--only-keep-debug"
${FEATURES_compressdebug} && objcopy_flags+=" --compress-debug-sections"
@@ -175,11 +167,13 @@ process_elf() {
if ${strip_this} ; then
# see if we can split & strip at the same time
if [[ -n ${SPLIT_STRIP_FLAGS} ]] ; then
+ local shortname="${x##*/}.debug"
+ local splitdebug="${tmpdir}/splitdebug/${shortname}.${BASHPID}"
${STRIP} ${strip_flags} \
- -f "${T}"/prepstrip.split.debug \
- -F "${x##*/}.debug" \
+ -f "${splitdebug}" \
+ -F "${shortname}" \
"${x}"
- save_elf_debug "${x}"
+ save_elf_debug "${x}" "${splitdebug}"
else
save_elf_debug "${x}"
${STRIP} ${strip_flags} "${x}"
@@ -194,8 +188,8 @@ if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then
# We need to do the non-stripped scan serially first before we turn around
# and start stripping the files ourselves. The log parsing can be done in
# parallel though.
- log=$T/scanelf-already-stripped.log
- scanelf -yqRBF '#k%F' -k '!.symtab' "$@" | sed -e "s#^${ED}##" > "$log"
+ log=${tmpdir}/scanelf-already-stripped.log
+ scanelf -yqRBF '#k%F' -k '!.symtab' "$@" | sed -e "s#^${ED}##" > "${log}"
(
multijob_child_init
qa_var="QA_PRESTRIPPED_${ARCH/-/_}"
@@ -286,9 +280,11 @@ do
multijob_post_fork
done
-# Let jobs finish before processing ${T}/debug.sources
+# With a bit more work, we could run the rsync processes below in
+# parallel, but not sure that'd be an overall improvement.
multijob_finish
+cat "${tmpdir}"/sources/* > "${tmpdir}/debug.sources" 2>/dev/null
if [[ -s ${T}/debug.sources ]] && \
${FEATURES_installsources} && \
! ${RESTRICT_installsources} && \
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 18:53 ` Mike Frysinger
@ 2012-05-14 19:02 ` Zac Medico
2012-05-14 19:08 ` Zac Medico
0 siblings, 1 reply; 17+ messages in thread
From: Zac Medico @ 2012-05-14 19:02 UTC (permalink / raw
To: gentoo-portage-dev
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On 05/14/2012 11:53 AM, Mike Frysinger wrote:
> On Monday 14 May 2012 13:37:40 Mike Frysinger wrote:
>> On Monday 14 May 2012 04:44:12 Zac Medico wrote:
>>> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=b4fb
>>>
>>>
a3 e9fa2e285244de491f57700978158c1838
>>
>> should really fix it to make the code parallel safe rather than
>> disabling it completely. i'll work on that.
>
> this should make it parallel safe -mike
Yeah, that looks good.
- --
Thanks,
Zac
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
iEYEARECAAYFAk+xVuEACgkQ/ejvha5XGaNa1ACeLTRHjwNuRRXp9wsLgKeTcKEp
W7QAn2Z642Dx8r2OhDSifoqZtljFn7+E
=piRb
-----END PGP SIGNATURE-----
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 19:02 ` Zac Medico
@ 2012-05-14 19:08 ` Zac Medico
2012-05-14 20:10 ` Mike Frysinger
0 siblings, 1 reply; 17+ messages in thread
From: Zac Medico @ 2012-05-14 19:08 UTC (permalink / raw
To: gentoo-portage-dev
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
On 05/14/2012 12:02 PM, Zac Medico wrote:
> On 05/14/2012 11:53 AM, Mike Frysinger wrote:
>> On Monday 14 May 2012 13:37:40 Mike Frysinger wrote:
>>> On Monday 14 May 2012 04:44:12 Zac Medico wrote:
>>>> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=b4fb
>>>>
>>>>
>
>>>>
a3 e9fa2e285244de491f57700978158c1838
>>>
>>> should really fix it to make the code parallel safe rather
>>> than disabling it completely. i'll work on that.
>
>> this should make it parallel safe -mike
>
> Yeah, that looks good.
Actually, the inode_var_name thing will not work unless it's all in
one process.
- --
Thanks,
Zac
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
iEYEARECAAYFAk+xWDAACgkQ/ejvha5XGaM8OwCguDf5rKVv4cpEmOYoqwrLBgGM
mr0AniCfHtJiNJRpF+mC4oHquO3nSen1
=3gSf
-----END PGP SIGNATURE-----
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 19:08 ` Zac Medico
@ 2012-05-14 20:10 ` Mike Frysinger
2012-05-14 22:42 ` Zac Medico
0 siblings, 1 reply; 17+ messages in thread
From: Mike Frysinger @ 2012-05-14 20:10 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 522 bytes --]
On Monday 14 May 2012 15:08:32 Zac Medico wrote:
> Actually, the inode_var_name thing will not work unless it's all in
> one process.
hmm, true, but that's the level we currently parallelize at, so it's fine. we
do one subprocess per ELF and that includes the strip/splitdebug/splitsources.
parallelizing more than on a per-ELF basis will require much finer grained
queues which, while possible, would make the file much harder to hack on and
extend. and i'm not sure we'd see that much of a gain.
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 20:10 ` Mike Frysinger
@ 2012-05-14 22:42 ` Zac Medico
2012-05-14 23:13 ` Mike Frysinger
0 siblings, 1 reply; 17+ messages in thread
From: Zac Medico @ 2012-05-14 22:42 UTC (permalink / raw
To: gentoo-portage-dev
On 05/14/2012 01:10 PM, Mike Frysinger wrote:
> On Monday 14 May 2012 15:08:32 Zac Medico wrote:
>> Actually, the inode_var_name thing will not work unless it's all in
>> one process.
>
> hmm, true, but that's the level we currently parallelize at, so it's fine. we
> do one subprocess per ELF and that includes the strip/splitdebug/splitsources.
>
> parallelizing more than on a per-ELF basis will require much finer grained
> queues which, while possible, would make the file much harder to hack on and
> extend. and i'm not sure we'd see that much of a gain.
> -mike
The thing is, in the case of hardlinks, we're parallelizing multiple
times on the *same* elf. Anyway, I've fixed it by using a directory full
of hardlinks, in these commits:
http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=ad944275b88a50d2a1f694826b127cceb9221e78
http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=9ed00a9e70a3705164a5349145ff467e5c40ddfd
--
Thanks,
Zac
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-14 22:42 ` Zac Medico
@ 2012-05-14 23:13 ` Mike Frysinger
0 siblings, 0 replies; 17+ messages in thread
From: Mike Frysinger @ 2012-05-14 23:13 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 1254 bytes --]
On Monday 14 May 2012 18:42:07 Zac Medico wrote:
> On 05/14/2012 01:10 PM, Mike Frysinger wrote:
> > On Monday 14 May 2012 15:08:32 Zac Medico wrote:
> >> Actually, the inode_var_name thing will not work unless it's all in
> >> one process.
> >
> > hmm, true, but that's the level we currently parallelize at, so it's
> > fine. we do one subprocess per ELF and that includes the
> > strip/splitdebug/splitsources.
> >
> > parallelizing more than on a per-ELF basis will require much finer
> > grained queues which, while possible, would make the file much harder to
> > hack on and extend. and i'm not sure we'd see that much of a gain.
>
> The thing is, in the case of hardlinks, we're parallelizing multiple
> times on the *same* elf. Anyway, I've fixed it by using a directory full
> of hardlinks, in these commits:
well, realistically speaking, hardlinking has been broken before the
parallelization work.
https://bugs.gentoo.org/400767
> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=ad9442
> 75b88a50d2a1f694826b127cceb9221e78
> http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=9ed00
> a9e70a3705164a5349145ff467e5c40ddfd
i'll go through it and see what's what
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-05-11 16:39 [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations Mike Frysinger
2012-05-11 17:32 ` Zac Medico
2012-05-14 7:33 ` Michael Haubenwallner
@ 2012-06-02 18:53 ` Zac Medico
2012-06-02 23:54 ` Mike Frysinger
2 siblings, 1 reply; 17+ messages in thread
From: Zac Medico @ 2012-06-02 18:53 UTC (permalink / raw
To: gentoo-portage-dev
On 05/11/2012 09:39 AM, Mike Frysinger wrote:
> + exec {mj_control_fd}<>${mj_control_pipe}
I've heard that this is new in bash 4.1 [1]. Hopefully it doesn't bother
anyone if portage relies on this. The prefix bootstrap script already
includes bash 4.2 [2], so it shouldn't be a problem there.
[1]
http://archives.gentoo.org/gentoo-dev/msg_cd4fa017636d3e6bdf7b146ae8390407.xml
[2]
http://overlays.gentoo.org/proj/alt/browser/trunk/prefix-overlay/scripts/bootstrap-prefix.sh?format=txt
--
Thanks,
Zac
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-06-02 18:53 ` Zac Medico
@ 2012-06-02 23:54 ` Mike Frysinger
2012-06-05 0:29 ` Zac Medico
0 siblings, 1 reply; 17+ messages in thread
From: Mike Frysinger @ 2012-06-02 23:54 UTC (permalink / raw
To: gentoo-portage-dev
[-- Attachment #1: Type: Text/Plain, Size: 434 bytes --]
On Saturday 02 June 2012 14:53:19 Zac Medico wrote:
> On 05/11/2012 09:39 AM, Mike Frysinger wrote:
> > + exec {mj_control_fd}<>${mj_control_pipe}
>
> I've heard that this is new in bash 4.1 [1]. Hopefully it doesn't bother
> anyone if portage relies on this. The prefix bootstrap script already
> includes bash 4.2 [2], so it shouldn't be a problem there.
once the eclass gets sorted out, i'll merge the fixes back
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations
2012-06-02 23:54 ` Mike Frysinger
@ 2012-06-05 0:29 ` Zac Medico
0 siblings, 0 replies; 17+ messages in thread
From: Zac Medico @ 2012-06-05 0:29 UTC (permalink / raw
To: gentoo-portage-dev
On 06/02/2012 04:54 PM, Mike Frysinger wrote:
> On Saturday 02 June 2012 14:53:19 Zac Medico wrote:
>> On 05/11/2012 09:39 AM, Mike Frysinger wrote:
>>> + exec {mj_control_fd}<>${mj_control_pipe}
>>
>> I've heard that this is new in bash 4.1 [1]. Hopefully it doesn't bother
>> anyone if portage relies on this. The prefix bootstrap script already
>> includes bash 4.2 [2], so it shouldn't be a problem there.
>
> once the eclass gets sorted out, i'll merge the fixes back
> -mike
Done now:
http://git.overlays.gentoo.org/gitweb/?p=proj/portage.git;a=commit;h=2c50bd9a82c3bb6dfbc63466ae8bfbd401fb3235
--
Thanks,
Zac
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2012-06-05 3:08 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-11 16:39 [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations Mike Frysinger
2012-05-11 17:32 ` Zac Medico
2012-05-11 18:14 ` Mike Frysinger
2012-05-14 7:33 ` Michael Haubenwallner
2012-05-14 8:44 ` Zac Medico
2012-05-14 17:37 ` Mike Frysinger
2012-05-14 18:53 ` Mike Frysinger
2012-05-14 19:02 ` Zac Medico
2012-05-14 19:08 ` Zac Medico
2012-05-14 20:10 ` Mike Frysinger
2012-05-14 22:42 ` Zac Medico
2012-05-14 23:13 ` Mike Frysinger
2012-05-14 8:48 ` Brian Harring
2012-05-14 17:33 ` Mike Frysinger
2012-06-02 18:53 ` Zac Medico
2012-06-02 23:54 ` Mike Frysinger
2012-06-05 0:29 ` Zac Medico
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox