From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from pigeon.gentoo.org ([208.92.234.80] helo=lists.gentoo.org) by finch.gentoo.org with esmtp (Exim 4.60) (envelope-from ) id 1SSuGj-00061H-Ie for garchives@archives.gentoo.org; Fri, 11 May 2012 18:08:45 +0000 Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 30962E0D53 for ; Fri, 11 May 2012 18:08:45 +0000 (UTC) Received: from smtp.gentoo.org (smtp.gentoo.org [140.211.166.183]) by pigeon.gentoo.org (Postfix) with ESMTP id C87C5E0913 for ; Fri, 11 May 2012 16:39:48 +0000 (UTC) Received: from localhost.localdomain (localhost [127.0.0.1]) by smtp.gentoo.org (Postfix) with ESMTP id 0F3941B4017 for ; Fri, 11 May 2012 16:39:47 +0000 (UTC) From: Mike Frysinger To: gentoo-portage-dev@lists.gentoo.org Subject: [gentoo-portage-dev] [RFC/PATCH] prepstrip/ecompressdir: parallelize operations Date: Fri, 11 May 2012 12:39:59 -0400 Message-Id: <1336754399-559-1-git-send-email-vapier@gentoo.org> X-Mailer: git-send-email 1.7.9.7 Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: 792c2fc1-eb5c-4949-97db-4fa555299bb9 X-Archives-Hash: e08d36702b09daa0475fa20181509db7 Stealing some ideas from ferringb, add a new API for doing parallel processing in bash, and then deploy this with the stripping and compressing stages. For stripping coreutils which has about 100 ELFs, this brings time to strip down from ~7 seconds to ~0.7 seconds on my system. Signed-off-by: Mike Frysinger --- note: i'm not terribly happy with the name "helper-functions.sh", so any better suggestions would be good. i didn't want to use "ebuild-helpers.sh" as that messes up tab completion ;). bin/ebuild-helpers/ecompressdir | 30 ++++++++++++++++--- bin/ebuild-helpers/prepstrip | 20 ++++++++++--- bin/helper-functions.sh | 62 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 8 deletions(-) create mode 100644 bin/helper-functions.sh diff --git a/bin/ebuild-helpers/ecompressdir b/bin/ebuild-helpers/ecompressdir index 17ecd80..a2c9e52 100755 --- a/bin/ebuild-helpers/ecompressdir +++ b/bin/ebuild-helpers/ecompressdir @@ -2,7 +2,7 @@ # Copyright 1999-2011 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh +source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/helper-functions.sh if [[ -z $1 ]] ; then helpers_die "${0##*/}: at least one argument needed" @@ -116,6 +116,16 @@ ret=0 rm -rf "${T}"/ecompress-skip +decompressors=( + ".Z" "gunzip -f" + ".gz" "gunzip -f" + ".bz2" "bunzip2 -f" + ".xz" "unxz -f" + ".lzma" "unxz -f" +) + +multijob_init + for dir in "$@" ; do dir=${dir#/} dir="${ED}${dir}" @@ -136,14 +146,26 @@ for dir in "$@" ; do find "${dir}" -type f -name '*.ecompress.file' -print0 | ${XARGS} -0 rm -f # not uncommon for packages to compress doc files themselves - funk_up_dir "decompress" ".Z" "gunzip -f" - funk_up_dir "decompress" ".gz" "gunzip -f" - funk_up_dir "decompress" ".bz2" "bunzip2 -f" + for (( d = 0; d < ${#decompressors[@]}; d += 2 )) ; do + # It's faster to parallelize at this stage than to try to + # parallelize the compressors. This is because the find|xargs + # ends up launching less compressors overall, so the overhead + # of forking children ends up dominating. + ( + multijob_child_init + funk_up_dir "decompress" "${decompressors[i]}" "${decompressors[i+1]}" + ) & + multijob_post_fork + : $(( ret |= $? )) + done # forcibly break all hard links as some compressors whine about it find "${dir}" -type f -links +1 -exec env file="{}" sh -c \ 'cp -p "${file}" "${file}.ecompress.break" ; mv -f "${file}.ecompress.break" "${file}"' \; + multijob_finish + : $(( ret |= $? )) + # now lets do our work if [[ -n ${suffix} ]] ; then vecho "${0##*/}: $(ecompress --bin) /${actual_dir#${ED}}" diff --git a/bin/ebuild-helpers/prepstrip b/bin/ebuild-helpers/prepstrip index daaa252..09b0333 100755 --- a/bin/ebuild-helpers/prepstrip +++ b/bin/ebuild-helpers/prepstrip @@ -1,8 +1,8 @@ #!/bin/bash -# Copyright 1999-2011 Gentoo Foundation +# Copyright 1999-2012 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh +source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/helper-functions.sh # avoid multiple calls to `has`. this creates things like: # FEATURES_foo=false @@ -62,6 +62,8 @@ prepstrip_sources_dir=${EPREFIX}/usr/src/debug/${CATEGORY}/${PF} type -P debugedit >/dev/null && debugedit_found=true || debugedit_found=false debugedit_warned=false +multijob_init + unset ${!INODE_*} inode_var_name() { @@ -171,6 +173,8 @@ process_elf() { # We want to log already stripped binaries, as this may be a QA violation. # They prevent us from getting the splitdebug data. if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then + ( + multijob_child_init log=$T/scanelf-already-stripped.log qa_var="QA_PRESTRIPPED_${ARCH/-/_}" [[ -n ${!qa_var} ]] && QA_PRESTRIPPED="${!qa_var}" @@ -193,6 +197,8 @@ if ! ${RESTRICT_binchecks} && ! ${RESTRICT_strip} ; then else rm -f "$log" fi + ) & + multijob_post_fork fi # Now we look for unstripped binaries. @@ -205,8 +211,10 @@ do banner=true fi - f=$(file "${x}") || continue - [[ -z ${f} ]] && continue + ( + multijob_child_init + f=$(file "${x}") || exit 0 + [[ -z ${f} ]] && exit 0 if ! ${SKIP_STRIP} ; then # The noglob funk is to support STRIP_MASK="/*/booga" and to keep @@ -253,6 +261,8 @@ do if ${was_not_writable} ; then chmod u-w "${x}" fi + ) & + multijob_post_fork done if [[ -s ${T}/debug.sources ]] && \ @@ -274,3 +284,5 @@ then >> "$emptydir"/.keepdir done < <(find "${D}${prepstrip_sources_dir}/" -type d -empty -print0) fi + +multijob_finish diff --git a/bin/helper-functions.sh b/bin/helper-functions.sh new file mode 100644 index 0000000..c69a41a --- /dev/null +++ b/bin/helper-functions.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Copyright 1999-2012 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 + +# For routines we want to use in ebuild-helpers/ but don't want to +# expose to the general ebuild environment. + +source "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/isolated-functions.sh + +# +# API functions for doing parallel processing +# +numjobs() { + # Copied from eutils.eclass:makeopts_jobs() + local jobs=$(echo " ${MAKEOPTS} " | \ + sed -r -n 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p') + echo ${jobs:-1} +} + +multijob_init() { + # Setup a pipe for children to write their pids to when they finish. + mj_control_pipe=$(mktemp -t multijob.XXXXXX) + rm "${mj_control_pipe}" + mkfifo "${mj_control_pipe}" + exec {mj_control_fd}<>${mj_control_pipe} + rm -f "${mj_control_pipe}" + + # See how many children we can fork based on the user's settings. + mj_max_jobs=$(numjobs) + mj_num_jobs=0 +} + +multijob_child_init() { + trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT + trap 'exit 1' INT TERM +} + +multijob_finish_one() { + local pid ret + read -r -u ${mj_control_fd} pid ret + : $(( --mj_num_jobs )) + return ${ret} +} + +multijob_finish() { + local ret=0 + while [[ ${mj_num_jobs} -gt 0 ]] ; do + multijob_finish_one + : $(( ret += $? )) + done + # Let bash clean up its internal child tracking state. + wait + return ${ret} +} + +multijob_post_fork() { + : $(( ++mj_num_jobs )) + if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then + multijob_finish_one + fi + return 0 +} -- 1.7.9.7