public inbox for gentoo-dev@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
@ 2012-06-01 22:41 Mike Frysinger
  2012-06-01 22:50 ` Mike Frysinger
                   ` (6 more replies)
  0 siblings, 7 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-01 22:41 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 4090 bytes --]

regenerating autotools in packages that have a lot of AC_CONFIG_SUBDIRS is
really slow due to the serialization of all the dirs (which really isn't
required).  so i took some code that i merged into portage semi-recently
(which is based on work by Brian, although i'm not sure he wants to admit it)
and put it into a new multiprocessing.eclass.  this way people can generically
utilize this in their own eclasses/ebuilds.

it doesn't currently support nesting.  not sure if i should fix that.

i'll follow up with an example of parallelizing of eautoreconf.  for
mail-filter/maildrop on my 4 core system, it cuts the time needed to run from
~2.5 min to ~1 min.
-mike

# Copyright 1999-2012 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Header: $

# @ECLASS: multiprocessing.eclass
# @MAINTAINER:
# base-system@gentoo.org
# @AUTHORS:
# Brian Harring <ferringb@gentoo.org>
# Mike Frysinger <vapier@gentoo.org>
# @BLURB: parallelization with bash (wtf?)
# @DESCRIPTION:
# The multiprocessing eclass contains a suite of functions that allow ebuilds
# to quickly run things in parallel using shell code.

if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"

# @FUNCTION: makeopts_jobs
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
# specified therein.  Useful for running non-make tools in parallel too.
# i.e. if the user has MAKEOPTS=-j9, this will show "9".
# We can't return the number as bash normalizes it to [0, 255].  If the flags
# haven't specified a -j flag, then "1" is shown as that is the default `make`
# uses.  Since there's no way to represent infinity, we return 999 if the user
# has -j without a number.
makeopts_jobs() {
	[[ $# -eq 0 ]] && set -- ${MAKEOPTS}
	# This assumes the first .* will be more greedy than the second .*
	# since POSIX doesn't specify a non-greedy match (i.e. ".*?").
	local jobs=$(echo " $* " | sed -r -n \
		-e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
		-e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
	echo ${jobs:-1}
}

# @FUNCTION: multijob_init
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Setup the environment for executing things in parallel.
# You must call this before any other multijob function.
multijob_init() {
	# Setup a pipe for children to write their pids to when they finish.
	mj_control_pipe="${T}/multijob.pipe"
	mkfifo "${mj_control_pipe}"
	exec {mj_control_fd}<>${mj_control_pipe}
	rm -f "${mj_control_pipe}"

	# See how many children we can fork based on the user's settings.
	mj_max_jobs=$(makeopts_jobs "$@")
	mj_num_jobs=0
}

# @FUNCTION: multijob_child_init
# @DESCRIPTION:
# You must call this first in the forked child process.
multijob_child_init() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
	trap 'exit 1' INT TERM
}

# @FUNCTION: multijob_post_fork
# @DESCRIPTION:
# You must call this in the parent process after forking a child process.
# If the parallel limit has been hit, it will wait for one to finish and
# return the child's exit status.
multijob_post_fork() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	: $(( ++mj_num_jobs ))
	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
		multijob_finish_one
	fi
	return $?
}

# @FUNCTION: multijob_finish_one
# @DESCRIPTION:
# Wait for a single process to exit and return its exit code.
multijob_finish_one() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	local pid ret
	read -r -u ${mj_control_fd} pid ret
	: $(( --mj_num_jobs ))
	return ${ret}
}

# @FUNCTION: multijob_finish
# @DESCRIPTION:
# Wait for all pending processes to exit and return the bitwise or
# of all their exit codes.
multijob_finish() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	local ret=0
	while [[ ${mj_num_jobs} -gt 0 ]] ; do
		multijob_finish_one
		: $(( ret |= $? ))
	done
	# Let bash clean up its internal child tracking state.
	wait
	return ${ret}
}

fi

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
@ 2012-06-01 22:50 ` Mike Frysinger
  2012-06-02  4:11 ` Brian Harring
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-01 22:50 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 1851 bytes --]

example conversion of eatureconf
-mike

--- autotools.eclass
+++ autotools.eclass
@@ -16,7 +16,7 @@
 if [[ ${___ECLASS_ONCE_AUTOTOOLS} != "recur -_+^+_- spank" ]] ; then
 ___ECLASS_ONCE_AUTOTOOLS="recur -_+^+_- spank"
 
-inherit libtool
+inherit libtool multiprocessing
 
 # @ECLASS-VARIABLE: WANT_AUTOCONF
 # @DESCRIPTION:
@@ -144,14 +144,24 @@ unset _automake_atom _autoconf_atom
 # Should do a full autoreconf - normally what most people will be interested in.
 # Also should handle additional directories specified by AC_CONFIG_SUBDIRS.
 eautoreconf() {
-	local x g
+	local x g multitop
 
-	if [[ -z ${AT_NO_RECURSIVE} ]]; then
+	if [[ -z ${AT_TOPLEVEL_EAUTORECONF} ]] ; then
+		AT_TOPLEVEL_EAUTORECONF="yes"
+		multitop="yes"
+		multijob_init
+	fi
+
+	if [[ -z ${AT_NO_RECURSIVE} ]] ; then
 		# Take care of subdirs
 		for x in $(autotools_check_macro_val AC_CONFIG_SUBDIRS) ; do
 			if [[ -d ${x} ]] ; then
 				pushd "${x}" >/dev/null
+				(
+				multijob_child_init
 				AT_NOELIBTOOLIZE="yes" eautoreconf
+				) &
+				multijob_post_fork || die
 				popd >/dev/null
 			fi
 		done
@@ -196,11 +206,16 @@ eautoreconf() {
 	eautoheader
 	[[ ${AT_NOEAUTOMAKE} != "yes" ]] && FROM_EAUTORECONF="yes" eautomake ${AM_OPTS}
 
-	[[ ${AT_NOELIBTOOLIZE} == "yes" ]] && return 0
+	if [[ ${AT_NOELIBTOOLIZE} != "yes" ]] ; then
+		# Call it here to prevent failures due to elibtoolize called _before_
+		# eautoreconf.  We set $S because elibtoolize runs on that #265319
+		S=${PWD} elibtoolize --force
+	fi
 
-	# Call it here to prevent failures due to elibtoolize called _before_
-	# eautoreconf.  We set $S because elibtoolize runs on that #265319
-	S=${PWD} elibtoolize --force
+	if [[ -n ${multitop} ]] ; then
+		unset AT_TOPLEVEL_EAUTORECONF
+		multijob_finish || die
+	fi
 
 	return 0
 }

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
  2012-06-01 22:50 ` Mike Frysinger
@ 2012-06-02  4:11 ` Brian Harring
  2012-06-02  4:57   ` Mike Frysinger
  2012-06-02  9:52 ` David Leverton
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: Brian Harring @ 2012-06-02  4:11 UTC (permalink / raw
  To: gentoo-dev

On Fri, Jun 01, 2012 at 06:41:22PM -0400, Mike Frysinger wrote:
> regenerating autotools in packages that have a lot of AC_CONFIG_SUBDIRS is
> really slow due to the serialization of all the dirs (which really isn't
> required).  so i took some code that i merged into portage semi-recently
> (which is based on work by Brian, although i'm not sure he wants to admit it)

I've come up with worse things in the name of speed (see the 
daemonized ebuild processor...) ;)

> and put it into a new multiprocessing.eclass.  this way people can generically
> utilize this in their own eclasses/ebuilds.
> 
> it doesn't currently support nesting.  not sure if i should fix that.
> 
> i'll follow up with an example of parallelizing of eautoreconf.  for
> mail-filter/maildrop on my 4 core system, it cuts the time needed to run from
> ~2.5 min to ~1 min.

My main concern here is cleanup during uncontrolled shutdown; if the 
backgrounded job has hung itself for some reason, the job *will* just 
sit; I'm not aware of any of the PMs doing process tree killing, or 
cgroups containment; in my copious free time I'm planning on adding a 
'cjobs' tool for others, and adding cgroups awareness into pkgcore; 
that said, none of 'em do this *now*, thus my concern.



> -mike
> 
> # Copyright 1999-2012 Gentoo Foundation
> # Distributed under the terms of the GNU General Public License v2
> # $Header: $
> 
> # @ECLASS: multiprocessing.eclass
> # @MAINTAINER:
> # base-system@gentoo.org
> # @AUTHORS:
> # Brian Harring <ferringb@gentoo.org>
> # Mike Frysinger <vapier@gentoo.org>
> # @BLURB: parallelization with bash (wtf?)
> # @DESCRIPTION:
> # The multiprocessing eclass contains a suite of functions that allow ebuilds
> # to quickly run things in parallel using shell code.
> 
> if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
> ___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"
> 
> # @FUNCTION: makeopts_jobs
> # @USAGE: [${MAKEOPTS}]
> # @DESCRIPTION:
> # Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
> # specified therein.  Useful for running non-make tools in parallel too.
> # i.e. if the user has MAKEOPTS=-j9, this will show "9".
> # We can't return the number as bash normalizes it to [0, 255].  If the flags
> # haven't specified a -j flag, then "1" is shown as that is the default `make`
> # uses.  Since there's no way to represent infinity, we return 999 if the user
> # has -j without a number.
> makeopts_jobs() {
> 	[[ $# -eq 0 ]] && set -- ${MAKEOPTS}
> 	# This assumes the first .* will be more greedy than the second .*
> 	# since POSIX doesn't specify a non-greedy match (i.e. ".*?").
> 	local jobs=$(echo " $* " | sed -r -n \
> 		-e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
> 		-e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
> 	echo ${jobs:-1}
> }

This function belongs in eutils, or somewhere similar- pretty sure 
we've got variants of this in multiple spots.  I'd prefer a single 
point to change if/when we add a way to pass parallelism down into the 
env via EAPI.


> # @FUNCTION: multijob_init
> # @USAGE: [${MAKEOPTS}]
> # @DESCRIPTION:
> # Setup the environment for executing things in parallel.
> # You must call this before any other multijob function.
> multijob_init() {
> 	# Setup a pipe for children to write their pids to when they finish.
> 	mj_control_pipe="${T}/multijob.pipe"
> 	mkfifo "${mj_control_pipe}"
> 	exec {mj_control_fd}<>${mj_control_pipe}
> 	rm -f "${mj_control_pipe}"

Nice; hadn't thought to wipe the pipe on the way out.

> 
> 	# See how many children we can fork based on the user's settings.
> 	mj_max_jobs=$(makeopts_jobs "$@")
> 	mj_num_jobs=0
> }
> 
> # @FUNCTION: multijob_child_init
> # @DESCRIPTION:
> # You must call this first in the forked child process.
> multijob_child_init() {
> 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> 
> 	trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
> 	trap 'exit 1' INT TERM
> }

Kind of dislike this form since it means consuming code has to be 
aware of, and do the () & trick.

A helper function, something like
multijob_child_job() {
  (
  multijob_child_init
  "$@"
  ) &
  multijob_post_fork || die "game over man, game over"
}

Doing so, would conver your eautoreconf from:
for x in $(autotools_check_macro_val AC_CONFIG_SUBDIRS) ; do
  if [[ -d ${x} ]] ; then
    pushd "${x}" >/dev/null
    (
    multijob_child_init
    AT_NOELIBTOOLIZE="yes" eautoreconf
    ) &
    multijob_post_fork || die
    popd >/dev/null
  fi
done

To:
for x in $(autotools_check_macro_val AC_CONFIG_SUBDIRS) ; do
  if [[ -d ${x} ]]; then
    pushd "${x}" > /dev/null
    AT_NOELIBTOOLIZE="yes" multijob_child_job eautoreconf
    popd
  fi
done


Note, if we used an eval in multijob_child_job, the pushd/popd could 
be folded in.  Debatable.



> # @FUNCTION: multijob_post_fork
> # @DESCRIPTION:
> # You must call this in the parent process after forking a child process.
> # If the parallel limit has been hit, it will wait for one to finish and
> # return the child's exit status.
> multijob_post_fork() {
> 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> 
> 	: $(( ++mj_num_jobs ))
> 	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> 		multijob_finish_one
> 	fi
> 	return $?
> }
> 
> # @FUNCTION: multijob_finish_one
> # @DESCRIPTION:
> # Wait for a single process to exit and return its exit code.
> multijob_finish_one() {
> 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> 
> 	local pid ret
> 	read -r -u ${mj_control_fd} pid ret

Mildly concerned about the failure case here- specifically if the read 
fails (fd was closed, take your pick).


> 	: $(( --mj_num_jobs ))
> 	return ${ret}
> }
> 
> # @FUNCTION: multijob_finish
> # @DESCRIPTION:
> # Wait for all pending processes to exit and return the bitwise or
> # of all their exit codes.
> multijob_finish() {
> 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

Tend to think this should do cleanup, then die if someone invoked the 
api incorrectly; I'd rather see the children reaped before this blows 
up.

> 	local ret=0
> 	while [[ ${mj_num_jobs} -gt 0 ]] ; do
> 		multijob_finish_one
> 		: $(( ret |= $? ))
> 	done
> 	# Let bash clean up its internal child tracking state.
> 	wait
> 	return ${ret}
> }
> 
> fi


~harring



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02  4:11 ` Brian Harring
@ 2012-06-02  4:57   ` Mike Frysinger
  2012-06-02  9:23     ` Cyprien Nicolas
  0 siblings, 1 reply; 29+ messages in thread
From: Mike Frysinger @ 2012-06-02  4:57 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 3862 bytes --]

On Saturday 02 June 2012 00:11:19 Brian Harring wrote:
> On Fri, Jun 01, 2012 at 06:41:22PM -0400, Mike Frysinger wrote:
> > and put it into a new multiprocessing.eclass.  this way people can
> > generically utilize this in their own eclasses/ebuilds.
> > 
> > it doesn't currently support nesting.  not sure if i should fix that.
> > 
> > i'll follow up with an example of parallelizing of eautoreconf.  for
> > mail-filter/maildrop on my 4 core system, it cuts the time needed to run
> > from ~2.5 min to ~1 min.
> 
> My main concern here is cleanup during uncontrolled shutdown; if the
> backgrounded job has hung itself for some reason, the job *will* just
> sit; I'm not aware of any of the PMs doing process tree killing, or
> cgroups containment; in my copious free time I'm planning on adding a
> 'cjobs' tool for others, and adding cgroups awareness into pkgcore;
> that said, none of 'em do this *now*, thus my concern.

i'm not sure there's much i can do here beyond adding traps

> > makeopts_jobs() {
> 
> This function belongs in eutils, or somewhere similar- pretty sure
> we've got variants of this in multiple spots.  I'd prefer a single
> point to change if/when we add a way to pass parallelism down into the
> env via EAPI.

it's already in eutils.  but i'm moving it out of that and into this since it 
makes more sense in this eclass imo, and avoids this eclass from inheriting 
eutils.

> > multijob_child_init() {
> > 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> > 	trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
> > 	trap 'exit 1' INT TERM
> > }
> 
> Kind of dislike this form since it means consuming code has to be
> aware of, and do the () & trick.
> 
> A helper function, something like
> multijob_child_job() {
>   (
>   multijob_child_init
>   "$@"
>   ) &
>   multijob_post_fork || die "game over man, game over"
> }
> 
> Doing so, would conver your eautoreconf from:
> for x in $(autotools_check_macro_val AC_CONFIG_SUBDIRS) ; do
>   if [[ -d ${x} ]] ; then
>     pushd "${x}" >/dev/null
>     (
>     multijob_child_init
>     AT_NOELIBTOOLIZE="yes" eautoreconf
>     ) &
>     multijob_post_fork || die
>     popd >/dev/null
>   fi
> done
> 
> To:
> for x in $(autotools_check_macro_val AC_CONFIG_SUBDIRS) ; do
>   if [[ -d ${x} ]]; then
>     pushd "${x}" > /dev/null
>     AT_NOELIBTOOLIZE="yes" multijob_child_job eautoreconf
>     popd
>   fi
> done

it depends on the form of the code.  i can see both being useful.  should be 
easy to support both though:
multijob_child_init() {
	if [[ $# -eq 0 ]] ; then
		trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
		trap 'exit 1' INT TERM
	else
		(
		multijob_child_init
		"$@"
		) &
		multijob_post_fork || die
	fi
}

> Note, if we used an eval in multijob_child_job, the pushd/popd could
> be folded in.  Debatable.

i'd lean towards not.  keeps things simple and people don't have to get into 
quoting hell.

> > # @FUNCTION: multijob_finish_one
> > # @DESCRIPTION:
> > # Wait for a single process to exit and return its exit code.
> > multijob_finish_one() {
> > 
> > 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> > 	
> > 	local pid ret
> > 	read -r -u ${mj_control_fd} pid ret
> 
> Mildly concerned about the failure case here- specifically if the read
> fails (fd was closed, take your pick).

read || die ?  not sure what else could be done really.

> > multijob_finish() {
> > 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> 
> Tend to think this should do cleanup, then die if someone invoked the
> api incorrectly; I'd rather see the children reaped before this blows
> up.

sounds good.  along those lines, i could add multijob_finish to 
EBUILD_DEATH_HOOKS so other `die` points also wait by default ...
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02  4:57   ` Mike Frysinger
@ 2012-06-02  9:23     ` Cyprien Nicolas
  0 siblings, 0 replies; 29+ messages in thread
From: Cyprien Nicolas @ 2012-06-02  9:23 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: text/plain, Size: 992 bytes --]

Mike Frysinger wrote:
> On Saturday 02 June 2012 00:11:19 Brian Harring wrote:
>> On Fri, Jun 01, 2012 at 06:41:22PM -0400, Mike Frysinger wrote:
>>> makeopts_jobs() {
>>
>> This function belongs in eutils, or somewhere similar- pretty sure
>> we've got variants of this in multiple spots.  I'd prefer a single
>> point to change if/when we add a way to pass parallelism down into the
>> env via EAPI.

We do have variants at several places in ebuild/eclass (scons-utils,
waf...). And some failed at some point, see [1].

> it's already in eutils.  but i'm moving it out of that and into this since it 
> makes more sense in this eclass imo, and avoids this eclass from inheriting 
> eutils.

Neat. Thanks for having added it. Lot of build related eclass would need
it, if we want to factorize that code.

We'll have to give maintainers incentive for migrating their code :-)

[1] https://bugs.gentoo.org/show_bug.cgi?id=337831

-- 
Fulax
Gentoo Lisp Contributor


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
  2012-06-01 22:50 ` Mike Frysinger
  2012-06-02  4:11 ` Brian Harring
@ 2012-06-02  9:52 ` David Leverton
  2012-06-02 19:18   ` Mike Frysinger
  2012-06-02 19:54 ` Mike Frysinger
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 29+ messages in thread
From: David Leverton @ 2012-06-02  9:52 UTC (permalink / raw
  To: gentoo-dev

Mike Frysinger wrote:
> 	exec {mj_control_fd}<>${mj_control_pipe}

I'll have to remember that feature, but unfortunately it's new in bash 
4.1, so unless we're giving up 3.2 as the minimum for the tree....

> 	: $(( ++mj_num_jobs ))

Any reason not to do just

         (( ++mj_num_jobs ))

?

> 	: $(( --mj_num_jobs ))

> 		: $(( ret |= $? ))

Same.



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02  9:52 ` David Leverton
@ 2012-06-02 19:18   ` Mike Frysinger
  0 siblings, 0 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-02 19:18 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 422 bytes --]

On Saturday 02 June 2012 05:52:01 David Leverton wrote:
> Mike Frysinger wrote:
> > 	exec {mj_control_fd}<>${mj_control_pipe}
> 
> I'll have to remember that feature, but unfortunately it's new in bash
> 4.1, so unless we're giving up 3.2 as the minimum for the tree....

lame

> > 	: $(( ++mj_num_jobs ))
> 
> Any reason not to do just
> 
>          (( ++mj_num_jobs ))

i prefer the portable form
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
                   ` (2 preceding siblings ...)
  2012-06-02  9:52 ` David Leverton
@ 2012-06-02 19:54 ` Mike Frysinger
  2012-06-02 20:39   ` Zac Medico
  2012-06-02 21:31   ` Michał Górny
  2012-06-02 23:59 ` Brian Harring
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-02 19:54 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 5197 bytes --]

v2
-mike

# Copyright 1999-2012 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Header: $

# @ECLASS: multiprocessing.eclass
# @MAINTAINER:
# base-system@gentoo.org
# @AUTHOR:
# Brian Harring <ferringb@gentoo.org>
# Mike Frysinger <vapier@gentoo.org>
# @BLURB: parallelization with bash (wtf?)
# @DESCRIPTION:
# The multiprocessing eclass contains a suite of functions that allow ebuilds
# to quickly run things in parallel using shell code.
# @EXAMPLE:
#
# @CODE
# # First initialize things:
# multijob_init
#
# # Then hash a bunch of files in parallel:
# for n in {0..20} ; do
# 	multijob_child_init md5sum data.${n} > data.${n}
# done
#
# # Then wait for all the children to finish:
# multijob_finish
# @CODE

if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"

# @FUNCTION: makeopts_jobs
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
# specified therein.  Useful for running non-make tools in parallel too.
# i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the
# number as bash normalizes it to [0, 255].  If the flags haven't specified a
# -j flag, then "1" is shown as that is the default `make` uses.  Since there's
# no way to represent infinity, we return 999 if the user has -j without a number.
makeopts_jobs() {
	[[ $# -eq 0 ]] && set -- ${MAKEOPTS}
	# This assumes the first .* will be more greedy than the second .*
	# since POSIX doesn't specify a non-greedy match (i.e. ".*?").
	local jobs=$(echo " $* " | sed -r -n \
		-e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
		-e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
	echo ${jobs:-1}
}

# @FUNCTION: redirect_alloc_fd
# @USAGE: <var> <file> [redirection]
# @DESCRIPTION:
# Find a free fd and redirect the specified file via it.  Store the new
# fd in the specified variable.  Useful for the cases where we don't care
# about the exact fd #.
redirect_alloc_fd() {
	local var=$1 file=$2 redir=${3:-"<>"}

	if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
		# Newer bash provides this functionality.
		eval "exec {${var}}${redir}'${file}'"
	else
		# Need to provide the functionality ourselves.
		local fd=10
		while :; do
			if [[ ! -L /dev/fd/${fd} ]] ; then
				eval "exec ${fd}${redir}'${file}'" && break
			fi
			[[ ${fd} -gt 1024 ]] && return 1 # sanity
			: $(( ++fd ))
		done
		: $(( ${var} = fd ))
	fi
}

# @FUNCTION: multijob_init
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Setup the environment for executing code in parallel.
# You must call this before any other multijob function.
multijob_init() {
	# When something goes wrong, try to wait for all the children so we
	# don't leave any zombies around.
	has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait"

	# Setup a pipe for children to write their pids to when they finish.
	mj_control_pipe="${T}/multijob.pipe"
	mkfifo "${mj_control_pipe}"
	redirect_alloc_fd mj_control_fd "${mj_control_pipe}"
	rm -f "${mj_control_pipe}"

	# See how many children we can fork based on the user's settings.
	mj_max_jobs=$(makeopts_jobs "$@")
	mj_num_jobs=0
}

# @FUNCTION: multijob_child_init
# @USAGE: [command to run in background]
# @DESCRIPTION:
# This function has two forms.  You can use it to execute a simple command
# in the background (and it takes care of everything else), or you must
# call this first thing in your forked child process.
#
# @CODE
# # 1st form: pass the command line as arguments:
# multijob_child_init ls /dev
#
# # 2nd form: execute multiple stuff in the background:
# (
# multijob_child_init
# out=`ls`
# if echo "${out}" | grep foo ; then
# 	echo "YEAH"
# fi
# ) &
# multijob_post_fork
# @CODE
multijob_child_init() {
	if [[ $# -eq 0 ]] ; then
		trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
		trap 'exit 1' INT TERM
	else
		( multijob_child_init ; "$@" ) &
		multijob_post_fork
	fi
}

# @FUNCTION: multijob_post_fork
# @DESCRIPTION:
# You must call this in the parent process after forking a child process.
# If the parallel limit has been hit, it will wait for one child to finish
# and return the its exit status.
multijob_post_fork() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	: $(( ++mj_num_jobs ))
	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
		multijob_finish_one
	fi
	return $?
}

# @FUNCTION: multijob_finish_one
# @DESCRIPTION:
# Wait for a single process to exit and return its exit code.
multijob_finish_one() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	local pid ret
	read -r -u ${mj_control_fd} pid ret || die
	: $(( --mj_num_jobs ))
	return ${ret}
}

# @FUNCTION: multijob_finish
# @DESCRIPTION:
# Wait for all pending processes to exit and return the bitwise or
# of all their exit codes.
multijob_finish() {
	local ret=0
	while [[ ${mj_num_jobs} -gt 0 ]] ; do
		multijob_finish_one
		: $(( ret |= $? ))
	done
	# Let bash clean up its internal child tracking state.
	wait

	# Do this after reaping all the children.
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	return ${ret}
}

fi

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 19:54 ` Mike Frysinger
@ 2012-06-02 20:39   ` Zac Medico
  2012-06-02 21:12     ` Mike Frysinger
  2012-06-02 21:31   ` Michał Górny
  1 sibling, 1 reply; 29+ messages in thread
From: Zac Medico @ 2012-06-02 20:39 UTC (permalink / raw
  To: gentoo-dev

On 06/02/2012 12:54 PM, Mike Frysinger wrote:

> # @FUNCTION: redirect_alloc_fd
> # @USAGE: <var> <file> [redirection]
> # @DESCRIPTION:
> # Find a free fd and redirect the specified file via it.  Store the new
> # fd in the specified variable.  Useful for the cases where we don't care
> # about the exact fd #.
> redirect_alloc_fd() {
> 	local var=$1 file=$2 redir=${3:-"<>"}
> 
> 	if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
> 		# Newer bash provides this functionality.
> 		eval "exec {${var}}${redir}'${file}'"
> 	else
> 		# Need to provide the functionality ourselves.
> 		local fd=10
> 		while :; do
> 			if [[ ! -L /dev/fd/${fd} ]] ; then
> 				eval "exec ${fd}${redir}'${file}'" && break
> 			fi
> 			[[ ${fd} -gt 1024 ]] && return 1 # sanity
> 			: $(( ++fd ))
> 		done
> 		: $(( ${var} = fd ))
> 	fi
> }

I launched up a GhostBSD livedvd to see what /dev/fd/ looks like on
FreeBSD, and it seems to contain plain character devices instead of
symlinks to character devices:

[ghostbsd@livecd ~]$ uname -a
FreeBSD livecd 9.0-RELEASE FreeBSD 9.0-RELEASE #0: Sun Jan 15 17:17:43
AST 2012
root@ericbsd.ghostbsd.org:/usr/obj/i386.i386/usr/src/sys/GHOSTBSD  i386
[ghostbsd@livecd ~]$ ls -l /dev/fd/
total 0
crw-rw-rw-  1 root  wheel    0,  19 Jun  2 20:15 0
crw-rw-rw-  1 root  wheel    0,  21 Jun  2 20:15 1
crw-rw-rw-  1 root  wheel    0,  23 Jun  2 20:15 2

-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 20:39   ` Zac Medico
@ 2012-06-02 21:12     ` Mike Frysinger
  2012-06-02 23:29       ` Zac Medico
  0 siblings, 1 reply; 29+ messages in thread
From: Mike Frysinger @ 2012-06-02 21:12 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 558 bytes --]

On Saturday 02 June 2012 16:39:16 Zac Medico wrote:
> On 06/02/2012 12:54 PM, Mike Frysinger wrote:
> > 			if [[ ! -L /dev/fd/${fd} ]] ; then
> > 				eval "exec ${fd}${redir}'${file}'" && break
> > 			fi
> 
> I launched up a GhostBSD livedvd to see what /dev/fd/ looks like on
> FreeBSD, and it seems to contain plain character devices instead of
> symlinks to character devices:

i didn't want to use [ -e ] because of broken links, but it seems that Linux 
has diff semantics with /proc and broken symlinks.  `test -e` will return true.
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 19:54 ` Mike Frysinger
  2012-06-02 20:39   ` Zac Medico
@ 2012-06-02 21:31   ` Michał Górny
  2012-06-02 22:50     ` Zac Medico
  1 sibling, 1 reply; 29+ messages in thread
From: Michał Górny @ 2012-06-02 21:31 UTC (permalink / raw
  To: gentoo-dev; +Cc: vapier

[-- Attachment #1: Type: text/plain, Size: 429 bytes --]

On Sat, 2 Jun 2012 15:54:03 -0400
Mike Frysinger <vapier@gentoo.org> wrote:

> # @FUNCTION: redirect_alloc_fd
> # @USAGE: <var> <file> [redirection]
> # @DESCRIPTION:

(...and a lot of code)

I may be wrong but wouldn't it be simpler to just stick with a named
pipe here? Well, at first glance you wouldn't be able to read exactly
one result at a time but is it actually useful?

-- 
Best regards,
Michał Górny

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 316 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 21:31   ` Michał Górny
@ 2012-06-02 22:50     ` Zac Medico
  2012-06-02 23:47       ` Brian Harring
  0 siblings, 1 reply; 29+ messages in thread
From: Zac Medico @ 2012-06-02 22:50 UTC (permalink / raw
  To: gentoo-dev; +Cc: Michał Górny, vapier

On 06/02/2012 02:31 PM, Michał Górny wrote:
> On Sat, 2 Jun 2012 15:54:03 -0400
> Mike Frysinger <vapier@gentoo.org> wrote:
> 
>> # @FUNCTION: redirect_alloc_fd
>> # @USAGE: <var> <file> [redirection]
>> # @DESCRIPTION:
> 
> (...and a lot of code)
> 
> I may be wrong but wouldn't it be simpler to just stick with a named
> pipe here? Well, at first glance you wouldn't be able to read exactly
> one result at a time but is it actually useful?

I'm pretty sure that the pipe has remain constantly open in read mode
(which can only be done by assigning it a file descriptor). Otherwise,
there's a race condition that can occur, where a write is lost because
it's written just before the reader closes the pipe.
-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 21:12     ` Mike Frysinger
@ 2012-06-02 23:29       ` Zac Medico
  2012-06-02 23:58         ` Mike Frysinger
  0 siblings, 1 reply; 29+ messages in thread
From: Zac Medico @ 2012-06-02 23:29 UTC (permalink / raw
  To: gentoo-dev

On 06/02/2012 02:12 PM, Mike Frysinger wrote:
> On Saturday 02 June 2012 16:39:16 Zac Medico wrote:
>> On 06/02/2012 12:54 PM, Mike Frysinger wrote:
>>> 			if [[ ! -L /dev/fd/${fd} ]] ; then
>>> 				eval "exec ${fd}${redir}'${file}'" && break
>>> 			fi
>>
>> I launched up a GhostBSD livedvd to see what /dev/fd/ looks like on
>> FreeBSD, and it seems to contain plain character devices instead of
>> symlinks to character devices:
> 
> i didn't want to use [ -e ] because of broken links, but it seems that Linux 
> has diff semantics with /proc and broken symlinks.  `test -e` will return true.
> -mike

How about if we just create a fallback mode for older bash, where no
pipes are involved, and multijob_post_fork just uses `wait` to check
status and effectively causes only one job to execute at a time?
-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 22:50     ` Zac Medico
@ 2012-06-02 23:47       ` Brian Harring
  2012-06-03  1:04         ` Zac Medico
  0 siblings, 1 reply; 29+ messages in thread
From: Brian Harring @ 2012-06-02 23:47 UTC (permalink / raw
  To: Micha?? G??rny; +Cc: gentoo-dev

On Sat, Jun 02, 2012 at 03:50:06PM -0700, Zac Medico wrote:
> On 06/02/2012 02:31 PM, Micha?? G??rny wrote:
> > On Sat, 2 Jun 2012 15:54:03 -0400
> > Mike Frysinger <vapier@gentoo.org> wrote:
> > 
> >> # @FUNCTION: redirect_alloc_fd
> >> # @USAGE: <var> <file> [redirection]
> >> # @DESCRIPTION:
> > 
> > (...and a lot of code)
> > 
> > I may be wrong but wouldn't it be simpler to just stick with a named
> > pipe here? Well, at first glance you wouldn't be able to read exactly
> > one result at a time but is it actually useful?
> 
> I'm pretty sure that the pipe has remain constantly open in read mode
> (which can only be done by assigning it a file descriptor). Otherwise,
> there's a race condition that can occur, where a write is lost because
> it's written just before the reader closes the pipe.

There isn't a race; write side, it'll block once it exceeds pipe buf 
size; read side, bash's read functionality is explicitly byte by byte 
reads to avoid consuming data it doesn't need.

That said, Mgorny's suggestion ignores that the the code already is 
pointed at a fifo.  Presume he's suggesting "Just open it everytime 
you need to fuck with it"... which, sure, 'cept that complicates the 
read side (either having to find a free fd, open to it, then close 
it), or abuse cat or $(<) to pull the results and make the reclaim 
code handle multiple results in a single shot.

Frankly, don't see the point in doing that.  The code isn't that 
complex frankly, and we *need* the overhead of this to be minimal- 
the hand off/reclaim is effectively the bottleneck for scaling.

If the jobs you've backgrounded are a second a piece, it matters less; 
if they're quick little bursts of activity, the scaling *will* be 
limited by how fast we can blast off/reclaim jobs.  Keep in mind that 
the main process has to go find more work to queue up between the 
reclaims, thus this matters more than you'd think.


Either way, that limit varies dependent on time required for each job 
vs # of cores; that said, you run code like this on a 48 core and you 
see it start becoming an actual bottleneck (which is why I came up 
with this hacky bash semaphore).

~harring



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 23:29       ` Zac Medico
@ 2012-06-02 23:58         ` Mike Frysinger
  0 siblings, 0 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-02 23:58 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 995 bytes --]

On Saturday 02 June 2012 19:29:29 Zac Medico wrote:
> On 06/02/2012 02:12 PM, Mike Frysinger wrote:
> > On Saturday 02 June 2012 16:39:16 Zac Medico wrote:
> >> On 06/02/2012 12:54 PM, Mike Frysinger wrote:
> >>> 			if [[ ! -L /dev/fd/${fd} ]] ; then
> >>> 				eval "exec ${fd}${redir}'${file}'" && break
> >>> 			fi
> >> 
> >> I launched up a GhostBSD livedvd to see what /dev/fd/ looks like on
> >> FreeBSD, and it seems to contain plain character devices instead of
> > 
> >> symlinks to character devices:
> > i didn't want to use [ -e ] because of broken links, but it seems that
> > Linux has diff semantics with /proc and broken symlinks.  `test -e` will
> > return true.
> 
> How about if we just create a fallback mode for older bash, where no
> pipes are involved, and multijob_post_fork just uses `wait` to check
> status and effectively causes only one job to execute at a time?

hmm, maybe, but i've already written the code to support older versions :)
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
                   ` (3 preceding siblings ...)
  2012-06-02 19:54 ` Mike Frysinger
@ 2012-06-02 23:59 ` Brian Harring
  2012-06-03  5:05   ` Mike Frysinger
  2012-06-03  5:08 ` Mike Frysinger
  2012-06-05  6:14 ` Mike Frysinger
  6 siblings, 1 reply; 29+ messages in thread
From: Brian Harring @ 2012-06-02 23:59 UTC (permalink / raw
  To: Mike Frysinger; +Cc: gentoo-dev

On Fri, Jun 01, 2012 at 06:41:22PM -0400, Mike Frysinger wrote:
> # @FUNCTION: multijob_post_fork
> # @DESCRIPTION:
> # You must call this in the parent process after forking a child process.
> # If the parallel limit has been hit, it will wait for one to finish and
> # return the child's exit status.
> multijob_post_fork() {
> 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> 
> 	: $(( ++mj_num_jobs ))
> 	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> 		multijob_finish_one
> 	fi
> 	return $?
> }

Minor note; the design of this (fork then check), means when a job 
finishes, we'll not be ready with more work.  This implicitly means 
that given a fast job identification step (main thread), and a slower 
job execution (what's backgrounded), we'll not breach #core of 
parallelism, nor will we achieve that level either (meaning 
potentially some idle cycles left on the floor).

Realistically, the main thread (what invokes post_fork) is *likely*, 
(if the consumer isn't fricking retarded) to be doing minor work- 
mostly just poking about figuring out what the next task/arguments 
are to submit to the pool.  That work isn't likely to be a full core 
worth of work, else as I said, the consumer is being a retard.

The original form of this was designed around the assumption that the 
main thread was light, and the backgrounded jobs weren't, thus it 
basically did the equivalent of make -j<cores>+1, allowing #cores 
background jobs running, while allowing the main thread to continue on 
and get the next job ready, once it had that ready, it would block 
waiting for a slot to open, then immediately submit the job once it 
had done a reclaim.

On the surface of it, it's a minor difference, but having the next 
job immediately ready to fire makes it easier to saturate cores.

Unfortunately, that also changes your API a bit; your call.

~harring



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 23:47       ` Brian Harring
@ 2012-06-03  1:04         ` Zac Medico
  2012-06-03  1:10           ` Zac Medico
  2012-06-03  7:15           ` Michał Górny
  0 siblings, 2 replies; 29+ messages in thread
From: Zac Medico @ 2012-06-03  1:04 UTC (permalink / raw
  To: gentoo-dev; +Cc: Brian Harring

[-- Attachment #1: Type: text/plain, Size: 1151 bytes --]

On 06/02/2012 04:47 PM, Brian Harring wrote:
> On Sat, Jun 02, 2012 at 03:50:06PM -0700, Zac Medico wrote:
>> On 06/02/2012 02:31 PM, Micha?? G??rny wrote:
>>> On Sat, 2 Jun 2012 15:54:03 -0400
>>> Mike Frysinger <vapier@gentoo.org> wrote:
>>>
>>>> # @FUNCTION: redirect_alloc_fd
>>>> # @USAGE: <var> <file> [redirection]
>>>> # @DESCRIPTION:
>>>
>>> (...and a lot of code)
>>>
>>> I may be wrong but wouldn't it be simpler to just stick with a named
>>> pipe here? Well, at first glance you wouldn't be able to read exactly
>>> one result at a time but is it actually useful?
>>
>> I'm pretty sure that the pipe has remain constantly open in read mode
>> (which can only be done by assigning it a file descriptor). Otherwise,
>> there's a race condition that can occur, where a write is lost because
>> it's written just before the reader closes the pipe.
> 
> There isn't a race; write side, it'll block once it exceeds pipe buf 
> size; read side, bash's read functionality is explicitly byte by byte 
> reads to avoid consuming data it doesn't need.

I've created a little test case and it seems you're right that nothing
is lost.
-- 
Thanks,
Zac

[-- Attachment #2: named_pipe_check_for_lost_write.sh --]
[-- Type: application/x-shellscript, Size: 394 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  1:04         ` Zac Medico
@ 2012-06-03  1:10           ` Zac Medico
  2012-06-03  7:15           ` Michał Górny
  1 sibling, 0 replies; 29+ messages in thread
From: Zac Medico @ 2012-06-03  1:10 UTC (permalink / raw
  To: gentoo-dev; +Cc: Brian Harring

[-- Attachment #1: Type: text/plain, Size: 1369 bytes --]

On 06/02/2012 06:04 PM, Zac Medico wrote:
> On 06/02/2012 04:47 PM, Brian Harring wrote:
>> On Sat, Jun 02, 2012 at 03:50:06PM -0700, Zac Medico wrote:
>>> On 06/02/2012 02:31 PM, Micha?? G??rny wrote:
>>>> On Sat, 2 Jun 2012 15:54:03 -0400
>>>> Mike Frysinger <vapier@gentoo.org> wrote:
>>>>
>>>>> # @FUNCTION: redirect_alloc_fd
>>>>> # @USAGE: <var> <file> [redirection]
>>>>> # @DESCRIPTION:
>>>>
>>>> (...and a lot of code)
>>>>
>>>> I may be wrong but wouldn't it be simpler to just stick with a named
>>>> pipe here? Well, at first glance you wouldn't be able to read exactly
>>>> one result at a time but is it actually useful?
>>>
>>> I'm pretty sure that the pipe has remain constantly open in read mode
>>> (which can only be done by assigning it a file descriptor). Otherwise,
>>> there's a race condition that can occur, where a write is lost because
>>> it's written just before the reader closes the pipe.
>>
>> There isn't a race; write side, it'll block once it exceeds pipe buf 
>> size; read side, bash's read functionality is explicitly byte by byte 
>> reads to avoid consuming data it doesn't need.
> 
> I've created a little test case and it seems you're right that nothing
> is lost.

Actually, I forgot the mkfifo call, so it was writing a regular file.
With it the fifo, the write appears to be lost, as I originally suspected.
-- 
Thanks,
Zac

[-- Attachment #2: named_pipe_check_for_lost_write.sh --]
[-- Type: application/x-shellscript, Size: 415 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-02 23:59 ` Brian Harring
@ 2012-06-03  5:05   ` Mike Frysinger
  2012-06-03  6:53     ` Zac Medico
  0 siblings, 1 reply; 29+ messages in thread
From: Mike Frysinger @ 2012-06-03  5:05 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 2394 bytes --]

On Saturday 02 June 2012 19:59:02 Brian Harring wrote:
> On Fri, Jun 01, 2012 at 06:41:22PM -0400, Mike Frysinger wrote:
> > # @FUNCTION: multijob_post_fork
> > # @DESCRIPTION:
> > # You must call this in the parent process after forking a child process.
> > # If the parallel limit has been hit, it will wait for one to finish and
> > # return the child's exit status.
> > multijob_post_fork() {
> > 
> > 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
> > 	
> > 	: $(( ++mj_num_jobs ))
> > 	
> > 	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> > 	
> > 		multijob_finish_one
> > 	
> > 	fi
> > 	return $?
> > 
> > }
> 
> Minor note; the design of this (fork then check), means when a job
> finishes, we'll not be ready with more work.  This implicitly means
> that given a fast job identification step (main thread), and a slower
> job execution (what's backgrounded), we'll not breach #core of
> parallelism, nor will we achieve that level either (meaning
> potentially some idle cycles left on the floor).
> 
> Realistically, the main thread (what invokes post_fork) is *likely*,
> (if the consumer isn't fricking retarded) to be doing minor work-
> mostly just poking about figuring out what the next task/arguments
> are to submit to the pool.  That work isn't likely to be a full core
> worth of work, else as I said, the consumer is being a retard.
> 
> The original form of this was designed around the assumption that the
> main thread was light, and the backgrounded jobs weren't, thus it
> basically did the equivalent of make -j<cores>+1, allowing #cores
> background jobs running, while allowing the main thread to continue on
> and get the next job ready, once it had that ready, it would block
> waiting for a slot to open, then immediately submit the job once it
> had done a reclaim.

the original code i designed this around had a heavier main thread because it 
had series of parallel sections followed by serial followed by parallel where 
the serial regions didn't depend on the parallel finishing right away.  that 
and doing things post meant it was easier to pass up return values because i 
didn't have to save $? anywhere ;).

thinking a bit more, i don't think the two methods are mutually exclusive.  
it's easy to have the code support both, but i'm not sure the extended 
documentation helps.
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
                   ` (4 preceding siblings ...)
  2012-06-02 23:59 ` Brian Harring
@ 2012-06-03  5:08 ` Mike Frysinger
  2012-06-03 22:16   ` Zac Medico
                     ` (2 more replies)
  2012-06-05  6:14 ` Mike Frysinger
  6 siblings, 3 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-03  5:08 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 6819 bytes --]

v3
-mike

# Copyright 1999-2012 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Header: $

# @ECLASS: multiprocessing.eclass
# @MAINTAINER:
# base-system@gentoo.org
# @AUTHOR:
# Brian Harring <ferringb@gentoo.org>
# Mike Frysinger <vapier@gentoo.org>
# @BLURB: parallelization with bash (wtf?)
# @DESCRIPTION:
# The multiprocessing eclass contains a suite of functions that allow ebuilds
# to quickly run things in parallel using shell code.
#
# It has two modes: pre-fork and post-fork.  If you don't want to dive into any
# more nuts & bolts, just use the pre-fork mode.  For main threads that mostly
# spawn children and then wait for them to finish, use the pre-fork mode.  For
# main threads that do a bit of processing themselves, use the post-fork mode.
# You may mix & match them for longer computation loops.
# @EXAMPLE:
#
# @CODE
# # First initialize things:
# multijob_init
#
# # Then hash a bunch of files in parallel:
# for n in {0..20} ; do
# 	multijob_child_init md5sum data.${n} > data.${n}
# done
#
# # Then wait for all the children to finish:
# multijob_finish
# @CODE

if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"

# @FUNCTION: makeopts_jobs
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
# specified therein.  Useful for running non-make tools in parallel too.
# i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the
# number as bash normalizes it to [0, 255].  If the flags haven't specified a
# -j flag, then "1" is shown as that is the default `make` uses.  Since there's
# no way to represent infinity, we return 999 if the user has -j without a number.
makeopts_jobs() {
	[[ $# -eq 0 ]] && set -- ${MAKEOPTS}
	# This assumes the first .* will be more greedy than the second .*
	# since POSIX doesn't specify a non-greedy match (i.e. ".*?").
	local jobs=$(echo " $* " | sed -r -n \
		-e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
		-e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
	echo ${jobs:-1}
}

# @FUNCTION: multijob_init
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Setup the environment for executing code in parallel.
# You must call this before any other multijob function.
multijob_init() {
	# When something goes wrong, try to wait for all the children so we
	# don't leave any zombies around.
	has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait"

	# Setup a pipe for children to write their pids to when they finish.
	local pipe="${T}/multijob.pipe"
	mkfifo "${pipe}"
	redirect_alloc_fd mj_control_fd "${pipe}"
	rm -f "${pipe}"

	# See how many children we can fork based on the user's settings.
	mj_max_jobs=$(makeopts_jobs "$@")
	mj_num_jobs=0
}

# @FUNCTION: multijob_child_init
# @USAGE: [--pre|--post] [command to run in background]
# @DESCRIPTION:
# This function has two forms.  You can use it to execute a simple command
# in the background (and it takes care of everything else), or you must
# call this first thing in your forked child process.
#
# The --pre/--post options allow you to select the child generation mode.
#
# @CODE
# # 1st form: pass the command line as arguments:
# multijob_child_init ls /dev
# # Or if you want to use pre/post fork modes:
# multijob_child_init --pre ls /dev
# multijob_child_init --post ls /dev
#
# # 2nd form: execute multiple stuff in the background (post fork):
# (
# multijob_child_init
# out=`ls`
# if echo "${out}" | grep foo ; then
# 	echo "YEAH"
# fi
# ) &
# multijob_post_fork
#
# # 2nd form: execute multiple stuff in the background (pre fork):
# multijob_pre_fork
# (
# multijob_child_init
# out=`ls`
# if echo "${out}" | grep foo ; then
# 	echo "YEAH"
# fi
# ) &
# @CODE
multijob_child_init() {
	local mode="pre"
	case $1 in
	--pre)  mode="pre" ; shift ;;
	--post) mode="post"; shift ;;
	esac

	if [[ $# -eq 0 ]] ; then
		trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
		trap 'exit 1' INT TERM
	else
		local ret
		[[ ${mode} == "pre" ]] && { multijob_pre_fork; ret=$?; }
		( multijob_child_init ; "$@" ) &
		[[ ${mode} == "post" ]] && { multijob_post_fork; ret=$?; }
		return ${ret}
	fi
}

# @FUNCTION: _multijob_fork
# @INTERNAL
# @DESCRIPTION:
# Do the actual book keeping.
_multijob_fork() {
	[[ $# -eq 1 ]] || die "incorrect number of arguments"

	local ret=0
	[[ $1 == "pre" ]] && : $(( ++mj_num_jobs ))
	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
		multijob_finish_one
		ret=$?
	fi
	[[ $1 == "post" ]] && : $(( ++mj_num_jobs ))
	return ${ret}
}

# @FUNCTION: multijob_pre_fork
# @DESCRIPTION:
# You must call this in the parent process before forking a child process.
# If the parallel limit has been hit, it will wait for one child to finish
# and return its exit status.
multijob_pre_fork() { _multijob_fork pre "$@" ; }

# @FUNCTION: multijob_post_fork
# @DESCRIPTION:
# You must call this in the parent process after forking a child process.
# If the parallel limit has been hit, it will wait for one child to finish
# and return its exit status.
multijob_post_fork() { _multijob_fork post "$@" ; }

# @FUNCTION: multijob_finish_one
# @DESCRIPTION:
# Wait for a single process to exit and return its exit code.
multijob_finish_one() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	local pid ret
	read -r -u ${mj_control_fd} pid ret || die
	: $(( --mj_num_jobs ))
	return ${ret}
}

# @FUNCTION: multijob_finish
# @DESCRIPTION:
# Wait for all pending processes to exit and return the bitwise or
# of all their exit codes.
multijob_finish() {
	local ret=0
	while [[ ${mj_num_jobs} -gt 0 ]] ; do
		multijob_finish_one
		: $(( ret |= $? ))
	done
	# Let bash clean up its internal child tracking state.
	wait

	# Do this after reaping all the children.
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	return ${ret}
}

# @FUNCTION: redirect_alloc_fd
# @USAGE: <var> <file> [redirection]
# @DESCRIPTION:
# Find a free fd and redirect the specified file via it.  Store the new
# fd in the specified variable.  Useful for the cases where we don't care
# about the exact fd #.
redirect_alloc_fd() {
	local var=$1 file=$2 redir=${3:-"<>"}

	if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
		# Newer bash provides this functionality.
		eval "exec {${var}}${redir}'${file}'"
	else
		# Need to provide the functionality ourselves.
		local fd=10
		while :; do
			# Make sure the fd isn't open.  It could be a char device,
			# or a symlink (possibly broken) to something else.
			if [[ ! -e /dev/fd/${fd} ]] && [[ ! -L /dev/fd/${fd} ]] ; then
				eval "exec ${fd}${redir}'${file}'" && break
			fi
			[[ ${fd} -gt 1024 ]] && return 1 # sanity
			: $(( ++fd ))
		done
		: $(( ${var} = fd ))
	fi
}

fi

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  5:05   ` Mike Frysinger
@ 2012-06-03  6:53     ` Zac Medico
  0 siblings, 0 replies; 29+ messages in thread
From: Zac Medico @ 2012-06-03  6:53 UTC (permalink / raw
  To: gentoo-dev

On 06/02/2012 10:05 PM, Mike Frysinger wrote:
> On Saturday 02 June 2012 19:59:02 Brian Harring wrote:
>> On Fri, Jun 01, 2012 at 06:41:22PM -0400, Mike Frysinger wrote:
>>> # @FUNCTION: multijob_post_fork
>>> # @DESCRIPTION:
>>> # You must call this in the parent process after forking a child process.
>>> # If the parallel limit has been hit, it will wait for one to finish and
>>> # return the child's exit status.
>>> multijob_post_fork() {
>>>
>>> 	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"
>>> 	
>>> 	: $(( ++mj_num_jobs ))
>>> 	
>>> 	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
>>> 	
>>> 		multijob_finish_one
>>> 	
>>> 	fi
>>> 	return $?
>>>
>>> }
>>
>> Minor note; the design of this (fork then check), means when a job
>> finishes, we'll not be ready with more work.  This implicitly means
>> that given a fast job identification step (main thread), and a slower
>> job execution (what's backgrounded), we'll not breach #core of
>> parallelism, nor will we achieve that level either (meaning
>> potentially some idle cycles left on the floor).
>>
>> Realistically, the main thread (what invokes post_fork) is *likely*,
>> (if the consumer isn't fricking retarded) to be doing minor work-
>> mostly just poking about figuring out what the next task/arguments
>> are to submit to the pool.  That work isn't likely to be a full core
>> worth of work, else as I said, the consumer is being a retard.
>>
>> The original form of this was designed around the assumption that the
>> main thread was light, and the backgrounded jobs weren't, thus it
>> basically did the equivalent of make -j<cores>+1, allowing #cores
>> background jobs running, while allowing the main thread to continue on
>> and get the next job ready, once it had that ready, it would block
>> waiting for a slot to open, then immediately submit the job once it
>> had done a reclaim.
> 
> the original code i designed this around had a heavier main thread because it 
> had series of parallel sections followed by serial followed by parallel where 
> the serial regions didn't depend on the parallel finishing right away.  that 
> and doing things post meant it was easier to pass up return values because i 
> didn't have to save $? anywhere ;).
> 
> thinking a bit more, i don't think the two methods are mutually exclusive.  
> it's easy to have the code support both, but i'm not sure the extended 
> documentation helps.

Can't you just add a multijob_pre_fork function and do your waiting in
there instead of in the multijob_post_fork function?
-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  1:04         ` Zac Medico
  2012-06-03  1:10           ` Zac Medico
@ 2012-06-03  7:15           ` Michał Górny
  2012-06-03  7:18             ` Zac Medico
  1 sibling, 1 reply; 29+ messages in thread
From: Michał Górny @ 2012-06-03  7:15 UTC (permalink / raw
  To: gentoo-dev; +Cc: zmedico, Brian Harring

[-- Attachment #1: Type: text/plain, Size: 619 bytes --]

On Sat, 02 Jun 2012 18:04:41 -0700
Zac Medico <zmedico@gentoo.org> wrote:

> #!/usr/bin/env bash
> named_pipe=$(mktemp -d)/fifo
> 
> (
> 	# hold the pipe open in read mode, so
> 	# the writer doesn't block
> 	sleep 3
> ) < "$named_pipe" &

I don't understand this part. This keeps the pipe open for reading
which obviously causes it to lose data. If you open it, you need to
read all that is there and then close.

And writers are supposed to be blocked. They are forked and just write
when done, so there's no problem with keeping them alive for a short
while.

-- 
Best regards,
Michał Górny

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 316 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  7:15           ` Michał Górny
@ 2012-06-03  7:18             ` Zac Medico
  0 siblings, 0 replies; 29+ messages in thread
From: Zac Medico @ 2012-06-03  7:18 UTC (permalink / raw
  To: Michał Górny; +Cc: gentoo-dev, Brian Harring

On 06/03/2012 12:15 AM, Michał Górny wrote:
> On Sat, 02 Jun 2012 18:04:41 -0700
> Zac Medico <zmedico@gentoo.org> wrote:
> 
>> #!/usr/bin/env bash
>> named_pipe=$(mktemp -d)/fifo
>>
>> (
>> 	# hold the pipe open in read mode, so
>> 	# the writer doesn't block
>> 	sleep 3
>> ) < "$named_pipe" &
> 
> I don't understand this part. This keeps the pipe open for reading
> which obviously causes it to lose data. If you open it, you need to
> read all that is there and then close.

The point is, there's always a small window of time between when a
reader reads its last byte, and when it finally closes the file
descriptor. During this window, there's a race where a writer can come
along and write something without blocking, and have that write be
destroyed when the previous reader closes the fd.

> And writers are supposed to be blocked. They are forked and just write
> when done, so there's no problem with keeping them alive for a short
> while.

Yeah, but you need locking if you want to prevent the race that I've
described above.
-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  5:08 ` Mike Frysinger
@ 2012-06-03 22:16   ` Zac Medico
  2012-06-05  6:10     ` Mike Frysinger
  2012-06-03 22:21   ` Zac Medico
  2012-06-04  1:41   ` Zac Medico
  2 siblings, 1 reply; 29+ messages in thread
From: Zac Medico @ 2012-06-03 22:16 UTC (permalink / raw
  To: gentoo-dev

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 06/02/2012 10:08 PM, Mike Frysinger wrote:
> # @FUNCTION: _multijob_fork # @INTERNAL # @DESCRIPTION: # Do the
> actual book keeping. _multijob_fork() { [[ $# -eq 1 ]] || die
> "incorrect number of arguments"
> 
> local ret=0 [[ $1 == "pre" ]] && : $(( ++mj_num_jobs )) if [[
> ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then multijob_finish_one 
> ret=$? fi [[ $1 == "post" ]] && : $(( ++mj_num_jobs )) return
> ${ret} }

The "pre" logic seems wrong. Consider an initial state of
mj_num_jobs=0 and mj_max_jobs=1. It will increment mj_num_jobs to 1,
so [[ 1 -ge 1 ]] is true, and then call multijob_finish_one even
though no jobs have started yet? Wouldn't that deadlock
multijob_finish_one, as it waits for a reply from a job that doesn't
exist yet?
- -- 
Thanks,
Zac
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk/L4j4ACgkQ/ejvha5XGaPyuQCfSHRUHA1KoVc97yRZa8FlF+TS
n04An1/c7IQaH4mqUtm8P305WKKDOgvE
=EgJz
-----END PGP SIGNATURE-----



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  5:08 ` Mike Frysinger
  2012-06-03 22:16   ` Zac Medico
@ 2012-06-03 22:21   ` Zac Medico
  2012-06-04  1:41   ` Zac Medico
  2 siblings, 0 replies; 29+ messages in thread
From: Zac Medico @ 2012-06-03 22:21 UTC (permalink / raw
  To: gentoo-dev; +Cc: Mike Frysinger

(re-send without enigmail screwing up the code formatting)

On 06/02/2012 10:08 PM, Mike Frysinger wrote:
> # @FUNCTION: _multijob_fork
> # @INTERNAL
> # @DESCRIPTION:
> # Do the actual book keeping.
> _multijob_fork() {
> 	[[ $# -eq 1 ]] || die "incorrect number of arguments"
> 
> 	local ret=0
> 	[[ $1 == "pre" ]] && : $(( ++mj_num_jobs ))
> 	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> 		multijob_finish_one
> 		ret=$?
> 	fi
> 	[[ $1 == "post" ]] && : $(( ++mj_num_jobs ))
> 	return ${ret}
> }

The "pre" logic seems wrong. Consider an initial state of
mj_num_jobs=0 and mj_max_jobs=1. It will increment mj_num_jobs to 1,
so [[ 1 -ge 1 ]] is true, and then call multijob_finish_one even
though no jobs have started yet? Wouldn't that deadlock
multijob_finish_one, as it waits for a reply from a job that doesn't
exist yet?
-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03  5:08 ` Mike Frysinger
  2012-06-03 22:16   ` Zac Medico
  2012-06-03 22:21   ` Zac Medico
@ 2012-06-04  1:41   ` Zac Medico
  2 siblings, 0 replies; 29+ messages in thread
From: Zac Medico @ 2012-06-04  1:41 UTC (permalink / raw
  To: gentoo-dev; +Cc: Mike Frysinger

On 06/02/2012 10:08 PM, Mike Frysinger wrote:
> # @FUNCTION: redirect_alloc_fd
> # @USAGE: <var> <file> [redirection]
> # @DESCRIPTION:
> # Find a free fd and redirect the specified file via it.  Store the new
> # fd in the specified variable.  Useful for the cases where we don't care
> # about the exact fd #.
> redirect_alloc_fd() {
> 	local var=$1 file=$2 redir=${3:-"<>"}
> 
> 	if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
> 		# Newer bash provides this functionality.
> 		eval "exec {${var}}${redir}'${file}'"
> 	else
> 		# Need to provide the functionality ourselves.
> 		local fd=10
> 		while :; do
> 			# Make sure the fd isn't open.  It could be a char device,
> 			# or a symlink (possibly broken) to something else.
> 			if [[ ! -e /dev/fd/${fd} ]] && [[ ! -L /dev/fd/${fd} ]] ; then
> 				eval "exec ${fd}${redir}'${file}'" && break
> 			fi
> 			[[ ${fd} -gt 1024 ]] && return 1 # sanity
> 			: $(( ++fd ))
> 		done
> 		: $(( ${var} = fd ))
> 	fi
> }
> 
> fi

Where it returns 1 if [[ ${fd} -gt 1024 ]], maybe it would be best to
die there. It shouldn't fail there in practice, but if it does, it would
be really helpful to exactly where it failed.
-- 
Thanks,
Zac



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-03 22:16   ` Zac Medico
@ 2012-06-05  6:10     ` Mike Frysinger
  0 siblings, 0 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-05  6:10 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 914 bytes --]

On Sunday 03 June 2012 18:16:30 Zac Medico wrote:
> On 06/02/2012 10:08 PM, Mike Frysinger wrote:
> > # @FUNCTION: _multijob_fork # @INTERNAL # @DESCRIPTION: # Do the
> > actual book keeping. _multijob_fork() { [[ $# -eq 1 ]] || die
> > "incorrect number of arguments"
> > 
> > local ret=0 [[ $1 == "pre" ]] && : $(( ++mj_num_jobs ))
> > if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
> >   multijob_finish_one
> > 	  ret=$?
> > fi
> > [[ $1 == "post" ]] && : $(( ++mj_num_jobs ))
> > return ${ret}
> 
> The "pre" logic seems wrong. Consider an initial state of
> mj_num_jobs=0 and mj_max_jobs=1. It will increment mj_num_jobs to 1,
> so [[ 1 -ge 1 ]] is true, and then call multijob_finish_one even
> though no jobs have started yet? Wouldn't that deadlock
> multijob_finish_one, as it waits for a reply from a job that doesn't
> exist yet?

yes, i inverted the cases in this func
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
                   ` (5 preceding siblings ...)
  2012-06-03  5:08 ` Mike Frysinger
@ 2012-06-05  6:14 ` Mike Frysinger
  2012-06-07  4:57   ` Mike Frysinger
  6 siblings, 1 reply; 29+ messages in thread
From: Mike Frysinger @ 2012-06-05  6:14 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 6842 bytes --]

v4
-mike

# Copyright 1999-2012 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Header: $

# @ECLASS: multiprocessing.eclass
# @MAINTAINER:
# base-system@gentoo.org
# @AUTHOR:
# Brian Harring <ferringb@gentoo.org>
# Mike Frysinger <vapier@gentoo.org>
# @BLURB: parallelization with bash (wtf?)
# @DESCRIPTION:
# The multiprocessing eclass contains a suite of functions that allow ebuilds
# to quickly run things in parallel using shell code.
#
# It has two modes: pre-fork and post-fork.  If you don't want to dive into any
# more nuts & bolts, just use the pre-fork mode.  For main threads that mostly
# spawn children and then wait for them to finish, use the pre-fork mode.  For
# main threads that do a bit of processing themselves, use the post-fork mode.
# You may mix & match them for longer computation loops.
# @EXAMPLE:
#
# @CODE
# # First initialize things:
# multijob_init
#
# # Then hash a bunch of files in parallel:
# for n in {0..20} ; do
# 	multijob_child_init md5sum data.${n} > data.${n}
# done
#
# # Then wait for all the children to finish:
# multijob_finish
# @CODE

if [[ ${___ECLASS_ONCE_MULTIPROCESSING} != "recur -_+^+_- spank" ]] ; then
___ECLASS_ONCE_MULTIPROCESSING="recur -_+^+_- spank"

# @FUNCTION: makeopts_jobs
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Searches the arguments (defaults to ${MAKEOPTS}) and extracts the jobs number
# specified therein.  Useful for running non-make tools in parallel too.
# i.e. if the user has MAKEOPTS=-j9, this will echo "9" -- we can't return the
# number as bash normalizes it to [0, 255].  If the flags haven't specified a
# -j flag, then "1" is shown as that is the default `make` uses.  Since there's
# no way to represent infinity, we return 999 if the user has -j without a number.
makeopts_jobs() {
	[[ $# -eq 0 ]] && set -- ${MAKEOPTS}
	# This assumes the first .* will be more greedy than the second .*
	# since POSIX doesn't specify a non-greedy match (i.e. ".*?").
	local jobs=$(echo " $* " | sed -r -n \
		-e 's:.*[[:space:]](-j|--jobs[=[:space:]])[[:space:]]*([0-9]+).*:\2:p' \
		-e 's:.*[[:space:]](-j|--jobs)[[:space:]].*:999:p')
	echo ${jobs:-1}
}

# @FUNCTION: multijob_init
# @USAGE: [${MAKEOPTS}]
# @DESCRIPTION:
# Setup the environment for executing code in parallel.
# You must call this before any other multijob function.
multijob_init() {
	# When something goes wrong, try to wait for all the children so we
	# don't leave any zombies around.
	has wait ${EBUILD_DEATH_HOOKS} || EBUILD_DEATH_HOOKS+=" wait"

	# Setup a pipe for children to write their pids to when they finish.
	local pipe="${T}/multijob.pipe"
	mkfifo "${pipe}"
	redirect_alloc_fd mj_control_fd "${pipe}"
	rm -f "${pipe}"

	# See how many children we can fork based on the user's settings.
	mj_max_jobs=$(makeopts_jobs "$@")
	mj_num_jobs=0
}

# @FUNCTION: multijob_child_init
# @USAGE: [--pre|--post] [command to run in background]
# @DESCRIPTION:
# This function has two forms.  You can use it to execute a simple command
# in the background (and it takes care of everything else), or you must
# call this first thing in your forked child process.
#
# The --pre/--post options allow you to select the child generation mode.
#
# @CODE
# # 1st form: pass the command line as arguments:
# multijob_child_init ls /dev
# # Or if you want to use pre/post fork modes:
# multijob_child_init --pre ls /dev
# multijob_child_init --post ls /dev
#
# # 2nd form: execute multiple stuff in the background (post fork):
# (
# multijob_child_init
# out=`ls`
# if echo "${out}" | grep foo ; then
# 	echo "YEAH"
# fi
# ) &
# multijob_post_fork
#
# # 2nd form: execute multiple stuff in the background (pre fork):
# multijob_pre_fork
# (
# multijob_child_init
# out=`ls`
# if echo "${out}" | grep foo ; then
# 	echo "YEAH"
# fi
# ) &
# @CODE
multijob_child_init() {
	local mode="pre"
	case $1 in
	--pre)  mode="pre" ; shift ;;
	--post) mode="post"; shift ;;
	esac

	if [[ $# -eq 0 ]] ; then
		trap 'echo ${BASHPID} $? >&'${mj_control_fd} EXIT
		trap 'exit 1' INT TERM
	else
		local ret
		[[ ${mode} == "pre" ]] && { multijob_pre_fork; ret=$?; }
		( multijob_child_init ; "$@" ) &
		[[ ${mode} == "post" ]] && { multijob_post_fork; ret=$?; }
		return ${ret}
	fi
}

# @FUNCTION: _multijob_fork
# @INTERNAL
# @DESCRIPTION:
# Do the actual book keeping.
_multijob_fork() {
	[[ $# -eq 1 ]] || die "incorrect number of arguments"

	local ret=0
	[[ $1 == "post" ]] && : $(( ++mj_num_jobs ))
	if [[ ${mj_num_jobs} -ge ${mj_max_jobs} ]] ; then
		multijob_finish_one
		ret=$?
	fi
	[[ $1 == "pre" ]] && : $(( ++mj_num_jobs ))
	return ${ret}
}

# @FUNCTION: multijob_pre_fork
# @DESCRIPTION:
# You must call this in the parent process before forking a child process.
# If the parallel limit has been hit, it will wait for one child to finish
# and return its exit status.
multijob_pre_fork() { _multijob_fork pre "$@" ; }

# @FUNCTION: multijob_post_fork
# @DESCRIPTION:
# You must call this in the parent process after forking a child process.
# If the parallel limit has been hit, it will wait for one child to finish
# and return its exit status.
multijob_post_fork() { _multijob_fork post "$@" ; }

# @FUNCTION: multijob_finish_one
# @DESCRIPTION:
# Wait for a single process to exit and return its exit code.
multijob_finish_one() {
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	local pid ret
	read -r -u ${mj_control_fd} pid ret || die
	: $(( --mj_num_jobs ))
	return ${ret}
}

# @FUNCTION: multijob_finish
# @DESCRIPTION:
# Wait for all pending processes to exit and return the bitwise or
# of all their exit codes.
multijob_finish() {
	local ret=0
	while [[ ${mj_num_jobs} -gt 0 ]] ; do
		multijob_finish_one
		: $(( ret |= $? ))
	done
	# Let bash clean up its internal child tracking state.
	wait

	# Do this after reaping all the children.
	[[ $# -eq 0 ]] || die "${FUNCNAME} takes no arguments"

	return ${ret}
}

# @FUNCTION: redirect_alloc_fd
# @USAGE: <var> <file> [redirection]
# @DESCRIPTION:
# Find a free fd and redirect the specified file via it.  Store the new
# fd in the specified variable.  Useful for the cases where we don't care
# about the exact fd #.
redirect_alloc_fd() {
	local var=$1 file=$2 redir=${3:-"<>"}

	if [[ $(( (BASH_VERSINFO[0] << 8) + BASH_VERSINFO[1] )) -ge $(( (4 << 8) + 1 )) ]] ; then
		# Newer bash provides this functionality.
		eval "exec {${var}}${redir}'${file}'"
	else
		# Need to provide the functionality ourselves.
		local fd=10
		while :; do
			# Make sure the fd isn't open.  It could be a char device,
			# or a symlink (possibly broken) to something else.
			if [[ ! -e /dev/fd/${fd} ]] && [[ ! -L /dev/fd/${fd} ]] ; then
				eval "exec ${fd}${redir}'${file}'" && break
			fi
			[[ ${fd} -gt 1024 ]] && die 'could not locate a free temp fd !?'
			: $(( ++fd ))
		done
		: $(( ${var} = fd ))
	fi
}

fi

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [gentoo-dev] multiprocessing.eclass: doing parallel work in bash
  2012-06-05  6:14 ` Mike Frysinger
@ 2012-06-07  4:57   ` Mike Frysinger
  0 siblings, 0 replies; 29+ messages in thread
From: Mike Frysinger @ 2012-06-07  4:57 UTC (permalink / raw
  To: gentoo-dev

[-- Attachment #1: Type: Text/Plain, Size: 93 bytes --]

On Tuesday 05 June 2012 02:14:36 Mike Frysinger wrote:
> v4

committed with test cases
-mike

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2012-06-07  4:58 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-01 22:41 [gentoo-dev] multiprocessing.eclass: doing parallel work in bash Mike Frysinger
2012-06-01 22:50 ` Mike Frysinger
2012-06-02  4:11 ` Brian Harring
2012-06-02  4:57   ` Mike Frysinger
2012-06-02  9:23     ` Cyprien Nicolas
2012-06-02  9:52 ` David Leverton
2012-06-02 19:18   ` Mike Frysinger
2012-06-02 19:54 ` Mike Frysinger
2012-06-02 20:39   ` Zac Medico
2012-06-02 21:12     ` Mike Frysinger
2012-06-02 23:29       ` Zac Medico
2012-06-02 23:58         ` Mike Frysinger
2012-06-02 21:31   ` Michał Górny
2012-06-02 22:50     ` Zac Medico
2012-06-02 23:47       ` Brian Harring
2012-06-03  1:04         ` Zac Medico
2012-06-03  1:10           ` Zac Medico
2012-06-03  7:15           ` Michał Górny
2012-06-03  7:18             ` Zac Medico
2012-06-02 23:59 ` Brian Harring
2012-06-03  5:05   ` Mike Frysinger
2012-06-03  6:53     ` Zac Medico
2012-06-03  5:08 ` Mike Frysinger
2012-06-03 22:16   ` Zac Medico
2012-06-05  6:10     ` Mike Frysinger
2012-06-03 22:21   ` Zac Medico
2012-06-04  1:41   ` Zac Medico
2012-06-05  6:14 ` Mike Frysinger
2012-06-07  4:57   ` Mike Frysinger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox