From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from lists.gentoo.org (pigeon.gentoo.org [208.92.234.80]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by finch.gentoo.org (Postfix) with ESMTPS id 51AA11396D0 for ; Mon, 14 Aug 2017 07:39:53 +0000 (UTC) Received: from pigeon.gentoo.org (localhost [127.0.0.1]) by pigeon.gentoo.org (Postfix) with SMTP id 42114E0829; Mon, 14 Aug 2017 07:39:52 +0000 (UTC) Received: from smtp.gentoo.org (mail.gentoo.org [IPv6:2001:470:ea4a:1:5054:ff:fec7:86e4]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by pigeon.gentoo.org (Postfix) with ESMTPS id E7DDCE0823 for ; Mon, 14 Aug 2017 07:39:51 +0000 (UTC) Received: from localhost.localdomain (unknown [100.42.98.197]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-SHA256 (128/128 bits)) (No client certificate requested) (Authenticated sender: zmedico) by smtp.gentoo.org (Postfix) with ESMTPSA id 89FE43417F4; Mon, 14 Aug 2017 07:39:50 +0000 (UTC) From: Zac Medico To: gentoo-portage-dev@lists.gentoo.org Cc: Hidehiko Abe Subject: [gentoo-portage-dev] [PATCH] Rewrite doins in python (bug 624526) Date: Mon, 14 Aug 2017 00:39:21 -0700 Message-Id: <20170814073921.20837-1-zmedico@gentoo.org> X-Mailer: git-send-email 2.13.0 Precedence: bulk List-Post: List-Help: List-Unsubscribe: List-Subscribe: List-Id: Gentoo Linux mail X-BeenThere: gentoo-portage-dev@lists.gentoo.org Reply-to: gentoo-portage-dev@lists.gentoo.org X-Archives-Salt: 99bb79d5-05fa-492b-82f6-cac8ae67a80a X-Archives-Hash: a4f97b134b161a20addfd3249ac3a727 From: Hidehiko Abe doins is written in bash. However, specifically in case that too many files are installed, it is very slow. This CL rewrites the script in python for performance. BUG=chromium:712659 TEST=time (./setup_board --forace && \ ./build_package --withdev && \ ./build_image --noenable_rootfs_verification test) ===Before=== real 21m35.445s user 93m40.588s sys 21m31.224s ===After=== real 17m30.106s user 94m1.812s sys 20m13.468s Change-Id: Ib10f623961ba316753d58397cff5e72fbc343339 Reviewed-on: https://chromium-review.googlesource.com/559225 X-Chromium-Bug: 712659 X-Chromium-Bug-url: https://bugs.chromium.org/p/chromium/issues/detail?id=712659 X-Gentoo-Bug: 624526 X-Gentoo-Bug-url: https://bugs.gentoo.org/624526 --- bin/doins.py | 341 +++++++++++++++++++++++++++++++++++++++++++++++ bin/ebuild-helpers/doins | 123 ++--------------- 2 files changed, 353 insertions(+), 111 deletions(-) create mode 100644 bin/doins.py diff --git a/bin/doins.py b/bin/doins.py new file mode 100644 index 000000000..4a17287ca --- /dev/null +++ b/bin/doins.py @@ -0,0 +1,341 @@ +#!/usr/bin/python -b +# Copyright 2017 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import print_function + +import argparse +import errno +import grp +import logging +import os +import pwd +import re +import shlex +import shutil +import stat +import subprocess +import sys + +from portage.util import movefile + +# Change back to original cwd _after_ all imports (bug #469338). +# See also bin/ebuild-helpers/doins, which sets the cwd. +os.chdir(os.environ["__PORTAGE_HELPER_CWD"]) + +_PORTAGE_ACTUAL_DISTDIR = ( + (os.environb if sys.version_info.major >= 3 else os.environ).get( + b'PORTAGE_ACTUAL_DISTDIR', b'') + b'/') + + +def _parse_install_options( + options, inprocess_runner_class, subprocess_runner_class): + """Parses command line arguments for install command.""" + parser = argparse.ArgumentParser() + parser.add_argument('-g', '--group', default=-1, type=_parse_group) + parser.add_argument('-o', '--owner', default=-1, type=_parse_user) + # "install"'s --mode option is complicated. So here is partially + # supported. + parser.add_argument('-m', '--mode', type=_parse_mode) + split_options = shlex.split(options) + namespace, remaining = parser.parse_known_args(split_options) + if remaining: + print('Unknown install options: %s, %r' % (options, remaining), + file=sys.stderr) + if os.environ.get('DOINSSTRICTOPTION', '') == '1': + sys.exit(1) + print('Continue with falling back to \'install\' ' + 'command execution, which can be slower.', + file=sys.stderr) + return subprocess_runner_class(split_options) + return inprocess_runner_class(namespace) + + +def _parse_group(group): + """Parses gid.""" + g = grp.getgrnam(group) + if g: + return g.gr_gid + return int(group) + + +def _parse_user(user): + """Parses uid.""" + u = pwd.getpwnam(user) + if u: + return u.pw_uid + return int(user) + + +def _parse_mode(mode): + # "install"'s --mode option is complicated. So here is partially + # supported. + # In Python 3, the prefix of octal int must be '0o' rather than '0'. + # So, set base explicitly in that case. + return int(mode, 8 if re.search(r'^0[0-7]*$', mode) else 0) + + +def _set_attributes(options, path): + """Sets attributes the file/dir at given |path|. + + Args: + options: object which has |owner|, |group| and |mode| fields. + |owner| is int value representing uid. Similary |group| + represents gid. + If -1 is set, just unchanged. + |mode| is the bits of permissions. + path: File/directory path. + """ + if options.owner != -1 or options.group != -1: + os.lchown(path, options.owner, options.group) + if options.mode is not None: + os.chmod(path, options.mode) + + +class _InsInProcessInstallRunner(object): + def __init__(self, parsed_options): + self._parsed_options = parsed_options + self._copy_xattr = ( + 'xattr' in os.environ.get('FEATURES', '').split()) + if self._copy_xattr: + self._xattr_exclude = os.environ.get( + 'PORTAGE_XATTR_EXCLUDE', + 'security.* system.nfs4_acl') + + def run(self, source, dest_dir): + """Installs a file at |source| into |dest_dir| in process.""" + dest = os.path.join(dest_dir, os.path.basename(source)) + try: + # TODO: Consider to use portage.util.file_copy.copyfile + # introduced by + # https://gitweb.gentoo.org/proj/portage.git/commit/ + # ?id=8ab5c8835931fd9ec098dbf4c5f416eb32e4a3a4 + # after uprev. + shutil.copyfile(source, dest) + _set_attributes(self._parsed_options, dest) + if self._copy_xattr: + movefile._copyxattr( + source, dest, + exclude=self._xattr_exclude) + except Exception: + logging.exception( + 'Failed to copy file: ' + '_parsed_options=%r, source=%r, dest_dir=%r', + self._parsed_options, source, dest_dir) + return False + return True + + +class _InsSubprocessInstallRunner(object): + def __init__(self, split_options): + self._split_options = split_options + + def run(self, source, dest_dir): + """Installs a file at |source| into |dest_dir| by 'install'.""" + command = ['install'] + self._split_options + [source, dest_dir] + return subprocess.call(command) == 0 + + +class _DirInProcessInstallRunner(object): + def __init__(self, parsed_options): + self._parsed_options = parsed_options + + def run(self, dest): + """Installs a dir into |dest| in process.""" + try: + os.makedirs(dest) + except OSError as e: + if e.errno != errno.EEXIST or not os.path.isdir(dest): + raise + _set_attributes(self._parsed_options, dest) + + +class _DirSubprocessInstallRunner(object): + """Runs real 'install' command to install files or directories.""" + + def __init__(self, split_options): + self._split_options = split_options + + def run(self, dest): + """Installs a dir into |dest| by 'install' command.""" + command = ['install', '-d'] + self._split_options + [dest] + subprocess.call(command) + + +class _InstallRunner(object): + def __init__(self): + self._ins_runner = _parse_install_options( + os.environ.get('INSOPTIONS', ''), + _InsInProcessInstallRunner, + _InsSubprocessInstallRunner) + self._dir_runner = _parse_install_options( + os.environ.get('DIROPTIONS', ''), + _DirInProcessInstallRunner, + _DirSubprocessInstallRunner) + + def install_file(self, source, dest_dir): + """Installs a file at |source| into |dest_dir| directory.""" + return self._ins_runner.run(source, dest_dir) + + def install_dir(self, dest): + """Creates a directory at |dest|.""" + self._dir_runner.run(dest) + + +def _doins(args, install_runner, relpath, source_root): + """Installs a file as if 'install' command runs. + + Installs a file at |source_root|/|relpath| into |args.dest|/|relpath|. + If |args.preserve_symlinks| is set, creates symlink if the source is a + symlink. + + Args: + args: parsed arguments. It should have following fields. + - preserve_symlinks: bool representing whether symlinks + needs to be preserved. + - dest: Destination root directory. + - insoptions: options for 'install' command. + intall_runner: _InstallRunner instance for file install. + relpath: Relative path of the file being installed. + source_root: Source root directory. + + Returns: True on success. + """ + source = os.path.join(source_root, relpath) + dest = os.path.join(args.dest, relpath) + if os.path.islink(source): + # Our fake $DISTDIR contains symlinks that should not be + # reproduced inside $D. In order to ensure that things like + # dodoc "$DISTDIR"/foo.pdf work as expected, we dereference + # symlinked files that refer to absolute paths inside + # $PORTAGE_ACTUAL_DISTDIR/. + try: + if (args.preserve_symlinks and + not os.readlink(source).startswith( + _PORTAGE_ACTUAL_DISTDIR)): + linkto = os.readlink(source) + shutil.rmtree(dest, ignore_errors=True) + os.symlink(linkto, dest) + return True + except Exception: + logging.exception( + 'Failed to create symlink: ' + 'args=%r, relpath=%r, source_root=%r', + args, relpath, source_root) + return False + + return install_runner.install_file(source, os.path.dirname(dest)) + + +def _parse_args(): + """Parses the command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--recursive', action='store_true', + help='If set, installs files recursively. Otherwise, ' + 'just skips directories.') + parser.add_argument( + '--preserve_symlinks', action='store_true', + help='If set, a symlink will be installed as symlink.') + # If helper is dodoc, it changes the behavior for the directory + # install without --recursive. + parser.add_argument('--helper', help='Name of helper.') + parser.add_argument( + '--dest', + help='Destination where the files are installed.') + parser.add_argument( + 'sources', nargs='*', + help='Source file/directory paths to be installed.') + + args = parser.parse_args() + + # Encode back to the original byte stream. Please see + # http://bugs.python.org/issue8776. + if sys.version_info.major >= 3: + args.dest = os.fsencode(args.dest) + args.sources = [os.fsencode(source) for source in args.sources] + + return args + + +def _install_dir(args, install_runner, source): + """Installs directory at |source|. + + Returns: + True on success, False on failure, or None on skipped. + """ + if not args.recursive: + if args.helper == 'dodoc': + print('!!! %s: %s is a directory' % ( + args.helper, source), + file=sys.stderr) + return False + # Neither success nor fail. Return None to indicate skipped. + return None + + # Strip trailing '/'s. + source = source.rstrip(b'/') + source_root = os.path.dirname(source) + dest_dir = os.path.join(args.dest, os.path.basename(source)) + install_runner.install_dir(dest_dir) + + relpath_list = [] + for dirpath, dirnames, filenames in os.walk(source): + for dirname in dirnames: + relpath = os.path.relpath( + os.path.join(dirpath, dirname), + source_root) + dest = os.path.join(args.dest, relpath) + install_runner.install_dir(dest) + relpath_list.extend( + os.path.relpath( + os.path.join(dirpath, filename), source_root) + for filename in filenames) + + if not relpath_list: + # NOTE: Even if only an empty directory is installed here, it + # still counts as success, since an empty directory given as + # an argument to doins -r should not trigger failure. + return True + success = True + for relpath in relpath_list: + if not _doins(args, install_runner, relpath, source_root): + success = False + return success + + +def main(): + args = _parse_args() + install_runner = _InstallRunner() + + if not os.path.isdir(args.dest): + install_runner.install_dir(args.dest) + + any_success = False + any_failure = False + for source in args.sources: + if (os.path.isdir(source) and + (not args.preserve_symlinks or + not os.path.islink(source))): + ret = _install_dir(args, install_runner, source) + if ret is None: + continue + if ret: + any_success = True + else: + any_failure = True + else: + if _doins( + args, install_runner, + os.path.basename(source), + os.path.dirname(source)): + any_success = True + else: + any_failure = True + + return 0 if not any_failure and any_success else 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/bin/ebuild-helpers/doins b/bin/ebuild-helpers/doins index 93052c2ea..de559780c 100755 --- a/bin/ebuild-helpers/doins +++ b/bin/ebuild-helpers/doins @@ -24,10 +24,10 @@ if [ $# -lt 1 ] ; then fi if [[ "$1" == "-r" ]] ; then - DOINSRECUR=y + RECURSIVE_OPTION='--recursive' shift else - DOINSRECUR=n + RECURSIVE_OPTION='' fi if ! ___eapi_has_prefix_variables; then @@ -44,116 +44,17 @@ if [[ ${INSDESTTREE#${ED}} != "${INSDESTTREE}" ]]; then fi if ___eapi_doins_and_newins_preserve_symlinks; then - PRESERVE_SYMLINKS=y + SYMLINK_OPTION='--preserve_symlinks' else - PRESERVE_SYMLINKS=n + SYMLINK_OPTION='' fi -export TMP=$(mktemp -d "${T}/.doins_tmp_XXXXXX") -# Use separate directories to avoid potential name collisions. -mkdir -p "$TMP"/{1,2} +# Use safe cwd, avoiding unsafe import for bug #469338. +export __PORTAGE_HELPER_CWD=${PWD} +cd "${PORTAGE_PYM_PATH:-/usr/lib/portage/pym}" -[[ ! -d ${ED}${INSDESTTREE} ]] && dodir "${INSDESTTREE}" - -_doins() { - local mysrc="$1" mydir="$2" cleanup="" rval - - if [ -L "$mysrc" ] ; then - # Our fake $DISTDIR contains symlinks that should - # not be reproduced inside $D. In order to ensure - # that things like dodoc "$DISTDIR"/foo.pdf work - # as expected, we dereference symlinked files that - # refer to absolute paths inside - # $PORTAGE_ACTUAL_DISTDIR/. - if [ $PRESERVE_SYMLINKS = y ] && \ - ! [[ $(readlink "$mysrc") == "$PORTAGE_ACTUAL_DISTDIR"/* ]] ; then - rm -rf "${ED}$INSDESTTREE/$mydir/${mysrc##*/}" || return $? - cp -P "$mysrc" "${ED}$INSDESTTREE/$mydir/${mysrc##*/}" - return $? - else - cp "$mysrc" "$TMP/2/${mysrc##*/}" || return $? - mysrc="$TMP/2/${mysrc##*/}" - cleanup=$mysrc - fi - fi - - install ${INSOPTIONS} "${mysrc}" "${ED}${INSDESTTREE}/${mydir}" - rval=$? - [[ -n ${cleanup} ]] && rm -f "${cleanup}" - [ $rval -ne 0 ] && echo "!!! ${helper}: $mysrc does not exist" 1>&2 - return $rval -} - -_xdoins() { - local -i failed=0 - while read -r -d $'\0' x ; do - _doins "$x" "${x%/*}" - ((failed|=$?)) - done - return $failed -} - -success=0 -failed=0 - -for x in "$@" ; do - if [[ $PRESERVE_SYMLINKS = n && -d $x ]] || \ - [[ $PRESERVE_SYMLINKS = y && -d $x && ! -L $x ]] ; then - if [ "${DOINSRECUR}" == "n" ] ; then - if [[ ${helper} == dodoc ]] ; then - echo "!!! ${helper}: $x is a directory" 1>&2 - ((failed|=1)) - fi - continue - fi - - while [ "$x" != "${x%/}" ] ; do - x=${x%/} - done - if [ "$x" = "${x%/*}" ] ; then - pushd "$PWD" >/dev/null - else - pushd "${x%/*}" >/dev/null - fi - x=${x##*/} - x_orig=$x - # Follow any symlinks recursively until we've got - # a normal directory for 'find' to traverse. The - # name of the symlink will be used for the name - # of the installed directory, as discussed in - # bug #239529. - while [ -L "$x" ] ; do - pushd "$(readlink "$x")" >/dev/null - x=${PWD##*/} - pushd "${PWD%/*}" >/dev/null - done - if [[ $x != $x_orig ]] ; then - mv "$x" "$TMP/1/$x_orig" - pushd "$TMP/1" >/dev/null - fi - find "$x_orig" -type d -exec dodir "${INSDESTTREE}/{}" \; - find "$x_orig" \( -type f -or -type l \) -print0 | _xdoins - if [[ ${PIPESTATUS[1]} -eq 0 ]] ; then - # NOTE: Even if only an empty directory is installed here, it - # still counts as success, since an empty directory given as - # an argument to doins -r should not trigger failure. - ((success|=1)) - else - ((failed|=1)) - fi - if [[ $x != $x_orig ]] ; then - popd >/dev/null - mv "$TMP/1/$x_orig" "$x" - fi - while popd >/dev/null 2>&1 ; do true ; done - else - _doins "${x}" - if [[ $? -eq 0 ]] ; then - ((success|=1)) - else - ((failed|=1)) - fi - fi -done -rm -rf "$TMP" -[[ $failed -ne 0 || $success -eq 0 ]] && { __helpers_die "${helper} failed"; exit 1; } || exit 0 +"${PORTAGE_PYTHON:-/usr/bin/python}" \ + "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/doins.py \ + ${RECURSIVE_OPTION} ${SYMLINK_OPTION} \ + --helper "${helper}" --dest "${ED}${INSDESTTREE}" "$@" || \ +{ __helpers_die "${helper} failed"; exit 1; } -- 2.13.0