public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/portage:master commit in: lib/portage/tests/process/, lib/portage/package/ebuild/, /, man/, lib/portage/
@ 2023-03-11 18:54 Sam James
  0 siblings, 0 replies; only message in thread
From: Sam James @ 2023-03-11 18:54 UTC (permalink / raw
  To: gentoo-commits

commit:     7c5a0866663cc27fdc8a80630f18f954532d561d
Author:     Florian Schmaus <flow <AT> gentoo <DOT> org>
AuthorDate: Fri Feb 17 09:04:03 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sat Mar 11 18:52:27 2023 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=7c5a0866

Add FEATURE="warn-on-large-env": warn on large child process environments

This adds a new feature to portage which issues a warning if portage
is about to execute a new child process with a large environment.

Signed-off-by: Florian Schmaus <flow <AT> gentoo.org>
Closes: https://github.com/gentoo/portage/pull/963
Signed-off-by: Sam James <sam <AT> gentoo.org>

 NEWS                                               |  3 +
 lib/portage/const.py                               |  1 +
 lib/portage/package/ebuild/doebuild.py             |  1 +
 lib/portage/process.py                             | 69 ++++++++++++++++------
 .../tests/process/test_spawn_warn_large_env.py     | 46 +++++++++++++++
 man/make.conf.5                                    |  3 +
 6 files changed, 106 insertions(+), 17 deletions(-)

diff --git a/NEWS b/NEWS
index e67e488b6..a6daf1138 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,6 @@
+* New portage FEATURE warn-on-large-env, to emit a warning if portage
+  executes an ebuild-related child process with a large environment.
+
 portage-3.0.45.2 (2023-03-04)
 --------------
 

diff --git a/lib/portage/const.py b/lib/portage/const.py
index 99206fe2c..10a208ceb 100644
--- a/lib/portage/const.py
+++ b/lib/portage/const.py
@@ -213,6 +213,7 @@ SUPPORTED_FEATURES = frozenset(
         "userpriv",
         "usersandbox",
         "usersync",
+        "warn-on-large-env",
         "webrsync-gpg",
         "xattr",
     )

diff --git a/lib/portage/package/ebuild/doebuild.py b/lib/portage/package/ebuild/doebuild.py
index 2226812e3..380f8f98d 100644
--- a/lib/portage/package/ebuild/doebuild.py
+++ b/lib/portage/package/ebuild/doebuild.py
@@ -209,6 +209,7 @@ def _doebuild_spawn(phase, settings, actionmap=None, **kwargs):
     kwargs["pidns"] = (
         "pid-sandbox" in settings.features and phase not in _global_pid_phases
     )
+    kwargs["warn_on_large_env"] = "warn-on-large-env" in settings.features
 
     if phase == "depend":
         kwargs["droppriv"] = "userpriv" in settings.features

diff --git a/lib/portage/process.py b/lib/portage/process.py
index 94850b6e7..4a1baedc6 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -6,6 +6,7 @@
 import atexit
 import errno
 import fcntl
+import logging
 import multiprocessing
 import platform
 import signal
@@ -15,6 +16,9 @@ import sys
 import traceback
 import os as _os
 
+from dataclasses import dataclass
+from functools import lru_cache
+
 from portage import os
 from portage import _encodings
 from portage import _unicode_encode
@@ -22,7 +26,7 @@ import portage
 
 portage.proxy.lazyimport.lazyimport(
     globals(),
-    "portage.util:dump_traceback,writemsg",
+    "portage.util:dump_traceback,writemsg,writemsg_level",
 )
 
 from portage.const import BASH_BINARY, SANDBOX_BINARY, FAKEROOT_BINARY
@@ -241,6 +245,37 @@ def cleanup():
     pass
 
 
+@dataclass(frozen=True)
+class EnvStats:
+    env_size: int
+    env_largest_name: str
+    env_largest_size: int
+
+
+def calc_env_stats(env) -> EnvStats:
+    @lru_cache(1024)
+    def encoded_length(s):
+        return len(os.fsencode(s))
+
+    env_size = 0
+    env_largest_name = None
+    env_largest_size = 0
+    for env_name, env_value in env.items():
+        env_name_size = encoded_length(env_name)
+        env_value_size = encoded_length(env_value)
+        # Add two for '=' and the terminating null byte.
+        total_size = env_name_size + env_value_size + 2
+        if total_size > env_largest_size:
+            env_largest_name = env_name
+            env_largest_size = total_size
+        env_size += total_size
+
+    return EnvStats(env_size, env_largest_name, env_largest_size)
+
+
+env_too_large_warnings = 0
+
+
 def spawn(
     mycommand,
     env=None,
@@ -261,6 +296,7 @@ def spawn(
     unshare_mount=False,
     unshare_pid=False,
     cgroup=None,
+    warn_on_large_env=False,
 ):
     """
     Spawns a given command.
@@ -322,6 +358,18 @@ def spawn(
 
     env = os.environ if env is None else env
 
+    env_stats = None
+    if warn_on_large_env:
+        env_stats = calc_env_stats(env)
+
+        global env_too_large_warnings
+        if env_stats.env_size > 1024 * 96 and env_too_large_warnings < 3:
+            env_too_large_warnings += 1
+            writemsg_level(
+                f"WARNING: New process environment is large, executing {mycommand} may fail. Size: {env_stats.env_size} bytes. Largest environment variable: {env_stats.env_largest_name} ({env_stats.env_largest_size} bytes)",
+                logging.WARNING,
+            )
+
     # If an absolute path to an executable file isn't given
     # search for it unless we've been told not to.
     binary = mycommand[0]
@@ -445,24 +493,11 @@ def spawn(
                     # culprit. See also
                     # - https://bugs.gentoo.org/721088
                     # - https://bugs.gentoo.org/830187
-                    def encoded_length(s):
-                        return len(os.fsencode(s))
-
-                    env_size = 0
-                    env_largest_name = None
-                    env_largest_size = 0
-                    for env_name, env_value in env.items():
-                        env_name_size = encoded_length(env_name)
-                        env_value_size = encoded_length(env_value)
-                        # Add two for '=' and the terminating null byte.
-                        total_size = env_name_size + env_value_size + 2
-                        if total_size > env_largest_size:
-                            env_largest_name = env_name
-                            env_largest_size = total_size
-                        env_size += total_size
+                    if not env_stats:
+                        env_stats = calc_env_stats(env)
 
                     writemsg(
-                        f"ERROR: Executing {mycommand} failed with E2BIG. Child process environment size: {env_size} bytes. Largest environment variable: {env_largest_name} ({env_largest_size} bytes)\n"
+                        f"ERROR: Executing {mycommand} failed with E2BIG. Child process environment size: {env_stats.env_size} bytes. Largest environment variable: {env_stats.env_largest_name} ({env_stats.env_largest_size} bytes)\n"
                     )
 
                 # We need to catch _any_ exception so that it doesn't

diff --git a/lib/portage/tests/process/test_spawn_warn_large_env.py b/lib/portage/tests/process/test_spawn_warn_large_env.py
new file mode 100644
index 000000000..185344881
--- /dev/null
+++ b/lib/portage/tests/process/test_spawn_warn_large_env.py
@@ -0,0 +1,46 @@
+# Copyright 2023 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import platform
+import tempfile
+
+from pathlib import Path
+
+import portage.process
+
+from portage import shutil
+from portage.tests import TestCase
+
+
+class SpawnWarnLargeEnvTestCase(TestCase):
+    def testSpawnWarnLargeEnv(self):
+        if platform.system() != "Linux":
+            self.skipTest("not Linux")
+
+        env = dict()
+        env["LARGE_ENV_VAR"] = "X" * 1024 * 96
+
+        tmpdir = tempfile.mkdtemp()
+        previous_env_too_large_warnings = portage.process.env_too_large_warnings
+        try:
+            logfile = tmpdir / Path("logfile")
+            echo_output = "This is an echo process with a large env"
+            retval = portage.process.spawn(
+                ["echo", echo_output],
+                env=env,
+                logfile=logfile,
+                warn_on_large_env=True,
+            )
+
+            with open(logfile) as f:
+                logfile_content = f.read()
+                self.assertIn(
+                    echo_output,
+                    logfile_content,
+                )
+            self.assertTrue(
+                portage.process.env_too_large_warnings > previous_env_too_large_warnings
+            )
+            self.assertEqual(retval, 0)
+        finally:
+            shutil.rmtree(tmpdir)

diff --git a/man/make.conf.5 b/man/make.conf.5
index 05832ce1c..f3a6d8e27 100644
--- a/man/make.conf.5
+++ b/man/make.conf.5
@@ -813,6 +813,9 @@ It is the user's responsibility to ensure correct ownership, since otherwise
 Portage would have to waste time validating ownership for each and every sync
 operation.
 .TP
+.B warn-on-large-env
+Warn if portage is about to execute a child process with a large environment.
+.TP
 .B webrsync-gpg
 Enable GPG verification when using \fIemerge\-webrsync\fR. This feature is
 deprecated and has been replaced by the \fBrepos.conf\fR


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-11 18:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-11 18:54 [gentoo-commits] proj/portage:master commit in: lib/portage/tests/process/, lib/portage/package/ebuild/, /, man/, lib/portage/ Sam James

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox