public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/portage:master commit in: lib/_emerge/, lib/portage/, lib/portage/tests/ebuild/
@ 2024-02-03 22:54 Zac Medico
  0 siblings, 0 replies; only message in thread
From: Zac Medico @ 2024-02-03 22:54 UTC (permalink / raw
  To: gentoo-commits

commit:     305612d1b04aa06d3d1a1c8b51d046a644742fd5
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Feb  3 21:27:45 2024 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sat Feb  3 21:27:45 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=305612d1

process.spawn: Use multiprocessing.Process for returnproc

Use multiprocessing.Process for returnproc, so that
fork will stop being used when python makes "spawn"
the default multiprocessing start method.

Continue to use _start_fork when returnproc is not
enabled, for backward compatibility. Ultimately,
it can be removed at the same time as the returnpid
parameter.

Bug: https://bugs.gentoo.org/916566
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>

 lib/_emerge/SpawnProcess.py                        |  4 +-
 lib/portage/process.py                             | 72 ++++++++++++++++++----
 lib/portage/tests/ebuild/test_doebuild_fd_pipes.py |  6 +-
 3 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/lib/_emerge/SpawnProcess.py b/lib/_emerge/SpawnProcess.py
index 7f4a23892b..5e582e3322 100644
--- a/lib/_emerge/SpawnProcess.py
+++ b/lib/_emerge/SpawnProcess.py
@@ -224,7 +224,9 @@ class SpawnProcess(SubProcess):
         got_pty, master_fd, slave_fd = _create_pty_or_pipe(copy_term_size=stdout_pipe)
         return (master_fd, slave_fd)
 
-    def _spawn(self, args: list[str], **kwargs) -> portage.process.Process:
+    def _spawn(
+        self, args: list[str], **kwargs
+    ) -> portage.process.MultiprocessingProcess:
         spawn_func = portage.process.spawn
 
         if self._selinux_type is not None:

diff --git a/lib/portage/process.py b/lib/portage/process.py
index 01426179d7..d64ffa924f 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -19,7 +19,7 @@ import warnings
 
 from dataclasses import dataclass
 from functools import lru_cache
-from typing import Any, Optional, Callable
+from typing import Any, Optional, Callable, Union
 
 from portage import os
 from portage import _encodings
@@ -28,6 +28,7 @@ import portage
 
 portage.proxy.lazyimport.lazyimport(
     globals(),
+    "portage.util._async.ForkProcess:ForkProcess",
     "portage.util._eventloop.global_event_loop:global_event_loop",
     "portage.util.futures:asyncio",
     "portage.util:dump_traceback,writemsg,writemsg_level",
@@ -296,12 +297,19 @@ class AbstractProcess:
 
 class Process(AbstractProcess):
     """
-    An object that wraps OS processes created by spawn.
-    In the future, spawn will return objects of a different type
-    but with a compatible interface to this class, in order
-    to encapsulate implementation-dependent objects like
-    multiprocessing.Process which are designed to manage
-    the process lifecycle and need to persist until it exits.
+    An object that wraps OS processes which do not have an
+    associated multiprocessing.Process instance. Ultimately,
+    we need to stop using os.fork() to create these processes
+    because it is unsafe for threaded processes as discussed
+    in https://github.com/python/cpython/issues/84559.
+
+    Note that if subprocess.Popen is used without pass_fds
+    or preexec_fn parameters, then it avoids using os.fork()
+    by instead using posix_spawn. This approach is not used
+    by spawn because it needs to execute python code prior
+    to exec, so it instead uses multiprocessing.Process,
+    which only uses os.fork() when the multiprocessing start
+    method is fork.
     """
 
     def __init__(self, pid: int):
@@ -461,7 +469,7 @@ def spawn(
     unshare_mount=False,
     unshare_pid=False,
     warn_on_large_env=False,
-):
+) -> Union[int, MultiprocessingProcess, list[int]]:
     """
     Spawns a given command.
 
@@ -479,8 +487,8 @@ def spawn(
     @param returnpid: Return the Process IDs for a successful spawn.
     NOTE: This requires the caller clean up all the PIDs, otherwise spawn will clean them.
     @type returnpid: Boolean
-    @param returnproc: Return a Process object for a successful spawn (conflicts with logfile parameter).
-    NOTE: This requires the caller to asynchronously wait for the Process.
+    @param returnproc: Return a MultiprocessingProcess instance (conflicts with logfile parameter).
+    NOTE: This requires the caller to asynchronously wait for the MultiprocessingProcess instance.
     @type returnproc: Boolean
     @param uid: User ID to spawn as; useful for dropping privilages
     @type uid: Integer
@@ -623,7 +631,9 @@ def spawn(
     # fork, so that the result is cached in the main process.
     bool(groups)
 
-    pid = _start_fork(
+    start_func = _start_proc if returnproc else _start_fork
+
+    pid = start_func(
         _exec_wrapper,
         args=(
             binary,
@@ -649,6 +659,10 @@ def spawn(
         close_fds=close_fds,
     )
 
+    if returnproc:
+        # _start_proc returns a MultiprocessingProcess instance.
+        return pid
+
     if not isinstance(pid, int):
         raise AssertionError(f"fork returned non-integer: {repr(pid)}")
 
@@ -670,8 +684,6 @@ def spawn(
                 stacklevel=1,
             )
         return mypids
-    if returnproc:
-        return Process(mypids[0])
 
     # Otherwise we clean them up.
     while mypids:
@@ -1370,6 +1382,40 @@ def _start_fork(
     return pid
 
 
+def _start_proc(
+    target: Callable[..., None],
+    args: Optional[tuple[Any, ...]] = (),
+    kwargs: Optional[dict[str, Any]] = {},
+    fd_pipes: Optional[dict[int, int]] = None,
+    close_fds: Optional[bool] = False,
+) -> MultiprocessingProcess:
+    """
+    Execute the target function using multiprocess.Process.
+    If the close_fds parameter is True then NotImplementedError
+    is raised, since it is risky to forcefully close file
+    descriptors that have references (bug 374335), and PEP 446
+    should ensure that any relevant file descriptors are
+    non-inheritable and therefore automatically closed on exec.
+    """
+    if close_fds:
+        raise NotImplementedError(
+            "close_fds is not supported (since file descriptors are non-inheritable by default for exec)"
+        )
+
+    proc = ForkProcess(
+        scheduler=global_event_loop(),
+        target=target,
+        args=args,
+        kwargs=kwargs,
+        fd_pipes=fd_pipes,
+    )
+    proc.start()
+
+    # ForkProcess conveniently holds a MultiprocessingProcess
+    # instance that is suitable to return here.
+    return proc._proc
+
+
 def find_binary(binary):
     """
     Given a binary name, find the binary in PATH

diff --git a/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py b/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py
index 678486ed16..c3b47d4e89 100644
--- a/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py
+++ b/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2023 Gentoo Authors
+# Copyright 2013-2024 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
 import multiprocessing
@@ -167,6 +167,10 @@ class DoebuildFdPipesTestCase(TestCase):
     @staticmethod
     def _doebuild(db, pw, *args, **kwargs):
         QueryCommand._db = db
+        # Somehow pw.fileno() is inheritable when doebuild is
+        # implemented with os.fork(), but not when it is implemented
+        # with multiprocessing.Process.
+        os.set_inheritable(pw.fileno(), True)
         kwargs["fd_pipes"] = {
             DoebuildFdPipesTestCase.output_fd: pw.fileno(),
         }


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-02-03 22:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-03 22:54 [gentoo-commits] proj/portage:master commit in: lib/_emerge/, lib/portage/, lib/portage/tests/ebuild/ Zac Medico

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox