* [gentoo-commits] proj/portage:master commit in: lib/_emerge/, lib/portage/, lib/portage/tests/ebuild/
@ 2024-02-03 22:54 Zac Medico
0 siblings, 0 replies; only message in thread
From: Zac Medico @ 2024-02-03 22:54 UTC (permalink / raw
To: gentoo-commits
commit: 305612d1b04aa06d3d1a1c8b51d046a644742fd5
Author: Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Feb 3 21:27:45 2024 +0000
Commit: Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sat Feb 3 21:27:45 2024 +0000
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=305612d1
process.spawn: Use multiprocessing.Process for returnproc
Use multiprocessing.Process for returnproc, so that
fork will stop being used when python makes "spawn"
the default multiprocessing start method.
Continue to use _start_fork when returnproc is not
enabled, for backward compatibility. Ultimately,
it can be removed at the same time as the returnpid
parameter.
Bug: https://bugs.gentoo.org/916566
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>
lib/_emerge/SpawnProcess.py | 4 +-
lib/portage/process.py | 72 ++++++++++++++++++----
lib/portage/tests/ebuild/test_doebuild_fd_pipes.py | 6 +-
3 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/lib/_emerge/SpawnProcess.py b/lib/_emerge/SpawnProcess.py
index 7f4a23892b..5e582e3322 100644
--- a/lib/_emerge/SpawnProcess.py
+++ b/lib/_emerge/SpawnProcess.py
@@ -224,7 +224,9 @@ class SpawnProcess(SubProcess):
got_pty, master_fd, slave_fd = _create_pty_or_pipe(copy_term_size=stdout_pipe)
return (master_fd, slave_fd)
- def _spawn(self, args: list[str], **kwargs) -> portage.process.Process:
+ def _spawn(
+ self, args: list[str], **kwargs
+ ) -> portage.process.MultiprocessingProcess:
spawn_func = portage.process.spawn
if self._selinux_type is not None:
diff --git a/lib/portage/process.py b/lib/portage/process.py
index 01426179d7..d64ffa924f 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -19,7 +19,7 @@ import warnings
from dataclasses import dataclass
from functools import lru_cache
-from typing import Any, Optional, Callable
+from typing import Any, Optional, Callable, Union
from portage import os
from portage import _encodings
@@ -28,6 +28,7 @@ import portage
portage.proxy.lazyimport.lazyimport(
globals(),
+ "portage.util._async.ForkProcess:ForkProcess",
"portage.util._eventloop.global_event_loop:global_event_loop",
"portage.util.futures:asyncio",
"portage.util:dump_traceback,writemsg,writemsg_level",
@@ -296,12 +297,19 @@ class AbstractProcess:
class Process(AbstractProcess):
"""
- An object that wraps OS processes created by spawn.
- In the future, spawn will return objects of a different type
- but with a compatible interface to this class, in order
- to encapsulate implementation-dependent objects like
- multiprocessing.Process which are designed to manage
- the process lifecycle and need to persist until it exits.
+ An object that wraps OS processes which do not have an
+ associated multiprocessing.Process instance. Ultimately,
+ we need to stop using os.fork() to create these processes
+ because it is unsafe for threaded processes as discussed
+ in https://github.com/python/cpython/issues/84559.
+
+ Note that if subprocess.Popen is used without pass_fds
+ or preexec_fn parameters, then it avoids using os.fork()
+ by instead using posix_spawn. This approach is not used
+ by spawn because it needs to execute python code prior
+ to exec, so it instead uses multiprocessing.Process,
+ which only uses os.fork() when the multiprocessing start
+ method is fork.
"""
def __init__(self, pid: int):
@@ -461,7 +469,7 @@ def spawn(
unshare_mount=False,
unshare_pid=False,
warn_on_large_env=False,
-):
+) -> Union[int, MultiprocessingProcess, list[int]]:
"""
Spawns a given command.
@@ -479,8 +487,8 @@ def spawn(
@param returnpid: Return the Process IDs for a successful spawn.
NOTE: This requires the caller clean up all the PIDs, otherwise spawn will clean them.
@type returnpid: Boolean
- @param returnproc: Return a Process object for a successful spawn (conflicts with logfile parameter).
- NOTE: This requires the caller to asynchronously wait for the Process.
+ @param returnproc: Return a MultiprocessingProcess instance (conflicts with logfile parameter).
+ NOTE: This requires the caller to asynchronously wait for the MultiprocessingProcess instance.
@type returnproc: Boolean
@param uid: User ID to spawn as; useful for dropping privilages
@type uid: Integer
@@ -623,7 +631,9 @@ def spawn(
# fork, so that the result is cached in the main process.
bool(groups)
- pid = _start_fork(
+ start_func = _start_proc if returnproc else _start_fork
+
+ pid = start_func(
_exec_wrapper,
args=(
binary,
@@ -649,6 +659,10 @@ def spawn(
close_fds=close_fds,
)
+ if returnproc:
+ # _start_proc returns a MultiprocessingProcess instance.
+ return pid
+
if not isinstance(pid, int):
raise AssertionError(f"fork returned non-integer: {repr(pid)}")
@@ -670,8 +684,6 @@ def spawn(
stacklevel=1,
)
return mypids
- if returnproc:
- return Process(mypids[0])
# Otherwise we clean them up.
while mypids:
@@ -1370,6 +1382,40 @@ def _start_fork(
return pid
+def _start_proc(
+ target: Callable[..., None],
+ args: Optional[tuple[Any, ...]] = (),
+ kwargs: Optional[dict[str, Any]] = {},
+ fd_pipes: Optional[dict[int, int]] = None,
+ close_fds: Optional[bool] = False,
+) -> MultiprocessingProcess:
+ """
+ Execute the target function using multiprocess.Process.
+ If the close_fds parameter is True then NotImplementedError
+ is raised, since it is risky to forcefully close file
+ descriptors that have references (bug 374335), and PEP 446
+ should ensure that any relevant file descriptors are
+ non-inheritable and therefore automatically closed on exec.
+ """
+ if close_fds:
+ raise NotImplementedError(
+ "close_fds is not supported (since file descriptors are non-inheritable by default for exec)"
+ )
+
+ proc = ForkProcess(
+ scheduler=global_event_loop(),
+ target=target,
+ args=args,
+ kwargs=kwargs,
+ fd_pipes=fd_pipes,
+ )
+ proc.start()
+
+ # ForkProcess conveniently holds a MultiprocessingProcess
+ # instance that is suitable to return here.
+ return proc._proc
+
+
def find_binary(binary):
"""
Given a binary name, find the binary in PATH
diff --git a/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py b/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py
index 678486ed16..c3b47d4e89 100644
--- a/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py
+++ b/lib/portage/tests/ebuild/test_doebuild_fd_pipes.py
@@ -1,4 +1,4 @@
-# Copyright 2013-2023 Gentoo Authors
+# Copyright 2013-2024 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
import multiprocessing
@@ -167,6 +167,10 @@ class DoebuildFdPipesTestCase(TestCase):
@staticmethod
def _doebuild(db, pw, *args, **kwargs):
QueryCommand._db = db
+ # Somehow pw.fileno() is inheritable when doebuild is
+ # implemented with os.fork(), but not when it is implemented
+ # with multiprocessing.Process.
+ os.set_inheritable(pw.fileno(), True)
kwargs["fd_pipes"] = {
DoebuildFdPipesTestCase.output_fd: pw.fileno(),
}
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2024-02-03 22:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-03 22:54 [gentoo-commits] proj/portage:master commit in: lib/_emerge/, lib/portage/, lib/portage/tests/ebuild/ Zac Medico
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox