* [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334)
@ 2020-08-08 4:08 Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08 4:08 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Use a separate connection instance for each pid, since
it is not safe to use a connection created in a parent
process.
See: https://www.sqlite.org/howtocorrupt.html
Bug: https://bugs.gentoo.org/736334
Zac Medico (3):
Add cached portage.getpid() function
sqlite: add lazy connection init
sqlite: fork safety (bug 736334)
lib/portage/__init__.py | 14 +++++++++
lib/portage/cache/sqlite.py | 30 +++++++++++++++----
lib/portage/tests/dbapi/test_auxdb.py | 13 ++++++--
.../tests/process/test_AsyncFunction.py | 24 +++++++++++++++
4 files changed, 74 insertions(+), 7 deletions(-)
--
2.25.3
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function
2020-08-08 4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
@ 2020-08-08 4:08 ` Zac Medico
2020-08-08 5:51 ` Michał Górny
2020-08-08 6:07 ` [gentoo-portage-dev] [PATCH 1/3 v2] " Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334) Zac Medico
2 siblings, 2 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08 4:08 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Since getpid is a syscall, cache results, and update them
via an after fork hook.
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
lib/portage/__init__.py | 14 +++++++++++
.../tests/process/test_AsyncFunction.py | 24 +++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
index 916c93510..52902ba7d 100644
--- a/lib/portage/__init__.py
+++ b/lib/portage/__init__.py
@@ -14,6 +14,7 @@ try:
if not hasattr(errno, 'ESTALE'):
# ESTALE may not be defined on some systems, such as interix.
errno.ESTALE = -1
+ import multiprocessing.util
import re
import types
import platform
@@ -368,6 +369,19 @@ _internal_caller = False
_sync_mode = False
+def _fork_watcher(_fork_watcher):
+ _fork_watcher.current_pid = _os.getpid()
+
+_fork_watcher(_fork_watcher)
+
+multiprocessing.util.register_after_fork(_fork_watcher, _fork_watcher)
+
+def getpid():
+ """
+ Cached version of os.getpid(). ForkProcess updates the cache.
+ """
+ return _fork_watcher.current_pid
+
def _get_stdin():
"""
Buggy code in python's multiprocessing/process.py closes sys.stdin
diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py
index 55857026d..3b360e02f 100644
--- a/lib/portage/tests/process/test_AsyncFunction.py
+++ b/lib/portage/tests/process/test_AsyncFunction.py
@@ -3,6 +3,7 @@
import sys
+import portage
from portage import os
from portage.tests import TestCase
from portage.util._async.AsyncFunction import AsyncFunction
@@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase):
def testAsyncFunctionStdin(self):
loop = asyncio._wrap_loop()
loop.run_until_complete(self._testAsyncFunctionStdin(loop))
+
+ def _test_getpid_fork(self):
+ """
+ Verify that portage.getpid() cache is updated in a forked child process.
+ """
+ loop = asyncio._wrap_loop()
+ proc = AsyncFunction(scheduler=loop, target=portage.getpid)
+ proc.start()
+ proc.wait()
+ self.assertEqual(proc.pid, proc.result)
+
+ def test_getpid_fork(self):
+ self._test_getpid_fork()
+
+ def test_getpid_double_fork(self):
+ """
+ Verify that portage.getpid() cache is updated correctly after
+ two forks.
+ """
+ loop = asyncio._wrap_loop()
+ proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork)
+ proc.start()
+ self.assertEqual(proc.wait(), 0)
--
2.25.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init
2020-08-08 4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
@ 2020-08-08 4:08 ` Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334) Zac Medico
2 siblings, 0 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08 4:08 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
lib/portage/cache/sqlite.py | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/lib/portage/cache/sqlite.py b/lib/portage/cache/sqlite.py
index 55ae8f0e5..0395dd516 100644
--- a/lib/portage/cache/sqlite.py
+++ b/lib/portage/cache/sqlite.py
@@ -1,6 +1,7 @@
# Copyright 1999-2020 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
+import collections
import re
from portage.cache import fs_template
@@ -23,6 +24,9 @@ class database(fs_template.FsBased):
# equation: cache_bytes = page_bytes * page_count
cache_bytes = 1024 * 1024 * 10
+ _connection_info_entry = collections.namedtuple('_connection_info_entry',
+ ('connection', 'cursor'))
+
def __init__(self, *args, **config):
super(database, self).__init__(*args, **config)
self._import_sqlite()
@@ -44,8 +48,8 @@ class database(fs_template.FsBased):
# Set longer timeout for throwing a "database is locked" exception.
# Default timeout in sqlite3 module is 5.0 seconds.
config.setdefault("timeout", 15)
- self._db_init_connection(config)
- self._db_init_structures()
+ self._config = config
+ self._db_connection_info = None
def _import_sqlite(self):
# sqlite3 is optional with >=python-2.5
@@ -65,7 +69,20 @@ class database(fs_template.FsBased):
s = str(s)
return "'%s'" % s.replace("'", "''")
- def _db_init_connection(self, config):
+ @property
+ def _db_cursor(self):
+ if self._db_connection_info is None:
+ self._db_init_connection()
+ return self._db_connection_info.cursor
+
+ @property
+ def _db_connection(self):
+ if self._db_connection_info is None:
+ self._db_init_connection()
+ return self._db_connection_info.connection
+
+ def _db_init_connection(self):
+ config = self._config
self._dbpath = self.location + ".sqlite"
#if os.path.exists(self._dbpath):
# os.unlink(self._dbpath)
@@ -74,14 +91,16 @@ class database(fs_template.FsBased):
try:
if not self.readonly:
self._ensure_dirs()
- self._db_connection = self._db_module.connect(
+ connection = self._db_module.connect(
database=_unicode_decode(self._dbpath), **connection_kwargs)
- self._db_cursor = self._db_connection.cursor()
+ cursor = connection.cursor()
+ self._db_connection_info = self._connection_info_entry(connection, cursor)
self._db_cursor.execute("PRAGMA encoding = %s" % self._db_escape_string("UTF-8"))
if not self.readonly and not self._ensure_access(self._dbpath):
raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath)
self._db_init_cache_size(config["cache_bytes"])
self._db_init_synchronous(config["synchronous"])
+ self._db_init_structures()
except self._db_error as e:
raise cache_errors.InitializationError(self.__class__, e)
--
2.25.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334)
2020-08-08 4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init Zac Medico
@ 2020-08-08 4:08 ` Zac Medico
2 siblings, 0 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08 4:08 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Use a separate connection instance for each pid, since
it is not safe to use a connection created in a parent
process.
See: https://www.sqlite.org/howtocorrupt.html
Bug: https://bugs.gentoo.org/736334
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
lib/portage/cache/sqlite.py | 9 +++++----
lib/portage/tests/dbapi/test_auxdb.py | 13 +++++++++++--
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/lib/portage/cache/sqlite.py b/lib/portage/cache/sqlite.py
index 0395dd516..36a4f049e 100644
--- a/lib/portage/cache/sqlite.py
+++ b/lib/portage/cache/sqlite.py
@@ -4,6 +4,7 @@
import collections
import re
+import portage
from portage.cache import fs_template
from portage.cache import cache_errors
from portage import os
@@ -25,7 +26,7 @@ class database(fs_template.FsBased):
cache_bytes = 1024 * 1024 * 10
_connection_info_entry = collections.namedtuple('_connection_info_entry',
- ('connection', 'cursor'))
+ ('connection', 'cursor', 'pid'))
def __init__(self, *args, **config):
super(database, self).__init__(*args, **config)
@@ -71,13 +72,13 @@ class database(fs_template.FsBased):
@property
def _db_cursor(self):
- if self._db_connection_info is None:
+ if self._db_connection_info is None or self._db_connection_info.pid != portage.getpid():
self._db_init_connection()
return self._db_connection_info.cursor
@property
def _db_connection(self):
- if self._db_connection_info is None:
+ if self._db_connection_info is None or self._db_connection_info.pid != portage.getpid():
self._db_init_connection()
return self._db_connection_info.connection
@@ -94,7 +95,7 @@ class database(fs_template.FsBased):
connection = self._db_module.connect(
database=_unicode_decode(self._dbpath), **connection_kwargs)
cursor = connection.cursor()
- self._db_connection_info = self._connection_info_entry(connection, cursor)
+ self._db_connection_info = self._connection_info_entry(connection, cursor, portage.getpid())
self._db_cursor.execute("PRAGMA encoding = %s" % self._db_escape_string("UTF-8"))
if not self.readonly and not self._ensure_access(self._dbpath):
raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath)
diff --git a/lib/portage/tests/dbapi/test_auxdb.py b/lib/portage/tests/dbapi/test_auxdb.py
index 5c79357d7..7865c3564 100644
--- a/lib/portage/tests/dbapi/test_auxdb.py
+++ b/lib/portage/tests/dbapi/test_auxdb.py
@@ -4,7 +4,8 @@
from portage.tests import TestCase
from portage.tests.resolver.ResolverPlayground import ResolverPlayground
from portage.util.futures import asyncio
-from portage.util.futures.compat_coroutine import coroutine
+from portage.util.futures.compat_coroutine import coroutine, coroutine_return
+from portage.util.futures.executor.fork import ForkExecutor
class AuxdbTestCase(TestCase):
@@ -61,8 +62,14 @@ class AuxdbTestCase(TestCase):
portdb = playground.trees[playground.eroot]["porttree"].dbapi
+ def test_func():
+ return asyncio._wrap_loop().run_until_complete(self._test_mod_async(
+ ebuilds, ebuild_inherited, eclass_defined_phases, eclass_depend, portdb))
+
+ self.assertTrue(test_func())
+
loop = asyncio._wrap_loop()
- loop.run_until_complete(self._test_mod_async(ebuilds, ebuild_inherited, eclass_defined_phases, eclass_depend, portdb))
+ self.assertTrue(loop.run_until_complete(loop.run_in_executor(ForkExecutor(), test_func)))
@coroutine
def _test_mod_async(self, ebuilds, ebuild_inherited, eclass_defined_phases, eclass_depend, portdb):
@@ -73,3 +80,5 @@ class AuxdbTestCase(TestCase):
self.assertEqual(depend, eclass_depend)
self.assertEqual(eapi, metadata['EAPI'])
self.assertEqual(frozenset(inherited.split()), ebuild_inherited)
+
+ coroutine_return(True)
--
2.25.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
@ 2020-08-08 5:51 ` Michał Górny
2020-08-08 6:07 ` [gentoo-portage-dev] [PATCH 1/3 v2] " Zac Medico
1 sibling, 0 replies; 6+ messages in thread
From: Michał Górny @ 2020-08-08 5:51 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
[-- Attachment #1: Type: text/plain, Size: 2871 bytes --]
On Fri, 2020-08-07 at 21:08 -0700, Zac Medico wrote:
> Since getpid is a syscall, cache results, and update them
> via an after fork hook.
>
> Signed-off-by: Zac Medico <zmedico@gentoo.org>
> ---
> lib/portage/__init__.py | 14 +++++++++++
> .../tests/process/test_AsyncFunction.py | 24 +++++++++++++++++++
> 2 files changed, 38 insertions(+)
>
> diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
> index 916c93510..52902ba7d 100644
> --- a/lib/portage/__init__.py
> +++ b/lib/portage/__init__.py
> @@ -14,6 +14,7 @@ try:
> if not hasattr(errno, 'ESTALE'):
> # ESTALE may not be defined on some systems, such as interix.
> errno.ESTALE = -1
> + import multiprocessing.util
> import re
> import types
> import platform
> @@ -368,6 +369,19 @@ _internal_caller = False
>
> _sync_mode = False
>
> +def _fork_watcher(_fork_watcher):
> + _fork_watcher.current_pid = _os.getpid()
I don't really like the idea of putting variables on functions. Would
there be any problem with using a proper class here?
> +
> +_fork_watcher(_fork_watcher)
> +
> +multiprocessing.util.register_after_fork(_fork_watcher, _fork_watcher)
> +
> +def getpid():
> + """
> + Cached version of os.getpid(). ForkProcess updates the cache.
> + """
> + return _fork_watcher.current_pid
> +
> def _get_stdin():
> """
> Buggy code in python's multiprocessing/process.py closes sys.stdin
> diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py
> index 55857026d..3b360e02f 100644
> --- a/lib/portage/tests/process/test_AsyncFunction.py
> +++ b/lib/portage/tests/process/test_AsyncFunction.py
> @@ -3,6 +3,7 @@
>
> import sys
>
> +import portage
> from portage import os
> from portage.tests import TestCase
> from portage.util._async.AsyncFunction import AsyncFunction
> @@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase):
> def testAsyncFunctionStdin(self):
> loop = asyncio._wrap_loop()
> loop.run_until_complete(self._testAsyncFunctionStdin(loop))
> +
> + def _test_getpid_fork(self):
> + """
> + Verify that portage.getpid() cache is updated in a forked child process.
> + """
> + loop = asyncio._wrap_loop()
> + proc = AsyncFunction(scheduler=loop, target=portage.getpid)
> + proc.start()
> + proc.wait()
> + self.assertEqual(proc.pid, proc.result)
> +
> + def test_getpid_fork(self):
> + self._test_getpid_fork()
> +
> + def test_getpid_double_fork(self):
> + """
> + Verify that portage.getpid() cache is updated correctly after
> + two forks.
> + """
> + loop = asyncio._wrap_loop()
> + proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork)
> + proc.start()
> + self.assertEqual(proc.wait(), 0)
--
Best regards,
Michał Górny
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 618 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* [gentoo-portage-dev] [PATCH 1/3 v2] Add cached portage.getpid() function
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
2020-08-08 5:51 ` Michał Górny
@ 2020-08-08 6:07 ` Zac Medico
1 sibling, 0 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08 6:07 UTC (permalink / raw
To: gentoo-portage-dev; +Cc: Zac Medico
Since getpid is a syscall, cache results, and update them
via an after fork hook.
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
lib/portage/__init__.py | 16 +++++++++++++
.../tests/process/test_AsyncFunction.py | 24 +++++++++++++++++++
2 files changed, 40 insertions(+)
diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
index 916c93510..4d4b590a8 100644
--- a/lib/portage/__init__.py
+++ b/lib/portage/__init__.py
@@ -14,6 +14,7 @@ try:
if not hasattr(errno, 'ESTALE'):
# ESTALE may not be defined on some systems, such as interix.
errno.ESTALE = -1
+ import multiprocessing.util
import re
import types
import platform
@@ -368,6 +369,21 @@ _internal_caller = False
_sync_mode = False
+class _ForkWatcher:
+ @staticmethod
+ def hook(_ForkWatcher):
+ _ForkWatcher.current_pid = _os.getpid()
+
+_ForkWatcher.hook(_ForkWatcher)
+
+multiprocessing.util.register_after_fork(_ForkWatcher, _ForkWatcher.hook)
+
+def getpid():
+ """
+ Cached version of os.getpid(). ForkProcess updates the cache.
+ """
+ return _ForkWatcher.current_pid
+
def _get_stdin():
"""
Buggy code in python's multiprocessing/process.py closes sys.stdin
diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py
index 55857026d..3b360e02f 100644
--- a/lib/portage/tests/process/test_AsyncFunction.py
+++ b/lib/portage/tests/process/test_AsyncFunction.py
@@ -3,6 +3,7 @@
import sys
+import portage
from portage import os
from portage.tests import TestCase
from portage.util._async.AsyncFunction import AsyncFunction
@@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase):
def testAsyncFunctionStdin(self):
loop = asyncio._wrap_loop()
loop.run_until_complete(self._testAsyncFunctionStdin(loop))
+
+ def _test_getpid_fork(self):
+ """
+ Verify that portage.getpid() cache is updated in a forked child process.
+ """
+ loop = asyncio._wrap_loop()
+ proc = AsyncFunction(scheduler=loop, target=portage.getpid)
+ proc.start()
+ proc.wait()
+ self.assertEqual(proc.pid, proc.result)
+
+ def test_getpid_fork(self):
+ self._test_getpid_fork()
+
+ def test_getpid_double_fork(self):
+ """
+ Verify that portage.getpid() cache is updated correctly after
+ two forks.
+ """
+ loop = asyncio._wrap_loop()
+ proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork)
+ proc.start()
+ self.assertEqual(proc.wait(), 0)
--
2.25.3
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2020-08-08 6:09 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-08 4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
2020-08-08 5:51 ` Michał Górny
2020-08-08 6:07 ` [gentoo-portage-dev] [PATCH 1/3 v2] " Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init Zac Medico
2020-08-08 4:08 ` [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334) Zac Medico
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox