public inbox for gentoo-portage-dev@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334)
@ 2020-08-08  4:08 Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08  4:08 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Use a separate connection instance for each pid, since
it is not safe to use a connection created in a parent
process.

See: https://www.sqlite.org/howtocorrupt.html
Bug: https://bugs.gentoo.org/736334

Zac Medico (3):
  Add cached portage.getpid() function
  sqlite: add lazy connection init
  sqlite: fork safety (bug 736334)

 lib/portage/__init__.py                       | 14 +++++++++
 lib/portage/cache/sqlite.py                   | 30 +++++++++++++++----
 lib/portage/tests/dbapi/test_auxdb.py         | 13 ++++++--
 .../tests/process/test_AsyncFunction.py       | 24 +++++++++++++++
 4 files changed, 74 insertions(+), 7 deletions(-)

-- 
2.25.3



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function
  2020-08-08  4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
@ 2020-08-08  4:08 ` Zac Medico
  2020-08-08  5:51   ` Michał Górny
  2020-08-08  6:07   ` [gentoo-portage-dev] [PATCH 1/3 v2] " Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334) Zac Medico
  2 siblings, 2 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08  4:08 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Since getpid is a syscall, cache results, and update them
via an after fork hook.

Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
 lib/portage/__init__.py                       | 14 +++++++++++
 .../tests/process/test_AsyncFunction.py       | 24 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
index 916c93510..52902ba7d 100644
--- a/lib/portage/__init__.py
+++ b/lib/portage/__init__.py
@@ -14,6 +14,7 @@ try:
 	if not hasattr(errno, 'ESTALE'):
 		# ESTALE may not be defined on some systems, such as interix.
 		errno.ESTALE = -1
+	import multiprocessing.util
 	import re
 	import types
 	import platform
@@ -368,6 +369,19 @@ _internal_caller = False
 
 _sync_mode = False
 
+def _fork_watcher(_fork_watcher):
+	_fork_watcher.current_pid = _os.getpid()
+
+_fork_watcher(_fork_watcher)
+
+multiprocessing.util.register_after_fork(_fork_watcher, _fork_watcher)
+
+def getpid():
+	"""
+	Cached version of os.getpid(). ForkProcess updates the cache.
+	"""
+	return _fork_watcher.current_pid
+
 def _get_stdin():
 	"""
 	Buggy code in python's multiprocessing/process.py closes sys.stdin
diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py
index 55857026d..3b360e02f 100644
--- a/lib/portage/tests/process/test_AsyncFunction.py
+++ b/lib/portage/tests/process/test_AsyncFunction.py
@@ -3,6 +3,7 @@
 
 import sys
 
+import portage
 from portage import os
 from portage.tests import TestCase
 from portage.util._async.AsyncFunction import AsyncFunction
@@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase):
 	def testAsyncFunctionStdin(self):
 		loop = asyncio._wrap_loop()
 		loop.run_until_complete(self._testAsyncFunctionStdin(loop))
+
+	def _test_getpid_fork(self):
+		"""
+		Verify that portage.getpid() cache is updated in a forked child process.
+		"""
+		loop = asyncio._wrap_loop()
+		proc = AsyncFunction(scheduler=loop, target=portage.getpid)
+		proc.start()
+		proc.wait()
+		self.assertEqual(proc.pid, proc.result)
+
+	def test_getpid_fork(self):
+		self._test_getpid_fork()
+
+	def test_getpid_double_fork(self):
+		"""
+		Verify that portage.getpid() cache is updated correctly after
+		two forks.
+		"""
+		loop = asyncio._wrap_loop()
+		proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork)
+		proc.start()
+		self.assertEqual(proc.wait(), 0)
-- 
2.25.3



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init
  2020-08-08  4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
@ 2020-08-08  4:08 ` Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334) Zac Medico
  2 siblings, 0 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08  4:08 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
 lib/portage/cache/sqlite.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/lib/portage/cache/sqlite.py b/lib/portage/cache/sqlite.py
index 55ae8f0e5..0395dd516 100644
--- a/lib/portage/cache/sqlite.py
+++ b/lib/portage/cache/sqlite.py
@@ -1,6 +1,7 @@
 # Copyright 1999-2020 Gentoo Authors
 # Distributed under the terms of the GNU General Public License v2
 
+import collections
 import re
 
 from portage.cache import fs_template
@@ -23,6 +24,9 @@ class database(fs_template.FsBased):
 	# equation: cache_bytes = page_bytes * page_count
 	cache_bytes = 1024 * 1024 * 10
 
+	_connection_info_entry = collections.namedtuple('_connection_info_entry',
+		('connection', 'cursor'))
+
 	def __init__(self, *args, **config):
 		super(database, self).__init__(*args, **config)
 		self._import_sqlite()
@@ -44,8 +48,8 @@ class database(fs_template.FsBased):
 		# Set longer timeout for throwing a "database is locked" exception.
 		# Default timeout in sqlite3 module is 5.0 seconds.
 		config.setdefault("timeout", 15)
-		self._db_init_connection(config)
-		self._db_init_structures()
+		self._config = config
+		self._db_connection_info = None
 
 	def _import_sqlite(self):
 		# sqlite3 is optional with >=python-2.5
@@ -65,7 +69,20 @@ class database(fs_template.FsBased):
 			s = str(s)
 		return "'%s'" % s.replace("'", "''")
 
-	def _db_init_connection(self, config):
+	@property
+	def _db_cursor(self):
+		if self._db_connection_info is None:
+			self._db_init_connection()
+		return self._db_connection_info.cursor
+
+	@property
+	def _db_connection(self):
+		if self._db_connection_info is None:
+			self._db_init_connection()
+		return self._db_connection_info.connection
+
+	def _db_init_connection(self):
+		config = self._config
 		self._dbpath = self.location + ".sqlite"
 		#if os.path.exists(self._dbpath):
 		#	os.unlink(self._dbpath)
@@ -74,14 +91,16 @@ class database(fs_template.FsBased):
 		try:
 			if not self.readonly:
 				self._ensure_dirs()
-			self._db_connection = self._db_module.connect(
+			connection = self._db_module.connect(
 				database=_unicode_decode(self._dbpath), **connection_kwargs)
-			self._db_cursor = self._db_connection.cursor()
+			cursor = connection.cursor()
+			self._db_connection_info = self._connection_info_entry(connection, cursor)
 			self._db_cursor.execute("PRAGMA encoding = %s" % self._db_escape_string("UTF-8"))
 			if not self.readonly and not self._ensure_access(self._dbpath):
 				raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath)
 			self._db_init_cache_size(config["cache_bytes"])
 			self._db_init_synchronous(config["synchronous"])
+			self._db_init_structures()
 		except self._db_error as e:
 			raise cache_errors.InitializationError(self.__class__, e)
 
-- 
2.25.3



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334)
  2020-08-08  4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init Zac Medico
@ 2020-08-08  4:08 ` Zac Medico
  2 siblings, 0 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08  4:08 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Use a separate connection instance for each pid, since
it is not safe to use a connection created in a parent
process.

See: https://www.sqlite.org/howtocorrupt.html
Bug: https://bugs.gentoo.org/736334
Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
 lib/portage/cache/sqlite.py           |  9 +++++----
 lib/portage/tests/dbapi/test_auxdb.py | 13 +++++++++++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/lib/portage/cache/sqlite.py b/lib/portage/cache/sqlite.py
index 0395dd516..36a4f049e 100644
--- a/lib/portage/cache/sqlite.py
+++ b/lib/portage/cache/sqlite.py
@@ -4,6 +4,7 @@
 import collections
 import re
 
+import portage
 from portage.cache import fs_template
 from portage.cache import cache_errors
 from portage import os
@@ -25,7 +26,7 @@ class database(fs_template.FsBased):
 	cache_bytes = 1024 * 1024 * 10
 
 	_connection_info_entry = collections.namedtuple('_connection_info_entry',
-		('connection', 'cursor'))
+		('connection', 'cursor', 'pid'))
 
 	def __init__(self, *args, **config):
 		super(database, self).__init__(*args, **config)
@@ -71,13 +72,13 @@ class database(fs_template.FsBased):
 
 	@property
 	def _db_cursor(self):
-		if self._db_connection_info is None:
+		if self._db_connection_info is None or self._db_connection_info.pid != portage.getpid():
 			self._db_init_connection()
 		return self._db_connection_info.cursor
 
 	@property
 	def _db_connection(self):
-		if self._db_connection_info is None:
+		if self._db_connection_info is None or self._db_connection_info.pid != portage.getpid():
 			self._db_init_connection()
 		return self._db_connection_info.connection
 
@@ -94,7 +95,7 @@ class database(fs_template.FsBased):
 			connection = self._db_module.connect(
 				database=_unicode_decode(self._dbpath), **connection_kwargs)
 			cursor = connection.cursor()
-			self._db_connection_info = self._connection_info_entry(connection, cursor)
+			self._db_connection_info = self._connection_info_entry(connection, cursor, portage.getpid())
 			self._db_cursor.execute("PRAGMA encoding = %s" % self._db_escape_string("UTF-8"))
 			if not self.readonly and not self._ensure_access(self._dbpath):
 				raise cache_errors.InitializationError(self.__class__, "can't ensure perms on %s" % self._dbpath)
diff --git a/lib/portage/tests/dbapi/test_auxdb.py b/lib/portage/tests/dbapi/test_auxdb.py
index 5c79357d7..7865c3564 100644
--- a/lib/portage/tests/dbapi/test_auxdb.py
+++ b/lib/portage/tests/dbapi/test_auxdb.py
@@ -4,7 +4,8 @@
 from portage.tests import TestCase
 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
 from portage.util.futures import asyncio
-from portage.util.futures.compat_coroutine import coroutine
+from portage.util.futures.compat_coroutine import coroutine, coroutine_return
+from portage.util.futures.executor.fork import ForkExecutor
 
 
 class AuxdbTestCase(TestCase):
@@ -61,8 +62,14 @@ class AuxdbTestCase(TestCase):
 
 		portdb = playground.trees[playground.eroot]["porttree"].dbapi
 
+		def test_func():
+			return asyncio._wrap_loop().run_until_complete(self._test_mod_async(
+				ebuilds, ebuild_inherited, eclass_defined_phases, eclass_depend, portdb))
+
+		self.assertTrue(test_func())
+
 		loop = asyncio._wrap_loop()
-		loop.run_until_complete(self._test_mod_async(ebuilds, ebuild_inherited, eclass_defined_phases, eclass_depend, portdb))
+		self.assertTrue(loop.run_until_complete(loop.run_in_executor(ForkExecutor(), test_func)))
 
 	@coroutine
 	def _test_mod_async(self, ebuilds, ebuild_inherited, eclass_defined_phases, eclass_depend, portdb):
@@ -73,3 +80,5 @@ class AuxdbTestCase(TestCase):
 			self.assertEqual(depend, eclass_depend)
 			self.assertEqual(eapi, metadata['EAPI'])
 			self.assertEqual(frozenset(inherited.split()), ebuild_inherited)
+
+		coroutine_return(True)
-- 
2.25.3



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
@ 2020-08-08  5:51   ` Michał Górny
  2020-08-08  6:07   ` [gentoo-portage-dev] [PATCH 1/3 v2] " Zac Medico
  1 sibling, 0 replies; 6+ messages in thread
From: Michał Górny @ 2020-08-08  5:51 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

[-- Attachment #1: Type: text/plain, Size: 2871 bytes --]

On Fri, 2020-08-07 at 21:08 -0700, Zac Medico wrote:
> Since getpid is a syscall, cache results, and update them
> via an after fork hook.
> 
> Signed-off-by: Zac Medico <zmedico@gentoo.org>
> ---
>  lib/portage/__init__.py                       | 14 +++++++++++
>  .../tests/process/test_AsyncFunction.py       | 24 +++++++++++++++++++
>  2 files changed, 38 insertions(+)
> 
> diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
> index 916c93510..52902ba7d 100644
> --- a/lib/portage/__init__.py
> +++ b/lib/portage/__init__.py
> @@ -14,6 +14,7 @@ try:
>  	if not hasattr(errno, 'ESTALE'):
>  		# ESTALE may not be defined on some systems, such as interix.
>  		errno.ESTALE = -1
> +	import multiprocessing.util
>  	import re
>  	import types
>  	import platform
> @@ -368,6 +369,19 @@ _internal_caller = False
>  
>  _sync_mode = False
>  
> +def _fork_watcher(_fork_watcher):
> +	_fork_watcher.current_pid = _os.getpid()

I don't really like the idea of putting variables on functions.  Would
there be any problem with using a proper class here?

> +
> +_fork_watcher(_fork_watcher)
> +
> +multiprocessing.util.register_after_fork(_fork_watcher, _fork_watcher)
> +
> +def getpid():
> +	"""
> +	Cached version of os.getpid(). ForkProcess updates the cache.
> +	"""
> +	return _fork_watcher.current_pid
> +
>  def _get_stdin():
>  	"""
>  	Buggy code in python's multiprocessing/process.py closes sys.stdin
> diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py
> index 55857026d..3b360e02f 100644
> --- a/lib/portage/tests/process/test_AsyncFunction.py
> +++ b/lib/portage/tests/process/test_AsyncFunction.py
> @@ -3,6 +3,7 @@
>  
>  import sys
>  
> +import portage
>  from portage import os
>  from portage.tests import TestCase
>  from portage.util._async.AsyncFunction import AsyncFunction
> @@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase):
>  	def testAsyncFunctionStdin(self):
>  		loop = asyncio._wrap_loop()
>  		loop.run_until_complete(self._testAsyncFunctionStdin(loop))
> +
> +	def _test_getpid_fork(self):
> +		"""
> +		Verify that portage.getpid() cache is updated in a forked child process.
> +		"""
> +		loop = asyncio._wrap_loop()
> +		proc = AsyncFunction(scheduler=loop, target=portage.getpid)
> +		proc.start()
> +		proc.wait()
> +		self.assertEqual(proc.pid, proc.result)
> +
> +	def test_getpid_fork(self):
> +		self._test_getpid_fork()
> +
> +	def test_getpid_double_fork(self):
> +		"""
> +		Verify that portage.getpid() cache is updated correctly after
> +		two forks.
> +		"""
> +		loop = asyncio._wrap_loop()
> +		proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork)
> +		proc.start()
> +		self.assertEqual(proc.wait(), 0)

-- 
Best regards,
Michał Górny


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 618 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [gentoo-portage-dev] [PATCH 1/3 v2] Add cached portage.getpid() function
  2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
  2020-08-08  5:51   ` Michał Górny
@ 2020-08-08  6:07   ` Zac Medico
  1 sibling, 0 replies; 6+ messages in thread
From: Zac Medico @ 2020-08-08  6:07 UTC (permalink / raw
  To: gentoo-portage-dev; +Cc: Zac Medico

Since getpid is a syscall, cache results, and update them
via an after fork hook.

Signed-off-by: Zac Medico <zmedico@gentoo.org>
---
 lib/portage/__init__.py                       | 16 +++++++++++++
 .../tests/process/test_AsyncFunction.py       | 24 +++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/lib/portage/__init__.py b/lib/portage/__init__.py
index 916c93510..4d4b590a8 100644
--- a/lib/portage/__init__.py
+++ b/lib/portage/__init__.py
@@ -14,6 +14,7 @@ try:
 	if not hasattr(errno, 'ESTALE'):
 		# ESTALE may not be defined on some systems, such as interix.
 		errno.ESTALE = -1
+	import multiprocessing.util
 	import re
 	import types
 	import platform
@@ -368,6 +369,21 @@ _internal_caller = False
 
 _sync_mode = False
 
+class _ForkWatcher:
+	@staticmethod
+	def hook(_ForkWatcher):
+		_ForkWatcher.current_pid = _os.getpid()
+
+_ForkWatcher.hook(_ForkWatcher)
+
+multiprocessing.util.register_after_fork(_ForkWatcher, _ForkWatcher.hook)
+
+def getpid():
+	"""
+	Cached version of os.getpid(). ForkProcess updates the cache.
+	"""
+	return _ForkWatcher.current_pid
+
 def _get_stdin():
 	"""
 	Buggy code in python's multiprocessing/process.py closes sys.stdin
diff --git a/lib/portage/tests/process/test_AsyncFunction.py b/lib/portage/tests/process/test_AsyncFunction.py
index 55857026d..3b360e02f 100644
--- a/lib/portage/tests/process/test_AsyncFunction.py
+++ b/lib/portage/tests/process/test_AsyncFunction.py
@@ -3,6 +3,7 @@
 
 import sys
 
+import portage
 from portage import os
 from portage.tests import TestCase
 from portage.util._async.AsyncFunction import AsyncFunction
@@ -36,3 +37,26 @@ class AsyncFunctionTestCase(TestCase):
 	def testAsyncFunctionStdin(self):
 		loop = asyncio._wrap_loop()
 		loop.run_until_complete(self._testAsyncFunctionStdin(loop))
+
+	def _test_getpid_fork(self):
+		"""
+		Verify that portage.getpid() cache is updated in a forked child process.
+		"""
+		loop = asyncio._wrap_loop()
+		proc = AsyncFunction(scheduler=loop, target=portage.getpid)
+		proc.start()
+		proc.wait()
+		self.assertEqual(proc.pid, proc.result)
+
+	def test_getpid_fork(self):
+		self._test_getpid_fork()
+
+	def test_getpid_double_fork(self):
+		"""
+		Verify that portage.getpid() cache is updated correctly after
+		two forks.
+		"""
+		loop = asyncio._wrap_loop()
+		proc = AsyncFunction(scheduler=loop, target=self._test_getpid_fork)
+		proc.start()
+		self.assertEqual(proc.wait(), 0)
-- 
2.25.3



^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-08-08  6:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-08  4:08 [gentoo-portage-dev] [PATCH 0/3] sqlite: fork safety (bug 736334) Zac Medico
2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 1/3] Add cached portage.getpid() function Zac Medico
2020-08-08  5:51   ` Michał Górny
2020-08-08  6:07   ` [gentoo-portage-dev] [PATCH 1/3 v2] " Zac Medico
2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 2/3] sqlite: add lazy connection init Zac Medico
2020-08-08  4:08 ` [gentoo-portage-dev] [PATCH 3/3] sqlite: fork safety (bug 736334) Zac Medico

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox