* [gentoo-commits] proj/portage:master commit in: lib/portage/util/, lib/portage/util/file_copy/, src/
@ 2024-03-15 20:06 Mike Gilbert
0 siblings, 0 replies; only message in thread
From: Mike Gilbert @ 2024-03-15 20:06 UTC (permalink / raw
To: gentoo-commits
commit: 23529ee81964665107400e87fc3d49c256e915c0
Author: Mike Gilbert <floppym <AT> gentoo <DOT> org>
AuthorDate: Fri Mar 1 15:45:58 2024 +0000
Commit: Mike Gilbert <floppym <AT> gentoo <DOT> org>
CommitDate: Fri Mar 15 20:05:34 2024 +0000
URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=23529ee8
Replace linux_reflink extension module
Python 3.8 added support for copy_file_range in the os module,
so we can just call that directly.
Also, we can use the FICLONE ioctl for fast file clones on supported
filesystems (btrfs).
Signed-off-by: Mike Gilbert <floppym <AT> gentoo.org>
lib/portage/util/file_copy.py | 137 ++++++++++
lib/portage/util/file_copy/__init__.py | 36 ---
lib/portage/util/file_copy/meson.build | 7 -
lib/portage/util/meson.build | 2 +-
src/meson.build | 20 --
src/portage_util_file_copy_reflink_linux.c | 396 -----------------------------
6 files changed, 138 insertions(+), 460 deletions(-)
diff --git a/lib/portage/util/file_copy.py b/lib/portage/util/file_copy.py
new file mode 100644
index 0000000000..e3926d8ef6
--- /dev/null
+++ b/lib/portage/util/file_copy.py
@@ -0,0 +1,137 @@
+# Copyright 2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+import errno
+import fcntl
+import logging
+import os
+import platform
+import shutil
+import sys
+
+
+logger = logging.getLogger(__name__)
+
+# Added in Python 3.12
+FICLONE = getattr(fcntl, "FICLONE", 0x40049409)
+
+# Unavailable in PyPy
+SEEK_DATA = getattr(os, "SEEK_DATA", 3)
+SEEK_HOLE = getattr(os, "SEEK_HOLE", 4)
+
+
+def _get_chunks(src):
+ try:
+ offset_hole = 0
+ while True:
+ try:
+ # Find the next bit of data
+ offset_data = os.lseek(src, offset_hole, SEEK_DATA)
+ except OSError as e:
+ # Re-raise for unexpected errno values
+ if e.errno not in (errno.EINVAL, errno.ENXIO):
+ raise
+
+ offset_end = os.lseek(src, 0, os.SEEK_END)
+
+ if e.errno == errno.ENXIO:
+ # End of file
+ if offset_end > offset_hole:
+ # Hole at end of file
+ yield (offset_end, 0)
+ else:
+ # SEEK_DATA failed with EINVAL, return the whole file
+ yield (0, offset_end)
+
+ break
+ else:
+ offset_hole = os.lseek(src, offset_data, SEEK_HOLE)
+ yield (offset_data, offset_hole - offset_data)
+
+ except OSError:
+ logger.warning("_get_chunks failed unexpectedly", exc_info=sys.exc_info())
+ raise
+
+
+def _do_copy_file_range(src, dst, offset, count):
+ while count > 0:
+ # count must fit in ssize_t
+ c = min(count, sys.maxsize)
+ written = os.copy_file_range(src, dst, c, offset, offset)
+ if written == 0:
+ # https://bugs.gentoo.org/828844
+ raise OSError(errno.EOPNOTSUPP, os.strerror(errno.EOPNOTSUPP))
+ offset += written
+ count -= written
+
+
+def _do_sendfile(src, dst, offset, count):
+ os.lseek(dst, offset, os.SEEK_SET)
+ while count > 0:
+ # count must fit in ssize_t
+ c = min(count, sys.maxsize)
+ written = os.sendfile(dst, src, offset, c)
+ offset += written
+ count -= written
+
+
+def _fastcopy(src, dst):
+ with (
+ open(src, "rb", buffering=0) as srcf,
+ open(dst, "wb", buffering=0) as dstf,
+ ):
+ srcfd = srcf.fileno()
+ dstfd = dstf.fileno()
+
+ if platform.system() == "Linux":
+ try:
+ fcntl.ioctl(dstfd, FICLONE, srcfd)
+ return
+ except OSError:
+ pass
+
+ try_cfr = hasattr(os, "copy_file_range")
+
+ for offset, count in _get_chunks(srcfd):
+ if count == 0:
+ os.ftruncate(dstfd, offset)
+ else:
+ if try_cfr:
+ try:
+ _do_copy_file_range(srcfd, dstfd, offset, count)
+ continue
+ except OSError as e:
+ try_cfr = False
+ if e.errno not in (errno.EXDEV, errno.ENOSYS, errno.EOPNOTSUPP):
+ logger.warning(
+ "_do_copy_file_range failed unexpectedly",
+ exc_info=sys.exc_info(),
+ )
+ try:
+ _do_sendfile(srcfd, dstfd, offset, count)
+ except OSError:
+ logger.warning(
+ "_do_sendfile failed unexpectedly", exc_info=sys.exc_info()
+ )
+ raise
+
+
+def copyfile(src, dst):
+ """
+ Copy the contents (no metadata) of the file named src to a file
+ named dst.
+
+ If possible, copying is done within the kernel, and uses
+ "copy acceleration" techniques (such as reflinks). This also
+ supports sparse files.
+
+ @param src: path of source file
+ @type src: str
+ @param dst: path of destination file
+ @type dst: str
+ """
+
+ try:
+ _fastcopy(src, dst)
+ except OSError:
+ shutil.copyfile(src, dst)
diff --git a/lib/portage/util/file_copy/__init__.py b/lib/portage/util/file_copy/__init__.py
deleted file mode 100644
index f88d4d9d59..0000000000
--- a/lib/portage/util/file_copy/__init__.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright 2017 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-
-import shutil
-
-try:
- from portage.util.file_copy.reflink_linux import file_copy as _file_copy
-except ImportError:
- _file_copy = None
-
-
-def _optimized_copyfile(src, dst):
- """
- Copy the contents (no metadata) of the file named src to a file
- named dst.
-
- If possible, copying is done within the kernel, and uses
- "copy acceleration" techniques (such as reflinks). This also
- supports sparse files.
-
- @param src: path of source file
- @type src: str
- @param dst: path of destination file
- @type dst: str
- """
- with (
- open(src, "rb", buffering=0) as src_file,
- open(dst, "wb", buffering=0) as dst_file,
- ):
- _file_copy(src_file.fileno(), dst_file.fileno())
-
-
-if _file_copy is None:
- copyfile = shutil.copyfile
-else:
- copyfile = _optimized_copyfile
diff --git a/lib/portage/util/file_copy/meson.build b/lib/portage/util/file_copy/meson.build
deleted file mode 100644
index 3e1d98f333..0000000000
--- a/lib/portage/util/file_copy/meson.build
+++ /dev/null
@@ -1,7 +0,0 @@
-py.install_sources(
- [
- '__init__.py',
- ],
- subdir : 'portage/util/file_copy',
- pure : not native_extensions
-)
diff --git a/lib/portage/util/meson.build b/lib/portage/util/meson.build
index b1e4a1c807..8a60617d6f 100644
--- a/lib/portage/util/meson.build
+++ b/lib/portage/util/meson.build
@@ -10,6 +10,7 @@ py.install_sources(
'cpuinfo.py',
'digraph.py',
'env_update.py',
+ 'file_copy.py',
'formatter.py',
'hooks.py',
'install_mask.py',
@@ -41,7 +42,6 @@ py.install_sources(
subdir('elf')
subdir('endian')
-subdir('file_copy')
subdir('futures')
subdir('iterators')
subdir('_async')
diff --git a/src/meson.build b/src/meson.build
index 6a36724ceb..0220e8d56b 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -19,23 +19,3 @@ run_command(
capture : false,
check : true
)
-
-if host_machine.system() == 'linux'
- reflink_ext = py.extension_module(
- 'reflink_linux',
- 'portage_util_file_copy_reflink_linux.c',
- dependencies : py.dependency(),
- subdir : 'portage' / 'util' / 'file_copy',
- install : true
- )
-
- run_command(
- [
- 'ln', '-srnf',
- reflink_ext.full_path(),
- meson.project_source_root() / 'lib' / 'portage' / 'util' / 'file_copy/'
- ],
- capture : false,
- check : true
- )
-endif
diff --git a/src/portage_util_file_copy_reflink_linux.c b/src/portage_util_file_copy_reflink_linux.c
deleted file mode 100644
index e98db3db88..0000000000
--- a/src/portage_util_file_copy_reflink_linux.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/* Copyright 2017-2023 Gentoo Authors
- * Distributed under the terms of the GNU General Public License v2
- */
-
-#include <Python.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <sys/sendfile.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-static PyObject * _reflink_linux_file_copy(PyObject *, PyObject *);
-
-static PyMethodDef reflink_linuxMethods[] = {
- {
- .ml_name = "file_copy",
- .ml_meth = _reflink_linux_file_copy,
- .ml_flags = METH_VARARGS,
- .ml_doc = "Copy between two file descriptors with reflink and sparse file support."
- },
- {NULL, NULL, 0, NULL}
-};
-
-static struct PyModuleDef moduledef = {
- PyModuleDef_HEAD_INIT,
- .m_name = "reflink_linux",
- .m_doc = "Module for reflink_linux copy operations",
- .m_size = -1,
- .m_methods = reflink_linuxMethods,
-};
-
-PyMODINIT_FUNC
-PyInit_reflink_linux(void)
-{
- PyObject *m;
- m = PyModule_Create(&moduledef);
- return m;
-}
-
-
-/**
- * cfr_wrapper - A copy_file_range syscall wrapper function, having a
- * function signature that is compatible with sf_wrapper.
- * @fd_out: output file descriptor
- * @fd_in: input file descriptor
- * @off_out: must point to a buffer that specifies the starting offset
- * where bytes will be copied to fd_out, and this buffer is adjusted by
- * the number of bytes copied.
- * @len: number of bytes to copy between the file descriptors
- *
- * Bytes are copied from fd_in starting from *off_out, and the file
- * offset of fd_in is not changed. Effects on the file offset of
- * fd_out are undefined.
- *
- * Return: Number of bytes written to out_fd on success, -1 on failure
- * (errno is set appropriately).
- */
-static ssize_t
-cfr_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len)
-{
-#ifdef __NR_copy_file_range
- off_t off_in = *off_out;
- return syscall(__NR_copy_file_range, fd_in, &off_in, fd_out,
- off_out, len, 0);
-#else
- /* This is how it fails at runtime when the syscall is not supported. */
- errno = ENOSYS;
- return -1;
-#endif
-}
-
-/**
- * sf_wrapper - A sendfile wrapper function, having a function signature
- * that is compatible with cfr_wrapper.
- * @fd_out: output file descriptor
- * @fd_in: input file descriptor
- * @off_out: must point to a buffer that specifies the starting offset
- * where bytes will be copied to fd_out, and this buffer is adjusted by
- * the number of bytes copied.
- * @len: number of bytes to copy between the file descriptors
- *
- * Bytes are copied from fd_in starting from *off_out, and the file
- * offset of fd_in is not changed. Effects on the file offset of
- * fd_out are undefined.
- *
- * Return: Number of bytes written to out_fd on success, -1 on failure
- * (errno is set appropriately).
- */
-static ssize_t
-sf_wrapper(int fd_out, int fd_in, off_t *off_out, size_t len)
-{
- ssize_t ret;
- off_t off_in = *off_out;
- /* The sendfile docs do not specify behavior of the output file
- * offset, therefore it must be adjusted with lseek.
- */
- if (lseek(fd_out, *off_out, SEEK_SET) < 0)
- return -1;
- ret = sendfile(fd_out, fd_in, &off_in, len);
- if (ret > 0)
- *off_out += ret;
- return ret;
-}
-
-
-/**
- * do_lseek_data - Adjust file offsets to the next location containing
- * data, creating sparse empty blocks in the output file as needed.
- * @fd_in: input file descriptor
- * @fd_out: output file descriptor
- * @off_out: offset of the output file
- *
- * Use lseek SEEK_DATA to adjust off_out to the next location from fd_in
- * containing data (creates sparse empty blocks when appropriate). Effects
- * on file offsets are undefined.
- *
- * Return: On success, the number of bytes to copy before the next hole,
- * and -1 on failure (errno is set appropriately). Returns 0 when fd_in
- * reaches EOF.
- */
-static off_t
-do_lseek_data(int fd_out, int fd_in, off_t *off_out) {
-#ifdef SEEK_DATA
- /* Use lseek SEEK_DATA/SEEK_HOLE for sparse file support,
- * as suggested in the copy_file_range man page.
- */
- off_t offset_data, offset_hole;
-
- offset_data = lseek(fd_in, *off_out, SEEK_DATA);
- if (offset_data < 0) {
- if (errno == ENXIO) {
- /* EOF - If the file ends with a hole, then use lseek SEEK_END
- * to find the end offset, and create sparse empty blocks in
- * the output file. It's the caller's responsibility to
- * truncate the file.
- */
- offset_hole = lseek(fd_in, 0, SEEK_END);
- if (offset_hole < 0) {
- return -1;
- } else if (offset_hole != *off_out) {
- if (lseek(fd_out, offset_hole, SEEK_SET) < 0) {
- return -1;
- }
- *off_out = offset_hole;
- }
- return 0;
- }
- return -1;
- }
-
- /* Create sparse empty blocks in the output file, up
- * until the next location that will contain data.
- */
- if (offset_data != *off_out) {
- if (lseek(fd_out, offset_data, SEEK_SET) < 0) {
- return -1;
- }
- *off_out = offset_data;
- }
-
- /* Locate the next hole, so that we know when to
- * stop copying. There is an implicit hole at the
- * end of the file. This should never result in ENXIO
- * after SEEK_DATA has succeeded above.
- */
- offset_hole = lseek(fd_in, offset_data, SEEK_HOLE);
- if (offset_hole < 0) {
- return -1;
- }
-
- return offset_hole - offset_data;
-#else
- /* This is how it fails at runtime when lseek SEEK_DATA is not supported. */
- errno = EINVAL;
- return -1;
-#endif
-}
-
-
-/**
- * _reflink_linux_file_copy - Copy between two file descriptors, with
- * reflink and sparse file support.
- * @fd_in: input file descriptor
- * @fd_out: output file descriptor
- *
- * When supported, this uses copy_file_range for reflink support,
- * and lseek SEEK_DATA for sparse file support. It has graceful
- * fallbacks when support is unavailable for copy_file_range, lseek
- * SEEK_DATA, or sendfile operations. When all else fails, it uses
- * a plain read/write loop that works in any kernel version.
- *
- * If a syscall is interrupted by a signal, then the function will
- * automatically resume copying a the appropriate location which is
- * tracked internally by the offset_out variable.
- *
- * Return: The length of the output file on success. Raise OSError
- * on failure.
- */
-static PyObject *
-_reflink_linux_file_copy(PyObject *self, PyObject *args)
-{
- int eintr_retry, error, fd_in, fd_out, stat_in_acquired, stat_out_acquired;
- int lseek_works, sendfile_works;
- off_t offset_out, len;
- ssize_t buf_bytes, buf_offset, copyfunc_ret;
- struct stat stat_in, stat_out;
- char* buf;
- ssize_t (*copyfunc)(int, int, off_t *, size_t);
-
- if (!PyArg_ParseTuple(args, "ii", &fd_in, &fd_out))
- return NULL;
-
- eintr_retry = 1;
- error = 0;
- offset_out = 0;
- stat_in_acquired = 0;
- stat_out_acquired = 0;
- buf = NULL;
- buf_bytes = 0;
- buf_offset = 0;
- copyfunc = cfr_wrapper;
- lseek_works = 1;
- sendfile_works = 1;
-
- while (eintr_retry) {
-
- Py_BEGIN_ALLOW_THREADS
-
- /* Linux 3.1 and later support SEEK_DATA (for sparse file support).
- * This code uses copy_file_range if possible, and falls back to
- * sendfile for cross-device or when the copy_file_range syscall
- * is not available (less than Linux 4.5). This will fail for
- * Linux less than 3.1, which does not support the lseek SEEK_DATA
- * parameter.
- */
- if (sendfile_works && lseek_works) {
- error = 0;
-
- while (1) {
- len = do_lseek_data(fd_out, fd_in, &offset_out);
- if (!len) {
- /* EOF */
- break;
- } else if (len < 0) {
- error = errno;
- if ((errno == EINVAL || errno == EOPNOTSUPP) && !offset_out) {
- lseek_works = 0;
- }
- break;
- }
-
- copyfunc_ret = copyfunc(fd_out,
- fd_in,
- &offset_out,
- len);
-
- if (copyfunc_ret <= 0) {
- error = errno;
- if ((errno == EXDEV || errno == ENOSYS || errno == EOPNOTSUPP || copyfunc_ret == 0) &&
- copyfunc == cfr_wrapper) {
- /* Use sendfile instead of copy_file_range for
- * cross-device copies, or when the copy_file_range
- * syscall is not available (less than Linux 4.5),
- * or when copy_file_range copies zero bytes.
- */
- error = 0;
- copyfunc = sf_wrapper;
- copyfunc_ret = copyfunc(fd_out,
- fd_in,
- &offset_out,
- len);
-
- if (copyfunc_ret < 0) {
- error = errno;
- /* On Linux, if lseek succeeded above, then
- * sendfile should have worked here too, so
- * don't bother to fallback for EINVAL here.
- */
- break;
- }
- } else {
- break;
- }
- }
- }
- }
-
- /* Less than Linux 3.1 does not support SEEK_DATA or copy_file_range,
- * so just use sendfile for in-kernel copy. This will fail for Linux
- * versions from 2.6.0 to 2.6.32, because sendfile does not support
- * writing to regular files.
- */
- if (sendfile_works && !lseek_works) {
- error = 0;
-
- if (!stat_in_acquired && fstat(fd_in, &stat_in) < 0) {
- error = errno;
- } else {
- stat_in_acquired = 1;
-
- while (offset_out < stat_in.st_size) {
- copyfunc_ret = sf_wrapper(fd_out,
- fd_in,
- &offset_out,
- stat_in.st_size - offset_out);
-
- if (copyfunc_ret < 0) {
- error = errno;
- if (errno == EINVAL && !offset_out) {
- sendfile_works = 0;
- }
- break;
- }
- }
- }
- }
-
- /* This implementation will work on any kernel. */
- if (!sendfile_works) {
- error = 0;
-
- if (!stat_out_acquired && fstat(fd_in, &stat_out) < 0) {
- error = errno;
- } else {
- stat_out_acquired = 1;
- if (buf == NULL)
- buf = malloc(stat_out.st_blksize);
- if (buf == NULL) {
- error = errno;
-
- /* For the read call, the fd_in file offset must be exactly
- * equal to offset_out + buf_bytes, where buf_bytes is the
- * amount of buffered data that has not been written to
- * to the output file yet. Use lseek to ensure correct state,
- * in case an EINTR retry caused it to get out of sync
- * somewhow.
- */
- } else if (lseek(fd_in, offset_out + buf_bytes, SEEK_SET) < 0) {
- error = errno;
- } else {
- while (1) {
- /* Some bytes may still be buffered from the
- * previous iteration of the outer loop.
- */
- if (!buf_bytes) {
- buf_offset = 0;
- buf_bytes = read(fd_in, buf, stat_out.st_blksize);
-
- if (!buf_bytes) {
- /* EOF */
- break;
-
- } else if (buf_bytes < 0) {
- error = errno;
- buf_bytes = 0;
- break;
- }
- }
-
- copyfunc_ret = write(fd_out,
- buf + buf_offset,
- buf_bytes);
-
- if (copyfunc_ret < 0) {
- error = errno;
- break;
- }
-
- buf_bytes -= copyfunc_ret;
- buf_offset += copyfunc_ret;
- offset_out += copyfunc_ret;
- }
- }
- }
- }
-
- if (!error && ftruncate(fd_out, offset_out) < 0)
- error = errno;
-
- Py_END_ALLOW_THREADS
-
- if (!(error == EINTR && PyErr_CheckSignals() == 0))
- eintr_retry = 0;
- }
-
- if (buf != NULL)
- free(buf);
-
- if (error)
- return PyErr_SetFromErrno(PyExc_OSError);
-
- return Py_BuildValue("i", offset_out);
-}
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2024-03-15 20:06 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-15 20:06 [gentoo-commits] proj/portage:master commit in: lib/portage/util/, lib/portage/util/file_copy/, src/ Mike Gilbert
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox