public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
* [gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/
@ 2024-04-28  0:04 Sam James
  0 siblings, 0 replies; 4+ messages in thread
From: Sam James @ 2024-04-28  0:04 UTC (permalink / raw
  To: gentoo-commits

commit:     7e93192fda22594b9e9d223c54a39b4bad0554f9
Author:     Alexey Gladkov <legion <AT> kernel <DOT> org>
AuthorDate: Mon Mar 11 00:25:07 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Apr 28 00:04:08 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=7e93192f

sync/zipfile: Add retrieve_head to return archive checksum

Since we have an ETag, we can return the checksum of the archive. It
will be a replacement for the head commit of the repository.

Suggested-by: Zac Medico <zmedico <AT> gentoo.org>
Signed-off-by: Alexey Gladkov <legion <AT> kernel.org>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/sync/modules/zipfile/__init__.py | 3 ++-
 lib/portage/sync/modules/zipfile/zipfile.py  | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/portage/sync/modules/zipfile/__init__.py b/lib/portage/sync/modules/zipfile/__init__.py
index 19fe3af412..e44833088c 100644
--- a/lib/portage/sync/modules/zipfile/__init__.py
+++ b/lib/portage/sync/modules/zipfile/__init__.py
@@ -21,10 +21,11 @@ module_spec = {
             "sourcefile": "zipfile",
             "class": "ZipFile",
             "description": doc,
-            "functions": ["sync"],
+            "functions": ["sync", "retrieve_head"],
             "func_desc": {
                 "sync": "Performs an archived http download of the "
                 + "repository, then unpacks it.",
+                "retrieve_head": "Returns the checksum of the unpacked archive.",
             },
             "validate_config": CheckSyncConfig,
             "module_specific_options": (),

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py b/lib/portage/sync/modules/zipfile/zipfile.py
index bb78b39243..3cd210a64b 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -26,6 +26,15 @@ class ZipFile(SyncBase):
     def __init__(self):
         SyncBase.__init__(self, "emerge", ">=sys-apps/portage-2.3")
 
+    def retrieve_head(self, **kwargs):
+        """Get information about the checksum of the unpacked archive"""
+        if kwargs:
+            self._kwargs(kwargs)
+        info = portage.grabdict(os.path.join(self.repo.location, ".info"))
+        if "etag" in info:
+            return (os.EX_OK, info["etag"][0])
+        return (1, False)
+
     def sync(self, **kwargs):
         """Sync the repository"""
         if kwargs:


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/
@ 2024-04-28  0:04 Sam James
  0 siblings, 0 replies; 4+ messages in thread
From: Sam James @ 2024-04-28  0:04 UTC (permalink / raw
  To: gentoo-commits

commit:     ced2e6d4f4ac95b8e17cf7dae964a64037a85bf0
Author:     Alexey Gladkov <legion <AT> kernel <DOT> org>
AuthorDate: Mon Mar 11 17:09:05 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Apr 28 00:04:09 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=ced2e6d4

sync/zipfile: Recycle files that have not changed

We can check whether the content of files from the archive differs from
the current revision. This will give us several advantages:

* This will give us some meaning to the mtime of files, since it will
prevent the timestamps of unmodified files from being changed.

* This will also get rid of recreatiing self.repo.location, which will
allow sync with FEATURES=usersync because self.repo.location is reused.

Suggested-by: Zac Medico <zmedico <AT> gentoo.org>
Signed-off-by: Alexey Gladkov <legion <AT> kernel.org>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/sync/modules/zipfile/zipfile.py | 32 ++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py b/lib/portage/sync/modules/zipfile/zipfile.py
index 3cd210a64b..edfb5aa681 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -35,6 +35,16 @@ class ZipFile(SyncBase):
             return (os.EX_OK, info["etag"][0])
         return (1, False)
 
+    def _do_cmp(self, f1, f2):
+        bufsize = 8 * 1024
+        while True:
+            b1 = f1.read(bufsize)
+            b2 = f2.read(bufsize)
+            if b1 != b2:
+                return False
+            if not b1:
+                return True
+
     def sync(self, **kwargs):
         """Sync the repository"""
         if kwargs:
@@ -76,7 +86,15 @@ class ZipFile(SyncBase):
             return (1, False)
 
         # Drop previous tree
-        shutil.rmtree(self.repo.location)
+        tempdir = tempfile.mkdtemp(prefix=".temp", dir=self.repo.location)
+        tmpname = os.path.basename(tempdir)
+
+        for name in os.listdir(self.repo.location):
+            if name != tmpname:
+                os.rename(
+                    os.path.join(self.repo.location, name),
+                    os.path.join(tempdir, name),
+                )
 
         with zipfile.ZipFile(zip_file) as archive:
             strip_comp = 0
@@ -101,9 +119,21 @@ class ZipFile(SyncBase):
                     continue
 
                 with archive.open(n) as srcfile:
+                    prvpath = os.path.join(tempdir, *parts[strip_comp:])
+
+                    if os.path.exists(prvpath):
+                        with open(prvpath, "rb") as prvfile:
+                            if self._do_cmp(prvfile, srcfile):
+                                os.rename(prvpath, dstpath)
+                                continue
+                        srcfile.seek(0)
+
                     with open(dstpath, "wb") as dstfile:
                         shutil.copyfileobj(srcfile, dstfile)
 
+        # Drop previous tree
+        shutil.rmtree(tempdir)
+
         with open(os.path.join(self.repo.location, ".info"), "w") as infofile:
             if etag:
                 infofile.write(f"etag {etag}\n")


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/
@ 2024-04-28  0:04 Sam James
  0 siblings, 0 replies; 4+ messages in thread
From: Sam James @ 2024-04-28  0:04 UTC (permalink / raw
  To: gentoo-commits

commit:     80445d9b00bfcd1eb4955cf3ecb397b4c02663ba
Author:     Alexey Gladkov <legion <AT> kernel <DOT> org>
AuthorDate: Mon Feb 12 13:59:40 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Apr 28 00:04:07 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=80445d9b

sync: Add method to download zip archives

Add a simple method for synchronizing repository as a snapshot in a zip
archive. The implementation does not require external utilities to
download and unpack archive. This makes the method very cheap.

The main usecase being considered is obtaining snapshots of github
repositories, but many other web interfaces for git also support
receiving snapshots in zip format.

For example, to get a snapshot of the master branch:

  https://github.com/gentoo/portage/archive/refs/heads/master.zip
  https://gitweb.gentoo.org/proj/portage.git/snapshot/portage-master.zip

or a link to a snapshot of the tag:

  https://github.com/gentoo/portage/archive/refs/tags/portage-3.0.61.zip

Signed-off-by: Alexey Gladkov <legion <AT> kernel.org>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/sync/modules/zipfile/__init__.py | 33 +++++++++++
 lib/portage/sync/modules/zipfile/zipfile.py  | 82 ++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/lib/portage/sync/modules/zipfile/__init__.py b/lib/portage/sync/modules/zipfile/__init__.py
new file mode 100644
index 0000000000..19fe3af412
--- /dev/null
+++ b/lib/portage/sync/modules/zipfile/__init__.py
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2024  Alexey Gladkov <gladkov.alexey@gmail.com>
+
+doc = """Zipfile plug-in module for portage.
+Performs a http download of a portage snapshot and unpacks it to the repo
+location."""
+__doc__ = doc[:]
+
+
+import os
+
+from portage.sync.config_checks import CheckSyncConfig
+
+
+module_spec = {
+    "name": "zipfile",
+    "description": doc,
+    "provides": {
+        "zipfile-module": {
+            "name": "zipfile",
+            "sourcefile": "zipfile",
+            "class": "ZipFile",
+            "description": doc,
+            "functions": ["sync"],
+            "func_desc": {
+                "sync": "Performs an archived http download of the "
+                + "repository, then unpacks it.",
+            },
+            "validate_config": CheckSyncConfig,
+            "module_specific_options": (),
+        },
+    },
+}

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py b/lib/portage/sync/modules/zipfile/zipfile.py
new file mode 100644
index 0000000000..1762d2c8f1
--- /dev/null
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2024  Alexey Gladkov <gladkov.alexey@gmail.com>
+
+import os
+import os.path
+import logging
+import zipfile
+import shutil
+import tempfile
+import urllib.request
+
+import portage
+from portage.util import writemsg_level
+from portage.sync.syncbase import SyncBase
+
+
+class ZipFile(SyncBase):
+    """ZipFile sync module"""
+
+    short_desc = "Perform sync operations on GitHub repositories"
+
+    @staticmethod
+    def name():
+        return "ZipFile"
+
+    def __init__(self):
+        SyncBase.__init__(self, "emerge", ">=sys-apps/portage-2.3")
+
+    def sync(self, **kwargs):
+        """Sync the repository"""
+        if kwargs:
+            self._kwargs(kwargs)
+
+        # initial checkout
+        zip_uri = self.repo.sync_uri
+
+        with urllib.request.urlopen(zip_uri) as response:
+            with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+                shutil.copyfileobj(response, tmp_file)
+            zip_file = tmp_file.name
+
+        if not zipfile.is_zipfile(zip_file):
+            msg = "!!! file is not a zip archive."
+            self.logger(self.xterm_titles, msg)
+            writemsg_level(msg + "\n", noiselevel=-1, level=logging.ERROR)
+
+            os.unlink(zip_file)
+
+            return (1, False)
+
+        # Drop previous tree
+        shutil.rmtree(self.repo.location)
+
+        with zipfile.ZipFile(zip_file) as archive:
+            strip_comp = 0
+
+            for f in archive.namelist():
+                f = os.path.normpath(f)
+                if os.path.basename(f) == "profiles":
+                    strip_comp = f.count("/")
+                    break
+
+            for n in archive.infolist():
+                p = os.path.normpath(n.filename)
+
+                if os.path.isabs(p):
+                    continue
+
+                parts = p.split("/")
+                dstpath = os.path.join(self.repo.location, *parts[strip_comp:])
+
+                if n.is_dir():
+                    os.makedirs(dstpath, mode=0o755, exist_ok=True)
+                    continue
+
+                with archive.open(n) as srcfile:
+                    with open(dstpath, "wb") as dstfile:
+                        shutil.copyfileobj(srcfile, dstfile)
+
+        os.unlink(zip_file)
+
+        return (os.EX_OK, True)


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/
@ 2024-04-28  0:04 Sam James
  0 siblings, 0 replies; 4+ messages in thread
From: Sam James @ 2024-04-28  0:04 UTC (permalink / raw
  To: gentoo-commits

commit:     8c6e5d06afbf6fca1893cff5ed777e44f93b7a5d
Author:     Alexey Gladkov <legion <AT> kernel <DOT> org>
AuthorDate: Sun Mar  3 16:41:08 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Apr 28 00:04:08 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=8c6e5d06

sync/zipfile: Handle ETag header

Most services add an ETag header and determine whether the locally
cached version of the URL has expired. So we can add ETag processing to
avoid unnecessary downloading and unpacking of the zip archive.

Signed-off-by: Alexey Gladkov <legion <AT> kernel.org>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/sync/modules/zipfile/zipfile.py | 36 +++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py b/lib/portage/sync/modules/zipfile/zipfile.py
index 1762d2c8f1..bb78b39243 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -10,7 +10,7 @@ import tempfile
 import urllib.request
 
 import portage
-from portage.util import writemsg_level
+from portage.util import writemsg_level, writemsg_stdout
 from portage.sync.syncbase import SyncBase
 
 
@@ -31,13 +31,31 @@ class ZipFile(SyncBase):
         if kwargs:
             self._kwargs(kwargs)
 
-        # initial checkout
-        zip_uri = self.repo.sync_uri
+        req = urllib.request.Request(url=self.repo.sync_uri)
 
-        with urllib.request.urlopen(zip_uri) as response:
-            with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
-                shutil.copyfileobj(response, tmp_file)
-            zip_file = tmp_file.name
+        info = portage.grabdict(os.path.join(self.repo.location, ".info"))
+        if "etag" in info:
+            req.add_header("If-None-Match", info["etag"][0])
+
+        try:
+            with urllib.request.urlopen(req) as response:
+                with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+                    shutil.copyfileobj(response, tmp_file)
+
+                zip_file = tmp_file.name
+                etag = response.headers.get("etag")
+
+        except urllib.error.HTTPError as resp:
+            if resp.code == 304:
+                writemsg_stdout(">>> The repository has not changed.\n", noiselevel=-1)
+                return (os.EX_OK, False)
+
+            writemsg_level(
+                f"!!! Unable to obtain zip archive: {resp}\n",
+                noiselevel=-1,
+                level=logging.ERROR,
+            )
+            return (1, False)
 
         if not zipfile.is_zipfile(zip_file):
             msg = "!!! file is not a zip archive."
@@ -77,6 +95,10 @@ class ZipFile(SyncBase):
                     with open(dstpath, "wb") as dstfile:
                         shutil.copyfileobj(srcfile, dstfile)
 
+        with open(os.path.join(self.repo.location, ".info"), "w") as infofile:
+            if etag:
+                infofile.write(f"etag {etag}\n")
+
         os.unlink(zip_file)
 
         return (os.EX_OK, True)


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-04-28  0:04 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-28  0:04 [gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/ Sam James
  -- strict thread matches above, loose matches on Subject: below --
2024-04-28  0:04 Sam James
2024-04-28  0:04 Sam James
2024-04-28  0:04 Sam James

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox