public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Fabian Groffen" <grobian@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] repo/proj/prefix:master commit in: scripts/rsync-generation/
Date: Wed, 28 Feb 2018 18:44:49 +0000 (UTC)	[thread overview]
Message-ID: <1519843411.a11406710f526f2b28e68f5544e6cd9e47710058.grobian@gentoo> (raw)

commit:     a11406710f526f2b28e68f5544e6cd9e47710058
Author:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
AuthorDate: Wed Feb 28 18:43:31 2018 +0000
Commit:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
CommitDate: Wed Feb 28 18:43:31 2018 +0000
URL:        https://gitweb.gentoo.org/repo/proj/prefix.git/commit/?id=a1140671

scripts/rsync-generation/hashgen: allow verification to be massively parallel

save up subdirs to handle so we can nicely loop over them in a for-loop
which OpenMP can cheaply parallelise for us (for free)

 scripts/rsync-generation/hashgen.c | 109 +++++++++++++++++++++++++++----------
 1 file changed, 79 insertions(+), 30 deletions(-)

diff --git a/scripts/rsync-generation/hashgen.c b/scripts/rsync-generation/hashgen.c
index 3f7aaf65d6..b2ba8b0f5a 100644
--- a/scripts/rsync-generation/hashgen.c
+++ b/scripts/rsync-generation/hashgen.c
@@ -919,6 +919,12 @@ compare_strings(const void *l, const void *r)
 
 static char verify_manifest(const char *dir, const char *manifest);
 
+struct subdir_workload {
+	size_t subdirlen;
+	size_t elemslen;
+	char **elems;
+};
+
 #define LISTSZ 64
 static char
 verify_dir(
@@ -940,6 +946,9 @@ verify_dir(
 	char etpe;
 	char ret = 0;
 	int cmp;
+	struct subdir_workload **subdir = NULL;
+	size_t subdirsize = 0;
+	size_t subdirlen = 0;
 
 	/* shortcut a single Manifest entry pointing to the same dir
 	 * (happens at top-level) */
@@ -1013,40 +1022,41 @@ verify_dir(
 			/* handle subdirs first */
 			if ((slash = strchr(entry, '/')) != NULL) {
 				size_t sublen = slash - entry;
-				char ndir[8192];
-
-				if (etpe == 'M') {
-					size_t skiplen = strlen(dir) + 1 + sublen;
-					/* sub-Manifest, we need to do a proper recurse */
-					slash = strrchr(entry, '/');  /* cannot be NULL */
-					snprintf(ndir, sizeof(ndir), "%s/%s", dir, entry);
-					ndir[skiplen] = '\0';
-					slash = strchr(ndir + skiplen + 1, ' ');
-					if (slash != NULL)  /* path should fit in ndir ... */
-						*slash = '\0';
-					if (verify_file(dir, entry, mfest) != 0 ||
-						verify_manifest(ndir, ndir + skiplen + 1) != 0)
-						ret |= 1;
-				} else {
-					int elemstart = curelem;
-					char **subelems = &elems[curelem];
-					/* collect all entries like this one (same subdir) into
-					 * a sub-list that we can verify */
+				int elemstart = curelem;
+				char **subelems = &elems[curelem];
+
+				/* collect all entries like this one (same subdir) into
+				 * a sub-list that we can verify */
+				curelem++;
+				while (curelem < elemslen &&
+						strncmp(entry, elems[curelem] + 2 + skippath,
+							sublen + 1) == 0)
 					curelem++;
-					while (curelem < elemslen &&
-							strncmp(entry, elems[curelem] + 2 + skippath,
-								sublen + 1) == 0)
-						curelem++;
-					snprintf(ndir, sizeof(ndir), "%s/%.*s", dir,
-							(int)sublen, elems[elemstart] + 2 + skippath);
-					ret |= verify_dir(ndir, subelems,
-							curelem - elemstart, skippath + sublen + 1, mfest);
-					curelem--; /* move back, see below */
+
+				if (subdirlen == subdirsize) {
+					subdirsize += LISTSZ;
+					subdir = realloc(subdir,
+							subdirsize * sizeof(subdir[0]));
+					if (subdir == NULL) {
+						fprintf(stderr, "out of memory\n");
+						return 1;
+					}
+				}
+				subdir[subdirlen] = malloc(sizeof(struct subdir_workload));
+				if (subdir[subdirlen] == NULL) {
+					fprintf(stderr, "out of memory\n");
+					return 1;
 				}
-				
+				subdir[subdirlen]->subdirlen = sublen;
+				subdir[subdirlen]->elemslen = curelem - elemstart;
+				subdir[subdirlen]->elems = subelems;
+				subdirlen++;
+
+				curelem--; /* move back, see below */
+
 				/* modify the last entry to be the subdir, such that we
 				 * can let the code below synchronise with dentries */
-				elems[curelem][2 + skippath + sublen] = '\0';
+				elems[curelem][2 + skippath + sublen] = ' ';
 				entry = elems[curelem] + 2 + skippath;
 				etpe = 'S';  /* flag this was a subdir */
 			}
@@ -1083,6 +1093,8 @@ verify_dir(
 						*slash = '\0';
 					fprintf(stderr, "%s: missing %s file: %s\n",
 							mfest, etpe == 'M' ? "MANIFEST" : "DATA", entry);
+					if (slash != NULL)
+						*slash = ' ';
 				}
 				curelem++;
 			} else if (cmp > 0) {
@@ -1098,6 +1110,43 @@ verify_dir(
 			free(dentries[dentrieslen]);
 		free(dentries);
 
+#pragma omp parallel for shared(ret) private(entry, etpe, slash)
+		for (cmp = 0; cmp < subdirlen; cmp++) {
+			char ndir[8192];
+
+			entry = subdir[cmp]->elems[0] + 2 + skippath;
+			etpe = subdir[cmp]->elems[0][0];
+
+			/* restore original entry format */
+			subdir[cmp]->elems[subdir[cmp]->elemslen - 1]
+				[2 + skippath + subdir[cmp]->subdirlen] = '/';
+
+			if (etpe == 'M') {
+				size_t skiplen = strlen(dir) + 1 + subdir[cmp]->subdirlen;
+				/* sub-Manifest, we need to do a proper recurse */
+				slash = strrchr(entry, '/');  /* cannot be NULL */
+				snprintf(ndir, sizeof(ndir), "%s/%s", dir, entry);
+				ndir[skiplen] = '\0';
+				slash = strchr(ndir + skiplen + 1, ' ');
+				if (slash != NULL)  /* path should fit in ndir ... */
+					*slash = '\0';
+				if (verify_file(dir, entry, mfest) != 0 ||
+						verify_manifest(ndir, ndir + skiplen + 1) != 0)
+					ret |= 1;
+			} else {
+				snprintf(ndir, sizeof(ndir), "%s/%.*s", dir,
+						(int)subdir[cmp]->subdirlen, entry);
+				ret |= verify_dir(ndir, subdir[cmp]->elems,
+						subdir[cmp]->elemslen,
+						skippath + subdir[cmp]->subdirlen + 1, mfest);
+			}
+
+			free(subdir[cmp]);
+		}
+
+		if (subdir)
+			free(subdir);
+
 		return ret;
 	} else {
 		return 1;


             reply	other threads:[~2018-02-28 18:44 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-28 18:44 Fabian Groffen [this message]
  -- strict thread matches above, loose matches on Subject: below --
2025-09-07 10:46 [gentoo-commits] repo/proj/prefix:master commit in: scripts/rsync-generation/ Fabian Groffen
2024-07-10 18:24 Fabian Groffen
2024-06-14 20:13 Fabian Groffen
2024-03-31 10:26 Fabian Groffen
2024-03-31 10:09 Fabian Groffen
2024-03-31  8:53 Fabian Groffen
2024-03-31  8:46 Fabian Groffen
2024-03-31  8:27 Fabian Groffen
2024-03-30 18:44 Fabian Groffen
2024-03-30 11:46 Fabian Groffen
2024-03-29 10:31 Fabian Groffen
2024-03-29 10:31 Fabian Groffen
2024-03-28 16:12 Fabian Groffen
2024-03-28 16:12 Fabian Groffen
2024-03-28 16:12 Fabian Groffen
2024-03-28 16:12 Fabian Groffen
2023-09-11 10:39 Fabian Groffen
2023-08-01  2:40 Benda XU
2023-04-09 16:06 Fabian Groffen
2022-08-17 19:27 Fabian Groffen
2022-07-24 20:11 Fabian Groffen
2019-06-07  5:44 Fabian Groffen
2019-06-07  5:44 Fabian Groffen
2018-05-14 15:54 Fabian Groffen
2018-03-29  5:55 Fabian Groffen
2018-03-27 14:03 Fabian Groffen
2018-03-17 20:59 Fabian Groffen
2018-03-17 20:59 Fabian Groffen
2018-03-12 10:06 Fabian Groffen
2018-03-10 15:04 Fabian Groffen
2018-03-07 18:04 Fabian Groffen
2018-03-03 21:42 Fabian Groffen
2018-03-01 16:36 Fabian Groffen
2018-03-01 14:03 Fabian Groffen
2018-03-01 13:00 Fabian Groffen
2018-03-01 10:55 Fabian Groffen
2018-03-01  6:42 Fabian Groffen
2018-02-28 19:09 Fabian Groffen
2018-02-28 18:44 Fabian Groffen
2018-02-28 18:44 Fabian Groffen
2018-02-28 14:44 Fabian Groffen
2018-02-28 14:44 Fabian Groffen
2018-02-22 19:45 Fabian Groffen
2018-02-22  7:29 Fabian Groffen
2018-02-21  8:53 Fabian Groffen
2018-02-17 17:19 Fabian Groffen
2018-02-17  8:13 Fabian Groffen
2017-12-01 13:45 Fabian Groffen
2017-11-29 21:36 Fabian Groffen
2017-11-29 21:36 Fabian Groffen
2017-11-29 19:30 Fabian Groffen
2017-11-29 19:30 Fabian Groffen
2017-11-29 19:30 Fabian Groffen
2017-11-29 16:46 Fabian Groffen
2017-11-29 16:46 Fabian Groffen
2017-11-29 14:38 Fabian Groffen
2017-11-27 14:10 Fabian Groffen
2017-11-27 13:07 Fabian Groffen
2017-11-27 13:07 Fabian Groffen
2017-09-09 18:39 Fabian Groffen
2016-10-12  7:24 Fabian Groffen
2016-09-09 13:38 Fabian Groffen
2016-09-07 11:02 Fabian Groffen
2016-08-17  4:26 Fabian Groffen
2016-08-16  7:57 Fabian Groffen
2016-07-29  9:01 Fabian Groffen
2016-07-29  8:08 Fabian Groffen
2016-05-03 18:35 Fabian Groffen
2016-05-03 16:08 Fabian Groffen
2016-04-14 15:38 Fabian Groffen
2016-04-14 13:39 Fabian Groffen
2016-04-06 12:32 Fabian Groffen
2016-04-06 11:28 Fabian Groffen
2016-04-06 11:28 Fabian Groffen
2016-04-06 10:50 Fabian Groffen
2016-04-06 10:49 Fabian Groffen
2016-01-05 19:08 Fabian Groffen
2015-08-31 18:53 Fabian Groffen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1519843411.a11406710f526f2b28e68f5544e6cd9e47710058.grobian@gentoo \
    --to=grobian@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox