From: "Kent Fredric" <kentfredric@gmail.com>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/
Date: Wed, 18 Apr 2012 03:32:25 +0000 (UTC) [thread overview]
Message-ID: <1334719551.b2800087a6719b8b9df1732d7ecdac3f5fab8b06.kent@gentoo> (raw)
commit: b2800087a6719b8b9df1732d7ecdac3f5fab8b06
Author: Kent Fredric <kentfredric <AT> gmail <DOT> com>
AuthorDate: Wed Apr 18 03:25:51 2012 +0000
Commit: Kent Fredric <kentfredric <AT> gmail <DOT> com>
CommitDate: Wed Apr 18 03:25:51 2012 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=b2800087
[scripts/package_map_all.pl] hacks to get around the abysmal speed I experienced today with the API, request batching and ssl stuff
---
scripts/package_map_all.pl | 120 +++++++++++++++++++++++++------------------
1 files changed, 70 insertions(+), 50 deletions(-)
diff --git a/scripts/package_map_all.pl b/scripts/package_map_all.pl
index 351cd63..8bb260e 100755
--- a/scripts/package_map_all.pl
+++ b/scripts/package_map_all.pl
@@ -28,7 +28,8 @@ if ( $optparse->has_long_opt('root') ) {
$root = Path::Class::Dir->new( $optparse->long_opt('root') );
}
-my $size = 1000;
+my $size = 500;
+my $scroll_time = '20m';
my $metadata = $root->subdir( 'metadata', 'perl' );
my $distmap = $metadata->subdir('distmap');
@@ -52,7 +53,7 @@ my %g_repos;
for ( keys %{$nodes} ) {
my $records = $nodes->{$_};
$lookup{$_}++;
- for my $rec ( @{ $records }) {
+ for my $rec ( @{$records} ) {
my $repo = $rec->{repository};
$repos{$repo}++;
}
@@ -72,58 +73,17 @@ my %g_repos;
my @dists = keys %lookup;
-my $search = {};
-$search->{query} = { constant_score => { filter => { terms => { distribution => [@dists] } } } };
-$search->{sort} = [ { 'date' => 'desc', }, ];
-$search->{size} = $size;
-$search->{fields} = [
- qw(
- abstract
- archive
- author
- authorized
- date
- distribution
- download_url
- license
- maturity
- name
- status
- version
- )
-];
-
-$ENV{WWW_MECH_NOCACHE} = 1;
-
-my $results_string = mcpan->ua->request(
- 'POST',
- mcpan->base_url . 'release/_search?search_type=scan&scroll=30s&size=' . $size,
- {
- headers => { 'Accept-Encoding' => 'gzip', },
- content => $encoder->encode($search),
- }
-);
-
-say $results_string->{content};
-
-my $results = $decoder->decode( $results_string->{content} );
-my $scroll_id = $results->{_scroll_id};
+my $dtree;
-my $total_results = $results->{hits}->{total};
+my $seen = 0;
-say "Found: $total_results releases";
+use List::MoreUtils qw( natatime );
-my $dtree;
-my $seen = 0;
+my $it = natatime 500, @dists;
-while (1) {
- my ( $result, $scroll ) = scroll($scroll_id);
- last unless scalar @{ $result->{hits}->{hits} };
- collate_resultset($result);
- $scroll_id = $scroll;
- say "Seen $seen of $total_results";
+while ( my @dists_batch = $it->() ) {
+ get_data_for(@dists_batch);
}
-
for my $package ( sort keys %{$dtree} ) {
say "Sorting $package";
$dtree->{$package} = [ sort { $b->{date} cmp $a->{date} } @{ $dtree->{$package} } ];
@@ -134,14 +94,74 @@ $fh->print( $encoder->encode($dtree) );
exit 0;
+sub get_data_for {
+ my (@items) = @_;
+ my $search = {};
+ $search->{query} = { constant_score => { filter => { terms => { distribution => [@items] } } } };
+ $search->{sort} = [ { 'date' => 'desc', }, ];
+ $search->{size} = $size;
+ $search->{fields} = [
+ qw(
+ abstract
+ archive
+ author
+ authorized
+ date
+ distribution
+ download_url
+ license
+ maturity
+ name
+ status
+ version
+ )
+ ];
+
+ $ENV{WWW_MECH_NOCACHE} = 1;
+
+ my $results_string = mcpan->ua->request(
+ 'POST',
+ 'https://api.metacpan.org/release/_search?search_type=scan&scroll=' . $scroll_time . '&size=' . $size,
+ {
+ headers => { 'Accept-Encoding' => 'gzip', },
+ content => $encoder->encode($search),
+ }
+ );
+
+ my $results = $decoder->decode( $results_string->{content} );
+ my $scroll_id = $results->{_scroll_id};
+
+ my $total_results = $results->{hits}->{total};
+
+ say "Found: $total_results releases";
+ $seen = 0;
+ while (1) {
+ my ( $result, $scroll ) = scroll($scroll_id);
+ last unless scalar @{ $result->{hits}->{hits} };
+ collate_resultset($result);
+ $scroll_id = $scroll;
+ say "Seen $seen of $total_results";
+ }
+
+}
+
sub scroll {
my ($id) = @_;
my $result = mcpan->ua->request(
'GET',
- 'http://api.metacpan.org/_search/scroll/?scroll=30s&size=' . $size . '&scroll_id=' . $id,
+ 'https://api.metacpan.org/_search/scroll/?scroll=' . $scroll_time . '&size=' . $size . '&scroll_id=' . $id,
{ headers => { 'Accept-Encoding' => 'gzip', } }
);
+ if ( $result->{content} =~ /Server Error/ ) {
+ require Data::Dump;
+ Data::Dump::pp( { result => $result, size => $size, scroll_id => $id } );
+ die;
+ }
+ else {
+ #require Data::Dump;
+ #Data::Dump::pp( { result => { %{$result}, content => '...' }, size => $size, scroll_id => $id } );
+ }
my $data = $decoder->decode( $result->{content} );
return $data, $data->{_scroll_id};
}
next reply other threads:[~2012-04-18 3:32 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-18 3:32 Kent Fredric [this message]
-- strict thread matches above, loose matches on Subject: below --
2017-09-16 22:36 [gentoo-commits] proj/perl-overlay:master commit in: scripts/ Kent Fredric
2015-02-28 23:17 Kent Fredric
2015-02-28 23:17 Kent Fredric
2013-12-23 15:28 Kent Fredric
2013-05-01 23:03 Kent Fredric
2013-05-01 23:03 Kent Fredric
2012-10-24 15:49 Kent Fredric
2012-09-15 23:19 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-07-31 3:04 Kent Fredric
2012-07-12 19:23 Torsten Veller
2012-06-22 7:34 Kent Fredric
2012-06-08 17:14 Kent Fredric
2012-05-27 2:30 Kent Fredric
2012-04-28 10:40 Kent Fredric
2012-04-18 3:32 Kent Fredric
2012-04-18 3:32 Kent Fredric
2012-04-12 19:46 Kent Fredric
2012-04-09 16:05 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-05 10:02 Kent Fredric
2012-03-27 1:26 Kent Fredric
2012-03-27 1:26 Kent Fredric
2012-03-27 1:26 Kent Fredric
2012-03-01 11:38 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:06 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-24 7:13 Kent Fredric
2012-02-24 7:13 Kent Fredric
2012-02-12 7:22 Kent Fredric
2012-02-12 7:22 Kent Fredric
2011-12-05 21:45 Kent Fredric
2011-11-14 2:57 Kent Fredric
2011-11-14 2:57 Kent Fredric
2011-11-11 14:38 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31 8:46 Kent Fredric
2011-10-31 7:10 Kent Fredric
2011-10-31 4:52 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-24 21:17 Kent Fredric
2011-10-24 18:26 Kent Fredric
2011-10-24 9:09 Kent Fredric
2011-09-23 6:17 Kent Fredric
2011-08-29 5:44 Kent Fredric
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1334719551.b2800087a6719b8b9df1732d7ecdac3f5fab8b06.kent@gentoo \
--to=kentfredric@gmail.com \
--cc=gentoo-commits@lists.gentoo.org \
--cc=gentoo-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox