public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Kent Fredric" <kentfredric@gmail.com>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/
Date: Wed, 18 Apr 2012 03:32:25 +0000 (UTC)	[thread overview]
Message-ID: <1334719551.b2800087a6719b8b9df1732d7ecdac3f5fab8b06.kent@gentoo> (raw)

commit:     b2800087a6719b8b9df1732d7ecdac3f5fab8b06
Author:     Kent Fredric <kentfredric <AT> gmail <DOT> com>
AuthorDate: Wed Apr 18 03:25:51 2012 +0000
Commit:     Kent Fredric <kentfredric <AT> gmail <DOT> com>
CommitDate: Wed Apr 18 03:25:51 2012 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=b2800087

[scripts/package_map_all.pl] hacks to get around the abysmal speed I experienced today with the API, request batching and ssl stuff

---
 scripts/package_map_all.pl |  120 +++++++++++++++++++++++++------------------
 1 files changed, 70 insertions(+), 50 deletions(-)

diff --git a/scripts/package_map_all.pl b/scripts/package_map_all.pl
index 351cd63..8bb260e 100755
--- a/scripts/package_map_all.pl
+++ b/scripts/package_map_all.pl
@@ -28,7 +28,8 @@ if ( $optparse->has_long_opt('root') ) {
   $root = Path::Class::Dir->new( $optparse->long_opt('root') );
 }
 
-my $size = 1000;
+my $size = 500;
+my $scroll_time = '20m';
 
 my $metadata = $root->subdir( 'metadata', 'perl' );
 my $distmap = $metadata->subdir('distmap');
@@ -52,7 +53,7 @@ my %g_repos;
     for ( keys %{$nodes} ) {
       my $records = $nodes->{$_};
       $lookup{$_}++;
-      for my $rec ( @{ $records }) {
+      for my $rec ( @{$records} ) {
         my $repo = $rec->{repository};
         $repos{$repo}++;
       }
@@ -72,58 +73,17 @@ my %g_repos;
 
 my @dists = keys %lookup;
 
-my $search = {};
-$search->{query} = { constant_score => { filter => { terms => { distribution => [@dists] } } } };
-$search->{sort}   = [ { 'date' => 'desc', }, ];
-$search->{size}   = $size;
-$search->{fields} = [
-  qw(
-    abstract
-    archive
-    author
-    authorized
-    date
-    distribution
-    download_url
-    license
-    maturity
-    name
-    status
-    version
-    )
-];
-
-$ENV{WWW_MECH_NOCACHE} = 1;
-
-my $results_string = mcpan->ua->request(
-  'POST',
-  mcpan->base_url . 'release/_search?search_type=scan&scroll=30s&size=' . $size,
-  {
-    headers => { 'Accept-Encoding' => 'gzip', },
-    content => $encoder->encode($search),
-  }
-);
-
-say $results_string->{content};
-
-my $results   = $decoder->decode( $results_string->{content} );
-my $scroll_id = $results->{_scroll_id};
+my $dtree;
 
-my $total_results = $results->{hits}->{total};
+my $seen = 0;
 
-say "Found: $total_results releases";
+use List::MoreUtils qw( natatime );
 
-my $dtree;
-my $seen = 0;
+my $it = natatime 500, @dists;
 
-while (1) {
-  my ( $result, $scroll ) = scroll($scroll_id);
-  last unless scalar @{ $result->{hits}->{hits} };
-  collate_resultset($result);
-  $scroll_id = $scroll;
-  say "Seen $seen of $total_results";
+while ( my @dists_batch = $it->() ) {
+  get_data_for(@dists_batch);
 }
-
 for my $package ( sort keys %{$dtree} ) {
   say "Sorting $package";
   $dtree->{$package} = [ sort { $b->{date} cmp $a->{date} } @{ $dtree->{$package} } ];
@@ -134,14 +94,74 @@ $fh->print( $encoder->encode($dtree) );
 
 exit 0;
 
+sub get_data_for {
+  my (@items) = @_;
+  my $search = {};
+  $search->{query} = { constant_score => { filter => { terms => { distribution => [@items] } } } };
+  $search->{sort}   = [ { 'date' => 'desc', }, ];
+  $search->{size}   = $size;
+  $search->{fields} = [
+    qw(
+      abstract
+      archive
+      author
+      authorized
+      date
+      distribution
+      download_url
+      license
+      maturity
+      name
+      status
+      version
+      )
+  ];
+
+  $ENV{WWW_MECH_NOCACHE} = 1;
+
+  my $results_string = mcpan->ua->request(
+    'POST',
+    'https://api.metacpan.org/release/_search?search_type=scan&scroll=' . $scroll_time . '&size=' . $size,
+    {
+      headers => { 'Accept-Encoding' => 'gzip', },
+      content => $encoder->encode($search),
+    }
+  );
+
+  my $results   = $decoder->decode( $results_string->{content} );
+  my $scroll_id = $results->{_scroll_id};
+
+  my $total_results = $results->{hits}->{total};
+
+  say "Found: $total_results releases";
+  $seen = 0;
+  while (1) {
+    my ( $result, $scroll ) = scroll($scroll_id);
+    last unless scalar @{ $result->{hits}->{hits} };
+    collate_resultset($result);
+    $scroll_id = $scroll;
+    say "Seen $seen of $total_results";
+  }
+
+}
+
 sub scroll {
   my ($id) = @_;
   my $result = mcpan->ua->request(
     'GET',
-    'http://api.metacpan.org/_search/scroll/?scroll=30s&size=' . $size . '&scroll_id=' . $id,
+    'https://api.metacpan.org/_search/scroll/?scroll=' . $scroll_time . '&size=' . $size . '&scroll_id=' . $id,
     { headers => { 'Accept-Encoding' => 'gzip', } }
   );
 
+  if ( $result->{content} =~ /Server Error/ ) {
+    require Data::Dump;
+    Data::Dump::pp( { result => $result, size => $size, scroll_id => $id } );
+    die;
+  }
+  else {
+    #require Data::Dump;
+    #Data::Dump::pp( { result => { %{$result}, content => '...' }, size => $size, scroll_id => $id } );
+  }
   my $data = $decoder->decode( $result->{content} );
   return $data, $data->{_scroll_id};
 }



             reply	other threads:[~2012-04-18  3:32 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-18  3:32 Kent Fredric [this message]
  -- strict thread matches above, loose matches on Subject: below --
2017-09-16 22:36 [gentoo-commits] proj/perl-overlay:master commit in: scripts/ Kent Fredric
2015-02-28 23:17 Kent Fredric
2015-02-28 23:17 Kent Fredric
2013-12-23 15:28 Kent Fredric
2013-05-01 23:03 Kent Fredric
2013-05-01 23:03 Kent Fredric
2012-10-24 15:49 Kent Fredric
2012-09-15 23:19 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-07-31  3:04 Kent Fredric
2012-07-12 19:23 Torsten Veller
2012-06-22  7:34 Kent Fredric
2012-06-08 17:14 Kent Fredric
2012-05-27  2:30 Kent Fredric
2012-04-28 10:40 Kent Fredric
2012-04-18  3:32 Kent Fredric
2012-04-18  3:32 Kent Fredric
2012-04-12 19:46 Kent Fredric
2012-04-09 16:05 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-05 10:02 Kent Fredric
2012-03-27  1:26 Kent Fredric
2012-03-27  1:26 Kent Fredric
2012-03-27  1:26 Kent Fredric
2012-03-01 11:38 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:06 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-24  7:13 Kent Fredric
2012-02-24  7:13 Kent Fredric
2012-02-12  7:22 Kent Fredric
2012-02-12  7:22 Kent Fredric
2011-12-05 21:45 Kent Fredric
2011-11-14  2:57 Kent Fredric
2011-11-14  2:57 Kent Fredric
2011-11-11 14:38 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31  8:46 Kent Fredric
2011-10-31  7:10 Kent Fredric
2011-10-31  4:52 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-24 21:17 Kent Fredric
2011-10-24 18:26 Kent Fredric
2011-10-24  9:09 Kent Fredric
2011-09-23  6:17 Kent Fredric
2011-08-29  5:44 Kent Fredric

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1334719551.b2800087a6719b8b9df1732d7ecdac3f5fab8b06.kent@gentoo \
    --to=kentfredric@gmail.com \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox