public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Kent Fredric" <kentfredric@gmail.com>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/, scripts/lib/
Date: Sun,  8 Apr 2012 23:11:59 +0000 (UTC)	[thread overview]
Message-ID: <1333926029.cf746e3c3f2fe8dca044012cca603464688cd289.kent@gentoo> (raw)

commit:     cf746e3c3f2fe8dca044012cca603464688cd289
Author:     Kent Fredric <kentfredric <AT> gmail <DOT> com>
AuthorDate: Sun Apr  8 23:00:29 2012 +0000
Commit:     Kent Fredric <kentfredric <AT> gmail <DOT> com>
CommitDate: Sun Apr  8 23:00:29 2012 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=cf746e3c

[scripts] add package_map_all which creates a JSON listing of all basic metadata of all available versions of all tracked packages

---
 scripts/lib/optparse.pm    |   17 ++++-
 scripts/package_map_all.pl |  184 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 199 insertions(+), 2 deletions(-)

diff --git a/scripts/lib/optparse.pm b/scripts/lib/optparse.pm
index 296184b..a12ccc9 100644
--- a/scripts/lib/optparse.pm
+++ b/scripts/lib/optparse.pm
@@ -12,8 +12,20 @@ use Moose;
 has 'help' => ( isa => 'CodeRef',  is => 'rw', required => 1 );
 has 'argv' => ( isa => 'ArrayRef', is => 'rw', required => 1 );
 
-has 'long_opts'  => ( isa => 'HashRef',  is => 'rw', 'lazy_build' => 1 );
-has 'opts'       => ( isa => 'HashRef',  is => 'rw', lazy_build   => 1 );
+has 'long_opts'  => ( isa => 'HashRef',  is => 'rw', 'lazy_build' => 1 ,
+  traits => [qw( Hash )],
+  handles => { 
+    has_long_opt => 'exists',
+    long_opt => 'get',
+  },
+);
+has 'opts'       => ( isa => 'HashRef',  is => 'rw', lazy_build   => 1, 
+  traits => [qw( Hash )],
+  handles => { 
+    has_opt => 'exists',
+    opt => 'get',
+  },
+);
 has 'extra_opts' => ( isa => 'ArrayRef', is => 'rw', 'lazy_build' => 1 );
 
 sub _build_extra_opts {
@@ -21,6 +33,7 @@ sub _build_extra_opts {
   return [ grep { $_ !~ /^--(.+)/ and $_ !~ /^-(\w+)/ } @{ $self->argv } ];
 }
 
+
 sub _build_opts {
   my $self = shift;
   my $hash = {};

diff --git a/scripts/package_map_all.pl b/scripts/package_map_all.pl
new file mode 100755
index 0000000..62a9c5b
--- /dev/null
+++ b/scripts/package_map_all.pl
@@ -0,0 +1,184 @@
+#!/usr/bin/env perl
+
+eval 'echo "Called with something not perl"' && exit 1    # Non-Perl protection.
+  if 0;
+
+use 5.12.2;
+use strict;
+use warnings;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+
+use env::gentoo::perl_experimental;
+use metacpan qw( mcpan );
+use Try::Tiny;
+use utf8;
+use optparse;
+use Path::Class::Dir;
+my $optparse = optparse->new(
+  argv => \@ARGV,
+  help => sub { print help(); },
+);
+
+my $env  = env::gentoo::perl_experimental->new();
+my $root = $env->root;
+
+if ( $optparse->has_long_opt('root') ) {
+  $root = Path::Class::Dir->new( $optparse->long_opt('root') );
+}
+
+my $size = 300;
+
+my $metadata = $root->subdir( 'metadata', 'perl' );
+my $distmap  = $metadata->subdir('distmap');
+#my $distinfo = $metadata->subdir('distinfo');
+$distinfo->mkpath();
+my (@json_files) = grep { not $_->is_dir and $_->basename =~ /\.json$/ } $distmap->children();
+
+use JSON;
+my $decoder = JSON->new()->utf8->relaxed;
+my $encoder = JSON->new()->pretty->utf8->canonical;
+
+my %lookup;
+
+{
+  for my $file (@json_files) {
+    say "* Reading " . $file->relative;
+    my $hash = $decoder->decode( scalar $file->slurp );
+    say "  Found " . ( scalar keys %{$hash} ) . " repositories indexed in " . $file->relative;
+    for my $repo ( keys %{$hash} ) {
+      my $nodes = $hash->{$repo};
+      say "   ${repo}: " . ( scalar keys %{$nodes} ) . " distributions";
+      $lookup{$_}++ for keys %{$nodes};
+    }
+  }
+  say "* Found: " . ( scalar keys %lookup ) . " unique distributions";
+  my ( @dup ) = grep { $lookup{$_} > 1 } keys %lookup;
+  if ( @dup > 0 ) {
+    say "   " . ( scalar @dup ) ." items listed more than once";
+    say "    > $_" for @dup;
+  }
+}
+
+my @dists = keys %lookup;
+
+my $search = {};
+$search->{query} = { constant_score => { filter => { terms => { 
+        distribution => [ @dists ] 
+} } } };
+$search->{sort}   = [ { 'date' => 'desc', }, ];
+$search->{size}   = $size;
+$search->{fields} = [
+  qw(
+    abstract
+    archive
+    author
+    authorized
+    date
+    distribution
+    download_url
+    license
+    maturity
+    name
+    status
+    version
+    )
+];
+
+$ENV{WWW_MECH_NOCACHE} = 1;
+
+my $results_string = mcpan->ua->request(
+  'POST',
+  mcpan->base_url . 'release/_search?search_type=scan&scroll=30s&size=' . $size,
+  {
+    content => $encoder->encode( $search ),
+  }
+);
+
+say $results_string->{content};
+
+my $results = $decoder->decode( $results_string->{content} );
+my $scroll_id = $results->{_scroll_id};
+
+my $total_results = $results->{hits}->{total};
+
+say "Found: $total_results releases";
+
+
+my $dtree;
+my $seen = 0;
+
+while( 1 ) { 
+  my ( $result, $scroll ) = scroll( $scroll_id );
+  last unless scalar @{$result->{hits}->{hits}};
+  collate_resultset( $result );
+  $scroll_id = $scroll;
+  say "Seen $seen of $total_results";
+}
+
+for my $package ( sort keys %{$dtree} ) {
+  say "Sorting $package";
+  $dtree->{$package} = [ sort { $b->{date} cmp $a->{date} } @{ $dtree->{$package} } ];
+}
+
+my $fh = $metadata->file('distinfo.json')->openw;
+$fh->print( $encoder->encode( $dtree ));
+
+exit 0;
+
+sub scroll {
+  my ( $id ) = @_ ;
+  my $result = mcpan->ua->request(
+    'GET',
+    'http://api.metacpan.org/_search/scroll/?scroll=30s&size=' . $size . '&scroll_id=' . $id
+  );
+
+  my $data = $decoder->decode( $result->{content} );
+  return $data, $data->{_scroll_id};
+}
+
+sub collate_resultset {
+  my ( $results ) = @_;
+  for my $result ( @{ $results->{hits}->{hits} } ) {
+    if ( not $result->{fields} ) {
+      $result->{fields} = $result->{_source};
+    }
+    delete $result->{fields}->{dependency} if exists $result->{fields}->{dependency};
+    my $fields = $result->{fields};
+
+    my $cversion = $fields->{name};
+    my $cdistrib = $fields->{distribution};
+    $cversion =~ s/^${cdistrib}-//;
+    $seen++;
+    $fields->{version_canon}  = $cversion;
+    $fields->{version_gentoo} = scalar try { gv( $cversion, { lax => 1 } ) };
+    $fields->{archive_canon}  = $fields->{author} . '/' . $fields->{archive};
+    #say $fields->{author} . '/' . $fields->{archive};
+    $dtree->{$cdistrib} = [] unless exists $dtree->{$cdistrib};
+    push @{ $dtree->{$cdistrib} }, $fields;
+  }
+}
+
+# Utils
+
+sub gv { require Gentoo::PerlMod::Version; goto \&Gentoo::PerlMod::Version::gentooize_version }
+
+sub help {
+  return <<"EOF";
+package_map_all.pl
+
+USAGE:
+
+  package_map_all.pl [--help]
+
+  ie:
+
+  package_map_all.pl
+
+    --help        Show this message
+
+EOF
+
+}
+



             reply	other threads:[~2012-04-08 23:12 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-08 23:11 Kent Fredric [this message]
  -- strict thread matches above, loose matches on Subject: below --
2013-05-01 22:23 [gentoo-commits] proj/perl-overlay:master commit in: scripts/, scripts/lib/ Kent Fredric
2012-04-08 23:12 Kent Fredric
2012-04-06 20:43 Kent Fredric
2012-02-25 22:14 Kent Fredric
2012-02-24  7:13 Kent Fredric
2012-01-06 16:38 Kent Fredric
2011-11-11 14:38 Kent Fredric
2011-10-31  2:48 Kent Fredric

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1333926029.cf746e3c3f2fe8dca044012cca603464688cd289.kent@gentoo \
    --to=kentfredric@gmail.com \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox