From: "Kent Fredric" <kentfredric@gmail.com>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/, scripts/lib/
Date: Sun, 8 Apr 2012 23:11:59 +0000 (UTC) [thread overview]
Message-ID: <1333926029.cf746e3c3f2fe8dca044012cca603464688cd289.kent@gentoo> (raw)
commit: cf746e3c3f2fe8dca044012cca603464688cd289
Author: Kent Fredric <kentfredric <AT> gmail <DOT> com>
AuthorDate: Sun Apr 8 23:00:29 2012 +0000
Commit: Kent Fredric <kentfredric <AT> gmail <DOT> com>
CommitDate: Sun Apr 8 23:00:29 2012 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=cf746e3c
[scripts] add package_map_all which creates a JSON listing of all basic metadata of all available versions of all tracked packages
---
scripts/lib/optparse.pm | 17 ++++-
scripts/package_map_all.pl | 184 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 199 insertions(+), 2 deletions(-)
diff --git a/scripts/lib/optparse.pm b/scripts/lib/optparse.pm
index 296184b..a12ccc9 100644
--- a/scripts/lib/optparse.pm
+++ b/scripts/lib/optparse.pm
@@ -12,8 +12,20 @@ use Moose;
has 'help' => ( isa => 'CodeRef', is => 'rw', required => 1 );
has 'argv' => ( isa => 'ArrayRef', is => 'rw', required => 1 );
-has 'long_opts' => ( isa => 'HashRef', is => 'rw', 'lazy_build' => 1 );
-has 'opts' => ( isa => 'HashRef', is => 'rw', lazy_build => 1 );
+has 'long_opts' => ( isa => 'HashRef', is => 'rw', 'lazy_build' => 1 ,
+ traits => [qw( Hash )],
+ handles => {
+ has_long_opt => 'exists',
+ long_opt => 'get',
+ },
+);
+has 'opts' => ( isa => 'HashRef', is => 'rw', lazy_build => 1,
+ traits => [qw( Hash )],
+ handles => {
+ has_opt => 'exists',
+ opt => 'get',
+ },
+);
has 'extra_opts' => ( isa => 'ArrayRef', is => 'rw', 'lazy_build' => 1 );
sub _build_extra_opts {
@@ -21,6 +33,7 @@ sub _build_extra_opts {
return [ grep { $_ !~ /^--(.+)/ and $_ !~ /^-(\w+)/ } @{ $self->argv } ];
}
+
sub _build_opts {
my $self = shift;
my $hash = {};
diff --git a/scripts/package_map_all.pl b/scripts/package_map_all.pl
new file mode 100755
index 0000000..62a9c5b
--- /dev/null
+++ b/scripts/package_map_all.pl
@@ -0,0 +1,184 @@
+#!/usr/bin/env perl
+
+eval 'echo "Called with something not perl"' && exit 1 # Non-Perl protection.
+ if 0;
+
+use 5.12.2;
+use strict;
+use warnings;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+
+use env::gentoo::perl_experimental;
+use metacpan qw( mcpan );
+use Try::Tiny;
+use utf8;
+use optparse;
+use Path::Class::Dir;
+my $optparse = optparse->new(
+ argv => \@ARGV,
+ help => sub { print help(); },
+);
+
+my $env = env::gentoo::perl_experimental->new();
+my $root = $env->root;
+
+if ( $optparse->has_long_opt('root') ) {
+ $root = Path::Class::Dir->new( $optparse->long_opt('root') );
+}
+
+my $size = 300;
+
+my $metadata = $root->subdir( 'metadata', 'perl' );
+my $distmap = $metadata->subdir('distmap');
+#my $distinfo = $metadata->subdir('distinfo');
+$distinfo->mkpath();
+my (@json_files) = grep { not $_->is_dir and $_->basename =~ /\.json$/ } $distmap->children();
+
+use JSON;
+my $decoder = JSON->new()->utf8->relaxed;
+my $encoder = JSON->new()->pretty->utf8->canonical;
+
+my %lookup;
+
+{
+ for my $file (@json_files) {
+ say "* Reading " . $file->relative;
+ my $hash = $decoder->decode( scalar $file->slurp );
+ say " Found " . ( scalar keys %{$hash} ) . " repositories indexed in " . $file->relative;
+ for my $repo ( keys %{$hash} ) {
+ my $nodes = $hash->{$repo};
+ say " ${repo}: " . ( scalar keys %{$nodes} ) . " distributions";
+ $lookup{$_}++ for keys %{$nodes};
+ }
+ }
+ say "* Found: " . ( scalar keys %lookup ) . " unique distributions";
+ my ( @dup ) = grep { $lookup{$_} > 1 } keys %lookup;
+ if ( @dup > 0 ) {
+ say " " . ( scalar @dup ) ." items listed more than once";
+ say " > $_" for @dup;
+ }
+}
+
+my @dists = keys %lookup;
+
+my $search = {};
+$search->{query} = { constant_score => { filter => { terms => {
+ distribution => [ @dists ]
+} } } };
+$search->{sort} = [ { 'date' => 'desc', }, ];
+$search->{size} = $size;
+$search->{fields} = [
+ qw(
+ abstract
+ archive
+ author
+ authorized
+ date
+ distribution
+ download_url
+ license
+ maturity
+ name
+ status
+ version
+ )
+];
+
+$ENV{WWW_MECH_NOCACHE} = 1;
+
+my $results_string = mcpan->ua->request(
+ 'POST',
+ mcpan->base_url . 'release/_search?search_type=scan&scroll=30s&size=' . $size,
+ {
+ content => $encoder->encode( $search ),
+ }
+);
+
+say $results_string->{content};
+
+my $results = $decoder->decode( $results_string->{content} );
+my $scroll_id = $results->{_scroll_id};
+
+my $total_results = $results->{hits}->{total};
+
+say "Found: $total_results releases";
+
+
+my $dtree;
+my $seen = 0;
+
+while( 1 ) {
+ my ( $result, $scroll ) = scroll( $scroll_id );
+ last unless scalar @{$result->{hits}->{hits}};
+ collate_resultset( $result );
+ $scroll_id = $scroll;
+ say "Seen $seen of $total_results";
+}
+
+for my $package ( sort keys %{$dtree} ) {
+ say "Sorting $package";
+ $dtree->{$package} = [ sort { $b->{date} cmp $a->{date} } @{ $dtree->{$package} } ];
+}
+
+my $fh = $metadata->file('distinfo.json')->openw;
+$fh->print( $encoder->encode( $dtree ));
+
+exit 0;
+
+sub scroll {
+ my ( $id ) = @_ ;
+ my $result = mcpan->ua->request(
+ 'GET',
+ 'http://api.metacpan.org/_search/scroll/?scroll=30s&size=' . $size . '&scroll_id=' . $id
+ );
+
+ my $data = $decoder->decode( $result->{content} );
+ return $data, $data->{_scroll_id};
+}
+
+sub collate_resultset {
+ my ( $results ) = @_;
+ for my $result ( @{ $results->{hits}->{hits} } ) {
+ if ( not $result->{fields} ) {
+ $result->{fields} = $result->{_source};
+ }
+ delete $result->{fields}->{dependency} if exists $result->{fields}->{dependency};
+ my $fields = $result->{fields};
+
+ my $cversion = $fields->{name};
+ my $cdistrib = $fields->{distribution};
+ $cversion =~ s/^${cdistrib}-//;
+ $seen++;
+ $fields->{version_canon} = $cversion;
+ $fields->{version_gentoo} = scalar try { gv( $cversion, { lax => 1 } ) };
+ $fields->{archive_canon} = $fields->{author} . '/' . $fields->{archive};
+ #say $fields->{author} . '/' . $fields->{archive};
+ $dtree->{$cdistrib} = [] unless exists $dtree->{$cdistrib};
+ push @{ $dtree->{$cdistrib} }, $fields;
+ }
+}
+
+# Utils
+
+sub gv { require Gentoo::PerlMod::Version; goto \&Gentoo::PerlMod::Version::gentooize_version }
+
+sub help {
+ return <<"EOF";
+package_map_all.pl
+
+USAGE:
+
+ package_map_all.pl [--help]
+
+ ie:
+
+ package_map_all.pl
+
+ --help Show this message
+
+EOF
+
+}
+
next reply other threads:[~2012-04-08 23:12 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-08 23:11 Kent Fredric [this message]
-- strict thread matches above, loose matches on Subject: below --
2013-05-01 22:23 [gentoo-commits] proj/perl-overlay:master commit in: scripts/, scripts/lib/ Kent Fredric
2012-04-08 23:12 Kent Fredric
2012-04-06 20:43 Kent Fredric
2012-02-25 22:14 Kent Fredric
2012-02-24 7:13 Kent Fredric
2012-01-06 16:38 Kent Fredric
2011-11-11 14:38 Kent Fredric
2011-10-31 2:48 Kent Fredric
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1333926029.cf746e3c3f2fe8dca044012cca603464688cd289.kent@gentoo \
--to=kentfredric@gmail.com \
--cc=gentoo-commits@lists.gentoo.org \
--cc=gentoo-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox