public inbox for gentoo-commits@lists.gentoo.org
 help / color / mirror / Atom feed
From: "Torsten Veller" <tove@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/
Date: Thu, 12 Jul 2012 19:23:26 +0000 (UTC)	[thread overview]
Message-ID: <1342120882.0585507945741fe6a1a0f347154cf69c9b9235cf.tove@gentoo> (raw)

commit:     0585507945741fe6a1a0f347154cf69c9b9235cf
Author:     Torsten Veller <tove <AT> gentoo <DOT> org>
AuthorDate: Thu Jul 12 19:21:22 2012 +0000
Commit:     Torsten Veller <tove <AT> gentoo <DOT> org>
CommitDate: Thu Jul 12 19:21:22 2012 +0000
URL:        http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=05855079

Add script for metacpan metadata updates

---
 scripts/metadata-cpan-update.pl |  230 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 230 insertions(+), 0 deletions(-)

diff --git a/scripts/metadata-cpan-update.pl b/scripts/metadata-cpan-update.pl
new file mode 100755
index 0000000..a4a5654
--- /dev/null
+++ b/scripts/metadata-cpan-update.pl
@@ -0,0 +1,230 @@
+#!/usr/bin/env perl
+
+eval 'echo "Called with something not perl"' && exit 1    # Non-Perl protection.
+  if 0;
+
+use strict;
+use warnings;
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use XML::Smart;
+use File::Slurp;
+use Data::Dumper;
+use CHI;
+use WWW::Mechanize::Cached;
+use HTTP::Tiny::Mech;
+use MetaCPAN::API;
+use Gentoo::Ebuild::ParseVariables qw(gentoo_ebuild_var);
+use CPAN::DistnameInfo;
+use Path::Class;
+use PortageXS;
+
+use metacpan qw( mcpan );
+my $mcpan = mcpan;
+#my $mcpan = MetaCPAN::API->new(
+#    ua => HTTP::Tiny::Mech->new(
+#        mechua => WWW::Mechanize::Cached->new(
+#            cache => CHI->new(
+#                driver   => 'File',
+#                root_dir => '/tmp/metacpan-cache',
+#            ),
+#        ),
+#    ),
+#);
+
+#my $portdir = '/var/gentoo/portage';
+my $portdir = PortageXS->new->getPortdir();
+my $dtd     = $portdir . "/metadata/dtd/metadata.dtd";
+
+while (@ARGV) {
+    my $md = shift @ARGV;
+    #print "$md\n";
+    my $dist;
+    my %cpan_modules;
+    my $metadata_old = read_file($md, binmode => ':utf8');
+    my $indent_level = indent($metadata_old);
+    my $metadata_new;
+    my $XML = XML::Smart->new($metadata_old);
+
+    #$XML->apply_dtd($dtd);
+    if ( check_remote_id($XML)
+        and not $XML->{pkgmetadata}->{upstream}
+        ->{'remote-id'}( 'type', 'eq', 'cpan' )->null() )
+    {
+        $dist =
+          $XML->{pkgmetadata}->{upstream}
+          ->{'remote-id'}( 'type', 'eq', 'cpan' )->content();
+    }
+    my $dist_src_uri = distname($md);
+    my $cpan_dist_failure;
+    if ( defined $dist and $dist ne '' and $dist ne $dist_src_uri ) {
+        print "Dist is wrong!\n";
+        print "'$dist' vs '$dist_src_uri'\n";
+        $cpan_dist_failure = 1;
+        $dist              = $dist_src_uri;
+    } elsif ( not defined $dist ) {
+	$dist = $dist_src_uri;
+    }
+
+    my $result = $mcpan->post(
+        'module/_search',
+        {
+            "fields" => [ "module.name", "release" ],
+            "query"  => {
+                "constant_score" => {
+                    "filter" => {
+                        "and" => [
+                            { "term" => { "distribution"      => "$dist" } },
+                            { "term" => { "status"            => "latest" } },
+                            { "term" => { "mime"              => "text/x-script.perl-module" } },
+                            { "term" => { "indexed"           => "true" } },
+                            { "term" => { "module.authorized" => "true" } }
+                        ]
+                    }
+                }
+            },
+            "size" => 990
+        }
+    );
+
+    return unless $result->{'hits'}->{'hits'};
+    for my $file ( @{ $result->{'hits'}->{'hits'} } ) {
+        if ( ref $file->{'fields'}->{'module.name'} eq 'ARRAY' ) {
+            for my $module ( @{ $file->{'fields'}->{'module.name'} } ) {
+                $cpan_modules{$module} += 2;
+            }
+        }
+        else {
+            $cpan_modules{ $file->{'fields'}->{'module.name'} } += 2;
+        }
+    }
+    drop_former_modules($XML, \%cpan_modules, $cpan_dist_failure);
+
+    if ( defined $dist and $dist ne '' ) {
+        if ( $XML->{pkgmetadata}->{upstream}
+            ->{'remote-id'}( 'type', 'eq', 'cpan' )->null() )
+        {
+            push @{ $XML->{pkgmetadata}->{upstream}->{"remote-id"} },
+              { type => 'cpan', content => "$dist" };
+        }
+        else {
+            if ( $XML->{pkgmetadata}->{upstream}
+                ->{'remote-id'}( 'type', 'eq', 'cpan' )->content() ne $dist )
+            {
+                push @{ $XML->{pkgmetadata}->{upstream}->{"remote-id"} },
+                  { type => 'cpan', content => "$dist" };
+            }
+        }
+    }
+
+    for my $module ( sort keys %cpan_modules ) {
+
+        print "Removed : $module\n" if $cpan_modules{$module} == 1;
+        print "Added   : $module\n" if $cpan_modules{$module} == 2;
+
+        push(
+            @{ $XML->{pkgmetadata}->{upstream}->{'remote-id'} },
+            { type => 'cpan-module', content => "$module" }
+        ) if $cpan_modules{$module} >= 2;
+    }
+    $metadata_new = '<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+';
+    $metadata_new .= $XML->data( nometagen => 1, nodtd => 1, noheader => 1 );
+    $metadata_new =~ s/\n\Z//sm;
+
+    my $metadata_newnew;
+    for my $line ( split /\n/, $metadata_new ) {
+	if ( $line =~ m,^[ ]{4}(?=\<), ) {
+	    $line =~ s,^[ ]{4}(?=\<),$indent_level->{'second'},mseg;
+	} elsif ( $line =~ m,^[ ]{2}(?=\<), ) {
+	$line =~ s,^[ ]{2}(?=\<),$indent_level->{'first'},mseg;
+	}
+	$metadata_newnew .= $line . "\n";
+    }
+
+    write_file( "$md.new",{binmode => ':utf8'}, $metadata_newnew );
+    system("diff -ur $md $md.new");
+    rename "$md.new", "$md" or die "Can't rename $md.new: $!\n";
+
+}
+
+sub indent {
+    my $metadata = shift;
+    my %tags;
+    foreach my $tag (qw( herd maintainer longdescription use upstream )) {
+        $tags{"first"} = $1 if $metadata =~ m,.*?^([ \t]*)\<$tag,ms;
+    }
+    foreach my $tag (qw( remote-id name email )) {
+        $tags{"second"} = $1 if $metadata =~ m,.*?^([ \t]*)\<$tag,ms;
+    }
+    $tags{"second"} = "$tags{'first'}$tags{'first'}"
+      if not exists $tags{"second"};
+    return \%tags;
+}
+
+sub distname {
+    my $fd = shift;
+    $fd =~ s,metadata.xml,*.ebuild,;
+    my @ebuilds = glob "$fd";
+#    @ebuilds = reverse @ebuilds;
+    foreach my $ebuild ( reverse @ebuilds) {
+        $ebuild = file($ebuild)->absolute;
+        my $ebuild_hash = gentoo_ebuild_var(
+            "$ebuild",
+	    #[qw( MY_PN SRC_URI MY_PV MODULE_VERSION MODULE_A )],
+            [qw( SRC_URI )],
+	    #file($ebuild)->absolute->dir->parent->parent
+	    $portdir
+        );
+	next unless $ebuild_hash->{'SRC_URI'};
+        my @src_uri = split /\s/, $ebuild_hash->{'SRC_URI'};
+        foreach my $uri (@src_uri) {
+
+            next unless $uri =~ m,authors/id,;
+            return CPAN::DistnameInfo->new("$uri")->dist();
+        }
+	print Dumper $ebuild_hash;
+    }
+}
+
+sub check_remote_id {
+    my $xml = shift;
+    if ( $xml->{pkgmetadata}->null() ) {
+        print "metadata.xml: pkgmetadata does not exist\n";
+        return;
+    }
+    if ( $xml->{pkgmetadata}->{upstream}->null() ) {
+        print "metadata.xml: upstream does not exist\n";
+        return;
+    }
+    if ( $xml->{pkgmetadata}->{upstream}->{'remote-id'}->null() ) {
+        print "metadata.xml: remote-id does not exist\n";
+        return;
+    }
+    return 1;
+}
+
+sub drop_former_modules {
+    my $xml = shift;
+    my $cpan_modules = shift;
+    my $cpan_dist_failure = shift;
+    return unless check_remote_id($xml);
+    for my $remote ( @{ $xml->{pkgmetadata}->{upstream}->{'remote-id'} } ) {
+
+        #     print "Remote: $remote\n";
+        next unless exists $remote->{type};
+        if ( $remote->{type} eq 'cpan-module' ) {
+            $cpan_modules->{ $remote->content() } += 1;
+            undef $remote;
+        }
+        elsif ( $remote->{type} eq 'cpan' ) {
+            if ($cpan_dist_failure) {
+                print "undef cpan\n";
+
+                undef $remote;
+                undef $xml->{pkgmetadata}->{upstream};
+            }
+        }
+    }
+}



             reply	other threads:[~2012-07-12 19:23 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-12 19:23 Torsten Veller [this message]
  -- strict thread matches above, loose matches on Subject: below --
2017-09-16 22:36 [gentoo-commits] proj/perl-overlay:master commit in: scripts/ Kent Fredric
2015-02-28 23:17 Kent Fredric
2015-02-28 23:17 Kent Fredric
2013-12-23 15:28 Kent Fredric
2013-05-01 23:03 Kent Fredric
2013-05-01 23:03 Kent Fredric
2012-10-24 15:49 Kent Fredric
2012-09-15 23:19 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-07-31  3:04 Kent Fredric
2012-06-22  7:34 Kent Fredric
2012-06-08 17:14 Kent Fredric
2012-05-27  2:30 Kent Fredric
2012-04-28 10:40 Kent Fredric
2012-04-18  3:32 Kent Fredric
2012-04-18  3:32 Kent Fredric
2012-04-18  3:32 Kent Fredric
2012-04-12 19:46 Kent Fredric
2012-04-09 16:05 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-05 10:02 Kent Fredric
2012-03-27  1:26 Kent Fredric
2012-03-27  1:26 Kent Fredric
2012-03-27  1:26 Kent Fredric
2012-03-01 11:38 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:06 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-24  7:13 Kent Fredric
2012-02-24  7:13 Kent Fredric
2012-02-12  7:22 Kent Fredric
2012-02-12  7:22 Kent Fredric
2011-12-05 21:45 Kent Fredric
2011-11-14  2:57 Kent Fredric
2011-11-14  2:57 Kent Fredric
2011-11-11 14:38 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31  8:46 Kent Fredric
2011-10-31  7:10 Kent Fredric
2011-10-31  4:52 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-31  2:48 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-24 21:17 Kent Fredric
2011-10-24 18:26 Kent Fredric
2011-10-24  9:09 Kent Fredric
2011-09-23  6:17 Kent Fredric
2011-08-29  5:44 Kent Fredric

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1342120882.0585507945741fe6a1a0f347154cf69c9b9235cf.tove@gentoo \
    --to=tove@gentoo.org \
    --cc=gentoo-commits@lists.gentoo.org \
    --cc=gentoo-dev@lists.gentoo.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox