From: "Torsten Veller" <tove@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/perl-overlay:master commit in: scripts/
Date: Thu, 12 Jul 2012 19:23:26 +0000 (UTC) [thread overview]
Message-ID: <1342120882.0585507945741fe6a1a0f347154cf69c9b9235cf.tove@gentoo> (raw)
commit: 0585507945741fe6a1a0f347154cf69c9b9235cf
Author: Torsten Veller <tove <AT> gentoo <DOT> org>
AuthorDate: Thu Jul 12 19:21:22 2012 +0000
Commit: Torsten Veller <tove <AT> gentoo <DOT> org>
CommitDate: Thu Jul 12 19:21:22 2012 +0000
URL: http://git.overlays.gentoo.org/gitweb/?p=proj/perl-overlay.git;a=commit;h=05855079
Add script for metacpan metadata updates
---
scripts/metadata-cpan-update.pl | 230 +++++++++++++++++++++++++++++++++++++++
1 files changed, 230 insertions(+), 0 deletions(-)
diff --git a/scripts/metadata-cpan-update.pl b/scripts/metadata-cpan-update.pl
new file mode 100755
index 0000000..a4a5654
--- /dev/null
+++ b/scripts/metadata-cpan-update.pl
@@ -0,0 +1,230 @@
+#!/usr/bin/env perl
+
+eval 'echo "Called with something not perl"' && exit 1 # Non-Perl protection.
+ if 0;
+
+use strict;
+use warnings;
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use XML::Smart;
+use File::Slurp;
+use Data::Dumper;
+use CHI;
+use WWW::Mechanize::Cached;
+use HTTP::Tiny::Mech;
+use MetaCPAN::API;
+use Gentoo::Ebuild::ParseVariables qw(gentoo_ebuild_var);
+use CPAN::DistnameInfo;
+use Path::Class;
+use PortageXS;
+
+use metacpan qw( mcpan );
+my $mcpan = mcpan;
+#my $mcpan = MetaCPAN::API->new(
+# ua => HTTP::Tiny::Mech->new(
+# mechua => WWW::Mechanize::Cached->new(
+# cache => CHI->new(
+# driver => 'File',
+# root_dir => '/tmp/metacpan-cache',
+# ),
+# ),
+# ),
+#);
+
+#my $portdir = '/var/gentoo/portage';
+my $portdir = PortageXS->new->getPortdir();
+my $dtd = $portdir . "/metadata/dtd/metadata.dtd";
+
+while (@ARGV) {
+ my $md = shift @ARGV;
+ #print "$md\n";
+ my $dist;
+ my %cpan_modules;
+ my $metadata_old = read_file($md, binmode => ':utf8');
+ my $indent_level = indent($metadata_old);
+ my $metadata_new;
+ my $XML = XML::Smart->new($metadata_old);
+
+ #$XML->apply_dtd($dtd);
+ if ( check_remote_id($XML)
+ and not $XML->{pkgmetadata}->{upstream}
+ ->{'remote-id'}( 'type', 'eq', 'cpan' )->null() )
+ {
+ $dist =
+ $XML->{pkgmetadata}->{upstream}
+ ->{'remote-id'}( 'type', 'eq', 'cpan' )->content();
+ }
+ my $dist_src_uri = distname($md);
+ my $cpan_dist_failure;
+ if ( defined $dist and $dist ne '' and $dist ne $dist_src_uri ) {
+ print "Dist is wrong!\n";
+ print "'$dist' vs '$dist_src_uri'\n";
+ $cpan_dist_failure = 1;
+ $dist = $dist_src_uri;
+ } elsif ( not defined $dist ) {
+ $dist = $dist_src_uri;
+ }
+
+ my $result = $mcpan->post(
+ 'module/_search',
+ {
+ "fields" => [ "module.name", "release" ],
+ "query" => {
+ "constant_score" => {
+ "filter" => {
+ "and" => [
+ { "term" => { "distribution" => "$dist" } },
+ { "term" => { "status" => "latest" } },
+ { "term" => { "mime" => "text/x-script.perl-module" } },
+ { "term" => { "indexed" => "true" } },
+ { "term" => { "module.authorized" => "true" } }
+ ]
+ }
+ }
+ },
+ "size" => 990
+ }
+ );
+
+ return unless $result->{'hits'}->{'hits'};
+ for my $file ( @{ $result->{'hits'}->{'hits'} } ) {
+ if ( ref $file->{'fields'}->{'module.name'} eq 'ARRAY' ) {
+ for my $module ( @{ $file->{'fields'}->{'module.name'} } ) {
+ $cpan_modules{$module} += 2;
+ }
+ }
+ else {
+ $cpan_modules{ $file->{'fields'}->{'module.name'} } += 2;
+ }
+ }
+ drop_former_modules($XML, \%cpan_modules, $cpan_dist_failure);
+
+ if ( defined $dist and $dist ne '' ) {
+ if ( $XML->{pkgmetadata}->{upstream}
+ ->{'remote-id'}( 'type', 'eq', 'cpan' )->null() )
+ {
+ push @{ $XML->{pkgmetadata}->{upstream}->{"remote-id"} },
+ { type => 'cpan', content => "$dist" };
+ }
+ else {
+ if ( $XML->{pkgmetadata}->{upstream}
+ ->{'remote-id'}( 'type', 'eq', 'cpan' )->content() ne $dist )
+ {
+ push @{ $XML->{pkgmetadata}->{upstream}->{"remote-id"} },
+ { type => 'cpan', content => "$dist" };
+ }
+ }
+ }
+
+ for my $module ( sort keys %cpan_modules ) {
+
+ print "Removed : $module\n" if $cpan_modules{$module} == 1;
+ print "Added : $module\n" if $cpan_modules{$module} == 2;
+
+ push(
+ @{ $XML->{pkgmetadata}->{upstream}->{'remote-id'} },
+ { type => 'cpan-module', content => "$module" }
+ ) if $cpan_modules{$module} >= 2;
+ }
+ $metadata_new = '<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
+';
+ $metadata_new .= $XML->data( nometagen => 1, nodtd => 1, noheader => 1 );
+ $metadata_new =~ s/\n\Z//sm;
+
+ my $metadata_newnew;
+ for my $line ( split /\n/, $metadata_new ) {
+ if ( $line =~ m,^[ ]{4}(?=\<), ) {
+ $line =~ s,^[ ]{4}(?=\<),$indent_level->{'second'},mseg;
+ } elsif ( $line =~ m,^[ ]{2}(?=\<), ) {
+ $line =~ s,^[ ]{2}(?=\<),$indent_level->{'first'},mseg;
+ }
+ $metadata_newnew .= $line . "\n";
+ }
+
+ write_file( "$md.new",{binmode => ':utf8'}, $metadata_newnew );
+ system("diff -ur $md $md.new");
+ rename "$md.new", "$md" or die "Can't rename $md.new: $!\n";
+
+}
+
+sub indent {
+ my $metadata = shift;
+ my %tags;
+ foreach my $tag (qw( herd maintainer longdescription use upstream )) {
+ $tags{"first"} = $1 if $metadata =~ m,.*?^([ \t]*)\<$tag,ms;
+ }
+ foreach my $tag (qw( remote-id name email )) {
+ $tags{"second"} = $1 if $metadata =~ m,.*?^([ \t]*)\<$tag,ms;
+ }
+ $tags{"second"} = "$tags{'first'}$tags{'first'}"
+ if not exists $tags{"second"};
+ return \%tags;
+}
+
+sub distname {
+ my $fd = shift;
+ $fd =~ s,metadata.xml,*.ebuild,;
+ my @ebuilds = glob "$fd";
+# @ebuilds = reverse @ebuilds;
+ foreach my $ebuild ( reverse @ebuilds) {
+ $ebuild = file($ebuild)->absolute;
+ my $ebuild_hash = gentoo_ebuild_var(
+ "$ebuild",
+ #[qw( MY_PN SRC_URI MY_PV MODULE_VERSION MODULE_A )],
+ [qw( SRC_URI )],
+ #file($ebuild)->absolute->dir->parent->parent
+ $portdir
+ );
+ next unless $ebuild_hash->{'SRC_URI'};
+ my @src_uri = split /\s/, $ebuild_hash->{'SRC_URI'};
+ foreach my $uri (@src_uri) {
+
+ next unless $uri =~ m,authors/id,;
+ return CPAN::DistnameInfo->new("$uri")->dist();
+ }
+ print Dumper $ebuild_hash;
+ }
+}
+
+sub check_remote_id {
+ my $xml = shift;
+ if ( $xml->{pkgmetadata}->null() ) {
+ print "metadata.xml: pkgmetadata does not exist\n";
+ return;
+ }
+ if ( $xml->{pkgmetadata}->{upstream}->null() ) {
+ print "metadata.xml: upstream does not exist\n";
+ return;
+ }
+ if ( $xml->{pkgmetadata}->{upstream}->{'remote-id'}->null() ) {
+ print "metadata.xml: remote-id does not exist\n";
+ return;
+ }
+ return 1;
+}
+
+sub drop_former_modules {
+ my $xml = shift;
+ my $cpan_modules = shift;
+ my $cpan_dist_failure = shift;
+ return unless check_remote_id($xml);
+ for my $remote ( @{ $xml->{pkgmetadata}->{upstream}->{'remote-id'} } ) {
+
+ # print "Remote: $remote\n";
+ next unless exists $remote->{type};
+ if ( $remote->{type} eq 'cpan-module' ) {
+ $cpan_modules->{ $remote->content() } += 1;
+ undef $remote;
+ }
+ elsif ( $remote->{type} eq 'cpan' ) {
+ if ($cpan_dist_failure) {
+ print "undef cpan\n";
+
+ undef $remote;
+ undef $xml->{pkgmetadata}->{upstream};
+ }
+ }
+ }
+}
next reply other threads:[~2012-07-12 19:23 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-07-12 19:23 Torsten Veller [this message]
-- strict thread matches above, loose matches on Subject: below --
2017-09-16 22:36 [gentoo-commits] proj/perl-overlay:master commit in: scripts/ Kent Fredric
2015-02-28 23:17 Kent Fredric
2015-02-28 23:17 Kent Fredric
2013-12-23 15:28 Kent Fredric
2013-05-01 23:03 Kent Fredric
2013-05-01 23:03 Kent Fredric
2012-10-24 15:49 Kent Fredric
2012-09-15 23:19 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-08-02 11:46 Kent Fredric
2012-07-31 3:04 Kent Fredric
2012-06-22 7:34 Kent Fredric
2012-06-08 17:14 Kent Fredric
2012-05-27 2:30 Kent Fredric
2012-04-28 10:40 Kent Fredric
2012-04-18 3:32 Kent Fredric
2012-04-18 3:32 Kent Fredric
2012-04-18 3:32 Kent Fredric
2012-04-12 19:46 Kent Fredric
2012-04-09 16:05 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-08 13:20 Kent Fredric
2012-04-05 10:02 Kent Fredric
2012-03-27 1:26 Kent Fredric
2012-03-27 1:26 Kent Fredric
2012-03-27 1:26 Kent Fredric
2012-03-01 11:38 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:22 Kent Fredric
2012-02-29 12:06 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-28 21:55 Kent Fredric
2012-02-24 7:13 Kent Fredric
2012-02-24 7:13 Kent Fredric
2012-02-12 7:22 Kent Fredric
2012-02-12 7:22 Kent Fredric
2011-12-05 21:45 Kent Fredric
2011-11-14 2:57 Kent Fredric
2011-11-14 2:57 Kent Fredric
2011-11-11 14:38 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31 18:05 Kent Fredric
2011-10-31 8:46 Kent Fredric
2011-10-31 7:10 Kent Fredric
2011-10-31 4:52 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-31 2:48 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-25 19:46 Kent Fredric
2011-10-24 21:17 Kent Fredric
2011-10-24 18:26 Kent Fredric
2011-10-24 9:09 Kent Fredric
2011-09-23 6:17 Kent Fredric
2011-08-29 5:44 Kent Fredric
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1342120882.0585507945741fe6a1a0f347154cf69c9b9235cf.tove@gentoo \
--to=tove@gentoo.org \
--cc=gentoo-commits@lists.gentoo.org \
--cc=gentoo-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox