Mini mirror de CPAN

Juan Jose Natera Abreu juanjose at lunarpages.com
Fri Apr 18 17:05:11 CDT 2003


Hola Gente,

Recuerdo que hace un tiempo, Cristobal necesitaba uno de estos, pero el Sr. 
Randal Schwartz no nos habia iluminado en esa epoca:

http://www.stonehenge.com/merlyn/LinuxMag/col42.html

Lo mejor del script es que solo se baja las versiones mas recientes de cada 
modulo asi que pueden tener su mini mirror de menos de 300 MB y segun Randal, 
actualizarlo luego solo toma en promedio de 2 a 5 minutos diarios con una 
linea de 28kbps. 

Saludos,

Juan Jose

A continuacion, el mismo script pero con Getopt::Std incluido :)

#!/usr/bin/perl -w
use strict;
$|++;
 
### CONFIG
use Getopt::Std;
my %options;
getopts('m:l:d', \%options);

my $REMOTE = $options{'m'} || "http://cpan.valueclick.com/";

## warning: unknown files below this dir are deleted!
my $LOCAL = $options{'l'} || "/usr/local/cpan/";

my $TRACE = $options{'d'} || 0;

### END CONFIG

## core -
use File::Path qw(mkpath);
use File::Basename qw(dirname);
use File::Spec::Functions qw(catfile);
use File::Find qw(find);

## LWP -
use URI ();
use LWP::Simple qw(mirror RC_OK RC_NOT_MODIFIED);

## Compress::Zlib -
use Compress::Zlib qw(gzopen $gzerrno);

## Read the options -


## first, get index files
my_mirror($_) for qw(
                     authors/01mailrc.txt.gz
                     modules/02packages.details.txt.gz
                     modules/03modlist.data.gz
                    );

## now walk the packages list
my $details = catfile($LOCAL, qw(modules 02packages.details.txt.gz));
my $gz = gzopen($details, "rb") or die "Cannot open details: $gzerrno";
my $inheader = 1;
while ($gz->gzreadline($_) > 0) {
  if ($inheader) {
    $inheader = 0 unless /\S/;
    next;
  }
  my ($module, $version, $path) = split;
  next if $path =~ m{/perl-5};  # skip Perl distributions
  my_mirror("authors/id/$path", 1);
}

## finally, clean the files we didn't stick there
clean_unmirrored();

exit 0;

BEGIN {
  ## %mirrored tracks the already done, keyed by filename
  ## 1 = local-checked, 2 = remote-mirrored
  my %mirrored;

  sub my_mirror {
    my $path = shift;           # partial URL
    my $skip_if_present = shift; # true/false

    my $remote_uri = URI->new_abs($path, $REMOTE)->as_string; # full URL
    my $local_file = catfile($LOCAL, split "/", $path); # native absolute file
    my $checksum_might_be_up_to_date = 1;

    if ($skip_if_present and -f $local_file) {
      ## upgrade to checked if not already
      $mirrored{$local_file} = 1 unless $mirrored{$local_file};
    } elsif (($mirrored{$local_file} || 0) < 2) {
      ## upgrade to full mirror
      $mirrored{$local_file} = 2;

      mkpath(dirname($local_file), $TRACE, 0711);
      print $path if $TRACE;
      my $status = mirror($remote_uri, $local_file);

      if ($status == RC_OK) {
        $checksum_might_be_up_to_date = 0;
        print " ... updated\n" if $TRACE;
      } elsif ($status != RC_NOT_MODIFIED) {
        warn "\n$remote_uri: $status\n";
        return;
      } else {
        print " ... up to date\n" if $TRACE;
      }
    }

    if ($path =~ m{^authors/id}) { # maybe fetch CHECKSUMS
      my $checksum_path =
        URI->new_abs("CHECKSUMS", $remote_uri)->rel($REMOTE);
      if ($path ne $checksum_path) {
        my_mirror($checksum_path, $checksum_might_be_up_to_date);
      }
   }
 }
 
 sub clean_unmirrored {
   find sub {
     return unless -f and not $mirrored{$File::Find::name};
     print "$File::Find::name ... removed\n" if $TRACE;
     unlink $_ or warn "Cannot remove $File::Find::name: $!";
   }, $LOCAL;
 }
}
------------------------------------------------------------------------
Enviar e-mail a <majordomo at pm.org> colocando en el cuerpo:
"UNSUBSCRIBE caracas-pm-list" para desuscribirse.
"INFO caracas-pm-list" para conocer las reglas de etiqueta.
------------------------------------------------------------------------



More information about the caracas-pm mailing list