#!/usr/bin/perl $^W=1; use strict; no utf8; # vi(1) se: tabstop=4 $ENV{LC_ALL}='C'; my $method='https'; # http or https my $domain='snapshot.debian.org'; my $max_retries=4; # max retries at some possibly transient errors my $retries_sleep=5; # how long we sleep between retries # Read (.deb) file argument(s), # for each, get size and calculate digest, # then try to locate in $method://$domain/ # and report results. my $my_exit=0; # exit value use File::Basename; my $prog=$0; $prog = basename($0); # block size (in bytes) we'll use my $bs=4096; # 4 KiB my $http_status_ok=200; my $fivexx='\A50[034]\z'; # http 5xx status codes we may retry @ARGV or die("usage: $prog file [...]\n"); # We use fstat to avoid race conditions between getting relevant [l]stat # data and open(2). # Perl doesn't itself give us fstat, so we get that via use POSIX. # Likewise since we need fd for fstat, we use open and close from POSIX. use POSIX(); use Digest::SHA; my $sha_alg=1; use WWW::Mechanize(); use URL::Encode(); FILE: for my $file (@ARGV){ my $f=basename($file); my $ignq='[!#-~]'; # RE for IsGraph but Not Quote (") my $ignu='[!-^`-~]'; # RE for IsGraph but Not Underscore my $archre='(?:all|amd64|i386)'; # all + dpkg-architecture -L for full list $f =~ / \A ($ignu+) _ ($ignu+) # see also: deb-version(7) (?: _ ($archre) )? # Some (old) debian package files may lack architeture in file name. \.deb \z /x ; my ($pkgn,$pkgvr,$arch)=($1,$2,$3); my $uarch; if(defined($arch)){ $uarch= '_' . $arch; }else{ $uarch= ''; }; if(!defined($pkgn)){ warn "$prog: bad file name: $file ($f), expecting *?.deb of" . " possible name, version, and generally known architecture, skipping\n"; $my_exit=1; next; }; my $pkgve=$pkgvr; $pkgve =~ s/%3a/:/g; # "cooked" package version to handle epoch # See also: deb-version(7) # Note despite deb-version(7) epoch may be multiple digits e.g.: # fonts-sil-gentium_20081126:1.03-1_all.deb my $pkgvc; $pkgve =~ /\A((?:\d+:)?)(.+)\z/; ($pkgve,$pkgvc)=($1,$2); my $fd=POSIX::open($file,&POSIX::O_RDONLY); if(!defined($fd)){ warn "$prog: open($file) failed: $!, skipping\n"; $my_exit=1; next; }; my $size=(POSIX::fstat($fd))[7]; if(!defined($size)){ warn "$prog: fstat(open($file)) failed: $!, skipping\n"; $my_exit=1; defined(POSIX::close($fd)) or warn "$prog: close(open($file)) failed: $!, skipping\n"; next; }; my $digest=Digest::SHA->new($sha_alg); while(1){ my $bytes=POSIX::read($fd,$_,$bs); if(!defined($bytes) or $bytes<0 or !($bytes==0 or $bytes>=1)){ # error (known or unknown?) warn "$prog: read(open($file)) failed: $!, skipping\n"; $my_exit=1; defined(POSIX::close($fd)) or warn "$prog: close(open($file)) failed: $!, skipping\n"; next FILE; }elsif($bytes>=1){ $digest->add($_); }else{ # $bytes==0 (EOF) if(!defined(POSIX::close($fd))){ warn "$prog: close(open($file)) failed: $!, skipping\n"; $my_exit=1; }; last; # calculated $digest, fall through }; }; $digest=$digest->hexdigest; $pkgn =~ /\A([^_]+)\z/; my $mech=WWW::Mechanize->new(); my $url= "$method://$domain/" . 'binary/?bin=' . URL::Encode::url_encode($pkgn); $mech->autocheck(0); my $status; my $retries=$max_retries; while(1){ if(!defined($mech->get($url))){ warn "$prog: !defined(\$mech->get(\$url)), skipping $file\n"; $my_exit=1; next FILE; }elsif(!defined($status=$mech->status())){ warn "$prog: !defined(\$mech->status())\n, skipping $file\n"; $my_exit=1; next FILE; }elsif($status =~ /$fivexx/){ # this error may be transient, so we may retry if($retries-- > 0){ sleep($retries_sleep); next; }; warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status != $http_status_ok){ warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n"; $my_exit=1; next FILE; }; last; }; if(!defined($mech->follow_link(text_regex => qr/\A\Q$pkgve$pkgvc (\Esource: /))){ # Try with (other) possible epoch, # may have been updated/added later. if(!defined($mech->follow_link(text_regex => qr/\A\d+:\Q$pkgvc (\Esource: /))){ warn "$prog: failed to find/follow link for $file, skipping\n"; $my_exit=1; next; }; }; my $retries=$max_retries; while(1){ if(!defined($status=$mech->status())){ warn "$prog: !defined(\$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status =~ /$fivexx/){ # this error may be transient, so we may retry if($retries-- > 0){ sleep($retries_sleep); $mech->reload(); next; }; warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status != $http_status_ok){ warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n"; $my_exit=1; next FILE; }; last; }; my $s='[\t\n\r ]'; # RE for my more limited whitespace characters my $uri; my $re_before_uri= '\A.*?' . $digest . ':[\t\n\r ]*?
' . $s . '*?
' . $s . '*?
'; my $re_more_after_uri= ' \.deb
' . $s . '*?
' . $s . '*? Seen\ in\ [-a-z]+\ on\ \d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2}\ in' . $s . '*? ' . $ignq . '+\.' . $s . '*?' . $s . '*? Size:\ ' . $size . '(?:\D|\z) ' ; if($uarch){ $mech->content() =~ qr! $re_before_uri ([^"]+) $re_right_after_uri \Q$pkgn\E_\Q$pkgvc$uarch\E $re_more_after_uri !msx ; $uri=$1; }else{ $mech->content() =~ qr! $re_before_uri ([^"]+) $re_right_after_uri \Q$pkgn\E_\Q$pkgvc\E(?:_$archre)? $re_more_after_uri !msx ; $uri=$1; }; if(!defined($uri)){ warn "$prog: failed to find/follow link for $file, skipping\n"; $my_exit=1; next; }; if(not $uri =~ m!\A/.+!){ warn "$prog: unexpected link: $uri for $file, skipping\n"; $my_exit=1; next; }; # $uri is bare absolute path, add method and domain: $url="$method://$domain$uri"; my $retries=$max_retries; while(1){ if(!defined($mech->head($url))){ warn "$prog: !defined(\$mech->head(\$url)), skipping $file\n"; $my_exit=1; next FILE; }elsif(!defined($status=$mech->status())){ warn "$prog: !defined(\$mech->status())\n, skipping $file\n"; $my_exit=1; next FILE; }elsif($status =~ /$fivexx/){ # this error may be transient, so we may retry if($retries-- > 0){ sleep($retries_sleep); next; }; warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n"; $my_exit=1; next FILE; }elsif($status != $http_status_ok){ warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n"; $my_exit=1; next FILE; }; last; }; # HEAD OK, give file and URL. # Note that etag may not be digest based, # so we can't use that to compare to our calculated digest print "$file $url\n"; }; exit($my_exit);