#!/usr/bin/perl
$^W=1;
use strict;
no utf8;
# vi(1) se: tabstop=4
$ENV{LC_ALL}='C';
my $method='https'; # http or https
my $domain='snapshot.debian.org';
my $max_retries=4; # max retries at some possibly transient errors
my $retries_sleep=5; # how long we sleep between retries
# Read (.deb) file argument(s),
# for each, get size and calculate digest,
# then try to locate in $method://$domain/
# and report results.
my $my_exit=0; # exit value
use File::Basename;
my $prog=$0;
$prog = basename($0);
# block size (in bytes) we'll use
my $bs=4096; # 4 KiB
my $http_status_ok=200;
my $fivexx='\A50[034]\z'; # http 5xx status codes we may retry
@ARGV or die("usage: $prog file [...]\n");
# We use fstat to avoid race conditions between getting relevant [l]stat
# data and open(2).
# Perl doesn't itself give us fstat, so we get that via use POSIX.
# Likewise since we need fd for fstat, we use open and close from POSIX.
use POSIX();
use Digest::SHA;
my $sha_alg=1;
use WWW::Mechanize();
use URL::Encode();
FILE:
for my $file (@ARGV){
my $f=basename($file);
my $ignq='[!#-~]'; # RE for IsGraph but Not Quote (")
my $ignu='[!-^`-~]'; # RE for IsGraph but Not Underscore
my $archre='(?:all|amd64|i386)'; # all + dpkg-architecture -L for full list
$f =~
/
\A
($ignu+)
_
($ignu+) # see also: deb-version(7)
(?:
_
($archre)
)? # Some (old) debian package files may lack architeture in file name.
\.deb
\z
/x
;
my ($pkgn,$pkgvr,$arch)=($1,$2,$3);
my $uarch;
if(defined($arch)){
$uarch= '_' . $arch;
}else{
$uarch= '';
};
if(!defined($pkgn)){
warn "$prog: bad file name: $file ($f), expecting *?.deb of" .
" possible name, version, and generally known architecture, skipping\n";
$my_exit=1;
next;
};
my $pkgve=$pkgvr;
$pkgve =~ s/%3a/:/g; # "cooked" package version to handle epoch
# See also: deb-version(7)
# Note despite deb-version(7) epoch may be multiple digits e.g.:
# fonts-sil-gentium_20081126:1.03-1_all.deb
my $pkgvc;
$pkgve =~ /\A((?:\d+:)?)(.+)\z/;
($pkgve,$pkgvc)=($1,$2);
my $fd=POSIX::open($file,&POSIX::O_RDONLY);
if(!defined($fd)){
warn "$prog: open($file) failed: $!, skipping\n";
$my_exit=1;
next;
};
my $size=(POSIX::fstat($fd))[7];
if(!defined($size)){
warn "$prog: fstat(open($file)) failed: $!, skipping\n";
$my_exit=1;
defined(POSIX::close($fd)) or
warn "$prog: close(open($file)) failed: $!, skipping\n";
next;
};
my $digest=Digest::SHA->new($sha_alg);
while(1){
my $bytes=POSIX::read($fd,$_,$bs);
if(!defined($bytes) or $bytes<0 or !($bytes==0 or $bytes>=1)){
# error (known or unknown?)
warn "$prog: read(open($file)) failed: $!, skipping\n";
$my_exit=1;
defined(POSIX::close($fd)) or
warn "$prog: close(open($file)) failed: $!, skipping\n";
next FILE;
}elsif($bytes>=1){
$digest->add($_);
}else{
# $bytes==0 (EOF)
if(!defined(POSIX::close($fd))){
warn "$prog: close(open($file)) failed: $!, skipping\n";
$my_exit=1;
};
last; # calculated $digest, fall through
};
};
$digest=$digest->hexdigest;
$pkgn =~ /\A([^_]+)\z/;
my $mech=WWW::Mechanize->new();
my $url=
"$method://$domain/" .
'binary/?bin=' .
URL::Encode::url_encode($pkgn);
$mech->autocheck(0);
my $status;
my $retries=$max_retries;
while(1){
if(!defined($mech->get($url))){
warn "$prog: !defined(\$mech->get(\$url)), skipping $file\n";
$my_exit=1;
next FILE;
}elsif(!defined($status=$mech->status())){
warn "$prog: !defined(\$mech->status())\n, skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status =~ /$fivexx/){
# this error may be transient, so we may retry
if($retries-- > 0){
sleep($retries_sleep);
next;
};
warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status != $http_status_ok){
warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n";
$my_exit=1;
next FILE;
};
last;
};
if(!defined($mech->follow_link(text_regex => qr/\A\Q$pkgve$pkgvc (\Esource: /))){
# Try with (other) possible epoch,
# may have been updated/added later.
if(!defined($mech->follow_link(text_regex => qr/\A\d+:\Q$pkgvc (\Esource: /))){
warn "$prog: failed to find/follow link for $file, skipping\n";
$my_exit=1;
next;
};
};
my $retries=$max_retries;
while(1){
if(!defined($status=$mech->status())){
warn "$prog: !defined(\$status), skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status =~ /$fivexx/){
# this error may be transient, so we may retry
if($retries-- > 0){
sleep($retries_sleep);
$mech->reload();
next;
};
warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n";
$my_exit=1;
next FILE;
}elsif($status != $http_status_ok){
warn "$prog: \$mech->status() != $http_status_ok (=$status), skipping $file\n";
$my_exit=1;
next FILE;
};
last;
};
my $s='[\t\n\r ]'; # RE for my more limited whitespace characters
my $uri;
my $re_before_uri=
'\A.*?' .
$digest .
'
:[\t\n\r ]*?
';
my $re_more_after_uri=
'
\.deb