#!/usr/bin/perl

# Print patent from IBM server

# Usage: prpat [ --scale <scale-factor> ] <patnum>
# scale factor defaults to 0.5

# http://www.delphion.com/gifcache/US05325479__.tif.1.s0.35.r0.gif

# http://www.delphion.com/fcgi-bin/any2html?FILENAME=%2Fcache%2F79%2F54%2FUS05325479__.tif&PAGE=13&USER_HTML=%253CA%2BHREF%253D%2Forder%253Fpn%3Dus05325479__%253EOrderPatent%253C%2FA%253E&SCALE=0.35

# http://www.delphion.com/fcgi-bin/any2html?FILENAME=%2Fcache%2F79%2F54%2FUS05325479__.tif&PAGE=1&USER_HTML=%253CA%2BHREF%253D%2Forder%253Fpn%3Dus05325479__%253EOrderPatent%253C%2FA%253E&SCALE=1.00

# http://www.delphion.com/cgi-bin/viewpat.cmd/US05325479__

$tmpdir = '/tmp';

sub get_url {
    my ($url, $fn) = @_;

    if (-s $fn) {
	# in cache, don't retrieve
	return 0;
    }
    pdb ("getting $url\n");
    $qurl = shell_quote ($url);
    return system "lynx -source $qurl > $fn";
}

sub shell_quote {
# $quoted_string = &shell_quote ($raw_string)
    my ($raw) = @_;

    if ($raw eq '') { return '""'; }
    $raw =~ s/(\W)/\\$1/g;
    return $raw;
}

sub pdb {
    print @_;
}

sub prpage {
    my ($patnum, $page, $url) = @_;
    my $imgurl;

    $url =~ s/SCALE=[\d\.]+/SCALE=$scale/;
    $url =~ s/PAGE=\d+/PAGE=$page/;
    my $fn = "$tmpdir/pat-$patnum-page$page.html";
    get_url ($url, $fn);
    open PAGE, $fn;
    while (<PAGE>) {
	if (!$imgurl && /\<IMG SRC=\"(\/gifcache[^\"]+)\"/) {
	    $imgurl = $1;
	    if ($imgurl =~ /^\//) {
		$imgurl = "http://www.delphion.com$imgurl";
	    }
	}
    }
    close PAGE;
    if ($imgurl) {
	my $imgfn = "$tmpdir/pat-$patnum-page$page.gif";
	get_url ($imgurl, $imgfn);
	my $s = 0.25 / $scale;
	system "giftopnm $imgfn | pnmcrop | pnmtops -scale $s -noturn | lpr";
    }
}

sub prpat {
    my ($patnum) = @_;
    my $npages;
    my $baseurl;

    $patnum =~ s/,//g;
    $patnum += 0;
    my $url = sprintf ("http://www.delphion.com/cgi-bin/viewpat.cmd/US%08d__", $patnum);
    my $fn = "$tmpdir/pat-$patnum-root.html";
    get_url ($url, $fn);
    open ROOT, $fn;
    while (<ROOT>) {
	if (!$npages && /ALT=\"1\/(\d+) /) {
	    $npages = $1;
	    pdb ("$npages pages\n");
	} elsif (!$baseurl && /\<A HREF=\"(\/[^\"]+)\"/) {
	    # The above regex is designed to math the /fcgi-bin urls
	    # for the patent pages, but reject the ad.doubleclick.net
	    # ad banners that IBM is now running.
	    $baseurl = $1;
	    if ($baseurl =~ /^\//) {
		$baseurl = "http://www.delphion.com$baseurl";
	    }
	}
    }
    close ROOT;
    if ($npages) {
	for my $i (1..$npages) {
	    if (!(($select eq 'even' && $i & 1) ||
		  ($select eq 'odd' && !($i & 1)))) {
		prpage ($patnum, $i, $baseurl);
	    }
	}
    }
}

sub usage {
    print "Usage: prpat pat #";
}

@args = ();

while (@ARGV) {
    my $arg = shift @ARGV;
    if ($arg =~ /^--scale$/) {
	$scale = sprintf ("%.2f", shift);
	print "$scale\n";
    } elsif ($arg eq '-e') {
	$select = 'even';
    } elsif ($arg eq '-o') {
	$select = 'odd';
    } else {
	push @args, $arg;
    }
}

if ($scale < 0.10) {
    $scale = '0.50';
}

if (@args) {
    prpat ($args[0]);
} else {
    usage ();
}
