#!/usr/bin/perl -l
#
# This script retrieve book information from google
#
# (c) 2009 by Miek Gieben <miek@miek.nl>
# GPL licensed

use LWP::UserAgent;
use HTML::Scrubber;
use warnings;
use strict;

die "Need an ISBN number" if !$ARGV[0];

my $scrub = HTML::Scrubber->new( allow => [ qw/div/ ] );
my $ua = LWP::UserAgent->new( agent => 
	"Links (2.2; Linux 2.6.29-02062904-generic i686; 100x40" );

my $resp = $ua->get(
	"http://books.google.com/books?q=$ARGV[0]"
	);

my @divs = split /<.?div>/, $scrub->scrub($resp->content);

#print "@divs";

my ($details, $i);
$i = 0;
foreach (@divs) {
    if ( /cover view/i../about this book/i ) {
	if (length($_) == 0) {
	    $i++;
	} else {
	    # after some whitespace we get to the goodies
	    # 5 seems to work ok
	    if ($i > 5) {
		$details = $_;
	    }
	    $i = 0;
	}
    }
}
if (!$details) { exit; }

my ($title, $rest) = split /;/, $details;
my ($author, $genre, $year) = split / - /, $rest;

$title =~ s/&#....$//;
$author =~ s/By //i;
$author =~ s/,.*$//;

# we might not have gotten a genre, then genre holds
# the year of publishing
if ($genre && $genre =~ /\d+/) {
    $year = $genre;
    $genre = "none";
}
$genre = lc $genre;

my %genre = (
none				=> 'none',
misc				=> 'misc',
'comics & graphic novels'       => 'comics',
games 			        => 'comics',
'language arts & disciplines'   => 'art',
art				=> 'art',
music				=> 'art',
'sports & recreation'		=> 'recreation',
cooking			        => 'cooking',
literature			=> 'literature',
mathematics			=> 'science',
technology			=> 'science',
science				=> 'science',
'political science'		=> 'science',
'social science'		=> 'science',
'business & economics'		=> 'science',
psychology			=> 'science',
'computer viruses'		=> 'computer',
computers			=> 'computer',
'computer programming'		=> 'computer',
'juvenile fiction'		=> 'fiction',
fiction				=> 'fiction',
humor				=> 'fiction',
'science fiction'		=> 'science fiction',
fantasy				=> 'fantasy',
dictionary			=> 'encyclopedia',
travel		   		=> 'travel'
);

print $title;
print $author;
print $genre{$genre} if $genre{$genre};
print "none"	     if ! $genre{$genre};
print $year if $year;
print "0" if !$year;
print "#", $details;