Subject: | Add support for companies and movie connections |
Purpose: Add support for movie connections and companies
Attachment: Patch based on 0.32 (might need tweaking due to tab
differences)
Testing: My own simple tests; regularly updates my own collections
to construct automatically organised views on my movies.
Hiya,
For my own collection I wanted to automatically organise my films by
the sequence they occur in. This meant obtaining that sequence from the
'Movie Connections' page on IMDB.
I have extended the 0.32 IMDB::Film class to support this, and
additionally to support the fetching of the company details (simple and
the full list).
There's also a fix for the rating problem that I see has already been
fixed in 0.35. The diff file attached may not apply cleanly due to tab
differences in my editor, but I thought it was useful to submit anyhow.
Hope this helps; let me know if there are any problems.
Subject: | film.diff |
--- BACKUP/Film-032.pm 2009-01-24 12:52:04.000000000 +0000
+++ Film.pm 2009-01-24 12:54:40.000000000 +0000
@@ -78,6 +78,9 @@
_official_sites
_release_dates
_aspect_ratio
+ _company
+ _connections
+ _fullcompanies
full_plot_url
);
@@ -395,6 +398,156 @@
}
+=item connections()
+
+Retrieve connections for the movie -
+{ follows => [ { id => <id>, film => <name>, year => <year> } ],
+ followedby => ditto
+ references => ditto
+ referencedin => ditto
+ featuredin => ditto
+ spoofedby => ditto
+}
+
+ my %connections = %{ $film->connections() };
+
+=cut
+sub connections {
+ my CLASS_NAME $self = shift;
+
+ unless($self->{_connections}) {
+ my $page;
+ $page = $self->_cacheObj->get($self->code . '_connections') if $self->_cache;
+
+ unless($page) {
+ my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/movieconnections";
+ $self->_show_message("URL for movie connections is $url ...", 'DEBUG');
+
+ $page = $self->_get_page_from_internet($url);
+ $self->_cacheObj->set($self->code.'_connections', $page, $self->_cache_exp) if $self->_cache;
+ }
+
+ my $parser = $self->_parser(FORCED, \$page);
+ my $group = undef;
+ my %result;
+ my @lookFor = ('h5');
+ while (my $tag = $parser->get_tag(@lookFor)) {
+ if ($tag->[0] eq 'h5')
+ {
+ $group = $parser->get_text;
+ $group =~ tr/A-Z/a-z/;
+ $group =~ s/[^a-z]//g;
+ $result{$group} = [];
+ @lookFor = ('h5', 'a', 'hr', 'hr/');
+ }
+ elsif ($tag->[0] eq 'a')
+ {
+ my ($id) = $tag->[1]->{href} =~ /(\d+)/;
+ my $name = $parser->get_trimmed_text;
+
+ # Handle series episodes (usually in 'referenced' sections)
+ my ($series,$t,$s,$e) = ($name =~ /^"(.*?): *(.*?) *\(#(\d+)\.(\d+)\)"$/);
+ if (defined $series)
+ {
+ $name = $series;
+ }
+ $tag = $parser->get_tag('/a');
+ my $next = $parser->get_trimmed_text();
+ my %film = ( 'id' => $id,
+ 'film' => $name );
+ if (defined $t)
+ {
+ $film{'title'} = $t;
+ $film{'season'} = $s;
+ $film{'episode'} = $e;
+ }
+ if ($next =~ /\((\d{4})\)/)
+ {
+ $film{'year'} = $1;
+ }
+ next if ($next =~ /\(VG\)/);
+ push @{$result{$group}}, \%film;
+ }
+ else
+ {
+ # Stop when we hit the divider
+ last;
+ }
+ }
+ $self->{_connections} = \%result;
+ }
+
+ return $self->{_connections};
+}
+
+
+=item fullcompanies()
+
+Retrieve companies for the movie -
+{ production => [ { name => <company name>, url => <imdb url>, extra => <specific task> } ],
+ distributors => ditto
+ specialeffects => ditto
+ other => ditto
+}
+
+ my %fullcompanies = %{ $film->fullcompanies() };
+
+=cut
+sub fullcompanies {
+ my CLASS_NAME $self = shift;
+
+ unless($self->{_fullcompanies}) {
+ my $page;
+ $page = $self->_cacheObj->get($self->code . '_fullcompanies') if $self->_cache;
+
+ unless($page) {
+ my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/companycredits";
+ $self->_show_message("URL for company credits is $url ...", 'DEBUG');
+
+ $page = $self->_get_page_from_internet($url);
+ $self->_cacheObj->set($self->code.'_fullcompanies', $page, $self->_cache_exp) if $self->_cache;
+ }
+
+ my $parser = $self->_parser(FORCED, \$page);
+ my $group = undef;
+ my %result;
+ my @lookFor = ('h2');
+ while (my $tag = $parser->get_tag(@lookFor)) {
+ if ($tag->[0] eq 'h2')
+ {
+ $group = $parser->get_text;
+ $group =~ s/ compan(y|ies)//i;
+ $group =~ tr/A-Z/a-z/;
+ $group =~ s/[^a-z]//g;
+ $result{$group} = [];
+ @lookFor = ('h2', 'a', 'hr', 'hr/');
+ }
+ elsif ($tag->[0] eq 'a')
+ {
+ my ($url) = $tag->[1]->{href};
+ my $name = $parser->get_trimmed_text;
+
+ $tag = $parser->get_tag('/a');
+ my $next = $parser->get_trimmed_text();
+ $next =~ s/^[\t \xA0]+//; # nbsp comes out as \xA0
+ my %company = ( 'url' => $url,
+ 'name' => $name,
+ 'extra' => $next );
+ push @{$result{$group}}, \%company;
+ }
+ else
+ {
+ # Stop when we hit the divider
+ last;
+ }
+ }
+ $self->{_fullcompanies} = \%result;
+ }
+
+ return $self->{_fullcompanies};
+}
+
+
=item episodes()
Retrieve episodes info list each element of which is hash reference for tv series -
@@ -703,14 +856,15 @@
if($forced) {
my $parser = $self->_parser(FORCED);
- while(my $tag = $parser->get_tag('b')) {
- last if $parser->get_text =~ /rating/i;
+ while(my $tag = $parser->get_tag('div')) {
+ last if $tag->[1]->{class} &&
+ $tag->[1]->{class} =~ /general rating/;
}
my $tag = $parser->get_tag('b');
my $text = $parser->get_trimmed_text('b', '/a');
- my($rating, $val) = $text =~ m!(\d*\.?\d*)\/.*?\((\d*\,?\d*)\s.*?\)?!;
+ my($rating, $val) = $text =~ m!(\d*\.?\d*)\/.*?(\d+\,?\d*)\svot!;
$val =~ s/\,// if $val;
$self->{_rating} = [$rating, $val];
@@ -946,6 +1100,24 @@
return $self->{_aspect_ratio};
}
+=item company()
+
+Returns an company given for a specified movie:
+
+ my $company = $film->company();
+
+=cut
+sub company {
+ my CLASS_NAME $self = shift;
+
+ return $self->{_company} if defined $self->{_company};
+
+ $self->{_company} = $self->_get_simple_prop('company');
+ $self->{_company} =~ s/ +more *$//;
+
+ return $self->{_company};
+}
+
=item summary()
Retrieve film user summary: