Subject: | [PATCH] add https support |
Add HTTPS support
Subject: | https.diff |
--- Film.pm 2013-01-02 10:28:08.000000000 -0700
+++ Film.pm 2018-07-30 12:14:47.014524133 -0600
@@ -138,7 +138,7 @@
timeout => 10,
user_agent => 'Mozilla/5.0',
decode_html => 1,
- full_plot_url => 'http://www.imdb.com/rg/title-tease/plotsummary/title/tt',
+ full_plot_url => 'https://www.imdb.com/rg/title-tease/plotsummary/title/tt',
_also_known_as => [],
_official_sites => [],
_release_dates => [],
@@ -219,7 +219,7 @@
defines proxy server name and port:
- proxy => 'http://proxy.myhost.com:80'
+ proxy => 'https://proxy.myhost.com:80'
By default object tries to get proxy from environment
@@ -366,7 +366,7 @@
}
if($title) {
- $self->retrieve_code($parser, 'http://www.imdb.com/title/tt(\d+)') unless $self->code;
+ $self->retrieve_code($parser, 'https://www.imdb.com/title/tt(\d+)') unless $self->code;
$title =~ s/\*/\\*/g;
$title = $self->_decode_special_symbols($title);
@@ -378,13 +378,17 @@
unless($self->{_title}) {
($self->{_title}, $self->{_kind}, $self->{_year}) = $title =~ m!(.*?)\s+\((.*?)?\s?([0-9\-]*\s?)\)!;
}
+
+ # Another whack at the year.
+ $self->{_year} = $1 if(!$self->{_year} && $title =~ /\((\d{4})\)/);
+
$self->{_kind} = 'Movie' unless $self->{_kind}; # Default kind should be movie
- # "The Series" An Episode (2005)
+ # "The Series" An Episode (2005)
# "The Series" (2005)
- if( $self->{_title} =~ /\"[^\"]+\"(\s+.+\s+)?/ ) {
- $self->{_kind} = $1 ? 'E' : 'S';
- }
+ if( $self->{_title} =~ /\"[^\"]+\"(\s+.+\s+)?/ ) {
+ $self->{_kind} = $1 ? 'E' : 'S';
+ }
}
}
@@ -444,7 +448,7 @@
$page = $self->_cacheObj->get($self->code . '_connections') if $self->_cache;
unless($page) {
- my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/trivia?tab=mc";
+ my $url = "https://". $self->{host} . "/" . $self->{query} . $self->code . "/trivia?tab=mc";
$self->_show_message("URL for movie connections is $url ...", 'DEBUG');
$page = $self->_get_page_from_internet($url);
@@ -522,7 +526,7 @@
$page = $self->_cacheObj->get($self->code . '_full_companies') if $self->_cache;
unless($page) {
- my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/companycredits";
+ my $url = "https://". $self->{host} . "/" . $self->{query} . $self->code . "/companycredits";
$self->_show_message("URL for company credits is $url ...", 'DEBUG');
$page = $self->_get_page_from_internet($url);
@@ -607,7 +611,7 @@
$page = $self->_cacheObj->get($self->code . '_episodes') if $self->_cache;
unless($page) {
- my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/epcast";
+ my $url = "https://". $self->{host} . "/" . $self->{query} . $self->code . "/epcast";
$self->_show_message("URL for episodes is $url ...", 'DEBUG');
$page = $self->_get_page_from_internet($url);
@@ -1029,16 +1033,16 @@
}
while($tag = $parser->get_tag()) {
last if $tag->[0] eq 'a' && $tag->[1]{href} && $tag->[1]{href} =~ /fullcredits/i;
- if($tag->[0] eq 'td' && $tag->[1]{class} && $tag->[1]{class} eq 'name') {
+ # if($tag->[0] eq 'td' && $tag->[1]{class} && $tag->[1]{class} eq 'name') {
$tag = $parser->get_tag('a');
if($tag->[1]{href} && $tag->[1]{href} =~ m#name/nm(\d+?)/#) {
$person = $parser->get_text;
$id = $1;
my $text = $parser->get_trimmed_text('/tr');
- ($role) = $text =~ /.*?\s+(.*)$/;
- push @cast, {id => $id, name => $person, role => $role};
+ ($role) = $text =~ /\.\.\. (.*)$/;
+ push @cast, {id => $id, name => $person, role => $role} if $person;
}
- }
+ # }
}
$self->{_cast} = \@cast;
@@ -1399,7 +1403,7 @@
}
}
if($self->{'_big_cover_page'}) {
- my $page = $self->_get_page_from_internet('http://' . $self->{'host'} . $self->{'_big_cover_page'});
+ my $page = $self->_get_page_from_internet('https://' . $self->{'host'} . $self->{'_big_cover_page'});
return unless $page;
my $parser = $self->_parser(FORCED, \$page);
@@ -1435,7 +1439,7 @@
$page = $self->_cacheObj->get($self->code . '_sites') if $self->_cache;
unless($page) {
- my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/officialsites";
+ my $url = "https://". $self->{host} . "/" . $self->{query} . $self->code . "/officialsites";
$self->_show_message("URL for sites is $url ...", 'DEBUG');
$page = $self->_get_page_from_internet($url);
@@ -1483,7 +1487,7 @@
$page = $self->_cacheObj->get($self->code . '_dates') if $self->_cache;
unless($page) {
- my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/releaseinfo";
+ my $url = "https://". $self->{host} . "/" . $self->{query} . $self->code . "/releaseinfo";
$self->_show_message("URL for sites is $url ...", 'DEBUG');
$page = $self->_get_page_from_internet($url);
@@ -1538,7 +1542,7 @@
$page = $self->_cacheObj->get($self->code . '_keywords') if $self->_cache;
unless($page) {
- my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/keywords";
+ my $url = "https://". $self->{host} . "/" . $self->{query} . $self->code . "/keywords";
$self->_show_message("URL for sites is $url ...", 'DEBUG');
$page = $self->_get_page_from_internet($url);
@@ -1616,7 +1620,7 @@
=head1 BUGS
Please, send me any found bugs by email: stepanov.michael@gmail.com or create
-a bug report: http://rt.cpan.org/NoAuth/Bugs.html?Dist=IMDB-Film
+a bug report: https://rt.cpan.org/NoAuth/Bugs.html?Dist=IMDB-Film
=head1 SEE ALSO
@@ -1626,7 +1630,7 @@
IMDB::Movie
HTML::TokeParser
-http://videoguide.sf.net
+https://videoguide.sf.net
=head1 AUTHOR
--- BaseClass.pm 2013-01-02 10:39:53.000000000 -0700
+++ BaseClass.pm 2018-07-30 12:26:28.798403737 -0600
@@ -24,6 +24,7 @@
use Text::Unidecode qw(unidecode);
use HTML::Entities;
use Carp;
+use IO::Socket::SSL;
use Data::Dumper;
@@ -406,7 +407,7 @@
} else {
$self->_show_message("Retrieving page from internet ...", 'DEBUG');
- my $url = 'http://'.$self->_host().'/'.
+ my $url = 'https://'.$self->_host().'/'.
($crit =~ /^\d+$/ && length($crit) >= ID_LENGTH ? $self->_query() : $self->_search()) . $crit;
$page = $self->_get_page_from_internet($url);
@@ -431,7 +432,7 @@
$self->_show_message("URL is [$url]...", 'DEBUG');
- my $page = get($url);
+ my $page = _get($url);
unless($page) {
$self->error("Cannot retieve an url: [$url]!");
@@ -441,6 +442,23 @@
return $page;
}
+sub _get {
+ my ($URL) = @_;
+
+ $ENV{'PERL_LWP_SSL_VERIFY_HOSTNAME'} = 0;
+
+ my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0,
+ SSL_verify_mode => IO::Socket::SSL::SSL_VERIFY_NONE,
+ });
+
+ $ua->agent('Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)'); #'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
+
+ my $req = HTTP::Request->new( GET => $URL);
+
+ my $response = $ua->request($req);
+ return $response->content;
+}
+
=item _parser()
Setup HTML::TokeParser and store. To have possibility to inherite that class