thumb_link and image_link urls appear to have broken recently, occurring
on both Amazon US and UK. The first link is still available in
registerImage, but the second appears to have disappeared.
Attached are patches which at least grab the first image, tweak the test
case appropriately, and comment out the test for the missing image. That
causes tests to pass for me again.
Note that I'm not sure what semantics are attached to thumb_link vs.
image_link though - the one we're getting is 300x300, which seems big
for a thumbnail, so I've treated it as image_link. Correct if that's
wrong, of course.
Not sure if the other link is still available on that page or not. It
wasn't obvious that it was, anyway.
Cheers,
Gavin
Subject: | perl-WWW-Scraper-ISBN-Amazon_Driver-0.26-image-fixes.patch |
From 5538eb824fc5aa767fce9f7bd2ee7b38135c30a6 Mon Sep 17 00:00:00 2001
From: Gavin Carr <gavin@openfusion.com.au>
Date: Mon, 19 Sep 2011 21:29:39 +1000
Subject: [PATCH] Adjust image url regexes to new Amazon html, but losing thumb_link.
---
lib/WWW/Scraper/ISBN/AmazonUK_Driver.pm | 3 +--
lib/WWW/Scraper/ISBN/AmazonUS_Driver.pm | 3 +--
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/lib/WWW/Scraper/ISBN/AmazonUK_Driver.pm b/lib/WWW/Scraper/ISBN/AmazonUK_Driver.pm
index 7b8661a..df80587 100644
--- a/lib/WWW/Scraper/ISBN/AmazonUK_Driver.pm
+++ b/lib/WWW/Scraper/ISBN/AmazonUK_Driver.pm
@@ -127,8 +127,7 @@ sub search {
$data->{content} =~ s/: Books.*//i;
($data->{title},$data->{author}) = ($data->{content} =~ /\s*(.*?)(?:\s+by|,|:)\s+([^:]+)\s*$/) unless($data->{author});
- ($data->{thumb_link},$data->{image_link})
- = $html =~ m!registerImage\("original_image",\s*"([^"]+)",\s*"<a href="\+'"'\+"([^"]+)"\+!;
+ ($data->{image_link}) = $html =~ m!registerImage\("original_image",\s*"([^"]+)"!;
($data->{publisher},$data->{pubdate}) = ($data->{published} =~ /\s*(.*?)(?:;.*?)?\s+\((.*?)\)/) if($data->{published});
$data->{isbn10} =~ s/[^\dX]+//g if($data->{isbn10});
diff --git a/lib/WWW/Scraper/ISBN/AmazonUS_Driver.pm b/lib/WWW/Scraper/ISBN/AmazonUS_Driver.pm
index 64b9962..da0e827 100644
--- a/lib/WWW/Scraper/ISBN/AmazonUS_Driver.pm
+++ b/lib/WWW/Scraper/ISBN/AmazonUS_Driver.pm
@@ -128,8 +128,7 @@ sub search {
($data->{description}) = $html =~ m!<h3 class="productDescriptionSource">Product Description</h3>\s*<div class="productDescriptionWrapper">\s*<p>([^<]+)!si;
($data->{description}) = $html =~ m!<h3 class="productDescriptionSource">Product Description</h3>\s*<div class="productDescriptionWrapper">\s*([^<]+)!si unless($data->{description});
- ($data->{thumb_link},$data->{image_link})
- = $html =~ m!registerImage\("original_image",\s*"([^"]+)",\s*"<a href="\+'"'\+"([^"]+)"\+!;
+ ($data->{image_link}) = $html =~ m!registerImage\("original_image",\s*"([^"]+)"!;
($data->{publisher},$data->{pubdate}) = ($data->{published} =~ /\s*(.*?)(?:;.*?)?\s+\((.*?)\)/) if($data->{published});
$data->{isbn10} =~ s/[^\dX]+//g if($data->{isbn10});
--
1.7.1
Subject: | perl-WWW-Scraper-ISBN-Amazon_Driver-0.26-image-tests.patch |
From 279a9eb7c4d696e7e0ac01656ba0440077da01af Mon Sep 17 00:00:00 2001
From: Gavin Carr <gavin@openfusion.com.au>
Date: Mon, 19 Sep 2011 21:31:29 +1000
Subject: [PATCH] Comment out thumb_link checks in tests, adjust image_link regexes.
---
t/10objectus.t | 10 +++++-----
t/11objectuk.t | 10 +++++-----
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/t/10objectus.t b/t/10objectus.t
index 1baa959..cf17aa5 100644
--- a/t/10objectus.t
+++ b/t/10objectus.t
@@ -2,7 +2,7 @@
use strict;
use lib './t';
-use Test::More tests => 45;
+use Test::More tests => 43;
use WWW::Scraper::ISBN;
###########################################################
@@ -25,8 +25,8 @@ my %tests = (
[ 'is', 'width', 175 ],
[ 'is', 'height', 228 ],
[ 'is', 'weight', undef ],
- [ 'like', 'image_link', qr!^http://www.amazon.com/gp/product/images! ],
- [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
+ [ 'like', 'image_link', qr!^http://ecx.images-amazon.com/images/! ],
+# [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
[ 'like', 'description', qr|This book is about taking over Perl code| ],
[ 'like', 'book_link', qr!^http://www.amazon.com/(Perl-Medic|.*?field-keywords=(0201795264|9780201795264))! ]
],
@@ -44,8 +44,8 @@ my %tests = (
[ 'is', 'width', 187 ],
[ 'is', 'height', 231 ],
[ 'is', 'weight', undef ],
- [ 'like', 'image_link', qr!^http://www.amazon.com/gp/product/images! ],
- [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
+ [ 'like', 'image_link', qr!^http://ecx.images-amazon.com/images/! ],
+# [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
[ 'like', 'description', qr|Perl Developer's Dictionary is a complete| ],
[ 'like', 'book_link', qr!http://www.amazon.com/(Perl-Developers-Dictionary|.*?field-keywords=(0672320673|9780672320675))! ]
],
diff --git a/t/11objectuk.t b/t/11objectuk.t
index 8bfbc4c..1f49e5e 100644
--- a/t/11objectuk.t
+++ b/t/11objectuk.t
@@ -2,7 +2,7 @@
use strict;
use lib './t';
-use Test::More tests => 45;
+use Test::More tests => 43;
use WWW::Scraper::ISBN;
###########################################################
@@ -25,8 +25,8 @@ my %tests = (
[ 'is', 'width', 175 ],
[ 'is', 'height', 229 ],
[ 'is', 'weight', undef ],
- [ 'like', 'image_link', qr!http://www.amazon.co.uk/gp/product/images! ],
- [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
+ [ 'like', 'image_link', qr!http://ecx.images-amazon.com/images/! ],
+# [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
[ 'like', 'description', qr|This book is about taking over Perl code| ],
[ 'like', 'book_link', qr!^http://www.amazon.co.uk/(Perl-Medic|.*?field-keywords=(0201795264|9780201795264))! ]
],
@@ -44,8 +44,8 @@ my %tests = (
[ 'is', 'width', 188 ],
[ 'is', 'height', 231 ],
[ 'is', 'weight', undef ],
- [ 'like', 'image_link', qr!http://www.amazon.co.uk/gp/product/images! ],
- [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
+ [ 'like', 'image_link', qr!http://ecx.images-amazon.com/images/! ],
+# [ 'like', 'thumb_link', qr!http://[-\w]+.images-amazon.com/images/[-\w/.]+\.jpg! ],
[ 'like', 'description', qr|Perl Developer's Dictionary is a complete| ],
[ 'like', 'book_link', qr!^http://www.amazon.co.uk/(Perl-Developers-Dictionary|.*?field-keywords=(0672320673|9780672320675))! ]
],
--
1.7.1