Subject: | [PATCH] Add HTTP::Response->filename |
The attached patch against current git adds a new filename() method to
HTTP::Response and makes lwp-download use it.
Subject: | 0003-Add-response-filename-use-it-in-lwp-download.patch |
From 6178d299cfccd32442e714e5b4a849b12d0747df Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Ville=20Skytt=C3=A4?= <ville.skytta@iki.fi>
Date: Sun, 27 Apr 2008 00:18:42 +0300
Subject: [PATCH] Add $response->filename, use it in lwp-download.
---
bin/lwp-download | 13 +-----
lib/HTTP/Response.pm | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 109 insertions(+), 11 deletions(-)
diff --git a/bin/lwp-download b/bin/lwp-download
index c36fde1..a746967 100755
--- a/bin/lwp-download
+++ b/bin/lwp-download
@@ -105,17 +105,8 @@ my $res = $ua->request(HTTP::Request->new(GET => $url),
}
unless (defined $argfile) {
- # must find a suitable name to use. First thing
- # to do is to look for the "Content-Disposition"
- # header defined by RFC1806. This is also supported
- # by Netscape
- my $cd = $res->header("Content-Disposition");
- if ($cd && $cd =~ /\bfilename\s*=\s*(\S+)/) {
- $file = $1;
- $file =~ s/;$//;
- $file =~ s/^([\"\'])(.*)\1$/$2/;
- $file =~ s,.*[\\/],,; # basename
- }
+ # find a suitable name to use
+ $file = $res->filename;
# if this fails we try to make something from the URL
unless ($file) {
diff --git a/lib/HTTP/Response.pm b/lib/HTTP/Response.pm
index f55b7a8..b25bf8e 100644
--- a/lib/HTTP/Response.pm
+++ b/lib/HTTP/Response.pm
@@ -96,6 +96,78 @@ sub base
}
+sub filename
+{
+ my $self = shift;
+ my $file;
+
+ my $cd = $self->header('Content-Disposition');
+ if ($cd) {
+ require HTTP::Headers::Util;
+ if (my @cd = HTTP::Headers::Util::split_header_words($cd)) {
+ my ($disposition, undef, %cd_param) = @{$cd[-1]};
+ $file = $cd_param{filename};
+
+ # RFC 2047 encoded?
+ if ($file && $file =~ /^=\?(.+?)\?(.+?)\?(.+)\?=$/) {
+ my $charset = $1;
+ my $encoding = uc($2);
+ my $encfile = $3;
+
+ if ($encoding eq 'Q' || $encoding eq 'B') {
+ local($SIG{__DIE__});
+ eval {
+ if ($encoding eq 'Q') {
+ $encfile =~ s/_/ /g;
+ require MIME::QuotedPrint;
+ $encfile = MIME::QuotedPrint::decode($encfile);
+ }
+ else { # $encoding eq 'B'
+ require MIME::Base64;
+ $encfile = MIME::Base64::decode($encfile);
+ }
+
+ require Encode;
+ require encoding;
+ # This is ugly use of non-public API, but is there
+ # a better way to accomplish what we want (locally
+ # as-is usable filename string)?
+ my $locale_charset = encoding::_get_locale_encoding();
+ Encode::from_to($encfile, $charset, $locale_charset);
+ };
+
+ $file = $encfile unless $@;
+ }
+ }
+ }
+ }
+
+ my $uri;
+ unless (defined($file) && length($file)) {
+ if (my $cl = $self->header('Content-Location')) {
+ $uri = URI->new($cl);
+ }
+ elsif (my $request = $self->request) {
+ $uri = $request->uri;
+ }
+
+ if ($uri) {
+ $file = ($uri->path_segments)[-1];
+ }
+ }
+
+ if ($file) {
+ $file =~ s,.*[\\/],,; # basename
+ }
+
+ if ($file && !length($file)) {
+ $file = undef;
+ }
+
+ $file;
+}
+
+
sub as_string
{
require HTTP::Status;
@@ -384,6 +456,41 @@ initialized the "Content-Base:" header. This means that this method
only performs the last 2 steps (the content is not always available
either).
+=item $r->filename
+
+Returns a filename for this response. Note that doing sanity checks
+on the returned filename (eg. removing characters that cannot be used
+on the target filesystem where the filename would be used, and
+laundering it for security purposes) are the caller's responsibility;
+the only related thing done by this method is that it makes a simple
+attempt to return a plain filename with no preceding path segments.
+
+The filename is obtained from one the following sources (in priority
+order):
+
+=over 4
+
+=item 1.
+
+A "Content-Disposition:" header in the response. Proper decoding of
+RFC 2047 encoded filenames requires the C<MIME::QuotedPrint> (for "Q"
+encoding), C<MIME::Base64> (for "B" encoding), and C<Encode> modules.
+
+=item 2.
+
+A "Content-Location:" header in the response.
+
+=item 3.
+
+The URI used to request this response. This might not be the original
+URI that was passed to $ua->request() method, because we might have
+received some redirect responses first.
+
+=back
+
+If a filename cannot be derived from any of these sources, undef is
+returned.
+
=item $r->as_string
=item $r->as_string( $eol )
--
1.5.4.3