Subject: | lwp-* command line argument charset issue |
lwp-* do not appear to process command line arguments correctly wrt.
their character encoding. This results in problems with IDN support, for
example in UTF-8 environment:
lwp-dump http://bücher.ch
...results in incorrect punycode for the hostname being used, and the
command failing. The attached patch appears to fix it for me, but I'm
not sure if it is the best approach. What do you think?
Subject: | 0001-Parse-lwp-command-line-arguments-according-to-locale.patch |
From c8757d1c471b17f3dfa191a99675f0b6927bdf39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Skytt=C3=A4?= <ville.skytta@iki.fi>
Date: Sat, 6 Nov 2010 19:46:27 +0200
Subject: [PATCH] Parse lwp-* command line arguments according to locale.
---
bin/lwp-download | 9 ++++++++-
bin/lwp-dump | 7 +++++++
bin/lwp-mirror | 7 +++++++
bin/lwp-request | 6 ++++++
bin/lwp-rget | 7 +++++++
5 files changed, 35 insertions(+), 1 deletions(-)
diff --git a/bin/lwp-download b/bin/lwp-download
index 180a0e0..880f346 100755
--- a/bin/lwp-download
+++ b/bin/lwp-download
@@ -62,12 +62,19 @@ use LWP::MediaTypes qw(guess_media_type media_suffix);
use URI ();
use HTTP::Date ();
+eval {
+ require I18N::Langinfo;
+ my $encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
+ require Encode;
+ @ARGV = map { Encode::decode($encoding, $_) } @ARGV;
+};
+
my $progname = $0;
$progname =~ s,.*/,,; # only basename left in progname
$progname =~ s,.*\\,, if $^O eq "MSWin32";
$progname =~ s/\.\w*$//; # strip extension if any
-#parse option
+# parse options
use Getopt::Std;
my %opt;
unless (getopts('as', \%opt)) {
diff --git a/bin/lwp-dump b/bin/lwp-dump
index 1805eb5..321e619 100755
--- a/bin/lwp-dump
+++ b/bin/lwp-dump
@@ -4,6 +4,13 @@ use strict;
use LWP::UserAgent ();
use Getopt::Long qw(GetOptions);
+eval {
+ require I18N::Langinfo;
+ my $encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
+ require Encode;
+ @ARGV = map { Encode::decode($encoding, $_) } @ARGV;
+};
+
my $VERSION = "5.827";
GetOptions(\my %opt,
diff --git a/bin/lwp-mirror b/bin/lwp-mirror
index 13da797..3ac1e6b 100755
--- a/bin/lwp-mirror
+++ b/bin/lwp-mirror
@@ -41,6 +41,13 @@ Gisle Aas <gisle@aas.no>
use LWP::Simple qw(mirror is_success status_message $ua);
use Getopt::Std;
+eval {
+ require I18N::Langinfo;
+ my $encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
+ require Encode;
+ @ARGV = map { Encode::decode($encoding, $_) } @ARGV;
+};
+
$progname = $0;
$progname =~ s,.*/,,; # use basename only
$progname =~ s/\.\w*$//; #strip extension if any
diff --git a/bin/lwp-request b/bin/lwp-request
index ee9dbf8..b47ee2e 100755
--- a/bin/lwp-request
+++ b/bin/lwp-request
@@ -191,6 +191,12 @@ use URI::Heuristic qw(uf_uri);
use HTTP::Status qw(status_message);
use HTTP::Date qw(time2str str2time);
+eval {
+ require I18N::Langinfo;
+ my $encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
+ require Encode;
+ @ARGV = map { Encode::decode($encoding, $_) } @ARGV;
+};
# This table lists the methods that are allowed. It should really be
# a superset for all methods supported for every scheme that may be
diff --git a/bin/lwp-rget b/bin/lwp-rget
index 2ac798f..e989a99 100755
--- a/bin/lwp-rget
+++ b/bin/lwp-rget
@@ -148,6 +148,13 @@ use HTML::Entities ();
use vars qw($VERSION);
use vars qw($MAX_DEPTH $MAX_DOCS $PREFIX $REFERER $VERBOSE $QUIET $SLEEP $HIER $AUTH $IIS $TOLOWER $NOSPACE %KEEPEXT);
+eval {
+ require I18N::Langinfo;
+ my $encoding = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET());
+ require Encode;
+ @ARGV = map { Encode::decode($encoding, $_) } @ARGV;
+};
+
my $progname = $0;
$progname =~ s|.*/||; # only basename left
$progname =~ s/\.\w*$//; #strip extension if any
--
1.7.2.3