Subject: | UTF-8 filenames are bad |
This is a patch to add an option to url-escape local filenames. Docs and tests included.
Default behaviour not changed.
diff -ruN WWW-Mediawiki-Client-0.27/lib/WWW/Mediawiki/Client.pm WWW-Mediawiki-Client-0.27-kap/lib/WWW/Mediawiki/Client.pm
--- WWW-Mediawiki-Client-0.27/lib/WWW/Mediawiki/Client.pm Sat Jun 4 16:54:15 2005
+++ WWW-Mediawiki-Client-0.27-kap/lib/WWW/Mediawiki/Client.pm Tue Aug 2 19:19:12 2005
@@ -292,7 +292,7 @@
use constant COOKIE_FILE => '.mediawiki_cookies.dat';
use constant SAVED_ATTRIBUTES => (
qw(site_url host language_code space_substitute username password
- wiki_path watch encoding minor_edit)
+ wiki_path watch encoding minor_edit escape_filenames)
); # It's important that host goes first since it has side effects
@@ -434,6 +434,29 @@
return $self->{space_substitute};
}
+=head2 escape_filenames
+
+ my $char = $mvs->escape_filenames($do_escape);
+
+Mediawiki allows article names to be in UTF-8 and most international
+Wikipedias use this feature. That leads us to UTF-8 encoded file names
+and not all filesystems can handle them. So you can set this option to
+some true value to make all your local file names with wiki articles
+URL-escaped.
+
+=cut
+
+sub escape_filenames {
+ my ($self, $do_escape) = @_;
+ if ($do_escape) {
+ $self->{escape_filenames} = $do_escape;
+ } elsif (!defined $self->{escape_filenames}) {
+ $self->{escape_filenames} = 0;
+ }
+
+ return $self->{escape_filenames};
+}
+
=head2 wiki_path
my $path = $mvs->wiki_path($path);
@@ -1279,6 +1302,9 @@
my ($self, $name) = @_;
$self->_check_path($name);
$name =~ s/.wiki$//;
+
+ $self->{escape_filenames} and $name = decode('UTF-8', URI::Escape::uri_unescape($name));
+
$name =~ s/_/ /g;
return ucfirst $name;
}
@@ -1319,6 +1345,9 @@
sub pagename_to_filename {
my ($self, $name) = @_;
$name =~ s/ /_/;
+
+ $self->{escape_filenames} and $name = URI::Escape::uri_escape_utf8($name);
+
$name .= '.wiki';
return $name;
}
diff -ruN WWW-Mediawiki-Client-0.27/t/client.t WWW-Mediawiki-Client-0.27-kap/t/client.t
--- WWW-Mediawiki-Client-0.27/t/client.t Sat May 28 18:13:23 2005
+++ WWW-Mediawiki-Client-0.27-kap/t/client.t Tue Aug 2 19:05:14 2005
@@ -1,9 +1,11 @@
#!/usr/bin/perl -w
use strict;
-use Test::More tests => 95;
+use Test::More tests => 102;
use Test::Differences;
+use utf8;
+
BEGIN {
use_ok('WWW::Mediawiki::Client', ':options');
}
@@ -207,6 +209,12 @@
isa_ok($@, 'WWW::Mediawiki::Client::ReadOnlyFieldException',
'... and throws an exception if you try to set it');
+# test the escape_filenames accessor
+$mvs = WWW::Mediawiki::Client->new(host => 'www.wikifoo.org');
+is($mvs->escape_filenames, 0, 'Does the default escape_filenames get set?');
+ok($mvs->escape_filenames(1), '... and can we change it');
+is($mvs->escape_filenames, 1, '... and get back the string we changed it to');
+
# test get_local_page
open(OUT, '>:utf8', 'Paris.wiki');
print OUT $WikiData;
@@ -264,7 +272,19 @@
'pagename_to_filename can convert a page name into a filename');
is($mvs->pagename_to_filename('User:Mark/Maps'), 'User:Mark/Maps.wiki',
'... even the sub-page of a User page.');
-
+
+$mvs->escape_filenames(0);
+is($mvs->pagename_to_filename('Ðижний ÐовгоÑод'), 'Ðижний_ÐовгоÑод.wiki',
+ 'pagename_to_filename with Unicode');
+is($mvs->filename_to_pagename('Ðижний_ÐовгоÑод.wiki'), 'Ðижний ÐовгоÑод',
+ 'filename_to_pagename with Unicode');
+
+$mvs->escape_filenames(1);
+is($mvs->pagename_to_filename('Ðижний ÐовгоÑод'), '%D0%9D%D0%B8%D0%B6%D0%BD%D0%B8%D0%B9_%D0%9D%D0%BE%D0%B2%D0%B3%D0%BE%D1%80%D0%BE%D0%B4.wiki',
+ 'pagename_to_filename with Unicode escaping');
+is($mvs->filename_to_pagename('%D0%9D%D0%B8%D0%B6%D0%BD%D0%B8%D0%B9_%D0%9D%D0%BE%D0%B2%D0%B3%D0%BE%D1%80%D0%BE%D0%B4.wiki'), 'Ðижний ÐовгоÑод',
+ 'filename_to_pagename with Unicode escaping');
+
# test url_to_filename
$mvs->space_substitute('+');
is($mvs->url_to_filename('http://www.wikifoo.org/wiki/en/wiki.phtml?action=edit&title=San+Francisco'),