Skip Menu |

This queue is for tickets about the WWW-Mediawiki-Client CPAN distribution.

Report information
The Basics
Id: 13987
Status: resolved
Priority: 0/
Queue: WWW-Mediawiki-Client

People
Owner: markj [...] cpan.org
Requestors: alex [...] kapranoff.ru
Cc:
AdminCc:

Bug Information
Severity: Wishlist
Broken in: (no value)
Fixed in: 0.28



Subject: UTF-8 filenames are bad
This is a patch to add an option to url-escape local filenames. Docs and tests included. Default behaviour not changed.
diff -ruN WWW-Mediawiki-Client-0.27/lib/WWW/Mediawiki/Client.pm WWW-Mediawiki-Client-0.27-kap/lib/WWW/Mediawiki/Client.pm --- WWW-Mediawiki-Client-0.27/lib/WWW/Mediawiki/Client.pm Sat Jun 4 16:54:15 2005 +++ WWW-Mediawiki-Client-0.27-kap/lib/WWW/Mediawiki/Client.pm Tue Aug 2 19:19:12 2005 @@ -292,7 +292,7 @@ use constant COOKIE_FILE => '.mediawiki_cookies.dat'; use constant SAVED_ATTRIBUTES => ( qw(site_url host language_code space_substitute username password - wiki_path watch encoding minor_edit) + wiki_path watch encoding minor_edit escape_filenames) ); # It's important that host goes first since it has side effects @@ -434,6 +434,29 @@ return $self->{space_substitute}; } +=head2 escape_filenames + + my $char = $mvs->escape_filenames($do_escape); + +Mediawiki allows article names to be in UTF-8 and most international +Wikipedias use this feature. That leads us to UTF-8 encoded file names +and not all filesystems can handle them. So you can set this option to +some true value to make all your local file names with wiki articles +URL-escaped. + +=cut + +sub escape_filenames { + my ($self, $do_escape) = @_; + if ($do_escape) { + $self->{escape_filenames} = $do_escape; + } elsif (!defined $self->{escape_filenames}) { + $self->{escape_filenames} = 0; + } + + return $self->{escape_filenames}; +} + =head2 wiki_path my $path = $mvs->wiki_path($path); @@ -1279,6 +1302,9 @@ my ($self, $name) = @_; $self->_check_path($name); $name =~ s/.wiki$//; + + $self->{escape_filenames} and $name = decode('UTF-8', URI::Escape::uri_unescape($name)); + $name =~ s/_/ /g; return ucfirst $name; } @@ -1319,6 +1345,9 @@ sub pagename_to_filename { my ($self, $name) = @_; $name =~ s/ /_/; + + $self->{escape_filenames} and $name = URI::Escape::uri_escape_utf8($name); + $name .= '.wiki'; return $name; } diff -ruN WWW-Mediawiki-Client-0.27/t/client.t WWW-Mediawiki-Client-0.27-kap/t/client.t --- WWW-Mediawiki-Client-0.27/t/client.t Sat May 28 18:13:23 2005 +++ WWW-Mediawiki-Client-0.27-kap/t/client.t Tue Aug 2 19:05:14 2005 @@ -1,9 +1,11 @@ #!/usr/bin/perl -w use strict; -use Test::More tests => 95; +use Test::More tests => 102; use Test::Differences; +use utf8; + BEGIN { use_ok('WWW::Mediawiki::Client', ':options'); } @@ -207,6 +209,12 @@ isa_ok($@, 'WWW::Mediawiki::Client::ReadOnlyFieldException', '... and throws an exception if you try to set it'); +# test the escape_filenames accessor +$mvs = WWW::Mediawiki::Client->new(host => 'www.wikifoo.org'); +is($mvs->escape_filenames, 0, 'Does the default escape_filenames get set?'); +ok($mvs->escape_filenames(1), '... and can we change it'); +is($mvs->escape_filenames, 1, '... and get back the string we changed it to'); + # test get_local_page open(OUT, '>:utf8', 'Paris.wiki'); print OUT $WikiData; @@ -264,7 +272,19 @@ 'pagename_to_filename can convert a page name into a filename'); is($mvs->pagename_to_filename('User:Mark/Maps'), 'User:Mark/Maps.wiki', '... even the sub-page of a User page.'); - + +$mvs->escape_filenames(0); +is($mvs->pagename_to_filename('Нижний Новгород'), 'Нижний_Новгород.wiki', + 'pagename_to_filename with Unicode'); +is($mvs->filename_to_pagename('Нижний_Новгород.wiki'), 'Нижний Новгород', + 'filename_to_pagename with Unicode'); + +$mvs->escape_filenames(1); +is($mvs->pagename_to_filename('Нижний Новгород'), '%D0%9D%D0%B8%D0%B6%D0%BD%D0%B8%D0%B9_%D0%9D%D0%BE%D0%B2%D0%B3%D0%BE%D1%80%D0%BE%D0%B4.wiki', + 'pagename_to_filename with Unicode escaping'); +is($mvs->filename_to_pagename('%D0%9D%D0%B8%D0%B6%D0%BD%D0%B8%D0%B9_%D0%9D%D0%BE%D0%B2%D0%B3%D0%BE%D1%80%D0%BE%D0%B4.wiki'), 'Нижний Новгород', + 'filename_to_pagename with Unicode escaping'); + # test url_to_filename $mvs->space_substitute('+'); is($mvs->url_to_filename('http://www.wikifoo.org/wiki/en/wiki.phtml?action=edit&title=San+Francisco'),
[KAPPA - Tue Aug 2 11:25:56 2005]: Show quoted text
> This is a patch to add an option to url-escape local filenames. Docs > and tests included. > > Default behaviour not change
I've applied your patch. Thanks! -mark