Subject: | _compile_alispec does not take into account unicode graphemes that take up != one column |
Perl's length() when operating on unicode strings returns the length in "logical characters". However, not every unicode character takes up the same amount of space on output to a terminal*. As such, Text::Aligner mis-computes the "graphical length" for some unicode strings, and as a result the output text isn't really aligned.
Attached is an example that demonstrates the problem. Note that you should use a terminal that expects utf-8 on stdout from perl. I performed my test with rxvt-unicode version 9.19 on Ubuntu 14.04 LTS.
Also demonstrated is a fix that depends on Unicode::GCString. This module provides a ->columns method which will tell you the "display length" of the string. I copied and modified the align() procedure in the example to change the {width} member to use this method instead of length. As you can see the right side of the table is justified when myalign() is used.
Note that at least some of the {pos} implementations have a similar issue, they use length, when they really mean "display columns".
Subject: | BadAlign.pl |
use v5.20;
use strict;
use Text::Aligner qw(align);
use List::Util qw(sum max);
use charnames qw();
# On a linux terminal. Your mileage will vary on Windows
binmode STDOUT, ':encoding(utf-8)';
my @cphex = qw(
299F 2220 299E 29A4 031A 0349 2E16 2770
276E 2771 276F 3008 2991 300A 2329 29FC
00AB 27E8 27EA 27E9 27EB 2221 299B 29AB
29AA 29AF 29AD 29AE 29AC 29A9 29A8 299D
276C 276D A71A 29A7 29A6 FE3F FE3D FE40
FE3E 29A3 29A5 221F 3009 2992 2E01 2E00
299C 22BE 237C 300B 232A 29FD 00BB
);
my @codepoints = map { oct("0x$_") } @cphex;
my @cpdec = map { "$_" } @codepoints;
my @names = map { charnames::viacode($_) } @codepoints;
my @chars = map { chr($_) } @codepoints;
my @cols = (
[ "Name", @names],
[ "Hex Code", @cphex],
[ "Dec Code", @cpdec],
[ "Char", @chars],
);
for my $aligner ( \&align, \&myalign) {
for my $dir (qw(left center right)) {
my @newcols = map {
[ $aligner->($dir, @$_) ]
} @cols;
my @rows = _transpose(\@newcols);
foreach (@rows) {
print '|.', join('.|.', @$_), '.|', "\n";
}
print "\n\n";
}
}
#
# Taken from current Text::Aligner, but modified to change
# $ali->{width} to use Unicode::GCString->columns
#
sub myalign ($@) {
my $ali = Text::Aligner->new( shift);
use Unicode::GCString;
# Rather than calculate the length of the string, calculate the
# number of columns of grapheme clusters
$ali->{width} = sub {
my $s = @_ ? $_[0] : $_;
Unicode::GCString->new($s)->columns;
};
# Note that the $pos routines in Text::Aligner::_compile_alispec
# also need to change to use the Unicode::GCString->columns rather
# than the length() of the string when computing padding
$ali->_alloc( map ref eq 'SCALAR' ? $$_ : $_, @_);
if ( defined wantarray ) {
my @just = map $ali->_justify( ref eq 'SCALAR' ? $$_ : $_), @_;
return @just if wantarray;
return join "\n", @just, '';
} else {
for ( @_ ) {
$_ = $ali->_justify( $_) for ref eq 'SCALAR' ? $$_ : $_; # one-shot
}
}
}
#
# Taken from Text::Table
#
# destructively transpose a number of lines/cols from an array of arrayrefs
sub _transpose_n {
my ($n, $cols) = @_;
return map { [ map { shift @$_ } @$cols] } 1 .. $n;
}
# like _transpose_n, but find the number to transpose from max of given
sub _transpose
{
my ($cols) = @_;
my $m = max ( map { scalar(@$_) } @$cols, []);
return _transpose_n( $m, $cols);
}