Subject: | Incorrect handling of references to elements of arrays and elements of hashes |
I recently wrote a column for The Perl Journal describing how I submitted a bug three years ago regarding Data::Dumper's handling of references to array elements and hash elements. Here's a portion of the text from the article:
** START **
However, while I was staring at C<Data::Dumper>'s guts, I noticed that
there seemed to be no provision for noticing that a scalar reference
was a reference to a scalar that existed as the value of another array
or hash element, and thus dumped those values incorrectly. For
example:
use Data::Dumper;
$Data::Dumper::Purity = 1; # try your hardest
my @values = qw(zero one two three);
my $ref_to_element = \$values[1];
my $all = [$ref_to_element, \@values];
print Dumper($all);
which results in:
$VAR1 = [
\'one',
[
'zero',
${$VAR1->[0]},
'two',
'three'
]
];
The problem is that C<< $VAR1->[0] >> is a reference to one copy
of C<'one'>, while C<< $VAR->[1]->[1] >> is a B<different> copy
of C<'one'>, so changing one won't change the other. The link
between the two elements has been severed.
** END **
So, the bug is that Data::Dumper doesn't notice that a scalar ref references an array element or a hash element, and therefore dumps a copy of the data instead of the original scalar, breaking the necessary linkage.
I've written model code to fix the problem, but the design would have to be retrofitted to Data::Dumper's guts. I've attached the .pm file if it's helpful.
package Data::Stringer;
use 5.006;
use strict;
use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(uneval);
our $VERSION = '0.01';
require overload;
my %stab;
## $stab{'@x0x123456'} = \@thevalue
## $stab{'%x0x123456'} = \%thevalue
## $stab{'$x0x123456'} = [\$thevalue]
## $stab{'$x0x123456'} = [\$thevalue, $aggregate, $index] # for elements
BEGIN {
my @queue;
sub uneval {
%stab = @queue = ();
my $label = pass_1_item(\@_); # prime the pump
pass_1_item(shift @queue) while @queue; # drain the pump
return pass_2($label); # dump the result
}
sub pass_1_item {
my $ref = shift;
my $label = ref_to_label($ref);
return $label if $stab{$label}; # already seen
if ($label =~ /^\$/) { # scalar
$stab{$label}[0] = $ref;
push @queue, $$ref if ref $$ref;
} elsif ($label =~ /^\@/) { # array
$stab{$label} = $ref;
for my $index (0..$#$ref) {
for ($ref->[$index]) { # carefully creating alias, not copy
my $thislabel = ref_to_label(\$_);
$stab{$thislabel} = [\$_, $label, $index];
push @queue, $_ if ref $_;
}
}
} elsif ($label =~ /^%/) { # hash
$stab{$label} = $ref;
for my $key (keys %$ref) {
for ($ref->{$key}) { # carefully creating alias, not copy
my $thislabel = ref_to_label(\$_);
$stab{$thislabel} = [\$_, $label, $key];
push @queue, $_ if ref $_;
}
}
} else {
die "Cannot process $label yet";
}
return $label;
}
}
BEGIN {
my @deferred;
sub pass_2 {
my $result_label = shift;
@deferred = ();
return join("",
pass_2_declarations(),
pass_2_initializations(),
map("$_\n", @deferred),
pass_2_blessings(),
"$result_label;\n",
);
}
sub pass_2_value {
my $value = shift;
my $set_place = shift;
my $set_index = shift;
if (ref $value) {
my $label = ref_to_label($value);
if ($label =~ /^\$/) { # it is a scalar, so it might be an element
(my ($value, $place, $index) = @{$stab{$label}}) >= 1 or die;
if ($place) {
if ($place =~ /^[@%]/) {
push(@deferred,
element_of($set_place, $set_index) . " = \\" .
element_of($place, $index) . ";");
return "00"; # placeholder for a deferred action
} else {
die "dunno place $place";
}
} else {
return "\\$label"; # no place in particular
}
} else {
return "\\$label";
}
} else {
return quote_scalar($value);
}
}
}
sub pass_2_declarations {
return join("",
"my (",
join(", ",
grep {
/^[\@%]/ or /^\$/ and not $stab{$_}[1]
} keys %stab),
");\n");
}
sub pass_2_initializations {
return join("",
map(pass_2_initialization($_, $stab{$_}),
sort keys %stab),
);
}
sub pass_2_blessings {
return join("",
map(pass_2_blessing($_, $stab{$_}),
sort keys %stab),
);
}
sub pass_2_initialization {
my $label = shift;
my $value = shift;
if ($label =~ /^\$/) { # scalar
if (@$value > 1) { # it's an element:
return "";
} else {
return "$label = ".pass_2_value(${$value->[0]}).";\n";
}
} elsif ($label =~ /^\@/) { # array
return "$label = (".join(", ",
map {
pass_2_value($value->[$_], $label, $_);
} 0..$#$value,
).");\n";
} elsif ($label =~ /^%/) { # hash
return "$label = (".join(", ",
map {
pass_2_value($_) .
" => " .
pass_2_value($value->{$_}, $label, $_);
} keys %$value,
).");\n";
} else {
die "Cannot process $label yet";
}
}
sub pass_2_blessing {
my $label = shift;
my $value = shift;
## get to the proper location of an element for scalars
if ($label =~ /^\$/) {
$label = element_of($value->[1], $value->[2]) if @$value > 1;
$value = $value->[0];
}
my ($package) = overload::StrVal($value) =~ /^(.*)=/;
if (defined $package) { # it's blessed
return "bless \\$label, ".quote_scalar($package), ";\n";
} else {
return "";
}
}
sub element_of {
my $label = shift;
my $index = shift;
if ($label =~ s/^\@/\$/) {
return "$label\[".quote_scalar($index)."\]";
} elsif ($label =~ s/^%/\$/) {
return "$label\{".quote_scalar($index)."\}";
} else {
die "Cannot take element_of($label, $index)";
}
}
sub ref_to_label {
my $ref = shift;
## eventually do something with $realpack
my ($realpack, $realtype, $id) =
(overload::StrVal($ref) =~ /^(?:(.*)\=)?([^=]*)\(([^\(]*)\)$/) or die;
s/^0x/X/ or s/^/X/ for $id;
if ($realtype eq "SCALAR" or $realtype eq "REF") {
return "\$$id";
} elsif ($realtype eq "ARRAY") {
return "\@$id";
} elsif ($realtype eq "HASH") {
return "%$id";
} else {
die "dunno $ref => $realpack $realtype $id";
}
}
sub quote_scalar {
local $_ = shift;
if (!defined($_)) {
return "undef";
}
{
no warnings;
if ($_ + 0 eq $_) { # safe as a number...
return $_;
}
if ("$_" == $_) { # safe as a string...
s/([\\\'])/\\$1/g;
return '\'' . $_ . '\'';
}
}
die "$_ is not safe as either a number or a string";
}
1;
__END__
# Below is stub documentation for your module. You'd better edit it!
=head1 NAME
Data::Stringer - Create a string from structured data
=head1 SYNOPSIS
use Data::Stringer;
my $string = uneval($scalar, \@array, \%hash);
...
my ($newscalar, $newarrayref, $newhashref) = eval $string;
=head1 DESCRIPTION
C<uneval> turns a list of values into a single string, such that using
C<eval> later on the string returns the same list in a list context.
(The scalar context return value is undefined at the moment, but
eventually will return the last element of the list for consistency
with the scalar comma operator).
=head2 EXPORT
=over
=item $string = uneval(@list)
Create Perl code in $string to reconstruct @list.
=back
=head1 SEE ALSO
L<Data::Dumper>
=head1 AUTHOR
Randal L. Schwartz, E<lt>merlyn@stonehenge.comE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2004 by Randal L. Schwartz
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.3 or,
at your option, any later version of Perl 5 you may have available.
=cut