Subject: | Bug in _rank function of RankCorrelation 0.0502 |
Dear Mr Boggs;
The perl version I am using are Perl 5.004_04 for irix-n32
I think I spot a mistake in the way you sort data in the _rank function.
If I am wrong, please forgive me.
#################################################
## --- lets analyse what you are doing:
sub _rank { # {{{
my $u = shift;
# Rank the sorted vector with an HoL.
my %rank;
## --- the line above you are creting a hash containing as keys the
## --- unique values in the list and as values their respective indexes
## --- in the listYou will use then this hash afterwards to see if
## --- there are ties
push @{ $rank{$u->[$_]} }, $_ + 1 for 0 .. @$u - 1;
# Set the ranks and average any tied data.
my @ranks;
for my $x (sort { $a <=> $b } keys %rank) {
# Get the number of ties.
my $ties = @{ $rank{$x} };
if ($ties > 1) {
# Average the tied data.
my $average = 0;
$average += $_ / $ties for @{ $rank{$x} };
# Add the tied rank averages to the array of ranks.
push @ranks, ($average) x $ties;
}
else {
# Add the single rank to the list of ranks.
## --- If there is no tie you just return the index that
## --- this value had in the root list, but there is no
## --- sorting at all...
push @ranks, $rank{$x}[0];
}
}
return \@ranks;
} # }}}
## --- The code below is a corrected version of the _rank subroutine
## --- The code is perhaps not so beautiful or not so subtle but appears
## --- to me to correctly rank the list
sub _rank {
my $u = shift;
# get a list of the indexes of the sorted values
my @indexes =
map $_->[0],
sort { $a->[1] <=> $b->[1] }
map [ $_, $u->[$_] ],
0 .. @$u - 1;
# get a list of the ranks in the same order as the values
my @indexed_ranks =
map $_->[0],
sort { $a->[1] <=> $b->[1] }
map [ $_ + 1, $indexes[$_] ],
0 .. @indexes - 1;
# Rank the sorted vector with an HoL.
my %rank;
push @{ $rank{$u->[$_]} }, $indexed_ranks[$_] for 0 .. @$u - 1;
# Set the ranks and average any tied data.
my @ranks;
my $counter = 0;
for my $x (sort {$a <=> $b} keys %rank) {
# Get the number of ties.
my $ties = @{ $rank{$x} };
if ($ties > 1) {
# Average the tied data.
my $average = 0;
$average += $_ / $ties for @{ $rank{$x} };
# Add the tied rank averages to the array of ranks.
$ranks[ $indexes[$counter++] ] = $average for 1 .. $ties
}
else {
# Add the single rank to the list of ranks.
$ranks[ $indexes[$counter++] ] = $rank{$x}[0];
}
}
return \@ranks;
}
## --- The code below allows to calculate the kendall's tau coefficient
## --- You could (if you are interrested...) add this code to your module
## --- Details can be found in "Nonparametric Statistics for the Behavioral
## --- Sciences" by Sidney Siegel, N. John Castellan Jr.
# Return Kendall's tau correlartion coefficient
sub kendall_tau {
my $self = shift;
# initialise number of concordant and number of discordant
my $concordant = 0;
my $discordant = 0;
# get a list of the order of index of the sorted ranks of the first list
my @list =
map $_->[0] + 1,
sort { $a->[1] <=> $b->[1] }
map [ $_, $self->{x_rank}[$_] ],
0 .. $self->{size} - 1;
# calculate number of concordant and number of discordant
while (my $index = shift @list) {
for (@list) {
$concordant++ if $self->{x_rank}[$index-1] < $self->{y_rank}[$_-1];
$discordant++ if $self->{y_rank}[$index-1] > $self->{y_rank}[$_-1] ;
}
}
return ( 2 * ($concordant - $discordant ) ) /
( $self->{size} * ($self->{size} - 1) );
}
###############################################
I hope this is helpful.
Regards,
Jerome.