Hi Shlomi,
Here's a candidate patch, including Test::Count annotations.
Regards,
Shawn.
On Wed Feb 08 12:53:43 2012, SHLOMIF wrote:
Show quoted text> Hi Shawn,
>
> On Sun Feb 05 01:13:21 2012, SLAFFAN wrote:
> > Hello Shlomi,
> >
> > If one creates a new stats object and then calls methods on it, the mean
> > and median return a value of zero. This should more logically be undef.
> > (This probably applies to other methods, but I haven't fully checked).
>
> Yes, that sounds correct. Please implement it.
>
> >
> > Some example test code is below.
> >
> > The one issue I can think of is where existing modules that use S::D
> > expect the zero to be returned. That said, the effect won't be too
> > dramatic as such a change will issue a warning when strict and warnings
> > are on, and the value will be silently converted to zero if they are
not.
Show quoted text> >
> > I'm happy to provide a patch if you'd like.
>
> That would be nice. See:
>
>
https://bitbucket.org/shlomif/perl-statistics-descriptive
>
> You can clone the repository, modify it and send a pull request, or a
> patch would be OK too. I can write the patch myself, but I'd like to
> encourage people to contribute.
>
> >
> > Regards,
> > Shawn.
> >
> >
> > {
> > use Test::More tests => 2;
> >
> > my $stat = Statistics::Descriptive::Full->new();
> > my $result;
> >
>
> This script needs Test-Count annotations:
>
>
http://search.cpan.org/dist/Test-Count/
>
> > $result = $stat->mean();
> > ok (!defined ($result), "Mean is undefined when object has no data.
> > Got $result.");
> >
> > $result = $stat->median();
> > ok (!defined ($result), "Median is undefined when object has no
> > data. Got $result.");
> > }
>
> Regards,
>
> -- Shlomi Fish
diff -r 6653104d5110 Statistics-Descriptive/lib/Statistics/Descriptive.pm
--- a/Statistics-Descriptive/lib/Statistics/Descriptive.pm Thu Nov 17 20:07:19 2011 +0200
+++ b/Statistics-Descriptive/lib/Statistics/Descriptive.pm Sat Feb 11 10:07:28 2012 +1100
@@ -76,15 +76,15 @@
##Define the fields to be used as methods
%fields = (
count => 0,
- mean => 0,
- sum => 0,
- sumsq => 0,
+ mean => undef,
+ sum => undef,
+ sumsq => undef,
min => undef,
max => undef,
mindex => undef,
maxdex => undef,
sample_range => undef,
- variance => undef,
+ variance => undef,
);
__PACKAGE__->_make_accessors( [ grep { $_ ne "variance" } keys(%fields) ] );
@@ -176,19 +176,22 @@
$self->count($count);
##indicator the value is not cached. Variance isn't commonly enough
##used to recompute every single data add.
- $self->_variance(undef());
+ $self->_variance(undef);
return 1;
}
sub standard_deviation {
my $self = shift; ##Myself
- return undef if (!$self->count());
+ return if (!$self->count());
return sqrt($self->variance());
}
##Return variance; if needed, compute and cache it.
sub variance {
my $self = shift; ##Myself
+
+ return if (!$self->count());
+
my $div = @_ ? 0 : 1;
my $count = $self->count();
if ($count < 1 + $div) {
@@ -380,7 +383,7 @@
if ((! $count) || ($percentile < 100 / $count))
{
- return undef;
+ return; # allow for both scalar and list context
}
$self->sort_data();
@@ -415,6 +418,8 @@
sub median {
my $self = shift;
+ return if !$self->count;
+
##Cached?
if (! defined($self->_median()))
{
@@ -432,6 +437,9 @@
return;
}
+ # check data count after the args are checked - should help debugging
+ return if !$self->count;
+
$self->sort_data();
return $self->_data->[0] if ( $QuantileNumber == 0 );
@@ -494,6 +502,9 @@
($lower,$upper) = ($_[0],$_[1]);
}
+ # check data count after the args
+ return if !$self->count;
+
##Cache
my $thistm = join ':',$lower,$upper;
my $cache = $self->_trimmed_mean_cache();
@@ -585,6 +596,8 @@
sub geometric_mean {
my $self = shift;
+
+ return if !$self->count;
if (!defined($self->_geometric_mean()))
{
diff -r 6653104d5110 Statistics-Descriptive/t/descr.t
--- a/Statistics-Descriptive/t/descr.t Thu Nov 17 20:07:19 2011 +0200
+++ b/Statistics-Descriptive/t/descr.t Sat Feb 11 10:07:28 2012 +1100
@@ -3,7 +3,7 @@
use strict;
use warnings;
-use Test::More tests => 29;
+use Test::More tests => 50;
use Benchmark;
use Statistics::Descriptive;
@@ -80,7 +80,7 @@
my $stat = Statistics::Descriptive::Full->new();
my @results = $stat->least_squares_fit();
# TEST
- ok (!scalar(@results), "Results on an non-filled object are empty.");
+ ok (!scalar(@results), "Least-squares results on a non-filled object are empty.");
# test #2
# data are y = 2*x - 1
@@ -424,3 +424,53 @@
# TEST
ok (!defined($ret), 'Returns undef and does not die.');
}
+
+
+
+# test stats when no data have been added
+{
+ my $stat = Statistics::Descriptive::Full->new();
+ my ($result, $str);
+
+ # An accessor method for _permitted would be handy,
+ # or one to get all the stats methods
+ my @methods = qw {
+ mean sum variance standard_deviation
+ min mindex max maxdex sample_range
+ skewness kurtosis median
+ harmonic_mean geometric_mean
+ mode least_squares_fit
+ percentile frequency_distribution
+ };
+ # least_squares_fit is handled in an earlier test, so is actually a duplicate here
+
+ #diag 'Results are undef when no data added';
+ # need to update next line when new methods are tested here
+ # TEST:$method_count=18
+ foreach my $method (sort @methods) {
+ $result = $stat->$method;
+ # TEST*$method_count
+ ok (!defined ($result), "$method is undef when object has no data.");
+ }
+
+ # quantile and trimmed_mean require valid args, so don't test in the method loop
+ my $method = 'quantile';
+ $result = $stat->$method(1);
+ # TEST
+ ok (!defined ($result), "$method is undef when object has no data.");
+
+ $method = 'trimmed_mean';
+ $result = $stat->$method(0.1);
+ # TEST
+ ok (!defined ($result), "$method is undef when object has no data.");
+}
+
+# test SD when only one value added
+{
+ my $stat = Statistics::Descriptive::Full->new();
+ $stat->add_data( 1 );
+
+ my $result = $stat->standard_deviation();
+ # TEST
+ ok ($result == 0, "SD is zero when object has one record.");
+}