Subject: | patch for skewness and kurtosis functions |
Attached is a diff file to add skewness and kurtosis functions in
Statistics::Descriptive::Full (3.0100). It includes regression tests
and POD updates.
It would be nice to have these available via S::D::Sparse using higher
order moments, but that can be patched in when developed. The OO
inheritance means users won't notice anything when it happens.
Shawn Laffan.
Subject: | skew_and_kurt_patch.diff |
Index: lib/Statistics/Descriptive.pm
===================================================================
--- lib/Statistics/Descriptive.pm (revision 4433)
+++ lib/Statistics/Descriptive.pm (working copy)
@@ -10,7 +10,7 @@
##Perl5. 01-03 weren't bug free.
use vars (qw($VERSION $Tolerance));
-$VERSION = '3.0100';
+$VERSION = '3.0200';
$Tolerance = 0.0;
@@ -247,6 +247,7 @@
__PACKAGE__->_make_private_accessors(
[qw(data frequency geometric_mean harmonic_mean
least_squares_fit median mode
+ skewness kurtosis
)
]
);
@@ -599,6 +600,71 @@
return $self->_geometric_mean();
}
+sub skewness {
+ my $self = shift;
+
+ if (!defined($self->_skewness()))
+ {
+ my $n = $self->count();
+ my $sd = $self->standard_deviation();
+
+ my $skew;
+
+ # skip if insufficient records
+ if ( $sd && $n > 2) {
+
+ my $mean = $self->mean();
+
+ my $sum_pow3;
+
+ foreach my $rec ( $self->get_data ) {
+ my $value = (($rec - $mean) / $sd);
+ $sum_pow3 += $value ** 3;
+ }
+
+ my $correction = $n / ( ($n-1) * ($n-2) );
+
+ $skew = $correction * $sum_pow3;
+ }
+
+ $self->_skewness($skew);
+ }
+
+ return $self->_skewness();
+}
+
+sub kurtosis {
+ my $self = shift;
+
+ if (!defined($self->_kurtosis()))
+ {
+ my $kurt;
+
+ my $n = $self->count();
+ my $sd = $self->standard_deviation();
+
+ if ( $sd && $n > 3) {
+
+ my $mean = $self->mean();
+
+ my $sum_pow4;
+ foreach my $rec ( $self->get_data ) {
+ $sum_pow4 += ( ($rec - $mean ) / $sd ) ** 4;
+ }
+
+ my $correction1 = ( $n * ($n+1) ) / ( ($n-1) * ($n-2) * ($n-3) );
+ my $correction2 = ( 3 * ($n-1) ** 2) / ( ($n-2) * ($n-3) );
+
+ $kurt = ( $correction1 * $sum_pow4 ) - $correction2;
+ }
+
+ $self->_kurtosis($kurt);
+ }
+
+ return $self->_kurtosis();
+}
+
+
sub frequency_distribution_ref
{
my $self = shift;
@@ -895,6 +961,19 @@
is called. Calling the method without an argument returns the value of
the flag.
+=item $stat->skewness();
+
+Returns the skewness of the data.
+A value of zero is no skew, negative is a left skewed tail,
+positive is a right skewed tail.
+This is consistent with Excel.
+
+=item $stat->kurtosis();
+
+Returns the kurtosis of the data.
+Positive is peaked, negative is flattened.
+
+
=item $x = $stat->percentile(25);
=item ($x, $index) = $stat->percentile(25);
@@ -1170,6 +1249,10 @@
=over 4
+=item v3.0200
+
+Add skewness and kurtosis.
+
=item v2.3
Rolled into November 1998
Index: t/descr.t
===================================================================
--- t/descr.t (revision 4433)
+++ t/descr.t (working copy)
@@ -3,7 +3,7 @@
use strict;
use warnings;
-use Test::More tests => 23;
+use Test::More;
use Benchmark;
use Statistics::Descriptive;
@@ -372,3 +372,47 @@
)
}
+{
+ my $stat = Statistics::Descriptive::Full->new();
+
+ $stat->add_data(1 .. 9, 100);
+
+ # TEST
+ is ($stat->skewness(),
+ 3.11889574523909,
+ 'Skewness of 3.11889574523909'
+ );
+
+ # TEST
+ is ($stat->kurtosis(),
+ 9.79924471616366,
+ 'Kurtosis of 9.79924471616366'
+ );
+
+}
+
+{
+ my $stat = Statistics::Descriptive::Full->new();
+
+ $stat->add_data(1,2);
+ my $def;
+
+ # TEST
+ $def = defined $stat->skewness() ? 1 : 0;
+ is ($def,
+ 0,
+ 'Skewness is undef for 2 samples'
+ );
+
+ $stat->add_data (1);
+
+ # TEST
+ $def = defined $stat->kurtosis() ? 1 : 0;
+ is ($def,
+ 0,
+ 'Kurtosis is undef for 3 samples'
+ );
+
+}
+
+done_testing();