Subject: | [patch] uniq_by implementation |
The TODO list in the documentation mentions the possibility of a
"uniq_by BLOCK LIST", and I missed it a lot, so here's a possible
implementation. (This would also solve #50208.)
Subject: | List-MoreUtils-uniq_by.diff |
commit fe6981804ada9b0bcc3dfac8676d75442273fb27
Author: Norbert Buchmuller <norbi@nix.hu>
Date: Thu Mar 4 02:37:59 2010 +0100
Implemented uniq_by.
diff --git a/Changes b/Changes
index 17bb28a..2418ffb 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,8 @@
Revision history for Perl extension List::Any/List::MoreUtils.
+0.25_03
+ - Implemented uniq_by(&@) (Norbert Buchmuller <norbi@nix.hu>
+
0.25_02 Sat Aug 1 06:41:55 EDT 2009
- MS VC++ 7 doesn't like inline nor 'long long'
(patch provided by Taro Nishino (taro DOT nishino AT gmail.com)
diff --git a/MoreUtils.xs b/MoreUtils.xs
index eb08465..e505c78 100644
--- a/MoreUtils.xs
+++ b/MoreUtils.xs
@@ -1302,6 +1302,84 @@ uniq (...)
}
void
+uniq_by (code, ...)
+ SV *code;
+PROTOTYPE: &@
+CODE:
+{
+ dMULTICALL; dSTACK;
+ register int i, count = 0, seen_undef = 0;
+ HV *seen_values = newHV();
+ HV *stash;
+ GV *gv;
+ I32 gimme = G_SCALAR;
+ CV *cv;
+
+ COPY_STACK;
+
+ cv = sv_2cv(code, &stash, &gv, 0);
+ PUSH_MULTICALL(cv);
+ SAVESPTR(GvSV(PL_defgv));
+
+ /* don't build return list in scalar context */
+ if (GIMME == G_SCALAR) {
+ for (i = 1; i < items; i++) {
+ GvSV(PL_defgv) = STA(i);
+ MULTICALL;
+ if (SvOK(*PL_stack_sp)) {
+ if (!hv_exists_ent(seen_values, *PL_stack_sp, 0)) {
+ count++;
+ hv_store_ent(seen_values, *PL_stack_sp, &PL_sv_yes, 0);
+ }
+ } else {
+ if (!seen_undef) {
+ count++;
+ seen_undef = 1;
+ }
+ }
+ }
+
+ POP_MULTICALL;
+ FREE_STACK;
+
+ SvREFCNT_dec(seen_values);
+
+ ST(0) = sv_2mortal(newSViv(count));
+ XSRETURN(1);
+ }
+
+ /* list context: populate SP with mortal copies */
+ for (i = 1; i < items; i++) {
+ GvSV(PL_defgv) = STA(i);
+ MULTICALL;
+ if (SvOK(*PL_stack_sp)) {
+ if (!hv_exists_ent(seen_values, *PL_stack_sp, 0)) {
+ ST(count) = sv_2mortal(newSVsv(ST(i)));
+ /* POP_MULTICALL further down will decrement it by one */
+ SvREFCNT_inc(ST(count));
+ count++;
+ hv_store_ent(seen_values, *PL_stack_sp, &PL_sv_yes, 0);
+ }
+ } else {
+ if (!seen_undef) {
+ ST(count) = sv_2mortal(newSVsv(ST(i)));
+ /* POP_MULTICALL further down will decrement it by one */
+ SvREFCNT_inc(ST(count));
+ count++;
+ seen_undef = 1;
+ }
+ }
+ }
+
+ POP_MULTICALL;
+ FREE_STACK;
+
+ SvREFCNT_dec(seen_values);
+
+ XSRETURN(count);
+}
+
+void
minmax (...)
PROTOTYPE: @
CODE:
diff --git a/lib/List/MoreUtils.pm b/lib/List/MoreUtils.pm
index a380138..024ce3d 100644
--- a/lib/List/MoreUtils.pm
+++ b/lib/List/MoreUtils.pm
@@ -14,7 +14,7 @@ use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS);
all => [ qw(any all none notall true false firstidx first_index lastidx
last_index insert_after insert_after_string apply after after_incl before
before_incl indexes firstval first_value lastval last_value each_array
- each_arrayref pairwise natatime mesh zip uniq minmax part bsearch) ],
+ each_arrayref pairwise natatime mesh zip uniq uniq_by minmax part bsearch) ],
);
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
@@ -288,6 +288,19 @@ sub uniq (@) {
map { $h{defined $_ ? $_ : $ref}++ == 0 ? $_ : () } @_;
}
+sub uniq_by (&@) {
+ my $code = shift;
+
+ my %seen_value;
+ my $seen_undef;
+
+ grep {
+ my $compare_by = $code->();
+
+ defined $compare_by ? !$seen_value{$compare_by}++ : !$seen_undef++
+ } @_;
+}
+
sub minmax (@) {
return if ! @_;
my $min = my $max = $_[0];
@@ -653,6 +666,17 @@ returns the number of unique elements in LIST.
my @x = uniq 1, 1, 2, 2, 3, 5, 3, 4; # returns 1 2 3 5 4
my $x = uniq 1, 1, 2, 2, 3, 5, 3, 4; # returns 5
+=item uniq_by BLOCK LIST
+
+Returns a new list of those elements of LIST that are unique on the keys that
+BLOCK returns: evaluates BLOCK for all the elements of LIST, and only keeps
+those elements for which BLOCK returns a value it has not returned yet. The
+order of elements in the returned list is the same as in LIST. In scalar
+context, returns the number of elements the unique list would contain.
+
+ my @x = uniq_by { $_ % 3 } 4, 4, 5, 5, 7, 6, 8, 9; # returns 4, 5, 6
+ my $x = uniq_by { $_ % 3 } 4, 4, 5, 5, 7, 6, 8, 9; # returns 3
+
=item minmax LIST
Calculates the minimum and maximum of LIST and returns a two element list with
@@ -793,11 +817,6 @@ mailbox. This includes:
=over 4
-=item * uniq_by(&@)
-
-Use code-reference to extract a key based on which the uniqueness is
-determined. Suggested by Aaron Crane.
-
=item * delete_index
=item * random_item
diff --git a/t/List-MoreUtils-pp.t b/t/List-MoreUtils-pp.t
index 93a0e0c..ed55490 100644
--- a/t/List-MoreUtils-pp.t
+++ b/t/List-MoreUtils-pp.t
@@ -485,6 +485,19 @@ BEGIN { $TESTS += 4 }
ok($u, 1);
}
+BEGIN { $TESTS += 3 }
+{
+ my @a = map { (0 .. 10000) } 0 .. 1;
+ my @u = uniq_by { int($_ / 3) } @a;
+ ok(arrayeq(\@u, [ map { 3 * $_ } 0 .. 3333 ]));
+
+ my $u = uniq_by { int($_ / 3) } @a;
+ ok(3334, $u);
+
+ my @uniq = uniq_by { $_ } (1, 2, 1, 3, undef, '');
+ ok(arrayeq(\@uniq, [ 1, 2, 3, undef, '']));
+}
+
BEGIN { $TESTS += 8 }
{
my @list = reverse 0 .. 100_000;
diff --git a/t/List-MoreUtils.t b/t/List-MoreUtils.t
index 6289db6..39dac65 100644
--- a/t/List-MoreUtils.t
+++ b/t/List-MoreUtils.t
@@ -526,6 +526,19 @@ BEGIN { $TESTS += 4 }
ok($u, 1);
}
+BEGIN { $TESTS += 3 }
+{
+ my @a = map { (0 .. 10000) } 0 .. 1;
+ my @u = uniq_by { int($_ / 3) } @a;
+ ok(arrayeq(\@u, [ map { 3 * $_ } 0 .. 3333 ]));
+
+ my $u = uniq_by { int($_ / 3) } @a;
+ ok(3334, $u);
+
+ my @uniq = uniq_by { $_ } (1, 2, 1, 3, undef, '');
+ ok(arrayeq(\@uniq, [ 1, 2, 3, undef, '']));
+}
+
BEGIN { $TESTS += 8 }
{
my @list = reverse 0 .. 100_000;