Subject: | [patch] grep_pairs, map_pairs |
Filtering key-value pairs of a hash (then storing the results into a new
hash) and transforming key-value pairs of a hash (and storing the
results into a new hash) seems to be a repetitive problem for me.
(During the last few months I met this same problem at least 3 times.)
I'd like to be able to write code like this:
my %having_uppercase_key = grep_pairs { $a =~ /^[A-Z]+$/ } %h;
my %having_odd_value = grep_pairs { $b % 2 != 0 } %h;
my %with_uppercase_key = map_pairs { (uc $a => $b) } %h;
my %with_doubled_value = map_pairs { ( $a => 2 * $b) } %h;
See the patch for the implementation of these grep_pairs and map_pairs
functions.
Subject: | List-MoreUtils-grep_pairs-map_pairs.diff |
commit 392a9ac628991aadd1f202e9fadd397769d6c386
Author: Norbert Buchmuller <norbi@nix.hu>
Date: Thu Mar 4 03:32:23 2010 +0100
Implemented grep_pairs and map_pairs.
diff --git a/Changes b/Changes
index 17bb28a..c0d7718 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,9 @@
Revision history for Perl extension List::Any/List::MoreUtils.
+0.25_03
+ - Implemented grep_pairs(&@) and map_pairs(&@)
+ (Norbert Buchmuller <norbi@nix.hu>
+
0.25_02 Sat Aug 1 06:41:55 EDT 2009
- MS VC++ 7 doesn't like inline nor 'long long'
(patch provided by Taro Nishino (taro DOT nishino AT gmail.com)
diff --git a/MoreUtils.xs b/MoreUtils.xs
index eb08465..e8b3ccf 100644
--- a/MoreUtils.xs
+++ b/MoreUtils.xs
@@ -1024,6 +1024,131 @@ each_arrayref (...)
OUTPUT:
RETVAL
+void
+grep_pairs (code, ...)
+ SV *code;
+PROTOTYPE: &@
+CODE:
+{
+ dMULTICALL; dSTACK;
+ register int i, j;
+ HV *stash;
+ GV *gv;
+ I32 gimme = G_SCALAR;
+ CV *cv;
+
+ if ((items & 1) == 0) { /* should be odd b/c BLOCK counts as one */
+ croak("grep_pairs: odd number of elements in the list");
+ }
+
+ if (in_pad("a", code) || in_pad("b", code)) {
+ croak("Can't use lexical $a or $b in grep_pairs code block");
+ }
+
+ if (!PL_firstgv || !PL_secondgv) {
+ SAVESPTR(PL_firstgv);
+ SAVESPTR(PL_secondgv);
+ PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
+ PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
+ }
+
+ COPY_STACK;
+
+ cv = sv_2cv(code, &stash, &gv, 0);
+ PUSH_MULTICALL(cv);
+
+ for (i = 1, j = 0; i < items; i += 2) {
+ GvSV(PL_firstgv) = STA(i);
+ GvSV(PL_secondgv) = STA(i+1);
+ MULTICALL;
+ if (SvTRUE(*PL_stack_sp)) {
+ ST(j) = sv_2mortal(newSVsv(ST(i)));
+ /* POP_MULTICALL further down will decrement it by one */
+ SvREFCNT_inc(ST(j++));
+
+ ST(j) = sv_2mortal(newSVsv(ST(i+1)));
+ /* POP_MULTICALL further down will decrement it by one */
+ SvREFCNT_inc(ST(j++));
+ }
+ }
+
+ POP_MULTICALL;
+ FREE_STACK;
+
+ XSRETURN(j);
+}
+
+void
+map_pairs (code, ...)
+ SV *code;
+PROTOTYPE: &@
+PPCODE:
+{
+ /* See the comment before 'pairwise' about efficiency. */
+ register int i, j;
+ SV **oldsp;
+ register SV **buf, **p; /* gather return values here and later copy down to SP */
+ int alloc;
+ int nitems = 0;
+ register int d;
+
+ if ((items & 1) == 0) { /* should be odd b/c BLOCK counts as one */
+ croak("map_pairs: odd number of elements in the list");
+ }
+
+ if (in_pad("a", code) || in_pad("b", code)) {
+ croak("Can't use lexical $a or $b in map_pairs code block");
+ }
+
+ if (!PL_firstgv || !PL_secondgv) {
+ SAVESPTR(PL_firstgv);
+ SAVESPTR(PL_secondgv);
+ PL_firstgv = gv_fetchpv("a", TRUE, SVt_PV);
+ PL_secondgv = gv_fetchpv("b", TRUE, SVt_PV);
+ }
+
+ New(0, buf, alloc = items, SV*);
+
+ ENTER;
+ for (i = 1, j = 0, d = 0; i < items; i += 2) {
+ int nret;
+
+ GvSV(PL_firstgv) = ST(i);
+ GvSV(PL_secondgv) = ST(i+1);
+
+ PUSHMARK(SP);
+ PUTBACK;
+ nret = call_sv(code, G_EVAL|G_ARRAY);
+ if (SvTRUE(ERRSV)) {
+ Safefree(buf);
+ croak("%s", SvPV_nolen(ERRSV));
+ }
+ SPAGAIN;
+ nitems += nret;
+
+ if (nitems > alloc) {
+ alloc <<= 2;
+ Renew(buf, alloc, SV*);
+ }
+ for (j = nret-1; j >= 0; j--) {
+ /* POPs would return elements in reverse order */
+ buf[d] = sp[-j];
+ SvREFCNT_inc(buf[d]);
+ d++;
+ }
+ sp -= nret;
+ }
+ LEAVE;
+ EXTEND(SP, nitems);
+
+ for (i = 0, p = buf; i < nitems; i++)
+ ST(i) = *p++;
+
+ Safefree(buf);
+
+ XSRETURN(nitems);
+}
+
#if 0
void
_pairwise (code, ...)
@@ -1618,4 +1743,3 @@ DESTROY(sv)
CvXSUBANY(code).any_ptr = NULL;
}
}
-
diff --git a/lib/List/MoreUtils.pm b/lib/List/MoreUtils.pm
index a380138..82e04dd 100644
--- a/lib/List/MoreUtils.pm
+++ b/lib/List/MoreUtils.pm
@@ -14,7 +14,8 @@ use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS);
all => [ qw(any all none notall true false firstidx first_index lastidx
last_index insert_after insert_after_string apply after after_incl before
before_incl indexes firstval first_value lastval last_value each_array
- each_arrayref pairwise natatime mesh zip uniq minmax part bsearch) ],
+ each_arrayref grep_pairs map_pairs pairwise natatime mesh zip uniq minmax
+ part bsearch) ],
);
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
@@ -350,6 +351,56 @@ sub bsearch(&@) {
return;
}
+sub grep_pairs(&@) {
+ my $code = shift;
+
+ if (@_ % 2 != 0) {
+ require Carp;
+ Carp::croak "grep_pairs: odd number of elements in the list\n";
+ }
+
+ # get references to $a, $b in the caller's package
+ my ($caller_a, $caller_b) = do {
+ my $pkg = caller;
+ no strict 'refs';
+ \*{"${pkg}::a"}, \*{"${pkg}::b"};
+ };
+ local (*$caller_a, *$caller_b);
+
+ my @ret;
+ while (my ($a, $b) = splice @_, 0, 2) {
+ (*$caller_a, *$caller_b) = \($a, $b);
+ push @ret, $code->() ? ($a, $b) : ();
+ }
+
+ return @ret;
+}
+
+sub map_pairs(&@) {
+ my $code = shift;
+
+ if (@_ % 2 != 0) {
+ require Carp;
+ Carp::croak "map_pairs: odd number of elements in the list\n";
+ }
+
+ # get references to $a, $b in the caller's package
+ my ($caller_a, $caller_b) = do {
+ my $pkg = caller;
+ no strict 'refs';
+ \*{"${pkg}::a"}, \*{"${pkg}::b"};
+ };
+ local (*$caller_a, *$caller_b);
+
+ my @ret;
+ while (my ($a, $b) = splice @_, 0, 2) {
+ (*$caller_a, *$caller_b) = \($a, $b);
+ push @ret, $code->();
+ }
+
+ return @ret;
+}
+
sub _XScompiled {
return 0;
}
@@ -603,6 +654,31 @@ the index of the last fetched set of values, as a scalar.
Like each_array, but the arguments are references to arrays, not the
plain arrays.
+=item grep_pairs BLOCK LIST
+
+Evaluates BLOCK for each element pair (ie. first and second element, then third
+and fourth and so on) in LIST and returns a new list consisting of those
+element pairs for which BLOCK returns a true value. The two elements of the
+pair are set to C<$a> and C<$b>. Note that those two are aliases to the
+original value so changing them will modify the input array.
+
+Useful for filtering hashes (in list form).
+
+ my %having_uppercase_key = grep_pairs { $a =~ /^[A-Z]+$/ } %h;
+ my %having_odd_value = grep_pairs { $b % 2 != 0 } %h;
+
+=item map_pairs BLOCK LIST
+
+Evaluates BLOCK for each element pair in LIST and returns a new list consisting
+of BLOCK's return values. The two elements are set to C<$a> and C<$b>. Note
+that those two are aliases to the original value so changing them will modify
+the input array.
+
+Useful for transforming hashes (in list form).
+
+ my %with_uppercase_key = map_pairs { (uc $a => $b) } %h;
+ my %with_doubled_value = map_pairs { ( $a => 2 * $b) } %h;
+
=item natatime BLOCK LIST
Creates an array iterator, for looping over an array in chunks of
diff --git a/t/List-MoreUtils-pp.t b/t/List-MoreUtils-pp.t
index 93a0e0c..ecc8d7c 100644
--- a/t/List-MoreUtils-pp.t
+++ b/t/List-MoreUtils-pp.t
@@ -325,6 +325,31 @@ BEGIN { $TESTS += 5 }
ok(arrayeq(\@b, [2,4,6]));
}
+BEGIN { $TESTS += 2 }
+{
+ my @a = (zero => 0, three => 3, one => 1, two => 2, four => 4, five => 5);
+
+ my @contains_e = grep_pairs { $a =~ /e/ } @a;
+ my @even = grep_pairs { $b % 2 == 0 } @a;
+
+ ok(arrayeq(\@contains_e, [ zero => 0, three => 3, one => 1, five => 5 ]));
+ ok(arrayeq(\@even, [ zero => 0, two => 2, four => 4 ]));
+}
+
+BEGIN { $TESTS += 2 }
+{
+ my @a = (zero => 0, three => 3, one => 1, two => 2, four => 4, five => 5);
+
+ my @with_uppercase_keys = map_pairs { (uc $a => $b) } @a;
+ my @with_doubled_values = map_pairs { ( $a => 2 * $b) } @a;
+
+ ok(arrayeq(\@with_uppercase_keys, [
+ ZERO => 0, THREE => 3, ONE => 1, TWO => 2, FOUR => 4, FIVE => 5,
+ ]));
+ ok(arrayeq(\@with_doubled_values, [
+ zero => 0, three => 6, one => 2, two => 4, four => 8, five => 10,
+ ]));
+}
BEGIN { $TESTS += 10 }
diff --git a/t/List-MoreUtils.t b/t/List-MoreUtils.t
index 6289db6..5ae1f30 100644
--- a/t/List-MoreUtils.t
+++ b/t/List-MoreUtils.t
@@ -354,6 +354,32 @@ BEGIN { $TESTS += 5 }
}
+BEGIN { $TESTS += 2 }
+{
+ my @a = (zero => 0, three => 3, one => 1, two => 2, four => 4, five => 5);
+
+ my @contains_e = grep_pairs { $a =~ /e/ } @a;
+ my @even = grep_pairs { $b % 2 == 0 } @a;
+
+ ok(arrayeq(\@contains_e, [ zero => 0, three => 3, one => 1, five => 5 ]));
+ ok(arrayeq(\@even, [ zero => 0, two => 2, four => 4 ]));
+}
+
+BEGIN { $TESTS += 2 }
+{
+ my @a = (zero => 0, three => 3, one => 1, two => 2, four => 4, five => 5);
+
+ my @with_uppercase_keys = map_pairs { (uc $a => $b) } @a;
+ my @with_doubled_values = map_pairs { ( $a => 2 * $b) } @a;
+
+ ok(arrayeq(\@with_uppercase_keys, [
+ ZERO => 0, THREE => 3, ONE => 1, TWO => 2, FOUR => 4, FIVE => 5,
+ ]));
+ ok(arrayeq(\@with_doubled_values, [
+ zero => 0, three => 6, one => 2, two => 4, four => 8, five => 10,
+ ]));
+}
+
BEGIN { $TESTS += 11 }
{
my @a = (1, 2, 3, 4, 5);