BrowserUk posted a Pure Perl alternative "lcss" subroutine which fixes this bug
on PerlMonks:
http://perlmonks.org/?node_id=819919
I adapted the sub to match the existing user interface for String::LCSS.
This is available in the diff-pm.txt patch.
I also modified the 01strings.t test to prove that the new code fixes this
bug. Refer to the diff-test.txt patch.
The new private sub (_lcss) was designed to allow for expanding the
functionality of the module. This would allow the user to choose a minimum
longest substring length (different from the hard-coded length of 2).
This new feature is currently disabled.
--- ../String-LCSS-0.12.orig/lib/String/LCSS.pm 2003-04-10 12:08:21.000000000 -0400
+++ lib/String/LCSS.pm 2015-11-11 20:53:37.195640000 -0500
@@ -11,71 +11,32 @@
@EXPORT_OK = qw( lcss );
}
+sub lcss {
+ _lcss($_[0], $_[1], 2);
+}
-sub lcss
-{
-my ($a, $b) = @_;
-my (@x, @y);
-my ($maxLength, $maxXi, $maxXk, $switch) = (0,0,0,0);
-my $returnString;
-
-
- my $m = length ( $a );
- my $n = length ( $b );
-
- if ( $m > $n ) {
- ( $m, $n ) = ( $n, $m );
- ( $a, $b ) = ( $b, $a );
- $switch = 1;
- }
-
- @x = split ( //, $a );
- @y = split ( //, $b );
-
- #
- # declare varialbes outside of loops for a hair more speed.
- #
- my ( $i, $ii, $j, $k, $length, $xi, $xj );
-
- for ( $k = 0; $k < $n; $k++ ) {
- #
- # abort if the remainder of the string to check is
- # less than the common substring length already found
- #
- last if ( $maxLength >= ( $m - $k ) );
-
- ( $xi, $length ) = ( 0, 0 );
-
- for ( $i = 0; $i < $m; $i++ ) {
- $j = $k;
- $length = 0;
- for ( $ii = 0; $ii < ($m-$i); $ii++ ) {
- if ( $x[$i+$ii] eq $y[$j] ) {
- $xi = $i+$ii unless ( $length );
- $xj = $j unless ( $length );
- $length++;
- $j++;
- }
- elsif ( $length ) {
- if ( $length > $maxLength ) {
- $maxLength = $length;
- $maxXi = $xi;
- $maxXk = $xj;
- }
- last;
- }
- }
- }
- }
-
- if ( $maxLength > 1 ) {
- for ($i = $maxXi; $i < $maxXi+$maxLength; $i++ ) {
- $returnString .= $x[$i];
- }
- ($maxXi, $maxXk) = ($maxXk, $maxXi) if ( $switch );
- }
-
- ( wantarray ) ? ( $returnString, $maxXi, $maxXk ) : $returnString;
+sub _lcss {
+ my( $ref1, $ref2, $min ) = @_;
+ my( $swapped, $l1, $l2 ) = ( 0, map length( $_ ), $ref1, $ref2 );
+ ( $l2, $ref2, $l1, $ref1, $swapped ) = ( $l1, $ref1, $l2, $ref2, 1 ) if $l1 > $l2;
+ $min = 1 unless defined $min;
+
+ my $mask = $ref1 x ( int( $l2 / $l1 ) + 1 );
+
+ my @match = '';
+ for my $start ( 0 .. $l1-1 ) {
+ my $masked = substr( $mask, $start, $l2 ) ^ $ref2;
+ while( $masked =~ m[\0{$min,}]go ) {
+ @match = (
+ substr( $ref2, $-[ 0 ], $+[ 0 ] - $-[ 0 ] ),
+ ( $-[ 0 ]+$start ) % $l1,
+ $-[ 0 ]
+ ) if ( $+[ 0 ] - $-[ 0 ] ) > length $match[ 0 ];
+ }
+ }
+ @match[ 2, 1 ] = @match[ 1, 2 ] if $swapped;
+ return unless $match[ 0 ];
+ return wantarray ? @match : $match[ 0 ];
}
--- ../String-LCSS-0.12.orig/t/01strings.t 2003-04-10 11:58:33.000000000 -0400
+++ t/01strings.t 2015-11-11 20:56:55.745450000 -0500
@@ -31,3 +31,22 @@
$haystack = "why did the quick brown fox jumps over the lazy dog";
$longest = String::LCSS::lcss ( $needle, $haystack );
is ( $longest, " the quick brown fox ", "why did the quick brown fox" );
+
+$needle = '1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 2 18 19 20 21 22 23 7 24';
+$haystack = '1 2 3 4 5 7 8 9 11 12 13 10 14 15 16 17 2 18 19 20 21 22 23 7 24';
+$longest = String::LCSS::lcss ( $needle, $haystack );
+is ( $longest, ' 14 15 16 17 2 18 19 20 21 22 23 7 24', 'rt32036 bug');
+
+$needle = 'the quick brown fox jumped over the lazy dog';
+$haystack = 'I saw a quick brown fox and jumped over the lazy dog';
+$longest = String::LCSS::lcss ( $needle, $haystack );
+is ( $longest, ' jumped over the lazy dog', 'rt62175 bug');
+
+my @results;
+
+@results = String::LCSS::lcss(qw(123456 456789));
+is_deeply(\@results, [qw(456 3 0)], 'array');
+
+@results = String::LCSS::lcss(qw(AbCdefg AbCDef));
+is_deeply(\@results, [qw(AbC 0 0)], 'array2');
+