Subject: | Segmentation fault |
The function ends on segmentation fault, when strings are longer than
13k characters.
It is also using too much memory.
My improvement also speed ups this implementation - from 52s to 2s on my
test sample.
Subject: | LevenshteinXS.xs |
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
/****************************************************/
/* Levenshtein Distance Algorithm */
/* C Implementation by Lorenzo Seidenari */
/* http://www.merriampark.com/ldc.htm */
/* modified by dree */
/****************************************************/
#include <stdlib.h>
#include <string.h>
int levenshtein_distance(char *s,char*t);
int minimum(int a,int b,int c);
int levenshtein_distance(char *s,char*t)
/*Compute levenshtein distance between s and t*/
{
//Step 1
int c, r,n,m,cost,*lastRow, *actRow, *tmpRow, distance, left;
if (strcmp(s,t) == 0) {return 0;}
n=strlen(s);
m=strlen(t);
if(n==0) {return m;}
if(m==0) {return n;}
lastRow = malloc((sizeof(int))*(n+1));
actRow = malloc((sizeof(int))*(n+1));
m++;
n++;
//Step 2
for(c=0;c<n;c++) {
lastRow[c]=c;
}
//Step 3 and 4
left = 0;
for(r=1;r<m;r++) {
left = r;
actRow[0] = left;
for(c=1;c<n;c++)
{
//Step 5
if(s[c-1]==t[r-1]) {
actRow[c] = lastRow[c - 1];
} else {
left = minimum(
lastRow[c - 1] + 1,
lastRow[c] + 1,
left + 1);
actRow[c] = left;
}
}
tmpRow = lastRow;
lastRow = actRow;
actRow = tmpRow;
}
free(lastRow);
free(actRow);
return left;
}
int minimum(int a,int b,int c)
/*Gets the minimum of three values*/
{
int min=a;
if(b<min)
min=b;
if(c<min)
min=c;
return min;
}
MODULE = Text::LevenshteinXS PACKAGE = Text::LevenshteinXS
int
distance(s,t)
char * s
char * t
CODE:
RETVAL = levenshtein_distance(s,t);
OUTPUT:
RETVAL