 # string match accuracy percentage for two name columns

I have two columns name and ename from two tables and I want to find the string match probability percentage for these columns as both contain employee name. For eg. If values are

``````name                ename
-----              --------
Kim Gutierrez   Kimberly Gutierrez
``````

then the probability percentage is around 0.3 as the percentage value will be between 0 and 1. Now I have find the % for around 500k names in the table.

i have tried this query but didn't get the required output.

``````CREATE TEMPORARY FUNCTION similarity(name STRING, ename STRING)
RETURNS FLOAT64
LANGUAGE js AS """
var _extend = function(dst) {
var sources = Array.prototype.slice.call(arguments, 1);
for (var i=0; i<sources.length; ++i) {
var src = sources[i];
for (var p in src) {
if (src.hasOwnProperty(p)) dst[p] = src[p];
}
}
return dst;
};

var Levenshtein = {
get: function(str1, str2) {
// base cases
if (str1 === str2) return 0;
if (str1.length === 0) return str2.length;
if (str2.length === 0) return str1.length;

// two rows
var prevRow  = new Array(str2.length + 1),
curCol, nextCol, i, j, tmp;

// initialise previous row
for (i=0; i<prevRow.length; ++i) {
prevRow[i] = i;
}

// calculate current row distance from previous row
for (i=0; i<str1.length; ++i) {
nextCol = i + 1;

for (j=0; j<str2.length; ++j) {
curCol = nextCol;

// substution
nextCol = prevRow[j] + ( (str1.charAt(i) === str2.charAt(j)) ? 0 : 1 );
// insertion
tmp = curCol + 1;
if (nextCol > tmp) {
nextCol = tmp;
}
// deletion
tmp = prevRow[j + 1] + 1;
if (nextCol > tmp) {
nextCol = tmp;
}

// copy current col value into previous (in preparation for next iteration)
prevRow[j] = curCol;
}

// copy last col value into previous (in preparation for next iteration)
prevRow[j] = nextCol;
}

return nextCol;
}

};

var the_Name1;

try {
the_Name1 = decodeURI(name).toLowerCase();
} catch (ex) {
the_Name1 = Name1.toLowerCase();
}

try {
the_Name2 = decodeURI(ename).toLowerCase();
} catch (ex) {
the_Name2 = Name2.toLowerCase();
}

return 1 - Levenshtein.get(the_Name1, the_Name2) / the_Name1.length;

""";
SELECT
t1.name,
t2.ename,
similarity(t1.name, t2.ename) similarity_percentge,
FROM ratings t1
JOIN reviews t2 on t1.location_id=t2.location_id`````` 