MySQL 5 Levenshtein stored function
Par KrisDevParis le samedi, janvier 29 2011, 01:20 - Lien permanent
Levenshtein can be used to create a fuzzy search engine. It can be eventually used in complement of soundex, soundex2, metaphone or phonex. I personnally use a customized version of phonex, which is more adapted to the French language, but this is not the purpose of this article.
As you might have tried, the code at http://codejanitor.com/wp/2007/02/10/levenshtein-distance-as-a-mysql-stored-function/comment-page-1/ doesn't work for PHP5.
However there's a MySQL 5 implementation by Riadh in the comments, but it doesn't work since the website comments removes HTML tags...
So here is this same function with my interpretation of what is missing. It is usefull for those that don't want (or can't) recompile MySQL to use a faster UDF function.
DELIMITER ;; CREATE FUNCTION LEVENSHTEIN (s1 VARCHAR(255), s2 VARCHAR(255)) RETURNS INT DETERMINISTIC BEGIN DECLARE s1_len INT DEFAULT CHAR_LENGTH(s1); DECLARE s2_len INT DEFAULT CHAR_LENGTH(s2); DECLARE i INT DEFAULT 1; DECLARE j INT DEFAULT 1; DECLARE c INT DEFAULT 0; DECLARE c_temp INT DEFAULT 0; DECLARE cost INT DEFAULT 0; DECLARE s1_char CHAR DEFAULT ``; DECLARE cv0 VARBINARY(256) DEFAULT 0×00; DECLARE cv1 VARBINARY(256) DEFAULT 0×00; IF s1 = s2 THEN RETURN 0; ELSEIF s1_len = 0 THEN RETURN s2_len; ELSEIF s2_len = 0 THEN RETURN s1_len; ELSE WHILE j <= s2_len DO SELECT CONCAT(cv1, UNHEX(HEX(j))) INTO cv1; SELECT j + 1 INTO j; END WHILE; WHILE i <= s1_len DO SELECT SUBSTRING(s1, i, 1) INTO s1_char; SELECT i INTO c; SELECT UNHEX(HEX(i)) INTO cv0; SELECT 1 INTO j; WHILE j <= s2_len DO SELECT C+1 INTO C; IF s1_char = SUBSTRING(s2, j, 1) THEN SELECT 0 INTO cost; ELSE SELECT 1 INTO cost; END IF; SELECT CONV(HEX(SUBSTRING(cv1, j, 1)), 16, 10) + cost INTO c_temp; IF c > c_temp THEN SELECT c_temp INTO c; END IF; SELECT (CONV(HEX(SUBSTRING(cv1, j+1, 1)), 16, 10) + 1) INTO c_temp; IF c > c_temp THEN SELECT c_temp INTO c; END IF; SELECT CONCAT(cv0, UNHEX(HEX(c))) INTO cv0; SELECT j + 1 INTO j; END WHILE; SELECT cv0 INTO cv1; SELECT i + 1 INTO i; END WHILE; END IF; RETURN c; END;
And:
DELIMITER ;; CREATE FUNCTION LEVENSHTEIN_RATIO (s1 VARCHAR(255), s2 VARCHAR(255)) RETURNS INT DETERMINISTIC BEGIN DECLARE s1_len INT DEFAULT LENGTH(s1); DECLARE s2_len INT DEFAULT LENGTH(s2); DECLARE max_len INT DEFAULT 0; IF s1_len > s2_len THEN SELECT s1_len INTO max_len; ELSE SELECT s2_len INTO max_len; END IF; RETURN ROUND((1 - LEVENSHTEIN(s1, s2) / max_len) * 100); END;