hi I am working on one of the custom function (essentially a Jaro Winkler Function with different implementation ). However this function is being called from the other procedure to calculate the jaro winkler distance between two strings. Oracle provided function UTL_MATC.JARO_WINKLER is producing the results in 3 secs where as this is taking 5 times more 15-20 seconds to produce the same result. Please help me in tuning and ways to perform it better.
Also can anyone provide me an insight on context switching of this function vs in-built UTL_MATCH.JARO_WINKLER.
CREATE OR REPLACE FUNCTION JW_S (str1_in IN VARCHAR2
, str2_in IN VARCHAR2)
RETURN float
AS
str1 VARCHAR2 (4000);
str2 VARCHAR2 (4000);
min_str VARCHAR2 (4000);
max_str VARCHAR2 (4000);
len_str1 PLS_INTEGER;
len_str2 PLS_INTEGER;
max_len PLS_INTEGER;
min_len PLS_INTEGER;
si PLS_INTEGER := 0;
matches PLS_INTEGER := 0;
jaro_constant NUMBER := 1 / 3;
jaro_threshold NUMBER := 0.7;
jaro_coefficeint NUMBER := 0.7;
jaro_score NUMBER;
jw_booster NUMBER;
jaro_value NUMBER := 0;
transpositions PLS_INTEGER := 0;
prefixes PLS_INTEGER ;
range PLS_INTEGER := 0;
match_char_count PLS_INTEGER := 0;
TYPE bool_list_t IS TABLE OF BOOLEAN
INDEX BY PLS_INTEGER;
MatchFlags bool_list_t;
TYPE number_list_t IS TABLE OF NUMBER
INDEX BY PLS_INTEGER;
MatchIndexes number_list_t;
TYPE match_chars_t IS TABLE OF VARCHAR2 (3000)
INDEX BY PLS_INTEGER;
ms1 match_chars_t;
ms2 match_chars_t;
-- MAIN
BEGIN
str1 := str1_in ;
str2 := str2_in ;
--DBMS_OUTPUT.put_line ('Jarov2');
len_str1 := LENGTH (str1);
len_str2 := LENGTH (str2);
IF len_str1 > len_str2
THEN
max_str := str1_in;
min_str := str2_in ;
max_len := len_str1 ;
min_len := len_str2;
ELSE
max_len := len_str2;
min_len := len_str1;
max_str := str2_in ;
min_str := str1_in;
END IF;
range := FLOOR ( (max_len / 2) - 1 ) ;
-- DBMS_OUTPUT.PUT_LINE (range);
FOR i IN 1 .. min_len
LOOP
MatchIndexes (i) := -1;
END LOOP;
FOR j IN 1 .. max_len
LOOP
MatchFlags (j) := FALSE;
END LOOP;
matches := 0;
FOR mi IN 1 .. min_len
LOOP
FOR xi IN GREATEST (mi - range, 1) .. LEAST (mi + range , max_len)
LOOP
IF (SUBSTR (max_str, xi, 1) = SUBSTR (min_str, mi, 1) ) AND (MatchFlags (xi) = FALSE)
THEN
matches := matches + 1;
MatchIndexes (mi) := xi;
MatchFlags (xi) := TRUE;
GOTO linea_exit;
END IF;
END LOOP;
<<linea_exit>>
NULL;
END LOOP;
si := 1;
FOR mi IN 1 .. min_len
LOOP
IF (MatchIndexes (mi) != -1)
THEN
ms1 (si) := SUBSTR (min_str, mi, 1);
si := si + 1;
match_char_count := match_char_count + 1;
END IF;
END LOOP;
si := 1;
FOR mi IN 1 .. max_len
LOOP
IF (MatchFlags (Mi) = TRUE)
THEN
ms2 (si) := SUBSTR (max_str, mi, 1);
si := si + 1;
END IF;
END LOOP;
/* Transpositions */
transpositions := 0;
FOR mi IN 1 .. match_char_count
LOOP
IF ms1 (mi) != ms2 (mi)
THEN
transpositions := transpositions + 1;
END IF;
END LOOP;
transpositions := FLOOR (transpositions * 1 / 2);
/* Prefixes */
prefixes := 0;
FOR r IN 1 .. min_len
LOOP
IF SUBSTR (str1, r, 1) = SUBSTR (str2, r, 1)
THEN
prefixes := prefixes + 1;
ELSE
GOTO end_prefix;
END IF;
END LOOP;
<<end_prefix>>
IF matches = 0
THEN
jaro_score := 0;
ELSE
jaro_value :=
( (matches / len_str1)
+ (matches / len_str2)
+ ( (matches - transpositions) / matches))
* jaro_constant;
IF (jaro_value > jaro_threshold)
THEN
jw_booster := ( LEAST (jaro_coefficeint, 1 / max_len) * prefixes * (1 - jaro_value));
jaro_score := jaro_value + jw_booster;
ELSE
jaro_score := jaro_value;
END IF;
END IF;
RETURN jaro_score;
EXCEPTION
WHEN OTHERS
THEN
RETURN NULL;
END;
/