Skip to Main Content

SQL & PL/SQL

Announcement

For appeals, questions and feedback about Oracle Forums, please email oracle-forums-moderators_us@oracle.com. Technical questions should be asked in the appropriate category. Thank you!

Tunning function

reddy482May 18 2017 — edited May 22 2017

hi I am working on one of the custom function (essentially a Jaro Winkler Function with different implementation ). However this function is being called from the other procedure to calculate the jaro winkler distance between two strings. Oracle provided function UTL_MATC.JARO_WINKLER is producing the results in 3 secs where as this is taking 5 times more 15-20 seconds to produce the same result. Please help me in tuning and ways to perform it better.

Also can anyone provide me an insight on context switching of this function vs in-built UTL_MATCH.JARO_WINKLER.

Thanks in advance

CREATE OR REPLACE FUNCTION  JW_S (str1_in   IN VARCHAR2

                                          , str2_in   IN VARCHAR2)

   RETURN float

   AS

   str1               VARCHAR2 (4000);

   str2               VARCHAR2 (4000);

   min_str            VARCHAR2 (4000);

   max_str            VARCHAR2 (4000);

   len_str1           PLS_INTEGER;

   len_str2           PLS_INTEGER;

  

   max_len            PLS_INTEGER;

   min_len            PLS_INTEGER;

   si                 PLS_INTEGER := 0; 

   matches            PLS_INTEGER := 0;

   jaro_constant      NUMBER := 1 / 3;

   jaro_threshold     NUMBER := 0.7;

   jaro_coefficeint   NUMBER := 0.7;

   jaro_score         NUMBER;

   jw_booster         NUMBER;

   jaro_value         NUMBER := 0;

   transpositions     PLS_INTEGER := 0;

   prefixes           PLS_INTEGER ;

   range              PLS_INTEGER := 0;

  

   match_char_count   PLS_INTEGER := 0;

  

   TYPE bool_list_t IS TABLE OF BOOLEAN

      INDEX BY PLS_INTEGER;

   MatchFlags        bool_list_t;

   TYPE number_list_t IS TABLE OF NUMBER

      INDEX BY PLS_INTEGER;

   MatchIndexes        number_list_t;

   TYPE match_chars_t IS TABLE OF VARCHAR2 (3000)

      INDEX BY PLS_INTEGER;

   ms1                match_chars_t;

   ms2                match_chars_t;

-- MAIN

BEGIN

   str1             :=   str1_in ;

   str2             :=  str2_in ;

   --DBMS_OUTPUT.put_line ('Jarov2');

   len_str1         := LENGTH (str1);

   len_str2         := LENGTH (str2);

   IF len_str1 > len_str2

   THEN

      max_str   := str1_in;

      min_str   := str2_in ;

      max_len   :=  len_str1 ;

      min_len   :=  len_str2;

   ELSE

      max_len   := len_str2;

      min_len   := len_str1;

      max_str   := str2_in ;

      min_str   := str1_in;

   END IF;

   range     := FLOOR ( (max_len / 2) - 1 ) ;

  

  -- DBMS_OUTPUT.PUT_LINE (range);

  

   FOR i IN 1 .. min_len

   LOOP

      MatchIndexes (i)   := -1;

   END LOOP;

   FOR j IN 1 .. max_len

   LOOP

      MatchFlags (j)   := FALSE;

   END LOOP;

  

    matches          := 0;

   FOR mi IN 1 .. min_len

   LOOP

    

      FOR xi IN GREATEST (mi - range, 1) .. LEAST (mi + range  , max_len)

      LOOP

        

         IF (SUBSTR (max_str, xi, 1) = SUBSTR (min_str, mi, 1) ) AND (MatchFlags (xi) = FALSE)

         THEN

            matches            := matches + 1;

            MatchIndexes (mi)   := xi;

            MatchFlags (xi)   := TRUE;

            GOTO linea_exit;

         END IF;

      END LOOP;  

     <<linea_exit>>

      NULL;                             

   END LOOP;                                            

   si               := 1;

   FOR mi IN 1 .. min_len

   LOOP

      IF (MatchIndexes (mi) != -1)

      THEN

         ms1 (si)           := SUBSTR (min_str, mi, 1);

         si                 := si + 1;

         match_char_count   := match_char_count + 1;

      

      END IF;

   END LOOP;

   si               := 1;

  

   FOR mi IN 1 .. max_len

   LOOP

      IF (MatchFlags (Mi) = TRUE)

      THEN

         ms2 (si)   := SUBSTR (max_str, mi, 1);

         si         := si + 1;

        

      END IF;

   END LOOP;

   /*  Transpositions */

   transpositions   := 0;

   FOR mi IN 1 .. match_char_count

   LOOP

     

      IF   ms1 (mi)   != ms2 (mi)

      THEN

         transpositions   := transpositions + 1;

      END IF;

   END LOOP;

   transpositions   := FLOOR (transpositions * 1 / 2);

  

   /*  Prefixes */

   prefixes         := 0;

   FOR r IN 1 .. min_len

   LOOP

    

      IF SUBSTR (str1, r, 1) = SUBSTR (str2, r, 1)

      THEN

         prefixes   := prefixes + 1;

      ELSE

         GOTO end_prefix;

      END IF;

                                           

   END LOOP;

<<end_prefix>>

   IF matches = 0

   THEN

      jaro_score   := 0;

   ELSE

      jaro_value      :=

           (  (matches / len_str1)

            + (matches / len_str2)

            + ( (matches - transpositions) / matches))

         * jaro_constant;

      IF (jaro_value > jaro_threshold)

      THEN

         jw_booster      := (  LEAST (jaro_coefficeint, 1 / max_len) * prefixes * (1 - jaro_value));

         jaro_score   := jaro_value + jw_booster;

      ELSE

         jaro_score   := jaro_value;

      END IF;

   END IF;

   RETURN jaro_score;

EXCEPTION

   WHEN OTHERS

   THEN 

      RETURN NULL;

END;

/

Comments
Locked Post
New comments cannot be posted to this locked post.
Post Details
Locked on Jun 19 2017
Added on May 18 2017
11 comments
726 views