Hi,
I have a table with 800k records that has one lat-long positions per record . I am looking for the best approach to find related records that meet the following conditions:
within 28 days of each other (start new <= end old + 28 days)
within the same category (there are 110 categories)
within 20 meters distance of each other
So far I've tried spatial joins but Oracle wants to do the spatial join before the other conditions are checked, resulting in a runawy query.
My best guess is using the lat-long positions and what's called the haversin formula. But it still takes too long but maybe I'm missing something. My query and explain below:
Any tips, solutions?
Regards,
Martin
WITH X AS
( SELECT M.MELDINGSNUMMER
, M.CATEGORIE_VID
, M.ADWH_DT_MELDING_GEMAAKT
, M.ADWH_DT_1E_AFGEHANDELD
, LOC.LATITUDE_RADIALEN LATRAD
, LOC.LONGITUDE_RADIALEN LONRAD
FROM MORA_MELDING M
JOIN MORA_DIM_LOCATIE LOC
ON M.LOCATIE_VID = LOC.ADWH_VERSIE_ID
WHERE 1=1
)
SELECT A.MELDINGSNUMMER MELDINGSNUMMER_VERLEDEN
, B.MELDINGSNUMMER MELDINGSNUMMER_HEDEN
, DECODE(A.MELDINGSNUMMER, NULL, 0, 1) IND_HERHAAL
, ROUND( 1000 * 6367 * 2 *
ATAN2(
SQRT(
POWER(SIN( ( B.LATRAD - A.LATRAD ) /2) ,2) +
COS( A.LATRAD ) * COS( B.LATRAD ) * POWER(SIN( ( B.LONRAD - A.LONRAD ) /2) ,2)
)
, SQRT(1-
POWER(SIN( ( B.LATRAD - A.LATRAD ) /2) ,2) +
COS( A.LATRAD ) * COS( B.LATRAD ) * POWER(SIN( ( B.LONRAD - A.LONRAD ) /2) ,2)
)
)
) AFSTAND
FROM X A -- VERLEDEN
JOIN X B -- HEDEN
ON A.CATEGORIE_VID = B.CATEGORIE_VID
AND B.ADWH_DT_MELDING_GEMAAKT BETWEEN A.ADWH_DT_1E_AFGEHANDELD AND (A.ADWH_DT_1E_AFGEHANDELD + 28)
AND A.MELDINGSNUMMER < B.MELDINGSNUMMER
AND ROUND( 1000 * 6367 * 2 *
ATAN2(
SQRT(
POWER(SIN( ( B.LATRAD - A.LATRAD ) /2) ,2) +
COS( A.LATRAD ) * COS( B.LATRAD ) * POWER(SIN( ( B.LONRAD - A.LONRAD ) /2) ,2)
)
, SQRT(1-
POWER(SIN( ( B.LATRAD - A.LATRAD ) /2) ,2) +
COS( A.LATRAD ) * COS( B.LATRAD ) * POWER(SIN( ( B.LONRAD - A.LONRAD ) /2) ,2)
)
)
) <= 20
WHERE 1=1
;
This query results in the following explain plan:
Plan hash value: 477534478
----------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 122 | 36730 (1)| 00:00:02 |
| 1 | TEMP TABLE TRANSFORMATION | | | | | |
| 2 | LOAD AS SELECT | SYS_TEMP_0FD9D6651_28B8758F | | | | |
|* 3 | HASH JOIN | | 36416 | 3947K| 36503 (1)| 00:00:02 |
|* 4 | TABLE ACCESS FULL | MORA_MELDING | 36416 | 1244K| 21840 (1)| 00:00:01 |
|* 5 | TABLE ACCESS FULL | MORA_DIM_LOCATIE | 879K| 63M| 14660 (1)| 00:00:01 |
|* 6 | HASH JOIN | | 1 | 122 | 227 (1)| 00:00:01 |
| 7 | VIEW | | 27275 | 1624K| 113 (0)| 00:00:01 |
| 8 | TABLE ACCESS FULL | SYS_TEMP_0FD9D6651_28B8758F | 27275 | 1864K| 113 (0)| 00:00:01 |
| 9 | VIEW | | 27275 | 1624K| 113 (0)| 00:00:01 |
| 10 | TABLE ACCESS FULL | SYS_TEMP_0FD9D6651_28B8758F | 27275 | 1864K| 113 (0)| 00:00:01 |
----------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$5BF935F8
2 - SEL$F1D6E378
4 - SEL$F1D6E378 / M@SEL$1
5 - SEL$F1D6E378 / LOC@SEL$1
7 - SEL$ECCE8CA4 / A@SEL$2
8 - SEL$ECCE8CA4 / T1@SEL$ECCE8CA4
9 - SEL$ECCE8CA5 / B@SEL$2
10 - SEL$ECCE8CA5 / T1@SEL$ECCE8CA5
Outline Data
-------------
/*+
BEGIN_OUTLINE_DATA
USE_HASH(@"SEL$F1D6E378" "LOC"@"SEL$1")
LEADING(@"SEL$F1D6E378" "M"@"SEL$1" "LOC"@"SEL$1")
FULL(@"SEL$F1D6E378" "LOC"@"SEL$1")
FULL(@"SEL$F1D6E378" "M"@"SEL$1")
FULL(@"SEL$ECCE8CA4" "T1"@"SEL$ECCE8CA4")
FULL(@"SEL$ECCE8CA5" "T1"@"SEL$ECCE8CA5")
USE_HASH(@"SEL$5BF935F8" "B"@"SEL$2")
LEADING(@"SEL$5BF935F8" "A"@"SEL$2" "B"@"SEL$2")
NO_ACCESS(@"SEL$5BF935F8" "B"@"SEL$2")
NO_ACCESS(@"SEL$5BF935F8" "A"@"SEL$2")
OUTLINE(@"SEL$2")
OUTLINE(@"SEL$4")
MERGE(@"SEL$1")
OUTLINE(@"SEL$F1D6E378")
OUTLINE(@"SEL$1")
OUTLINE(@"SEL$3")
MERGE(@"SEL$2")
OUTLINE_LEAF(@"SEL$5BF935F8")
OUTLINE_LEAF(@"SEL$ECCE8CA5")
OUTLINE_LEAF(@"SEL$ECCE8CA4")
MERGE(@"SEL$1")
OUTLINE_LEAF(@"SEL$F1D6E378")
ALL_ROWS
DB_VERSION('12.1.0.2')
OPTIMIZER_FEATURES_ENABLE('12.1.0.2')
IGNORE_OPTIM_EMBEDDED_HINTS
END_OUTLINE_DATA
*/
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("M"."LOCATIE_VID"="LOC"."ADWH_VERSIE_ID")
4 - filter("M"."ADWH_DT_MELDING_GEMAAKT">=TO_DATE(' 2020-12-01 00:00:00', 'syyyy-mm-dd
hh24:mi:ss') AND "M"."ADWH_DT_MELDING_GEMAAKT"<=TO_DATE(' 2020-12-31 23:59:59', 'syyyy-mm-dd
hh24:mi:ss'))
5 - filter("LOC"."LATITUDE" IS NOT NULL AND "LOC"."LONGITUDE" IS NOT NULL)
6 - access("A"."CATEGORIE_VID"="B"."CATEGORIE_VID")
filter("B"."ADWH_DT_MELDING_GEMAAKT">="A"."ADWH_DT_1E_AFGEHANDELD" AND
"B"."ADWH_DT_MELDING_GEMAAKT"<=INTERNAL_FUNCTION("A"."ADWH_DT_1E_AFGEHANDELD")+28 AND
"A"."MELDINGSNUMMER"<"B"."MELDINGSNUMMER" AND ROUND(12734000*ATAN2(SQRT(POWER(SIN(("B"."LATRAD"-"A
"."LATRAD")/2),2)+COS("A"."LATRAD")*COS("B"."LATRAD")*POWER(SIN(("B"."LONRAD"-"A"."LONRAD")/2),2))
,SQRT(1-POWER(SIN(("B"."LATRAD"-"A"."LATRAD")/2),2)+COS("A"."LATRAD")*COS("B"."LATRAD")*POWER(SIN(
("B"."LONRAD"-"A"."LONRAD")/2),2))))<=20)
Column Projection Information (identified by operation id):
-----------------------------------------------------------
1 - "A"."CATEGORIE_VID"[NUMBER,22], "B"."CATEGORIE_VID"[NUMBER,22],
"A"."MELDINGSNUMMER"[NUMBER,22], "A"."LONRAD"[NUMBER,22], "A"."ADWH_DT_1E_AFGEHANDELD"[DATE,7],
"A"."LATRAD"[NUMBER,22], "B"."MELDINGSNUMMER"[NUMBER,22], "B"."LONRAD"[NUMBER,22],
"B"."ADWH_DT_MELDING_GEMAAKT"[DATE,7], "B"."LATRAD"[NUMBER,22]
2 - SYSDEF[4], SYSDEF[0], SYSDEF[1], SYSDEF[112], SYSDEF[0]
3 - (#keys=1) "M"."LOCATIE_VID"[NUMBER,22], "LOC"."ADWH_VERSIE_ID"[NUMBER,22],
"M"."MELDINGSNUMMER"[NUMBER,22], "M"."CATEGORIE_VID"[NUMBER,22],
"M"."ADWH_DT_MELDING_GEMAAKT"[DATE,7], "M"."ADWH_DT_1E_AFGEHANDELD"[DATE,7],
"LOC"."LONGITUDE_RADIALEN"[NUMBER,22], "LOC"."LATITUDE"[VARCHAR2,32],
"LOC"."LONGITUDE"[VARCHAR2,32], "LOC"."ADWH_STRAAT"[VARCHAR2,64],
"LOC"."ADWH_HUISNUMMER"[NUMBER,22], "LOC"."LATITUDE_RADIALEN"[NUMBER,22]
4 - (rowset=200) "M"."MELDINGSNUMMER"[NUMBER,22], "M"."CATEGORIE_VID"[NUMBER,22],
"M"."ADWH_DT_MELDING_GEMAAKT"[DATE,7], "M"."ADWH_DT_1E_AFGEHANDELD"[DATE,7],
"M"."LOCATIE_VID"[NUMBER,22]
5 - (rowset=200) "LOC"."ADWH_VERSIE_ID"[NUMBER,22], "LOC"."LATITUDE"[VARCHAR2,32],
"LOC"."LONGITUDE"[VARCHAR2,32], "LOC"."ADWH_STRAAT"[VARCHAR2,64],
"LOC"."ADWH_HUISNUMMER"[NUMBER,22], "LOC"."LATITUDE_RADIALEN"[NUMBER,22],
"LOC"."LONGITUDE_RADIALEN"[NUMBER,22]
6 - (#keys=1) "A"."CATEGORIE_VID"[NUMBER,22], "B"."CATEGORIE_VID"[NUMBER,22],
"A"."MELDINGSNUMMER"[NUMBER,22], "A"."LONRAD"[NUMBER,22], "A"."ADWH_DT_1E_AFGEHANDELD"[DATE,7],
"A"."LATRAD"[NUMBER,22], "B"."MELDINGSNUMMER"[NUMBER,22], "B"."LONRAD"[NUMBER,22],
"B"."ADWH_DT_MELDING_GEMAAKT"[DATE,7], "B"."LATRAD"[NUMBER,22]
7 - "A"."MELDINGSNUMMER"[NUMBER,22], "A"."CATEGORIE_VID"[NUMBER,22],
"A"."ADWH_DT_1E_AFGEHANDELD"[DATE,7], "A"."LATRAD"[NUMBER,22], "A"."LONRAD"[NUMBER,22]
8 - "C0"[NUMBER,22], "C1"[NUMBER,22], "C2"[DATE,7], "C3"[DATE,7], "C4"[VARCHAR2,64],
"C5"[NUMBER,22], "C6"[NUMBER,22], "C7"[NUMBER,22]
9 - "B"."MELDINGSNUMMER"[NUMBER,22], "B"."CATEGORIE_VID"[NUMBER,22],
"B"."ADWH_DT_MELDING_GEMAAKT"[DATE,7], "B"."LATRAD"[NUMBER,22], "B"."LONRAD"[NUMBER,22]
10 - "C0"[NUMBER,22], "C1"[NUMBER,22], "C2"[DATE,7], "C3"[DATE,7], "C4"[VARCHAR2,64],
"C5"[NUMBER,22], "C6"[NUMBER,22], "C7"[NUMBER,22]
Note
-----
- dynamic statistics used: dynamic sampling (level=2)
- 1 Sql Plan Directive used for this statement