Hi, I have a big table want to calculate Z-score of grouped data, it may very slow use my method, do you have more efficient way,
some example data
create table NOAA_OLR_Z_TEST
(
longitude NUMBER,
latitude NUMBER,
date_ DATE,
olr_d NUMBER
)
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('01-05-2006', 'dd-mm-yyyy'), 212);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('02-05-2006', 'dd-mm-yyyy'), 248);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('03-05-2006', 'dd-mm-yyyy'), 249);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('04-05-2006', 'dd-mm-yyyy'), -9999);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('05-05-2006', 'dd-mm-yyyy'), 217);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('01-06-2006', 'dd-mm-yyyy'), 252);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('02-06-2006', 'dd-mm-yyyy'), 250);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('03-06-2006', 'dd-mm-yyyy'), 262);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('04-06-2006', 'dd-mm-yyyy'), 181);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (45.5, 84.5, to_date('05-06-2006', 'dd-mm-yyyy'), -9999);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('01-05-2006', 'dd-mm-yyyy'), 199);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('02-05-2006', 'dd-mm-yyyy'), 244);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('03-05-2006', 'dd-mm-yyyy'), 248);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('04-05-2006', 'dd-mm-yyyy'), 227);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('05-05-2006', 'dd-mm-yyyy'), 219);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('01-06-2006', 'dd-mm-yyyy'), 255);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('02-06-2006', 'dd-mm-yyyy'), 258);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('03-06-2006', 'dd-mm-yyyy'), 252);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('04-06-2006', 'dd-mm-yyyy'), 170);
insert into NOAA_OLR_Z_TEST (LONGITUDE, LATITUDE, DATE_, OLR_D)
values (46.5, 84.5, to_date('05-06-2006', 'dd-mm-yyyy'), 238);
my sql
With TBL_MEAN_STD As
(Select Distinct LONGITUDE,
LATITUDE,
TRUNC(DATE_, 'mm') DATE_,
Avg(OLR_D) OVER(Partition By LONGITUDE, LATITUDE, TRUNC(DATE_, 'mm')) M,
Stddev(OLR_D) OVER(Partition By LONGITUDE, LATITUDE, TRUNC(DATE_, 'mm')) STD
From NOAA_OLR_Z_TEST
Where OLR_D <> -9999)
Select A.LONGITUDE,
A.LATITUDE,
A.DATE_,
A.OLR_D,
(A.OLR_D - B.M) / B.STD As Z_SCORE,
B.M,
B.STD
From NOAA_OLR_Z_TEST A, TBL_MEAN_STD B
Where A.LONGITUDE = B.LONGITUDE
And A.LATITUDE = B.LATITUDE
And TRUNC(A.DATE_, 'mm') = B.DATE_
And A.OLR_D <> -9999