69 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.quantile_big(table_name TEXT, col_name TEXT,
quantile FLOAT) RETURNS FLOAT AS $$
83 rows_removed BIGINT := 0;
94 EXECUTE
'SELECT array[MIN('||col_name||
'), AVG('||col_name||
'), MAX('||col_name||
')], COUNT(*) FROM '||table_name||
' ' INTO size, count;
95 quantile_size = (count*
quantile)::BIGINT;
98 -- check
for bad input
100 RAISE EXCEPTION 'Quantile should be between 0 and 0.99';
105 -- create some temp tables to use as swap
106 DROP TABLE IF EXISTS temptable0;
107 CREATE TEMP TABLE temptable0(val FLOAT);
109 DROP TABLE IF EXISTS temptable1;
110 CREATE TEMP TABLE temptable1(val FLOAT);
125 IF(increment = 0) THEN
126 EXECUTE 'SELECT ARRAY[MIN('||col_name||'),AVG('||col_name||'),MAX('||col_name||')],COUNT(*) FROM '||table_name||' WHERE '||col_name||' <= '||size[2]||';' INTO last_values, last_count;
128 EXECUTE 'SELECT ARRAY[MIN(val),AVG(val),MAX(val)],COUNT(*) FROM temptable'||increment%2||' WHERE val <= '||size[2]||';' INTO last_values, last_count;
130 last_count = last_count + rows_removed;
133 IF(last_count=rows_removed) THEN
139 ELSIF((increment > 0)AND(curr_old = last_count)) THEN
145 ELSIF((last_count - quantile_size) > 1) THEN
150 size[2] = (last_values[3]+size[1])/2.0;
151 size[3] = last_values[3];
153 --remove all rows that are larger than new max
154 IF(increment = 0) THEN
155 EXECUTE 'INSERT INTO temptable'||(increment+1)%2||' SELECT '||col_name||' FROM '||table_name||' WHERE '||col_name||' <= '||size[3]||';';
157 EXECUTE 'INSERT INTO temptable'||(increment+1)%2||' SELECT val FROM temptable'||increment%2||' WHERE val <= '||size[3]||';';
158 EXECUTE 'TRUNCATE temptable'||increment%2||';';
162 ELSIF((quantile_size - last_count) > 1) THEN
167 size[1] = last_values[3];
168 size[2] = (last_values[3]+size[3])/2.0;
170 --remove all rows that are smaller than new min
171 IF(increment = 0) THEN
172 --add a small offset to ensure the value that is equal to size[1] is NOT kept
173 EXECUTE 'INSERT INTO temptable'||(increment+1)%2||' SELECT '||col_name||' FROM '||table_name||' WHERE '||col_name||' > '||size[1]||'+1e-10;';
175 EXECUTE 'INSERT INTO temptable'||(increment+1)%2||' SELECT val FROM temptable'||increment%2||' WHERE val > '||size[1]||'+1e-10;';
176 EXECUTE 'TRUNCATE temptable'||increment%2||';';
178 rows_removed = last_count;
184 IF((quantile_size - last_count) < 0)THEN
185 size[2] = last_values[3];
189 increment = increment+1;
190 curr_old = last_count;
197 EXECUTE 'SELECT MAX('||col_name||'),COUNT(*) FROM '||table_name||' WHERE '||col_name||' < '||size[2]||';' INTO last_value1, last_count1;
199 IF(last_count1 >= quantile_size) THEN
203 EXECUTE 'SELECT MIN('||col_name||'),'||full_size||'-COUNT(*)+1 FROM '||table_name||' WHERE '||col_name||' > '||size[2]||';' INTO last_value2, last_count2;
206 IF(last_count >= quantile_size) THEN
207 --If the difference is greater than 1 element away, then there are probably many repeated values
208 IF(last_count-quantile_size >= 1) THEN
209 RETURN last_values[3];
211 RETURN last_values[3]*(quantile_size-last_count1)/(last_count-last_count1)+last_value1*(last_count-quantile_size)/(last_count-last_count1);
213 --If the difference is greater than 1 element away, then there are probably many repeated values
214 IF(quantile_size-last_count > 1) THEN
217 RETURN last_value2*(quantile_size-last_count)/(last_count2-last_count)+last_values[3]*(last_count2-quantile_size)/(last_count2-last_count);
221 DROP TABLE IF EXISTS temptable0;
222 DROP TABLE IF EXISTS temptable1;
239 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.
quantile(table_name TEXT, col_name TEXT,
quantile FLOAT) RETURNS FLOAT AS $$
245 -- check for bad input
247 RAISE EXCEPTION 'Quantile should be between 0 and 0.99';
250 EXECUTE 'SELECT COUNT(*)*'||
quantile||' FROM '||table_name||' ' INTO size;
251 EXECUTE 'SELECT ARRAY(SELECT '||col_name||' FROM (SELECT '||col_name||' FROM '||table_name||' ORDER BY '||col_name||' OFFSET '||floor(size)||'-1 LIMIT 2) AS g)' INTO result;
252 EXECUTE 'SELECT '||result[2]||'*('||size||'%1)+'||result[1]||'*(1-'||size||'%1)' INTO res;