14 m4_include(`SQLCommon.m4
') --'
182 ------------------------------------------------------------------------
207 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cross_validation_general(
208 modelling_func VARCHAR, --
function for setting up the model
209 modelling_params VARCHAR[], -- parameters
for modelling
210 modelling_params_type VARCHAR[], -- parameter types
for modelling
212 param_explored VARCHAR, -- which parameter will be studied
using validation
213 explore_values VARCHAR[], -- values that will be explored
for this parameter
215 predict_func VARCHAR, --
function for predicting
using the model
216 predict_params VARCHAR[], -- parameters
for prediction
217 predict_params_type VARCHAR[], -- parameter types
for prediction
219 metric_func VARCHAR, --
function that computes the error metric
220 metric_params VARCHAR[], -- parameters
for metric
221 metric_params_type VARCHAR[], -- parameter types
for metric
223 data_tbl VARCHAR, -- table containing the data, which will be split into training and validation parts
224 data_id VARCHAR, -- user provide a unique ID
for each row
225 id_is_random BOOLEAN, -- the ID provided by user is random
227 validation_result VARCHAR, -- store the result: param values, error, +/-
229 data_cols VARCHAR[], -- names of data columns that are going to be used
230 fold_num INTEGER -- how many fold validation,
default: 10
232 PythonFunction(validation, cross_validation, cross_validation_general)
233 $$ LANGUAGE plpythonu;
235 ------------------------------------------------------------------------
237 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cross_validation_general(
238 modelling_func VARCHAR, --
function for setting up the model
239 modelling_params VARCHAR[], -- parameters
for modelling
240 modelling_params_type VARCHAR[], -- parameter types
for modelling
242 param_explored VARCHAR, -- which parameter will be studied
using validation
243 explore_values VARCHAR[], -- values that will be explored
for this parameter
245 predict_func VARCHAR, --
function for predicting
using the model
246 predict_params VARCHAR[], -- parameters
for prediction
247 predict_params_type VARCHAR[], -- parameter types
for prediction
249 metric_func VARCHAR, --
function that computes the error metric
250 metric_params VARCHAR[], -- parameters
for prediction
251 metric_params_type VARCHAR[], -- parameter types
for prediction
253 data_tbl VARCHAR, -- table containing the data, which will be split into training and validation parts
254 data_id VARCHAR, -- user provide a unique ID
for each row
255 id_is_random BOOLEAN, -- the ID provided by user is random
257 validation_result VARCHAR, -- store the result: param values, error, +/-
259 data_cols VARCHAR[] -- names of data columns that are going to be used
262 PERFORM MADLIB_SCHEMA.cross_validation_general($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,10);
264 $$ LANGUAGE plpgsql VOLATILE;
266 ------------------------------------------------------------------------
267 ------------------------------------------------------------------------
268 ------------------------------------------------------------------------
296 -- ------------------------------------------------------------------------
312 -- ------------------------------------------------------------------------
323 -- ------------------------------------------------------------------------
339 ------------------------------------------------------------------------
344 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cv_linregr_train(
351 $$ LANGUAGE plpythonu;
353 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linregr_predict(
354 coef DOUBLE PRECISION[],
355 col_ind DOUBLE PRECISION[]
356 ) RETURNS DOUBLE PRECISION AS $$
357 PythonFunction(validation, cross_validation, linregr_predict)
358 $$ LANGUAGE plpythonu;
363 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cv_linregr_predict(
367 col_id VARCHAR, -- ID column
371 $$ LANGUAGE plpythonu;
373 -- compare the prediction and actual values
374 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mse_error(
375 tbl_prediction VARCHAR, -- predicted values
378 values_actual VARCHAR,
382 error DOUBLE PRECISION;
383 old_messages VARCHAR;
385 old_messages := (SELECT setting FROM pg_settings WHERE name =
'client_min_messages');
386 EXECUTE
'SET client_min_messages TO warning';
389 CREATE TABLE '|| tbl_error ||
' AS
391 avg(('|| tbl_prediction ||
'.prediction - '|| tbl_actual ||
'.'|| values_actual ||
')^2) as mean_squared_error
393 '|| tbl_prediction ||
',
396 '|| tbl_prediction ||
'.id = '|| tbl_actual ||
'.'|| id_actual;
398 EXECUTE
'SET client_min_messages TO ' || old_messages;
400 $$ LANGUAGE plpgsql VOLATILE;
402 ------------------------------------------------------------------------
412 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.logregr_predict(
413 coef DOUBLE PRECISION[],
414 col_ind DOUBLE PRECISION[]
415 ) RETURNS BOOLEAN AS $$
417 $$ LANGUAGE plpythonu;
425 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cv_logregr_predict(
433 $$ LANGUAGE plpythonu;
445 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.logregr_accuracy(
446 coef DOUBLE PRECISION[],
447 col_ind DOUBLE PRECISION[],
449 ) RETURNS INTEGER AS $$
451 $$ LANGUAGE plpythonu;
459 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cv_logregr_accuracy(
467 $$ LANGUAGE plpythonu;