13 m4_include(`SQLCommon.m4
')
223 DROP TYPE IF EXISTS MADLIB_SCHEMA.intermediate_cox_prop_hazards_result;
224 CREATE TYPE MADLIB_SCHEMA.intermediate_cox_prop_hazards_result AS (
225 x DOUBLE PRECISION[],
227 exp_coef_x DOUBLE PRECISION,
228 x_exp_coef_x DOUBLE PRECISION[],
229 x_xTrans_exp_coef_x DOUBLE PRECISION[]
233 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.intermediate_cox_prop_hazards(
234 /*+ x */ DOUBLE PRECISION[],
235 /*+ status */ BOOLEAN,
236 /*+ coef */ DOUBLE PRECISION[])
237 RETURNS MADLIB_SCHEMA.intermediate_cox_prop_hazards_result AS
239 LANGUAGE c IMMUTABLE;
243 DROP TYPE IF EXISTS MADLIB_SCHEMA.cox_prop_hazards_result;
244 CREATE TYPE MADLIB_SCHEMA.cox_prop_hazards_result AS (
245 coef DOUBLE PRECISION[],
246 logLikelihood DOUBLE PRECISION,
247 std_err DOUBLE PRECISION[],
248 z_stats DOUBLE PRECISION[],
249 p_values DOUBLE PRECISION[],
250 num_iterations INTEGER
256 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_step_final(
257 state DOUBLE PRECISION[])
258 RETURNS DOUBLE PRECISION[]
260 LANGUAGE C IMMUTABLE STRICT;
262 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_step_transition(
263 /*+ state */ DOUBLE PRECISION[],
264 /*+ x */ DOUBLE PRECISION[],
265 /*+ y */ DOUBLE PRECISION,
266 /*+ status */ BOOLEAN,
267 /*+ exp_coef_x */ DOUBLE PRECISION,
268 /*+ xexp_coef_x */ DOUBLE PRECISION[],
269 /*+ x_xTrans_exp_coef_x */ DOUBLE PRECISION[],
270 /*+ previous_state */ DOUBLE PRECISION[])
271 RETURNS DOUBLE PRECISION[] AS
273 LANGUAGE C IMMUTABLE;
281 m4_ifdef(`__GREENPLUM__',m4_ifdef(`__HAS_ORDERED_AGGREGATES__
',`ORDERED'))
282 AGGREGATE MADLIB_SCHEMA.cox_prop_hazards_step(
290 DOUBLE PRECISION[]) (
291 STYPE=DOUBLE PRECISION[],
292 SFUNC=MADLIB_SCHEMA.cox_prop_hazards_step_transition,
293 FINALFUNC=MADLIB_SCHEMA.cox_prop_hazards_step_final,
294 INITCOND='{0,0,0,0,0,0,0}
'
299 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_cox_prop_hazards_step_distance(
300 /*+ state1 */ DOUBLE PRECISION[],
301 /*+ state2 */ DOUBLE PRECISION[])
302 RETURNS DOUBLE PRECISION AS
304 LANGUAGE c IMMUTABLE STRICT;
306 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_cox_prop_hazards_result(
307 /*+ state */ DOUBLE PRECISION[])
308 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
310 LANGUAGE c IMMUTABLE STRICT;
313 -- We only need to document the last one (unfortunately, in Greenplum we have to
314 -- use function overloading instead of default arguments).
315 CREATE FUNCTION MADLIB_SCHEMA.compute_cox_prop_hazards_regr(
317 "indepColumn" VARCHAR,
320 "maxNumIterations" INTEGER,
322 "precision" DOUBLE PRECISION)
324 AS $$PythonFunction(stats, cox_prop_hazards, compute_cox_prop_hazards)$$
325 LANGUAGE plpythonu VOLATILE;
328 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
377 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
379 "indepColumn" VARCHAR,
382 "maxNumIterations" INTEGER /*+ DEFAULT 20 */,
383 "optimizer" VARCHAR /*+ DEFAULT 'newton
' */,
384 "precision" DOUBLE PRECISION /*+ DEFAULT 0.0001 */)
385 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS $$
387 theIteration INTEGER;
389 theResult MADLIB_SCHEMA.cox_prop_hazards_result;
392 SELECT MADLIB_SCHEMA.compute_cox_prop_hazards_regr($1, $2, $3, $4, $5, $6, $7)
394 IF optimizer = 'newton
' THEN
395 fnName := 'internal_cox_prop_hazards_result
';
397 RAISE EXCEPTION 'Unknown optimizer (
''%
'')
', optimizer;
404 MADLIB_SCHEMA.$sql$ || fnName || $sql$(_madlib_state) AS result
405 FROM _madlib_iterative_alg
406 WHERE _madlib_iteration = $sql$ || theIteration || $sql$
411 -- The number of iterations are not updated in the C++ code. We do it here.
412 IF NOT (theResult IS NULL) THEN
413 theResult.num_iterations = theIteration;
417 $$ LANGUAGE plpgsql VOLATILE;
420 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
421 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
423 "indepColumn" VARCHAR,
426 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
427 $$SELECT MADLIB_SCHEMA.cox_prop_hazards_regr($1, $2, $3, $4, 20, 'newton
', 0.0001);$$
428 LANGUAGE sql VOLATILE;
430 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
431 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
433 "indepColumn" VARCHAR,
436 "maxNumIterations" INTEGER)
437 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
438 $$SELECT MADLIB_SCHEMA.cox_prop_hazards_regr($1, $2, $3, $4, $5, 'newton
', 0.0001);$$
439 LANGUAGE sql VOLATILE;
441 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
442 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
444 "indepColumn" VARCHAR,
447 "maxNumIterations" INTEGER,
449 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
450 $$SELECT MADLIB_SCHEMA.cox_prop_hazards_regr($1, $2, $3, $4, $5, $6, 0.0001);$$
451 LANGUAGE sql VOLATILE;
457 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards(
458 source_table VARCHAR -- name of input table
459 , out_table VARCHAR -- name of output table
460 , dependent_varname VARCHAR -- name of dependent variable
461 , independent_varname VARCHAR -- name of independent variable
462 , status VARCHAR -- censoring status
466 insert_string VARCHAR;
467 cox_rst MADLIB_SCHEMA.cox_prop_hazards_result;
470 EXECUTE 'SELECT setting FROM pg_settings WHERE name=
''client_min_messages
''' INTO old_msg_level;
471 EXECUTE
'SET client_min_messages TO warning';
472 IF (source_table IS NULL OR source_table =
'') THEN
473 RAISE EXCEPTION 'Invalid input table name given.';
475 IF (NOT MADLIB_SCHEMA.__table_exists(source_table)) THEN
476 RAISE EXCEPTION 'Input table name does not exist.';
478 IF (MADLIB_SCHEMA.__table_exists(out_table)) THEN
479 RAISE EXCEPTION 'Output table name already exists. Drop the table before calling the function.';
481 IF (out_table IS NULL OR out_table = '') THEN
482 RAISE EXCEPTION 'Invalid output table name given.';
484 IF (dependent_varname IS NULL OR dependent_varname = '') THEN
485 RAISE EXCEPTION 'Invalid dependent variable name given.';
487 IF (independent_varname IS NULL OR independent_varname = '') THEN
488 RAISE EXCEPTION 'Invalid independent variable name given.';
490 IF (status IS NULL OR independent_varname = '') THEN
491 RAISE EXCEPTION 'Invalid status name given.';
493 -- create output table with appropriate column names
494 EXECUTE 'DROP TABLE IF EXISTS ' || out_table;
496 CREATE TABLE ' || out_table || ' (
497 coef DOUBLE PRECISION[],
498 std_err DOUBLE PRECISION[],
499 z_stats DOUBLE PRECISION[],
500 p_values DOUBLE PRECISION[])';
501 -- compute cox regression results
502 cox_rst := MADLIB_SCHEMA.__internal_get_cox_prop_hazards_result(
503 source_table, dependent_varname, independent_varname, status);
504 insert_string := MADLIB_SCHEMA.__internal_get_cox_prop_hazards_insert_string(
506 -- Ensure Infinity and NaN are cast properly
507 insert_string := REGEXP_REPLACE(insert_string, 'Infinity',
508 '''Infinity''::
double precision', 'gi');
509 insert_string := REGEXP_REPLACE(insert_string, 'NaN',
510 '''NaN''::
double precision', 'gi');
511 EXECUTE insert_string;
512 EXECUTE 'SET client_min_messages TO '|| old_msg_level;
514 $$ LANGUAGE plpgsql VOLATILE;
522 CREATE FUNCTION MADLIB_SCHEMA.__internal_get_cox_prop_hazards_result(
523 source_table VARCHAR -- name of input table
524 , dependent_varname VARCHAR -- name of dependent variable
525 , independent_varname VARCHAR -- name of independent variable
526 , status VARCHAR -- name of status variable
528 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS $$
530 cox_rst MADLIB_SCHEMA.cox_prop_hazards_result;
534 || '''' || source_table || ''', '
535 || '''' || independent_varname || ''', '
536 || '''' || dependent_varname || ''', '
537 || '''' || status || ''')).* '
541 $$ LANGUAGE plpgsql VOLATILE;
548 CREATE FUNCTION MADLIB_SCHEMA.__internal_get_cox_prop_hazards_insert_string(
549 cox_rst MADLIB_SCHEMA.cox_prop_hazards_result,
552 RETURNS VARCHAR AS $$
554 insert_string VARCHAR;
556 insert_string := 'INSERT INTO ' || out_table || ' VALUES (';
557 insert_string := insert_string ||
559 WHEN (cox_rst).coef is NULL
561 ELSE 'ARRAY[
' || array_to_string((cox_rst).coef, ',
') || '],
'
564 WHEN (cox_rst).std_err is NULL
566 ELSE 'ARRAY[
' || array_to_string((cox_rst).std_err, ',
') || '],
'
569 WHEN (cox_rst).z_stats is NULL
571 ELSE 'ARRAY[
' || array_to_string((cox_rst).z_stats, ',
') || '],
'
574 WHEN (cox_rst).p_values is NULL
576 ELSE 'ARRAY[
' || array_to_string((cox_rst).p_values, ',
') || '])
'
578 RETURN insert_string;
580 $$ LANGUAGE plpgsql VOLATILE;
586 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards(
587 source_table VARCHAR -- name of input table
588 , out_table VARCHAR -- name of output table
589 , dependent_variable VARCHAR -- name of dependent variable
590 , independent_variable VARCHAR -- name of independent variable
594 EXECUTE 'SELECT MADLIB_SCHEMA.cox_prop_hazards(
' ||
595 ' ''' || source_table ||
''' ' ||
596 ' ,
''' || out_table ||
''' ' ||
597 ' ,
''' || dependent_variable ||
''' ' ||
598 ' ,
''' || independent_variable ||
''' ' ||
601 $$ LANGUAGE plpgsql VOLATILE;
604 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards()
605 RETURNS VARCHAR AS $$
607 RETURN MADLIB_SCHEMA.cox_prop_hazards('');
609 $$ LANGUAGE plpgsql VOLATILE;
611 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards(
612 usage_string VARCHAR -- usage string
614 RETURNS VARCHAR AS $$
616 insert_string VARCHAR;
618 IF (usage_string = '' OR usage_string = 'help
' OR usage_string = '?
') THEN
619 insert_string := '' ||
621 E'-----------------------------------------------------------------------------------------\n
' ||
622 E' Functionality: Cox proprtional hazards regression (Breslow method)\n
' ||
623 E' SELECT {schema_madlib}.cox_prop_hazards(
''source_table
'' \n
' ||
624 E' ,
''output_table
'' \n
' ||
625 E' ,
''dependent_variable
'' \n
' ||
626 E' ,
''independent_variable
'' \n
' ||
627 E' ,
''right_censoring_status
'' \n
' ||
629 E'For more details on
function usage: \n
' ||
630 E'SELECT {schema_madlib}.cox_prop_hazards(
''usage
'') \n
' ||
632 ElSIF (usage_string = 'usage
') THEN
633 insert_string := '' ||
635 E'-----------------------------------------------------------------------------------------\n
' ||
636 E' SELECT {schema_madlib}.cox_prop_hazards( \n
' ||
637 E' ''source_table
'', -- Name of data table \n
' ||
638 E' ''output_table
'', -- Name of result table (overwrites
if exists) \n
' ||
639 E' ''dependent_variable
'', -- Name of column
for dependent variables\n
' ||
640 E' ''independent_variable
'', -- Name of column
for independent variables\n
' ||
641 E' (can be any SQL expression Eg:
''*
'') \n
' ||
642 E' [
''right_censoring_status
'', -- Name of the column containing censoring status \n
' ||
643 E' 0/
false : If the observation is censored\n
' ||
644 E' 1/
true : otherwise\n
' ||
645 E' Default is 1/
true for all observations\n
' ||
646 E' Can also be an SQL expression:
''dependent_variable < 10
'') \n
' ||
650 E'-----------------------------------------------------------------------------------------\n
' ||
651 E' The output table (
''output_table
'' above) has the following columns\n
' ||
652 E' ''coef
'' DOUBLE PRECISION[], -- Coefficients of regression \n
' ||
653 E' ''std_err
'' DOUBLE PRECISION[], -- Standard errors\n
' ||
654 E' ''z_stats
'' DOUBLE PRECISION[], -- z-stats of the standard errors\n
' ||
655 E' ''p_values
'' DOUBLE PRECISION[], -- p-values of the standard errors\n
' ||
659 insert_string := 'No such option. Run SELECT {schema_madlib}.cox_prop_hazards()
';
661 RETURN insert_string;
663 $$ LANGUAGE plpgsql VOLATILE;