13 m4_include(`SQLCommon.m4
')
223 DROP TYPE IF EXISTS MADLIB_SCHEMA.intermediate_cox_prop_hazards_result;
224 CREATE TYPE MADLIB_SCHEMA.intermediate_cox_prop_hazards_result AS (
225 x DOUBLE PRECISION[],
227 exp_coef_x DOUBLE PRECISION,
228 x_exp_coef_x DOUBLE PRECISION[],
229 x_xTrans_exp_coef_x DOUBLE PRECISION[]
233 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.intermediate_cox_prop_hazards(
234 /*+ x */ DOUBLE PRECISION[],
235 /*+ status */ BOOLEAN,
236 /*+ coef */ DOUBLE PRECISION[])
237 RETURNS MADLIB_SCHEMA.intermediate_cox_prop_hazards_result AS
239 LANGUAGE c IMMUTABLE;
243 DROP TYPE IF EXISTS MADLIB_SCHEMA.cox_prop_hazards_result;
244 CREATE TYPE MADLIB_SCHEMA.cox_prop_hazards_result AS (
245 coef DOUBLE PRECISION[],
246 logLikelihood DOUBLE PRECISION,
247 std_err DOUBLE PRECISION[],
248 z_stats DOUBLE PRECISION[],
249 p_values DOUBLE PRECISION[],
250 num_iterations INTEGER
256 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_step_final(
257 state DOUBLE PRECISION[])
258 RETURNS DOUBLE PRECISION[]
260 LANGUAGE C IMMUTABLE STRICT;
262 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_step_transition(
263 /*+ state */ DOUBLE PRECISION[],
264 /*+ x */ DOUBLE PRECISION[],
265 /*+ y */ DOUBLE PRECISION,
266 /*+ status */ BOOLEAN,
267 /*+ exp_coef_x */ DOUBLE PRECISION,
268 /*+ xexp_coef_x */ DOUBLE PRECISION[],
269 /*+ x_xTrans_exp_coef_x */ DOUBLE PRECISION[],
270 /*+ previous_state */ DOUBLE PRECISION[])
271 RETURNS DOUBLE PRECISION[] AS
273 LANGUAGE C IMMUTABLE;
281 m4_ifdef(`__GREENPLUM__',m4_ifdef(`__HAS_ORDERED_AGGREGATES__
',`ORDERED'))
282 AGGREGATE MADLIB_SCHEMA.cox_prop_hazards_step(
290 DOUBLE PRECISION[]) (
291 STYPE=DOUBLE PRECISION[],
292 SFUNC=MADLIB_SCHEMA.cox_prop_hazards_step_transition,
293 FINALFUNC=MADLIB_SCHEMA.cox_prop_hazards_step_final,
294 INITCOND='{0,0,0,0,0,0,0}
'
299 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_cox_prop_hazards_step_distance(
300 /*+ state1 */ DOUBLE PRECISION[],
301 /*+ state2 */ DOUBLE PRECISION[])
302 RETURNS DOUBLE PRECISION AS
304 LANGUAGE c IMMUTABLE STRICT;
306 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_cox_prop_hazards_result(
307 /*+ state */ DOUBLE PRECISION[])
308 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
310 LANGUAGE c IMMUTABLE STRICT;
313 -- We only need to document the last one (unfortunately, in Greenplum we have to
314 -- use function overloading instead of default arguments).
315 CREATE FUNCTION MADLIB_SCHEMA.compute_cox_prop_hazards_regr(
317 "indepColumn" VARCHAR,
320 "maxNumIterations" INTEGER,
322 "precision" DOUBLE PRECISION)
324 AS $$PythonFunction(stats, cox_prop_hazards, compute_cox_prop_hazards)$$
325 LANGUAGE plpythonu VOLATILE;
328 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
376 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
378 "indepColumn" VARCHAR,
381 "maxNumIterations" INTEGER /*+ DEFAULT 20 */,
382 "optimizer" VARCHAR /*+ DEFAULT 'newton
' */,
383 "precision" DOUBLE PRECISION /*+ DEFAULT 0.0001 */)
384 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS $$
386 theIteration INTEGER;
388 theResult MADLIB_SCHEMA.cox_prop_hazards_result;
391 SELECT MADLIB_SCHEMA.compute_cox_prop_hazards_regr($1, $2, $3, $4, $5, $6, $7)
393 IF optimizer = 'newton
' THEN
394 fnName := 'internal_cox_prop_hazards_result
';
396 RAISE EXCEPTION 'Unknown optimizer (
''%
'')
', optimizer;
403 MADLIB_SCHEMA.$sql$ || fnName || $sql$(_madlib_state) AS result
404 FROM _madlib_iterative_alg
405 WHERE _madlib_iteration = $sql$ || theIteration || $sql$
410 -- The number of iterations are not updated in the C++ code. We do it here.
411 IF NOT (theResult IS NULL) THEN
412 theResult.num_iterations = theIteration;
416 $$ LANGUAGE plpgsql VOLATILE;
419 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
420 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
422 "indepColumn" VARCHAR,
425 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
426 $$SELECT MADLIB_SCHEMA.cox_prop_hazards_regr($1, $2, $3, $4, 20, 'newton
', 0.0001);$$
427 LANGUAGE sql VOLATILE;
429 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
430 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
432 "indepColumn" VARCHAR,
435 "maxNumIterations" INTEGER)
436 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
437 $$SELECT MADLIB_SCHEMA.cox_prop_hazards_regr($1, $2, $3, $4, $5, 'newton
', 0.0001);$$
438 LANGUAGE sql VOLATILE;
440 -- Note: The function cox_prop_hazards_regr has been deprecated but maintained
441 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards_regr(
443 "indepColumn" VARCHAR,
446 "maxNumIterations" INTEGER,
448 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS
449 $$SELECT MADLIB_SCHEMA.cox_prop_hazards_regr($1, $2, $3, $4, $5, $6, 0.0001);$$
450 LANGUAGE sql VOLATILE;
456 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards(
457 source_table VARCHAR -- name of input table
458 , out_table VARCHAR -- name of output table
459 , dependent_varname VARCHAR -- name of dependent variable
460 , independent_varname VARCHAR -- name of independent variable
461 , status VARCHAR -- censoring status
465 insert_string VARCHAR;
466 cox_rst MADLIB_SCHEMA.cox_prop_hazards_result;
469 EXECUTE 'SELECT setting FROM pg_settings WHERE name=
''client_min_messages
''' INTO old_msg_level;
470 EXECUTE
'SET client_min_messages TO warning';
471 IF (source_table IS NULL OR source_table =
'') THEN
472 RAISE EXCEPTION 'Invalid input table name given.';
474 IF (NOT MADLIB_SCHEMA.__table_exists(source_table)) THEN
475 RAISE EXCEPTION 'Input table name does not exist.';
477 IF (MADLIB_SCHEMA.__table_exists(out_table)) THEN
478 RAISE EXCEPTION 'Output table name already exists. Drop the table before calling the function.';
480 IF (out_table IS NULL OR out_table = '') THEN
481 RAISE EXCEPTION 'Invalid output table name given.';
483 IF (dependent_varname IS NULL OR dependent_varname = '') THEN
484 RAISE EXCEPTION 'Invalid dependent variable name given.';
486 IF (independent_varname IS NULL OR independent_varname = '') THEN
487 RAISE EXCEPTION 'Invalid independent variable name given.';
489 IF (status IS NULL OR independent_varname = '') THEN
490 RAISE EXCEPTION 'Invalid status name given.';
492 -- create output table with appropriate column names
493 EXECUTE 'DROP TABLE IF EXISTS ' || out_table;
495 CREATE TABLE ' || out_table || ' (
496 coef DOUBLE PRECISION[],
497 std_err DOUBLE PRECISION[],
498 z_stats DOUBLE PRECISION[],
499 p_values DOUBLE PRECISION[])';
500 -- compute cox regression results
501 cox_rst := MADLIB_SCHEMA.__internal_get_cox_prop_hazards_result(
502 source_table, dependent_varname, independent_varname, status);
503 insert_string := MADLIB_SCHEMA.__internal_get_cox_prop_hazards_insert_string(
505 -- Ensure Infinity and NaN are cast properly
506 insert_string := REGEXP_REPLACE(insert_string, 'Infinity',
507 '''Infinity''::
double precision', 'gi');
508 insert_string := REGEXP_REPLACE(insert_string, 'NaN',
509 '''NaN''::
double precision', 'gi');
510 EXECUTE insert_string;
511 EXECUTE 'SET client_min_messages TO '|| old_msg_level;
513 $$ LANGUAGE plpgsql VOLATILE;
521 CREATE FUNCTION MADLIB_SCHEMA.__internal_get_cox_prop_hazards_result(
522 source_table VARCHAR -- name of input table
523 , dependent_varname VARCHAR -- name of dependent variable
524 , independent_varname VARCHAR -- name of independent variable
525 , status VARCHAR -- name of status variable
527 RETURNS MADLIB_SCHEMA.cox_prop_hazards_result AS $$
529 cox_rst MADLIB_SCHEMA.cox_prop_hazards_result;
533 || '''' || source_table || ''', '
534 || '''' || independent_varname || ''', '
535 || '''' || dependent_varname || ''', '
536 || '''' || status || ''')).* '
540 $$ LANGUAGE plpgsql VOLATILE;
547 CREATE FUNCTION MADLIB_SCHEMA.__internal_get_cox_prop_hazards_insert_string(
548 cox_rst MADLIB_SCHEMA.cox_prop_hazards_result,
551 RETURNS VARCHAR AS $$
553 insert_string VARCHAR;
555 insert_string := 'INSERT INTO ' || out_table || ' VALUES (';
556 insert_string := insert_string ||
558 WHEN (cox_rst).coef is NULL
560 ELSE 'ARRAY[
' || array_to_string((cox_rst).coef, ',
') || '],
'
563 WHEN (cox_rst).std_err is NULL
565 ELSE 'ARRAY[
' || array_to_string((cox_rst).std_err, ',
') || '],
'
568 WHEN (cox_rst).z_stats is NULL
570 ELSE 'ARRAY[
' || array_to_string((cox_rst).z_stats, ',
') || '],
'
573 WHEN (cox_rst).p_values is NULL
575 ELSE 'ARRAY[
' || array_to_string((cox_rst).p_values, ',
') || '])
'
577 RETURN insert_string;
579 $$ LANGUAGE plpgsql VOLATILE;
585 CREATE FUNCTION MADLIB_SCHEMA.cox_prop_hazards(
586 source_table VARCHAR -- name of input table
587 , out_table VARCHAR -- name of output table
588 , dependent_variable VARCHAR -- name of dependent variable
589 , independent_variable VARCHAR -- name of independent variable
593 EXECUTE 'SELECT MADLIB_SCHEMA.cox_prop_hazards(
' ||
594 ' ''' || source_table ||
''' ' ||
595 ' ,
''' || out_table ||
''' ' ||
596 ' ,
''' || dependent_variable ||
''' ' ||
597 ' ,
''' || independent_variable ||
''' ' ||
600 $$ LANGUAGE plpgsql VOLATILE;
603 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards()
604 RETURNS VARCHAR AS $$
606 RETURN MADLIB_SCHEMA.cox_prop_hazards('');
608 $$ LANGUAGE plpgsql VOLATILE;
610 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_prop_hazards(
611 usage_string VARCHAR -- usage string
613 RETURNS VARCHAR AS $$
615 insert_string VARCHAR;
617 IF (usage_string = '' OR usage_string = 'help
' OR usage_string = '?
') THEN
618 insert_string := '' ||
620 E'-----------------------------------------------------------------------------------------\n
' ||
621 E' Functionality: Cox proprtional hazards regression (Breslow method)\n
' ||
622 E' SELECT {schema_madlib}.cox_prop_hazards(
''source_table
'' \n
' ||
623 E' ,
''output_table
'' \n
' ||
624 E' ,
''dependent_variable
'' \n
' ||
625 E' ,
''independent_variable
'' \n
' ||
626 E' ,
''right_censoring_status
'' \n
' ||
628 E'For more details on
function usage: \n
' ||
629 E'SELECT {schema_madlib}.cox_prop_hazards(
''usage
'') \n
' ||
631 ElSIF (usage_string = 'usage
') THEN
632 insert_string := '' ||
634 E'-----------------------------------------------------------------------------------------\n
' ||
635 E' SELECT {schema_madlib}.cox_prop_hazards( \n
' ||
636 E' ''source_table
'', -- Name of data table \n
' ||
637 E' ''output_table
'', -- Name of result table (overwrites
if exists) \n
' ||
638 E' ''dependent_variable
'', -- Name of column
for dependent variables\n
' ||
639 E' ''independent_variable
'', -- Name of column
for independent variables\n
' ||
640 E' (can be any SQL expression Eg:
''*
'') \n
' ||
641 E' [
''right_censoring_status
'', -- Name of the column containing censoring status \n
' ||
642 E' 0/
false : If the observation is censored\n
' ||
643 E' 1/
true : otherwise\n
' ||
644 E' Default is 1/
true for all observations\n
' ||
645 E' Can also be an SQL expression:
''dependent_variable < 10
'') \n
' ||
649 E'-----------------------------------------------------------------------------------------\n
' ||
650 E' The output table (
''output_table
'' above) has the following columns\n
' ||
651 E' ''coef
'' DOUBLE PRECISION[], -- Coefficients of regression \n
' ||
652 E' ''std_err
'' DOUBLE PRECISION[], -- Standard errors\n
' ||
653 E' ''z_stats
'' DOUBLE PRECISION[], -- z-stats of the standard errors\n
' ||
654 E' ''p_values
'' DOUBLE PRECISION[], -- p-values of the standard errors\n
' ||
658 insert_string := 'No such option. Run SELECT {schema_madlib}.cox_prop_hazards()
';
660 RETURN insert_string;
662 $$ LANGUAGE plpgsql VOLATILE;