12 m4_include(`SQLCommon.m4
')
283 ------------------ Marginal Logistic Regression ------------------------------
285 CREATE TYPE MADLIB_SCHEMA.marginal_logregr_result AS (
286 margins DOUBLE PRECISION[],
287 coef DOUBLE PRECISION[],
288 std_err DOUBLE PRECISION[],
289 t_stats DOUBLE PRECISION[],
290 p_values DOUBLE PRECISION[]
293 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.marginal_logregr_step_transition(
294 state DOUBLE PRECISION[],
296 x DOUBLE PRECISION[],
297 coef DOUBLE PRECISION[])
298 RETURNS DOUBLE PRECISION[]
300 LANGUAGE C IMMUTABLE;
302 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.marginal_logregr_step_merge_states(
303 state1 DOUBLE PRECISION[],
304 state2 DOUBLE PRECISION[])
305 RETURNS DOUBLE PRECISION[]
307 LANGUAGE C IMMUTABLE STRICT;
310 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.marginal_logregr_step_final(
311 state DOUBLE PRECISION[])
312 RETURNS MADLIB_SCHEMA.marginal_logregr_result
314 LANGUAGE C IMMUTABLE STRICT;
344 CREATE AGGREGATE MADLIB_SCHEMA.marginal_logregr(
345 /*+ "dependentVariable" */ BOOLEAN,
346 /*+ "independentVariables" */ DOUBLE PRECISION[],
347 /*+ "coef" */ DOUBLE PRECISION[] )(
348 STYPE=DOUBLE PRECISION[],
349 SFUNC=MADLIB_SCHEMA.marginal_logregr_step_transition,
350 m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.marginal_logregr_step_merge_states,
')
351 FINALFUNC=MADLIB_SCHEMA.marginal_logregr_step_final,
352 INITCOND='{0,0,0,0,0,0}
'
356 --------------------------- Interface ----------------------------------
358 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
359 usage_string VARCHAR -- usage string
361 RETURNS VARCHAR AS $$
363 insert_string VARCHAR;
365 IF (usage_string = '' OR usage_string = 'help
' OR usage_string = '?
') THEN
366 insert_string := '' ||
368 E'-----------------------------------------------------------------------------------------\n
' ||
369 E' Functionality: Calculate marginal effects
for logistic regression\n
' ||
370 E' The funciton first runs the regression and then calculates the \n
' ||
371 E' coefficients and uses them to calculate the marginal effects \n
' ||
372 E' SELECT {schema_madlib}.margins_logregr(
''source_table
'' \n
' ||
373 E' ,
''output_table
'' \n
' ||
374 E' ,
''dependent_variable
'' \n
' ||
375 E' ,
''independent_variable
'' \n
' ||
376 E' [,
''grouping_cols
'' \n
' ||
377 E' , marginal_variables \n
' ||
379 E' ,
''optimizer
'' \n
' ||
380 E' , tolerance \n
' ||
381 E' , verbose_mode \n
' ||
384 E'For more details on
function usage: \n
' ||
385 E'SELECT {schema_madlib}.margins_logregr(
''usage
'') \n
' ||
387 ElSIF (usage_string = 'usage
') THEN
388 insert_string := '' ||
389 E'-----------------------------------------------------------------------------------------\n
' ||
391 E'-----------------------------------------------------------------------------------------\n
' ||
392 E' SELECT {schema_madlib}.margins_logregr( \n
' ||
393 E' ''source_table
'', -- Name of data table \n
' ||
394 E' ''output_table
'', -- Name of result table (overwrites
if exists) \n
' ||
395 E' ''dependent_variable
'', -- Name of column
for dependent variables\n
' ||
396 E' ''independent_variable
'', -- Name of column
for independent variables\n
' ||
397 E' (can be any SQL expression that evaluates to an array)\n
' ||
398 E'[
''group_cols
'', -- Comma separated
string with columns to group by\n
' ||
399 E' (Optional : Default NULL => no grouping) \n
' ||
400 E' ''margins_vars
'', -- Index list (base 1) representing the independent variables to compute marginal effects on \n
' ||
401 E' (Optional : Default NULL which computes marginal effects on all variables) \n
' ||
402 E' max_iter, -- Maximum number of iterations
for the
logistic regression\n
' ||
403 E' (Optional : Default 20) \n
' ||
404 E' ''optimizer
'', -- Optimizer
for logistic regression (newton/irls, cg or igd)\n
' ||
405 E' (Optional : Default irls) \n
' ||
406 E' tolerance, -- Termination criterion
for logistic regression (relative)\n
' ||
407 E' (Optional : Default 1e-4) \n
' ||
408 E' verbose_mode, -- Verbose mode \n
' ||
409 E' (Optional : Default False) \n
' ||
413 E'-----------------------------------------------------------------------------------------\n
' ||
415 E'-----------------------------------------------------------------------------------------\n
' ||
416 E' The output table (
''output_table
'' above) has the following columns\n
' ||
417 E' ''margins
'' DOUBLE PRECISION[], -- Marginal effects \n
' ||
418 E' ''std_err
'' DOUBLE PRECISION[], -- Standard errors
using the delta method\n
' ||
419 E' ''t_stats
'' DOUBLE PRECISION[], -- T-stats of the marginal effects\n
' ||
420 E' ''p_values
'' DOUBLE PRECISION[], -- p-values of the marginal effects\n
' ||
424 insert_string := 'No such option. Run SELECT {schema_madlib}.margins_logregr()
';
426 RETURN insert_string;
428 $$ LANGUAGE plpgsql VOLATILE;
430 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr()
431 RETURNS VARCHAR AS $$
433 RETURN MADLIB_SCHEMA.margins_logregr('');
435 $$ LANGUAGE plpgsql VOLATILE;
474 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
475 source_table VARCHAR -- name of input table
476 , out_table VARCHAR -- name of output table
477 , dependent_varname VARCHAR -- name of dependent variable
478 , input_independent_varname VARCHAR -- name of independent variable
479 , input_group_cols VARCHAR -- names of columns to group by
480 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
481 , max_iter INTEGER -- Max iterations for the logstic regression inner call
482 , optimizer VARCHAR -- Logistic regression optimizer
483 , tolerance DOUBLE PRECISION -- Tolerance
484 , verbose_mode BOOLEAN -- Verbose mode
487 PythonFunction(regress, marginal, margins_logregr)
488 $$ LANGUAGE plpythonu;
492 -- Default Variable calls for margins_logregr
493 ------------------------------------------------------------------------------
498 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
499 source_table VARCHAR -- name of input table
500 , out_table VARCHAR -- name of output table
501 , dependent_variable VARCHAR -- name of dependent variable
502 , independent_variable VARCHAR -- name of independent variable
506 PERFORM MADLIB_SCHEMA.margins_logregr(
510 independent_variable,
518 $$ LANGUAGE plpgsql VOLATILE;
523 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
524 source_table VARCHAR -- name of input table
525 , out_table VARCHAR -- name of output table
526 , dependent_variable VARCHAR -- name of dependent variable
527 , independent_variable VARCHAR -- name of independent variable
528 , grouping_cols VARCHAR -- name of grouping cols
532 PERFORM MADLIB_SCHEMA.margins_logregr(
536 independent_variable,
544 $$ LANGUAGE plpgsql VOLATILE;
549 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
550 source_table VARCHAR -- name of input table
551 , out_table VARCHAR -- name of output table
552 , dependent_variable VARCHAR -- name of dependent variable
553 , independent_variable VARCHAR -- name of independent variable
554 , grouping_cols VARCHAR -- name of grouping cols
555 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
559 PERFORM MADLIB_SCHEMA.margins_logregr(
563 independent_variable,
571 $$ LANGUAGE plpgsql VOLATILE;
576 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
577 source_table VARCHAR -- name of input table
578 , out_table VARCHAR -- name of output table
579 , dependent_variable VARCHAR -- name of dependent variable
580 , independent_variable VARCHAR -- name of independent variable
581 , grouping_cols VARCHAR -- name of grouping cols
582 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
583 , max_iter INTEGER -- Max iterations for the logstic regression inner call
587 PERFORM MADLIB_SCHEMA.margins_logregr(
591 independent_variable,
599 $$ LANGUAGE plpgsql VOLATILE;
605 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
606 source_table VARCHAR -- name of input table
607 , out_table VARCHAR -- name of output table
608 , dependent_variable VARCHAR -- name of dependent variable
609 , independent_variable VARCHAR -- name of independent variable
610 , grouping_cols VARCHAR -- name of grouping cols
611 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
612 , max_iter INTEGER -- Max iterations for the logstic regression inner call
613 , optimizer VARCHAR -- Logistic regression optimizer
617 PERFORM MADLIB_SCHEMA.margins_logregr(
621 independent_variable,
629 $$ LANGUAGE plpgsql VOLATILE;
634 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_logregr(
635 source_table VARCHAR -- name of input table
636 , out_table VARCHAR -- name of output table
637 , dependent_variable VARCHAR -- name of dependent variable
638 , independent_variable VARCHAR -- name of independent variable
639 , grouping_cols VARCHAR -- name of grouping cols
640 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
641 , max_iter INTEGER -- Max iterations for the logstic regression inner call
642 , optimizer VARCHAR -- Logistic regression optimizer
643 , tolerance DOUBLE PRECISION -- Tolerance
647 PERFORM MADLIB_SCHEMA.margins_logregr(
651 independent_variable,
659 $$ LANGUAGE plpgsql VOLATILE;
660 -- End of Default Variable calls for margins_logregr
661 ------------------------------------------------------------------------------
665 ------------------ Marginal Multi-Logistic Regression ------------------------------
667 CREATE TYPE MADLIB_SCHEMA.marginal_mlogregr_result AS (
668 margins DOUBLE PRECISION[],
669 coef DOUBLE PRECISION[],
670 std_err DOUBLE PRECISION[],
671 t_stats DOUBLE PRECISION[],
672 p_values DOUBLE PRECISION[]
675 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_marginal_step_transition(
676 state DOUBLE PRECISION[],
678 num_categories INTEGER,
679 ref_category INTEGER,
680 x DOUBLE PRECISION[],
681 coef DOUBLE PRECISION[])
682 RETURNS DOUBLE PRECISION[]
684 LANGUAGE C IMMUTABLE;
686 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_marginal_step_merge_states(
687 state1 DOUBLE PRECISION[],
688 state2 DOUBLE PRECISION[])
689 RETURNS DOUBLE PRECISION[]
691 LANGUAGE C IMMUTABLE STRICT;
694 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlogregr_marginal_step_final(
695 state DOUBLE PRECISION[])
696 RETURNS MADLIB_SCHEMA.marginal_mlogregr_result
698 LANGUAGE C IMMUTABLE STRICT;
730 CREATE AGGREGATE MADLIB_SCHEMA.marginal_mlogregr(
731 /*+ "dependentVariable" */ INTEGER,
732 /*+ "num_categories" */ INTEGER,
733 /*+ "ref_category" */ INTEGER,
734 /*+ "independentVariables" */ DOUBLE PRECISION[],
735 /*+ "coef" */ DOUBLE PRECISION[] )(
736 STYPE=DOUBLE PRECISION[],
737 SFUNC=MADLIB_SCHEMA.mlogregr_marginal_step_transition,
738 m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.mlogregr_marginal_step_merge_states,
')
739 FINALFUNC=MADLIB_SCHEMA.mlogregr_marginal_step_final,
740 INITCOND='{0,0,0,0,0,0}
'
744 --------------------------- Interface ----------------------------------
746 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
747 usage_string VARCHAR -- usage string
749 RETURNS VARCHAR AS $$
751 insert_string VARCHAR;
753 IF (usage_string = '' OR usage_string = 'help
' OR usage_string = '?
') THEN
754 insert_string := '' ||
756 E'-----------------------------------------------------------------------------------------\n
' ||
757 E' Functionality: Calculate marginal effects
for logistic regression\n
' ||
758 E' The funciton first runs the regression and then calculates the \n
' ||
759 E' coefficients and uses them to calculate the marginal effects \n
' ||
760 E' SELECT {schema_madlib}.margins_mlogregr(
''source_table
'' \n
' ||
761 E' ,
''output_table
'' \n
' ||
762 E' ,
''dependent_variable
'' \n
' ||
763 E' ,
''independent_variable
'' \n
' ||
764 E' [, ref_category \n
' ||
765 E' ,
''grouping_cols
'' \n
' ||
766 E' ,
''marginal_variables
'' \n
' ||
768 E' ,
''optimizer
'' \n
' ||
769 E' , tolerance \n
' ||
770 E' , verbose_mode \n
' ||
773 E'For more details on
function usage: \n
' ||
774 E'SELECT {schema_madlib}.margins_mlogregr(
''usage
'') \n
' ||
776 ElSIF (usage_string = 'usage
') THEN
777 insert_string := '' ||
778 E'-----------------------------------------------------------------------------------------\n
' ||
780 E'-----------------------------------------------------------------------------------------\n
' ||
781 E' SELECT {schema_madlib}.margins_mlogregr( \n
' ||
782 E' ''source_table
'', -- Name of data table \n
' ||
783 E' ''output_table
'', -- Name of result table (overwrites
if exists) \n
' ||
784 E' ''dependent_variable
'', -- Name of column
for dependent variables\n
' ||
785 E' ''independent_variable
'', -- Name of column
for independent variables\n
' ||
786 E' (can be any SQL expression that evaluates to an array)\n
' ||
787 E'[ ref_category, -- Reference category
for the multinomial logisitic regressionby\n
' ||
788 E' (Optional : Default 0) \n
' ||
789 E' ''group_cols
'', -- Comma separated
string with columns to group by\n
' ||
790 E' (Optional : Default NULL => no grouping) \n
' ||
791 E' margins_vars, -- Index list (base 1) representing the independent variables to compute marginal effects on \n
' ||
792 E' (Optional : Default NULL which computes marginal effects on all variables) \n
' ||
793 E' max_iter, -- Maximum number of iterations
for the
logistic regression\n
' ||
794 E' (Optional : Default 20) \n
' ||
795 E' ''optimizer
'', -- Optimizer
for logistic regression (newton/irls, cg or igd)\n
' ||
796 E' (Optional : Default irls) \n
' ||
797 E' tolerance, -- Termination criterion
for logistic regression (relative)\n
' ||
798 E' (Optional : Default 1e-4) \n
' ||
799 E' verbose_mode, -- Verbose mode \n
' ||
800 E' (Optional : Default False) \n
' ||
804 E'-----------------------------------------------------------------------------------------\n
' ||
806 E'-----------------------------------------------------------------------------------------\n
' ||
807 E' The output table (
''output_table
'' above) has the following columns\n
' ||
808 E' ''margins
'' DOUBLE PRECISION[], -- Marginal effects \n
' ||
809 E' ''std_err
'' DOUBLE PRECISION[], -- Standard errors
using the delta method\n
' ||
810 E' ''t_stats
'' DOUBLE PRECISION[], -- T-stats of the standard errors\n
' ||
811 E' ''p_values
'' DOUBLE PRECISION[], -- p-values of the standard errors\n
' ||
815 insert_string := 'No such option. Run SELECT {schema_madlib}.margins_mlogregr()
';
817 RETURN insert_string;
819 $$ LANGUAGE plpgsql VOLATILE;
821 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr()
822 RETURNS VARCHAR AS $$
824 RETURN MADLIB_SCHEMA.margins_mlogregr('');
826 $$ LANGUAGE plpgsql VOLATILE;
862 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
863 source_table VARCHAR -- name of input table
864 , out_table VARCHAR -- name of output table
865 , dependent_varname VARCHAR -- name of dependent variable
866 , input_independent_varname VARCHAR -- name of independent variable
867 , ref_category INTEGER -- reference category
868 , input_group_cols VARCHAR -- names of columns to group by
869 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
870 , max_iter INTEGER -- Max iterations for the logstic regression inner call
871 , optimizer VARCHAR -- Logistic regression optimizer
872 , tolerance DOUBLE PRECISION -- Tolerance
873 , verbose_mode BOOLEAN -- Verbose mode
876 PythonFunction(regress, marginal, margins_mlogregr)
877 $$ LANGUAGE plpythonu;
881 -- Default Variable calls for margins_mlogregr
882 ------------------------------------------------------------------------------
887 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
888 source_table VARCHAR -- name of input table
889 , out_table VARCHAR -- name of output table
890 , dependent_variable VARCHAR -- name of dependent variable
891 , independent_variable VARCHAR -- name of independent variable
895 PERFORM MADLIB_SCHEMA.margins_mlogregr(
899 independent_variable,
908 $$ LANGUAGE plpgsql VOLATILE;
913 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
914 source_table VARCHAR -- name of input table
915 , out_table VARCHAR -- name of output table
916 , dependent_variable VARCHAR -- name of dependent variable
917 , independent_variable VARCHAR -- name of independent variable
918 , ref_category INTEGER -- reference category
922 PERFORM MADLIB_SCHEMA.margins_mlogregr(
926 independent_variable,
935 $$ LANGUAGE plpgsql VOLATILE;
940 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
941 source_table VARCHAR -- name of input table
942 , out_table VARCHAR -- name of output table
943 , dependent_variable VARCHAR -- name of dependent variable
944 , independent_variable VARCHAR -- name of independent variable
945 , ref_category INTEGER -- reference category
946 , grouping_cols VARCHAR -- name of grouping cols
950 PERFORM MADLIB_SCHEMA.margins_mlogregr(
954 independent_variable,
963 $$ LANGUAGE plpgsql VOLATILE;
968 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
969 source_table VARCHAR -- name of input table
970 , out_table VARCHAR -- name of output table
971 , dependent_variable VARCHAR -- name of dependent variable
972 , independent_variable VARCHAR -- name of independent variable
973 , ref_category INTEGER -- reference category
974 , grouping_cols VARCHAR -- name of grouping cols
975 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
979 PERFORM MADLIB_SCHEMA.margins_mlogregr(
983 independent_variable,
992 $$ LANGUAGE plpgsql VOLATILE;
997 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
998 source_table VARCHAR -- name of input table
999 , out_table VARCHAR -- name of output table
1000 , dependent_variable VARCHAR -- name of dependent variable
1001 , independent_variable VARCHAR -- name of independent variable
1002 , ref_category INTEGER -- reference category
1003 , grouping_cols VARCHAR -- name of grouping cols
1004 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
1005 , max_iter INTEGER -- Max iterations for the logstic regression inner call
1009 PERFORM MADLIB_SCHEMA.margins_mlogregr(
1013 independent_variable,
1022 $$ LANGUAGE plpgsql VOLATILE;
1028 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
1029 source_table VARCHAR -- name of input table
1030 , out_table VARCHAR -- name of output table
1031 , dependent_variable VARCHAR -- name of dependent variable
1032 , independent_variable VARCHAR -- name of independent variable
1033 , ref_category INTEGER -- reference category
1034 , grouping_cols VARCHAR -- name of grouping cols
1035 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
1036 , max_iter INTEGER -- Max iterations for the logstic regression inner call
1037 , optimizer VARCHAR -- Logistic regression optimizer
1041 PERFORM MADLIB_SCHEMA.margins_mlogregr(
1045 independent_variable,
1054 $$ LANGUAGE plpgsql VOLATILE;
1059 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.margins_mlogregr(
1060 source_table VARCHAR -- name of input table
1061 , out_table VARCHAR -- name of output table
1062 , dependent_variable VARCHAR -- name of dependent variable
1063 , independent_variable VARCHAR -- name of independent variable
1064 , ref_category INTEGER -- reference category
1065 , grouping_cols VARCHAR -- name of grouping cols
1066 , marginal_vars INTEGER[] -- indices of variables to calculate marginal effects on
1067 , max_iter INTEGER -- Max iterations for the logstic regression inner call
1068 , optimizer VARCHAR -- Logistic regression optimizer
1069 , tolerance DOUBLE PRECISION -- Tolerance
1073 PERFORM MADLIB_SCHEMA.margins_mlogregr(
1077 independent_variable,
1086 $$ LANGUAGE plpgsql VOLATILE;
1087 -- End of Default Variable calls for margins_mlogregr
1088 ------------------------------------------------------------------------------