13 m4_include(`SQLCommon.m4
')
212 DROP TYPE IF EXISTS MADLIB_SCHEMA.mlogregr_result;
213 CREATE TYPE MADLIB_SCHEMA.mlogregr_result AS
215 ref_category INTEGER,
216 coef DOUBLE PRECISION[],
217 log_likelihood DOUBLE PRECISION,
218 std_err DOUBLE PRECISION[],
219 z_stats DOUBLE PRECISION[],
220 p_values DOUBLE PRECISION[],
221 odds_ratios DOUBLE PRECISION[],
222 condition_no DOUBLE PRECISION,
223 num_iterations INTEGER
227 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__mlogregr_irls_step_transition
229 state DOUBLE PRECISION[],
231 num_categories INTEGER,
232 ref_category INTEGER,
233 x DOUBLE PRECISION[],
234 prev_state DOUBLE PRECISION[]
236 RETURNS DOUBLE PRECISION[]
238 LANGUAGE C IMMUTABLE;
241 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__mlogregr_irls_step_merge_states
243 state1 DOUBLE PRECISION[],
244 state2 DOUBLE PRECISION[]
246 RETURNS DOUBLE PRECISION[]
248 LANGUAGE C IMMUTABLE STRICT;
251 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__mlogregr_irls_step_final
253 state DOUBLE PRECISION[]
255 RETURNS DOUBLE PRECISION[]
257 LANGUAGE C IMMUTABLE STRICT;
265 CREATE AGGREGATE MADLIB_SCHEMA.__mlogregr_irls_step(
267 /*+ numCategories */ INTEGER,
268 /*+ ref_category */ INTEGER,
269 /*+ x */ DOUBLE PRECISION[],
270 /*+ previous_state */ DOUBLE PRECISION[]) (
272 STYPE=DOUBLE PRECISION[],
273 SFUNC=MADLIB_SCHEMA.__mlogregr_irls_step_transition,
274 m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.__mlogregr_irls_step_merge_states,
')
275 FINALFUNC=MADLIB_SCHEMA.__mlogregr_irls_step_final,
276 INITCOND='{0,0,0,0,0}
'
280 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__internal_mlogregr_irls_step_distance(
281 /*+ state1 */ DOUBLE PRECISION[],
282 /*+ state2 */ DOUBLE PRECISION[])
283 RETURNS DOUBLE PRECISION AS
285 LANGUAGE c IMMUTABLE STRICT;
287 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__internal_mlogregr_irls_result(
288 /*+ state */ DOUBLE PRECISION[])
289 RETURNS MADLIB_SCHEMA.mlogregr_result AS
291 LANGUAGE c IMMUTABLE STRICT;
294 -- We only need to document the last one (unfortunately, in Greenplum we have to
295 -- use function overloading instead of default arguments).
296 CREATE FUNCTION MADLIB_SCHEMA.__compute_mlogregr
301 numcategories INTEGER,
302 maxnumiterations INTEGER,
304 "precision" DOUBLE PRECISION,
309 PythonFunction(regress, multilogistic, compute_mlogregr)
310 $$ LANGUAGE plpythonu VOLATILE STRICT;
371 CREATE FUNCTION MADLIB_SCHEMA.mlogregr
376 maxnumiterations INTEGER /*+ DEFAULT 20 */,
377 optimizer VARCHAR /*+ DEFAULT 'irls
' */,
378 "precision" DOUBLE PRECISION /*+ DEFAULT 0.0001 */,
381 RETURNS MADLIB_SCHEMA.mlogregr_result AS $$
383 observed_count INTEGER;
384 theIteration INTEGER;
386 theResult MADLIB_SCHEMA.mlogregr_result;
387 numcategories INTEGER;
388 min_category INTEGER;
389 max_category INTEGER;
391 IF (source IS NULL OR trim(source) = '') THEN
392 RAISE EXCEPTION 'Invalid source table given
';
395 IF (depvar IS NULL OR trim(depvar) = '') THEN
396 RAISE EXCEPTION 'Invalid depvar given
';
398 IF (indepvar IS NULL OR trim(indepvar) = '') THEN
399 RAISE EXCEPTION 'Invalid indepvar given
';
401 IF (maxnumiterations IS NULL OR maxnumiterations < 1) THEN
402 RAISE EXCEPTION 'Number of max iterations must be positive
';
404 IF (optimizer IS NULL OR trim(optimizer) = '') THEN
405 RAISE EXCEPTION 'Invalid optimizer given
';
407 IF (precision IS NULL) THEN
408 RAISE EXCEPTION 'Invalid precision given.
';
410 IF (ref_category IS NULL OR ref_category < 0) THEN
411 RAISE EXCEPTION 'Invalid ref_category given.
';
414 IF (SELECT atttypid::regtype <> 'INTEGER
'::regtype
416 WHERE attrelid = source::regclass AND attname = depvar) THEN
417 RAISE EXCEPTION 'The dependent variable column should be of type INTEGER
';
420 EXECUTE $sql$ SELECT count(DISTINCT $sql$ || depvar || $sql$ )
421 FROM $sql$ || textin(regclassout(source))
423 numcategories := observed_count;
425 EXECUTE $sql$ SELECT max($sql$ || depvar || $sql$ )
426 FROM $sql$ || textin(regclassout(source))
428 EXECUTE $sql$ SELECT min($sql$ || depvar || $sql$ )
429 FROM $sql$ || textin(regclassout(source))
432 IF max_category != numcategories - 1 OR min_category != 0 THEN
433 RAISE EXCEPTION 'The value of the dependent variable should be in the
434 range of [0, %)
', numcategories;
437 IF ref_category > numcategories -1 OR ref_category < 0 THEN
438 RAISE EXCEPTION 'The value of the reference category should be in the
439 range of [0,
"%")
', numcategories;
442 IF optimizer = 'irls
' OR optimizer = 'newton
' THEN
443 fnName := '__internal_mlogregr_irls_result
';
445 RAISE EXCEPTION 'Unknown optimizer (
''%
''). Must be
"newton" or
"irls"', optimizer;
449 SELECT MADLIB_SCHEMA.__compute_mlogregr(
450 $1, $2, $3, numcategories, $4, $5, $6, $7)
453 -- Because of Greenplum bug MPP-10050, we have to use dynamic SQL (using
454 -- EXECUTE) in the following
455 -- Because of Greenplum bug MPP-6731, we have to hide the tuple-returning
456 -- function in a subquery
462 MADLIB_SCHEMA.$sql$ || fnName || $sql$(_madlib_state) AS result
463 FROM _madlib_iterative_alg
464 WHERE _madlib_iteration = $sql$ || theIteration || $sql$
468 -- The number of iterations are not updated in the C++ code. We do it here.
469 IF NOT (theResult IS NULL) THEN
470 theResult.num_iterations = theIteration;
474 $$ LANGUAGE plpgsql VOLATILE;
477 CREATE FUNCTION MADLIB_SCHEMA.mlogregr
483 RETURNS MADLIB_SCHEMA.mlogregr_result AS
485 SELECT MADLIB_SCHEMA.mlogregr($1, $2, $3, 20, 'irls
', 0.0001, 0);
486 $$ LANGUAGE sql VOLATILE;
488 CREATE FUNCTION MADLIB_SCHEMA.mlogregr(
492 maxnumiterations INTEGER
494 RETURNS MADLIB_SCHEMA.mlogregr_result AS
496 SELECT MADLIB_SCHEMA.mlogregr($1, $2, $3, $4, 'irls
', 0.0001, 0);
497 $$ LANGUAGE sql VOLATILE;
499 CREATE FUNCTION MADLIB_SCHEMA.mlogregr(
503 maxbumiterations INTEGER,
506 RETURNS MADLIB_SCHEMA.mlogregr_result AS
508 SELECT MADLIB_SCHEMA.mlogregr($1, $2, $3, $4, $5, 0.0001, 0);
509 $$ LANGUAGE sql VOLATILE;