12 m4_include(`SQLCommon.m4
')
311 -- -----------------------------------------------------------------------
312 -- PCA for Dense matrices
313 -- -----------------------------------------------------------------------
315 @brief Compute principal components for a dense matrix stored in a
318 CREATE OR REPLACE FUNCTION
319 MADLIB_SCHEMA.pca_train(
320 source_table TEXT, -- Source table name (dense matrix)
321 pc_table TEXT, -- Output table name for the principal components
322 row_id TEXT, -- Column name for the ID for each row
323 k INTEGER, -- Number of principal components to compute
324 grouping_cols TEXT, -- Comma-separated list of grouping columns (Default: NULL)
325 lanczos_iter INTEGER, -- The number of Lanczos iterations for the SVD calculation (Default: min(k+40, smallest Matrix dimension))
326 use_correlation BOOLEAN, -- If True correlation matrix is used for principal components (Default: False)
327 result_summary_table TEXT -- Table name to store summary of results (Default: NULL)
330 PythonFunction(pca, pca, pca)
331 $$ LANGUAGE plpythonu;
333 -- Overloaded functions for optional parameters
334 -- -----------------------------------------------------------------------
337 CREATE OR REPLACE FUNCTION
338 MADLIB_SCHEMA.pca_train(
339 source_table TEXT, -- Source table name (dense matrix)
340 pc_table TEXT, -- Output table name for the principal components
341 row_id TEXT, -- Column name for the ID for each row
342 k INTEGER,-- Number of principal components to compute
343 grouping_cols TEXT, -- Comma-separated list of grouping columns
344 lanczos_iter INTEGER,-- The number of Lanczos iterations for the SVD calculation
345 use_correlation BOOLEAN -- If True correlation matrix is used for principal components
348 SELECT MADLIB_SCHEMA.pca_train($1, $2, $3, $4, $5, $6, $7, NULL)
351 CREATE OR REPLACE FUNCTION
352 MADLIB_SCHEMA.pca_train(
353 source_table TEXT, -- Source table name (dense matrix)
354 pc_table TEXT, -- Output table name for the principal components
355 row_id TEXT, -- Column name for the ID for each row
356 k INTEGER,-- Number of principal components to compute
357 grouping_cols TEXT, -- Comma-separated list of grouping columns
358 lanczos_iter INTEGER -- The number of Lanczos iterations for the SVD calculation
361 SELECT MADLIB_SCHEMA.pca_train($1, $2, $3, $4, $5, $6, False , NULL)
364 CREATE OR REPLACE FUNCTION
365 MADLIB_SCHEMA.pca_train(
366 source_table TEXT, -- Source table name (dense matrix)
367 pc_table TEXT, -- Output table name for the principal components
368 row_id TEXT, -- Column name for the ID for each row
369 k INTEGER,-- Number of principal components to compute
370 grouping_cols TEXT -- Comma-separated list of grouping columns
373 SELECT MADLIB_SCHEMA.pca_train($1, $2, $3, $4, $5, 0, False , NULL)
377 CREATE OR REPLACE FUNCTION
378 MADLIB_SCHEMA.pca_train(
379 source_table TEXT, -- Source table name (dense matrix)
380 pc_table TEXT, -- Output table name for the principal components
381 row_id TEXT, -- Column name for the ID for each row
382 k INTEGER -- Number of principal components to compute
385 SELECT MADLIB_SCHEMA.pca_train($1, $2, $3, $4, NULL, 0, False, NULL)
389 -- Information Functions
390 -- -----------------------------------------------------------------------
392 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pca_train(
393 usage_string VARCHAR -- usage string
396 PythonFunctionBodyOnly(`pca', `pca
')
397 return pca.pca_help_message(schema_madlib, usage_string)
398 $$ LANGUAGE plpythonu;
401 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pca_train()
402 RETURNS VARCHAR AS $$
404 RETURN MADLIB_SCHEMA.pca_train('');
406 $$ LANGUAGE plpgsql VOLATILE;
408 -- -----------------------------------------------------------------------
409 -- PCA for Sparse matrices
410 -- -----------------------------------------------------------------------
412 @brief Compute principal components for a sparse matrix stored in a
415 CREATE OR REPLACE FUNCTION
416 MADLIB_SCHEMA.pca_sparse_train(
417 source_table TEXT, -- Source table name (dense matrix)
418 pc_table TEXT, -- Output table name for the principal components
419 row_id TEXT, -- Name of 'row_id
' column in sparse matrix representation
420 col_id TEXT, -- Name of 'col_id
' column in sparse matrix representation
421 val_id TEXT, -- Name of 'val_id
' column in sparse matrix representation
422 row_dim INTEGER, -- Number of rows in the sparse matrix
423 col_dim INTEGER, -- Number of columns in the sparse matrix
424 k INTEGER, -- Number of eigenvectors with dominant eigenvalues, sorted decreasingly
425 grouping_cols TEXT, -- Comma-separated list of grouping columns (Default: NULL)
426 lanczos_iter INTEGER, -- The number of Lanczos iterations for the SVD calculation (Default: min(k+40, smallest Matrix dimension))
427 use_correlation BOOLEAN, -- If True correlation matrix is used for principal components (Default: False)
428 result_summary_table TEXT -- Table name to store summary of results (Default: NULL)
431 PythonFunction(pca, pca, pca_sparse)
432 $$ LANGUAGE plpythonu;
435 -- Overloaded functions for optional parameters
436 -- -----------------------------------------------------------------------
437 CREATE OR REPLACE FUNCTION
438 MADLIB_SCHEMA.pca_sparse_train(
439 source_table TEXT, -- Source table name (dense matrix)
440 pc_table TEXT, -- Output table name for the principal components
441 row_id TEXT, -- Column name for the ID for each row
442 col_id TEXT, -- Name of 'col_id
' column in sparse matrix representation
443 val_id TEXT, -- Name of 'val_id
' column in sparse matrix representation
444 row_dim INTEGER, -- Number of rows in the sparse matrix
445 col_dim INTEGER, -- Number of columns in the sparse matrix
446 k INTEGER, -- Number of principal components to compute
447 grouping_cols TEXT, -- Comma-separated list of grouping columns
448 lanczos_iter INTEGER, -- The number of Lanczos iterations for the SVD calculation
449 use_correlation BOOLEAN -- If True correlation matrix is used for principal components
452 SELECT MADLIB_SCHEMA.pca_sparse_train($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL)
455 CREATE OR REPLACE FUNCTION
456 MADLIB_SCHEMA.pca_sparse_train(
457 source_table TEXT, -- Source table name (dense matrix)
458 pc_table TEXT, -- Output table name for the principal components
459 row_id TEXT, -- Column name for the ID for each row
460 col_id TEXT, -- Name of 'col_id
' column in sparse matrix representation
461 val_id TEXT, -- Name of 'val_id
' column in sparse matrix representation
462 row_dim INTEGER, -- Number of rows in the sparse matrix
463 col_dim INTEGER, -- Number of columns in the sparse matrix
464 k INTEGER, -- Number of principal components to compute
465 grouping_cols TEXT, -- Comma-separated list of grouping columns
466 lanczos_iter INTEGER -- The number of Lanczos iterations for the SVD calculation
469 SELECT MADLIB_SCHEMA.pca_sparse_train($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, False , NULL)
472 CREATE OR REPLACE FUNCTION
473 MADLIB_SCHEMA.pca_sparse_train(
474 source_table TEXT, -- Source table name (dense matrix)
475 pc_table TEXT, -- Output table name for the principal components
476 row_id TEXT, -- Column name for the ID for each row
477 col_id TEXT, -- Name of 'col_id
' column in sparse matrix representation
478 val_id TEXT, -- Name of 'val_id
' column in sparse matrix representation
479 row_dim INTEGER, -- Number of rows in the sparse matrix
480 col_dim INTEGER, -- Number of columns in the sparse matrix
481 k INTEGER, -- Number of principal components to compute
482 grouping_cols TEXT -- Comma-separated list of grouping columns
485 SELECT MADLIB_SCHEMA.pca_sparse_train($1, $2, $3, $4, $5, $6, $7, $8, $9, 0, False , NULL)
488 CREATE OR REPLACE FUNCTION
489 MADLIB_SCHEMA.pca_sparse_train(
490 source_table TEXT, -- Source table name (dense matrix)
491 pc_table TEXT, -- Output table name for the principal components
492 row_id TEXT, -- Column name for the ID for each row
493 col_id TEXT, -- Name of 'col_id
' column in sparse matrix representation
494 val_id TEXT, -- Name of 'val_id
' column in sparse matrix representation
495 row_dim INTEGER, -- Number of rows in the sparse matrix
496 col_dim INTEGER, -- Number of columns in the sparse matrix
497 k INTEGER -- Number of principal components to compute
500 SELECT MADLIB_SCHEMA.pca_sparse_train($1, $2, $3, $4, $5, $6, $7, $8, NULL, 0, False, NULL)
504 -- -----------------------------------------------------------------------
505 -- Information Functions
506 -- -----------------------------------------------------------------------
508 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pca_sparse_train(
509 usage_string VARCHAR -- usage string
512 PythonFunctionBodyOnly(`pca', `pca
')
513 return pca.pca_sparse_help_message(schema_madlib, usage_string)
514 $$ LANGUAGE plpythonu;
517 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pca_sparse_train()
520 RETURN MADLIB_SCHEMA.pca_sparse_train('');
522 $$ LANGUAGE plpgsql VOLATILE;