User Documentation
 All Files Functions Groups
svd.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file svd.sql_in
4  *
5  * @brief Singular Value Decomposition
6  *
7  * @sa For a brief introduction to singular value decomposition, see the module
8  * description \ref grp_linalg.
9  *
10  *//* ----------------------------------------------------------------------- */
11 
12 m4_include(`SQLCommon.m4')
13 
14 /**
15 * @addtogroup grp_linalg
16 *
17 * @brief
18 * In linear algebra, the singular value decomposition (SVD) is a factorization of
19 * a real or complex matrix, with many useful applications in signal processing and
20 * statistics.
21 *
22 * Let \f$A\f$ be a \f$mxn\f$ matrix, where \f$m \ge n\f$. Then \f$A\f$ can be
23 * decomposed as follows:
24 * \f[
25 * A = U \Sigma V^T,
26 * \f]
27 * where \f$U\f$ is a \f$m \times n\f$ orthonormal matrix,
28 * \f$\Sigma\f$ is a \f$n \times n\f$ diagonal matrix, and \f$V\f$ is an
29 * \f$n \times n\f$ orthonormal matrix. The diagonal elements of \f$\Sigma\f$ are
30 * called the \emph{singular values}.
31 *
32 *
33 * @input
34 * The input table can be two forms:
35  - Dense matrix representation
36  The table contains a 'row_id' column that identifies each row.
37  Further, the other columns are assumed to be the data for the matrix
38  represented in two possible forms, illustrated in a the 2x2 matrix example below:
39  -# <pre>
40  row_id col1 col2
41  row1 1 1 0
42  row2 2 0 1
43  </pre>
44  -# <pre>
45  row_id row_vec
46  row1 1 {1, 0}
47  row2 2 {0, 1}
48  </pre>
49  - Sparse matrix representation
50  An example representation is given below:
51  <pre>
52  row_id col_id value
53  row1 1 1 1
54  row2 2 2 1
55  </pre>
56  The 'row_id' represents the row number, 'col_id' represents the column
57  id and the 'value' represents the matrix value at ['row_id', 'col_id']
58 
59 * @output
60 * The output and summary table
61 * -------------------------------------------------------------------------
62 * OUTPUT TABLE
63 * -------------------------------------------------------------------------
64 * Ouptut for eigen vectors/values will be in the following 3 tables:
65 * - Left singular matrix: Table named <output_table_prefix>_left (e.g. ‘netflix_u’)
66 * - Right singular matrix: Table named <output_table_prefix>_right (e.g. ‘netflix_v’)
67 * - Singular values: Table named <output_table_prefix>_s (e.g. ‘netflix_s’)
68 * The singular vector tables are of the format:
69 \code
70  row_id INTEGER, -- ID corresponding to each eigen value (in decreasing order)
71  row_vec FLOAT8[] -- Singular vector elements for this row_id
72 \endcode Each array is of size k
73 * The singular values table is in a sparse table format:
74 \code
75  row_id INTEGER, -- i for ith eigen value
76  col_id INTEGER, -- i for ith eigen value (same as row_id)
77  value FLOAT8 -- Eigen Value
78 \endcode
79 
80 * -------------------------------------------------------------------------
81 * RESULT SUMMARY TABLE
82 * -------------------------------------------------------------------------
83 * Result summary table will be in the following format:
84 \code
85 * rows_used INTEGER, -- Number of rows used for SVD calculation
86 * exec_time FLOAT8, -- Total time for executing SVD
87 * iter INTEGER, -- Total number of iterations run
88 * recon_error FLOAT8 -- Total quality score (i.e. approximation quality) for this set of
89 * orthonormal basis
90  relative_recon_error FLOAT8 -- relative quality score
91 \endcode
92 
93 In the result summary table, the reconstruction error is computed as \f$ \sqrt{mean((X - USV^T)_{ij}^2)} \f$, where the average is over all elements of the matrices. The relative reconstruction error is then computed as ratio of the reconstruction error and \f$ \sqrt{mean(X_{ij}^2)} \f$.
94 
95 * @usage
96  Dense matrices:
97  \code
98  SELECT {schema_madlib}.svd(
99  source_table, -- TEXT, Source table name (dense matrix)
100  output_table_prefix, -- TEXT, Prefix for output tables
101  row_id, -- TEXT, ID for each row
102  k, -- INTEGER, Number of singular vectors to compute
103  nIterations, -- INTEGER, Number of iterations to run (OPTIONAL)
104  result_summary_table, -- TEXT Table name to store result summary (OPTIONAL)
105  );
106  \endcode
107 
108  Sparse matrices:
109  \code
110  SELECT {schema_madlib}.svd_sparse(
111  source_table, -- TEXT, Source table name (sparse matrix)
112  output_table_prefix, -- TEXT, Prefix for output tables
113  row_id, -- TEXT, ID for each row
114  col_id, -- TEXT, ID for each column
115  value, -- TEXT, Non-zero values of the sparse matrix
116  row_dim, -- INTEGER, Row dimension of sparse matrix
117  col_dim, -- INTEGER, Col dimension of sparse matrix
118  k, -- INTEGER, Number of singular vectors to compute
119  nIterations, -- INTEGER, Number of iterations to run (OPTIONAL)
120  result_summary_table -- TEXT Table name to store result summary (OPTIONAL)
121  );
122  \endcode
123 
124  Block matrices:
125  \code
126  SELECT {schema_madlib}.svd_block(
127  source_table, -- TEXT, Source table name (block matrix)
128  output_table_prefix, -- TEXT, Prefix for output tables
129  k, -- INTEGER, Number of singular vectors to compute
130  nIterations, -- INTEGER, Number of iterations to run (OPTIONAL)
131  result_summary_table -- TEXT Table name to store result summary (OPTIONAL)
132  );
133  \endcode
134 
135  Native implementation for sparse matrix:
136  \code
137  SELECT {schema_madlib}.svd_sparse_native(
138  source_table, -- TEXT, Source table name (sparse matrix)
139  output_table_prefix, -- TEXT, Prefix for output tables
140  row_id, -- TEXT, ID for each row
141  col_id, -- TEXT, ID for each column
142  value, -- TEXT, Non-zero values of the sparse matrix
143  row_dim, -- INTEGER, Row dimension of sparse matrix
144  col_dim, -- INTEGER, Col dimension of sparse matrix
145  k, -- INTEGER, Number of singular vectors to compute
146  lanczos_iter, -- INTEGER, Number of iterations to run, optional
147  result_summary_table -- TEXT Table name to store result summary, optional
148  );
149  \endcode
150 
151 * @examples
152 * \code
153  CREATE TABLE mat (
154  row_id integer,
155  row_vec double precision[]
156  );
157 
158  -- sample input data
159  COPY mat (row_id, row_vec) FROM stdin;
160  1 {691,58,899,163,159,533,604,582,269,390}
161  0 {396,840,353,446,318,886,15,584,159,383}
162  3 {462,532,787,265,982,306,600,608,212,885}
163  2 {293,742,298,75,404,857,941,662,846,2}
164  5 {327,946,368,943,7,516,272,24,591,204}
165  4 {304,151,337,387,643,753,603,531,459,652}
166  7 {458,959,774,376,228,354,300,669,718,565}
167  6 {877,59,260,302,891,498,710,286,864,675}
168  9 {882,761,398,688,761,405,125,484,222,873}
169  8 {824,390,818,844,180,943,424,520,65,913}
170  11 {492,220,576,289,321,261,173,1,44,241}
171  10 {528,1,860,18,814,242,314,965,935,809}
172  13 {350,192,211,633,53,783,30,444,176,932}
173  12 {415,701,221,503,67,393,479,218,219,916}
174  15 {739,651,678,577,273,935,661,47,373,618}
175  14 {909,472,871,695,930,455,398,893,693,838}
176  \.
177 
178  DROP TABLE if exists svd_u;
179  DROP TABLE if exists svd_v;
180  DROP TABLE if exists svd_s;
181  -- SVD for dense matrices
182  SELECT {schema_madlib}.svd('mat', 'svd', 'row_id', 10);
183  ----------------------------------------------------------------
184  DROP TABLE if exists mat_sparse;
185  SELECT {schema_madlib}.matrix_sparsify('mat', 'mat_sparse', False);
186 
187  DROP TABLE if exists svd_u;
188  DROP TABLE if exists svd_v;
189  DROP TABLE if exists svd_s;
190  -- SVD for sparse matrices
191  SELECT {schema_madlib}.svd_sparse('mat_sparse', 'svd', 'row_id', 'col_id', 'value', 10);*
192 * \endcode
193 
194 * @par Technical Background
195 * In linear algebra, the singular value decomposition (SVD) is a factorization of
196 * a real or complex matrix, with many useful applications in signal processing and
197 * statistics.
198 *
199 * Let \f$A\f$ be a \f$m \times n\f$ matrix, where \f$m \ge n\f$. Then \f$A\f$ can be
200 * decomposed as follows:
201 * \f[
202 * A = U \Sigma V^T,
203 * \f]
204 * where \f$U\f$ is a \f$m \times n\f$ orthonormal matrix,
205 * \f$\Sigma\f$ is a \f$n \times n\f$ diagonal matrix, and \f$V\f$ is an
206 * \f$n \times n\f$ orthonormal matrix. The diagonal elements of \f$\Sigma\f$ are
207 * called the \emph{singular values}.
208 *
209 * It is possible to formulate the problem of computing the singular triplets
210 * (\f$\sigma_i, u_i, v_i\f$) of \f$A\f$ as an eigenvalue problem involving a Hermitian
211 * matrix related to \f$A\f$. There are two possible ways of achieving this:
212 *
213 * -# With the cross product matrix, \f$A^TA\f$ and \f$AA^T\f$
214 * -# With the cyclic matrix
215 * \f[
216 * H(A) =
217 * \begin{bmatrix}
218 * 0 & A\\
219 * A^* & 0
220 * \end{bmatrix}
221 * \f]
222 *
223 * The singular values are the nonnegative square roots of the eigenvalues of the
224 * cross product matrix. This approach may imply a severe loss of accuracy in the
225 * smallest singular values. The cyclic matrix approach is an alternative that
226 * avoids this problem, but at the expense of significantly increasing the cost of
227 * the computation.
228 *
229 * Computing the cross product matrix explicitly is not recommended, especially in
230 * the case of sparse A. Bidiagonalization was proposed by Golub and Kahan
231 * [citation?] as a way of tridiagonalizing the cross product matrix without
232 * forming it explicitly.
233 *
234 * Consider the following decomposition
235 * \f[ A = P B Q^T, \f]
236 * where \f$P\f$ and \f$Q\f$ are unitary matrices and \f$B\f$ is an \f$m \times n\f$
237 * upper bidiagonal matrix. Then the tridiagonal matrix \f$B*B\f$ is unitarily
238 * similar to \f$A*A\f$. Additionally, specific methods exist that compute the
239 * singular values of \f$B\f$ without forming \f$B*B\f$. Therefore, after computing the SVD of B,
240 * \f[
241 * B = X\Sigma Y^T,
242 * \f]
243 * it only remains to compute the SVD of the original matrix with \f$U = PX\f$ and \f$V = QY\f$.
244 *
245 **/
246 
247 -- -----------------------------------------------------------------------
248 -- Main function for SVD (Dense format)
249 -- -----------------------------------------------------------------------
250 /*
251 @brief Compute an singular value decomposition for a dense matrix stored in a
252  database table
253 */
254 CREATE OR REPLACE FUNCTION
255 MADLIB_SCHEMA.svd(
256  source_table TEXT, -- Source table name (dense array-format matrix)
257  output_table_prefix TEXT, -- Prefix for output tables
258  row_id TEXT, -- ID for each row
259  k INTEGER, -- Number of singular vectors to compute
260  lanczos_iter INTEGER, -- Iteration number of Lanczos
261  result_summary_table TEXT -- Table name to store result summary
262 )
263 RETURNS VOID AS $$
264  PythonFunctionBodyOnly(`linalg', `svd')
265  return svd.svd(
266  schema_madlib, source_table, output_table_prefix,
267  row_id, k, lanczos_iter, result_summary_table)
268 $$ LANGUAGE plpythonu;
269 
270 -- -----------------------------------------------------------------------
271 -- Main function for SVD (Block format)
272 -- Each row in the input table is a triple: <row_id, col_id, block>
273 -- -----------------------------------------------------------------------
274 CREATE OR REPLACE FUNCTION
275 MADLIB_SCHEMA.svd_block(
276  source_table TEXT, -- Source table name (dense block-format matrix)
277  output_table_prefix TEXT, -- Prefix for output tables
278  k INTEGER, -- Number of singular vectors to compute
279  lanczos_iter INTEGER, -- Iteration number of Lanczos
280  result_summary_table TEXT -- Table name to store result summary
281 )
282 RETURNS VOID AS $$
283  PythonFunctionBodyOnly(`linalg', `svd')
284  return svd.svd_block(
285  schema_madlib, source_table, output_table_prefix, k, lanczos_iter, result_summary_table)
286 $$ LANGUAGE plpythonu;
287 
288 CREATE OR REPLACE FUNCTION
289 MADLIB_SCHEMA.svd_block(
290  source_table TEXT, -- Source table name (dense block-format matrix)
291  output_table_prefix TEXT, -- Prefix for output tables
292  k INTEGER, -- Number of singular vectors to compute
293  lanczos_iter INTEGER -- Iteration number of Lanczos
294 )
295 RETURNS VOID AS $$
296  SELECT MADLIB_SCHEMA.svd_block($1, $2, $3, $4, NULL)
297 $$ LANGUAGE SQL;
298 
299 CREATE OR REPLACE FUNCTION
300 MADLIB_SCHEMA.svd_block(
301  source_table TEXT, -- Source table name (dense block-format matrix)
302  output_table_prefix TEXT, -- Prefix for output tables
303  k INTEGER -- Number of singular vectors to compute
304 )
305 RETURNS VOID AS $$
306  SELECT MADLIB_SCHEMA.svd_block($1, $2, $3, NULL, NULL)
307 $$ LANGUAGE SQL;
308 
309 -- -----------------------------------------------------------------------
310 -- Main Function for SVD (sparse format)
311 -- -----------------------------------------------------------------------
312 /*
313 @brief Compute an singular value decomposition for a sparse matrix stored in a
314  database table
315  ('input_table', 'output_table_prefix', ’row_id', ’col_id', 'value', row_dim, col_dim, k, 'result_summary_table')
316 */
317 CREATE OR REPLACE FUNCTION
318 MADLIB_SCHEMA.svd_sparse(
319  source_table TEXT, -- Source table name (dense matrix)
320  output_table_prefix TEXT, -- Prefix for output tables
321  row_id TEXT, -- Name of ‘row_id’ column in sparse matrix representation
322  col_id TEXT, -- Name of 'col_id' column in sparse matrix representation
323  val_id TEXT, -- Name of 'val_id' column in sparse matrix representation
324  row_dim INTEGER, -- row dimension of sparse matrix
325  col_dim INTEGER, -- col dimension of sparse matrix
326  k INTEGER, -- Number of singular vectors with dominant singular values, sorted decreasingly
327  lanczos_iter INTEGER, -- Iteration number of Lanczos
328  result_summary_table TEXT -- Table name to store result summary
329 )
330 RETURNS VOID AS $$
331  PythonFunctionBodyOnly(`linalg', `svd')
332  return svd.svd_sparse(
333  schema_madlib, source_table, output_table_prefix,
334  row_id, col_id, val_id, row_dim, col_dim, k,
335  lanczos_iter, result_summary_table)
336 $$ LANGUAGE plpythonu;
337 
338 CREATE OR REPLACE FUNCTION
339 MADLIB_SCHEMA.svd_sparse_native(
340  source_table TEXT, -- Source table name (dense matrix)
341  output_table_prefix TEXT, -- Prefix for output tables
342  row_id TEXT, -- Name of ‘row_id’ column in sparse matrix representation
343  col_id TEXT, -- Name of 'col_id' column in sparse matrix representation
344  val_id TEXT, -- Name of 'val_id' column in sparse matrix representation
345  row_dim INTEGER, -- row dimension of sparse matrix
346  col_dim INTEGER, -- col dimension of sparse matrix
347  k INTEGER, -- Number of singular vectors with dominant singular values,
348  -- sorted decreasingly
349  lanczos_iter INTEGER, -- Iteration number of Lanczos
350  result_summary_table TEXT -- Table name to store result summary
351 )
352 RETURNS VOID AS $$
353  PythonFunctionBodyOnly(`linalg', `svd')
354  return svd.svd_sparse_native(
355  schema_madlib, source_table, output_table_prefix,
356  row_id, col_id, val_id, row_dim, col_dim, k, lanczos_iter, result_summary_table)
357 $$ LANGUAGE plpythonu;
358 
359 -- -----------------------------------------------------------------------
360 -- Overloaded functions for optional parameters
361 -- -----------------------------------------------------------------------
362 CREATE OR REPLACE FUNCTION
363 MADLIB_SCHEMA.svd(
364  source_table TEXT, -- Source table name (dense matrix)
365  output_table_prefix TEXT, -- Prefix for output tables
366  row_id TEXT, -- ID for each row
367  k INTEGER -- Number of singular vectors to compute
368 )
369 RETURNS VOID AS $$
370  SELECT MADLIB_SCHEMA.svd($1, $2, $3, $4, NULL, NULL)
371 $$ LANGUAGE SQL;
372 
373 
374 CREATE OR REPLACE FUNCTION
375 MADLIB_SCHEMA.svd_sparse(
376  source_table TEXT, -- Source table name (dense matrix)
377  output_table_prefix TEXT, -- Prefix for output tables
378  row_id TEXT, -- Name of ‘row_id’ column in sparse matrix representation
379  col_id TEXT, -- Name of 'col_id' column in sparse matrix representation
380  val_id TEXT, -- Name of 'val_id' column in sparse matrix representation
381  row_dim INTEGER, -- row dimension of sparse matrix
382  col_dim INTEGER, -- col dimension of sparse matrix
383  k INTEGER -- Number of singular vectors with dominant singular values, sorted decreasingly
384 )
385 RETURNS VOID AS $$
386  SELECT MADLIB_SCHEMA.svd_sparse($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL)
387 $$ LANGUAGE SQL;
388 
389 CREATE OR REPLACE FUNCTION
390 MADLIB_SCHEMA.svd(
391  source_table TEXT, -- Source table name (dense matrix)
392  output_table_prefix TEXT, -- Prefix for output tables
393  row_id TEXT, -- ID for each row
394  k INTEGER, -- Number of singular vectors to compute
395  lanczos_iter INTEGER -- Iteration number of Lanczos
396 )
397 RETURNS VOID AS $$
398  SELECT MADLIB_SCHEMA.svd($1, $2, $3, $4, $5, NULL)
399 $$ LANGUAGE SQL;
400 
401 
402 CREATE OR REPLACE FUNCTION
403 MADLIB_SCHEMA.svd_sparse(
404  source_table TEXT, -- Source table name (dense matrix)
405  output_table_prefix TEXT, -- Prefix for output tables
406  row_id TEXT, -- Name of ‘row_id’ column in sparse matrix representation
407  col_id TEXT, -- Name of 'col_id' column in sparse matrix representation
408  val_id TEXT, -- Name of 'val_id' column in sparse matrix representation
409  row_dim INTEGER, -- row dimension of sparse matrix
410  col_dim INTEGER, -- col dimension of sparse matrix
411  k INTEGER, -- Number of singular vectors with dominant singular values, sorted decreasingly
412  lanczos_iter INTEGER -- Iteration number of Lanczos
413 )
414 RETURNS VOID AS $$
415  SELECT MADLIB_SCHEMA.svd_sparse($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
416 $$ LANGUAGE SQL;
417 
418 CREATE OR REPLACE FUNCTION
419 MADLIB_SCHEMA.svd_sparse_native(
420  source_table TEXT, -- Source table name (dense matrix)
421  output_table_prefix TEXT, -- Prefix for output tables
422  row_id TEXT, -- Name of ‘row_id’ column in sparse matrix representation
423  col_id TEXT, -- Name of 'col_id' column in sparse matrix representation
424  val_id TEXT, -- Name of 'val_id' column in sparse matrix representation
425  row_dim INTEGER, -- row dimension of sparse matrix
426  col_dim INTEGER, -- col dimension of sparse matrix
427  k INTEGER -- Number of singular vectors with dominant singular
428  -- values, sorted decreasingly
429 )
430 RETURNS VOID AS $$
431  SELECT MADLIB_SCHEMA.svd_sparse_native($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL)
432 $$ LANGUAGE SQL;
433 
434 CREATE OR REPLACE FUNCTION
435 MADLIB_SCHEMA.svd_sparse_native(
436  source_table TEXT, -- Source table name (dense matrix)
437  output_table_prefix TEXT, -- Prefix for output tables
438  row_id TEXT, -- Name of ‘row_id’ column in sparse matrix representation
439  col_id TEXT, -- Name of 'col_id' column in sparse matrix representation
440  val_id TEXT, -- Name of 'val_id' column in sparse matrix representation
441  row_dim INTEGER, -- row dimension of sparse matrix
442  col_dim INTEGER, -- col dimension of sparse matrix
443  k INTEGER, -- Number of singular vectors with dominant singular values, sorted decreasingly
444  lanczos_iter INTEGER -- Iteration number of Lanczos
445 )
446 RETURNS VOID AS $$
447  SELECT MADLIB_SCHEMA.svd_sparse_native($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
448 $$ LANGUAGE SQL;
449 
450 ---------------------------------------------------------------------
451 ------------------------Internal Functions---------------------------
452 ---------------------------------------------------------------------
453 
454 CREATE OR REPLACE FUNCTION
455 MADLIB_SCHEMA.__svd_unit_vector
456 (
457  dim INT -- Dimension of the vector
458 )
459 -- RETURNS MADLIB_SCHEMA.__svd_unit_vector_result
460 RETURNS DOUBLE PRECISION[]
461 AS 'MODULE_PATHNAME', 'svd_unit_vector'
462 LANGUAGE C STRICT;
463 
464 DROP TYPE IF EXISTS MADLIB_SCHEMA.__svd_lanczos_result;
465 CREATE TYPE MADLIB_SCHEMA.__svd_lanczos_result AS
466 (
467  scalar FLOAT8, -- alpha/beta
468  vec FLOAT8[] -- pvec/qvec
469 );
470 
471 ---------------------------------------------------------------------
472 -------Common Aggregator for Computing Lanzcos P/Q Vectors-----------
473 ---------------------------------------------------------------------
474 
475 ---------------------------------------------------------------------
476 ---------------------For Array-Format Matrix-------------------------
477 ---------------------------------------------------------------------
478 CREATE OR REPLACE FUNCTION
479 MADLIB_SCHEMA.__svd_lanczos_sfunc
480 (
481  state FLOAT8[], -- A * q_j OR A_Trans * p_(j-1)
482  row_id INT, -- Matrix row id
483  row_array FLOAT8[], -- Matrix row array
484  vector FLOAT8[], -- q_j OR p_(j-1)
485  dimension INT -- row_dim OR col_dim
486 )
487 RETURNS FLOAT8[]
488 AS 'MODULE_PATHNAME', 'svd_lanczos_sfunc'
489 LANGUAGE C;
490 
491 ---------------------------------------------------------------------
492 ---------------------For Block-Format Matrix-------------------------
493 ---------------------------------------------------------------------
494 CREATE OR REPLACE FUNCTION
495 MADLIB_SCHEMA.__svd_block_lanczos_sfunc
496 (
497  state FLOAT8[], -- A * q_j OR A_Trans * p_(j-1)
498  row_id INT4, -- Matrix block row id
499  col_id INT4, -- Matrix block col id
500  block FLOAT8[], -- Matrix block
501  vector FLOAT8[], -- q_j OR p_(j-1)
502  dimension INT4 -- row_dim OR col_dim
503 )
504 RETURNS FLOAT8[]
505 AS 'MODULE_PATHNAME', 'svd_block_lanczos_sfunc'
506 LANGUAGE C;
507 
508 ---------------------------------------------------------------------
509 ---------------------For Sparse-Format Matrix-------------------------
510 ---------------------------------------------------------------------
511 CREATE OR REPLACE FUNCTION
512 MADLIB_SCHEMA.__svd_sparse_lanczos_sfunc
513 (
514  state FLOAT8[], -- A * q_j OR A_Trans * p_(j-1)
515  row_id INT4, -- row id
516  col_id INT4, -- col id
517  val FLOAT8, -- value
518  vector FLOAT8[], -- q_j OR p_(j-1)
519  dimension INT4 -- row_dim OR col_dim
520 )
521 RETURNS FLOAT8[]
522 AS 'MODULE_PATHNAME', 'svd_sparse_lanczos_sfunc'
523 LANGUAGE C;
524 
525 CREATE OR REPLACE FUNCTION
526 MADLIB_SCHEMA.__svd_lanczos_prefunc
527 (
528  state1 FLOAT8[],
529  state2 FLOAT8[]
530 )
531 RETURNS FLOAT8[]
532 AS 'MODULE_PATHNAME', 'svd_lanczos_prefunc'
533 LANGUAGE C STRICT;
534 
535 ---------------------------------------------------------------------
536 ---------------------For Array-Format Matrix-------------------------
537 ---------------------------------------------------------------------
538 DROP AGGREGATE IF EXISTS
539 MADLIB_SCHEMA.__svd_lanczos_agg
540 (
541  INT, -- Matrix row id
542  FLOAT8[], -- Matrix row array
543  FLOAT8[], -- q_j OR p_(j-1)
544  INT -- row_dim OR col_dim
545 );
546 
547 CREATE AGGREGATE
548 MADLIB_SCHEMA.__svd_lanczos_agg
549 (
550  INT, -- Matrix row id
551  FLOAT8[], -- Matrix row array
552  FLOAT8[], -- q_j OR p_(j-1)
553  INT -- row_dim OR col_dim
554 )
555 (
556  stype = FLOAT8[],
557  sfunc = MADLIB_SCHEMA.__svd_lanczos_sfunc
558  m4_ifdef(
559  `__GREENPLUM__',
560  `, prefunc = MADLIB_SCHEMA.__svd_lanczos_prefunc'
561  )
562 );
563 
564 ---------------------------------------------------------------------
565 ---------------------For Block-Format Matrix-------------------------
566 ---------------------------------------------------------------------
567 DROP AGGREGATE IF EXISTS
568 MADLIB_SCHEMA.__svd_block_lanczos_agg
569 (
570  INT4, -- Matrix block row id
571  INT4, -- Matrix block col id
572  FLOAT8[], -- Matrix block
573  FLOAT8[], -- q_j OR p_(j-1)
574  INT4 -- row_dim OR col_dim
575 );
576 
577 CREATE AGGREGATE
578 MADLIB_SCHEMA.__svd_block_lanczos_agg
579 (
580  INT, -- Matrix block row id
581  INT4, -- Matrix block col id
582  FLOAT8[], -- Matrix row array
583  FLOAT8[], -- q_j OR p_(j-1)
584  INT -- row_dim OR col_dim
585 )
586 (
587  stype = FLOAT8[],
588  --Note that only the sfunc is different
589  sfunc = MADLIB_SCHEMA.__svd_block_lanczos_sfunc
590  m4_ifdef(
591  `__GREENPLUM__',
592  `, prefunc = MADLIB_SCHEMA.__svd_lanczos_prefunc'
593  )
594 );
595 
596 ---------------------------------------------------------------------
597 ---------------------For Sparse-Format Matrix-------------------------
598 ---------------------------------------------------------------------
599 DROP AGGREGATE IF EXISTS
600 MADLIB_SCHEMA.__svd_sparse_lanczos_agg
601 (
602  INT4, -- Row ID
603  INT4, -- Column ID
604  FLOAT8, -- Value
605  FLOAT8[], -- q_j OR p_(j-1)
606  INT4 -- row_dim OR col_dim
607 );
608 
609 CREATE AGGREGATE
610 MADLIB_SCHEMA.__svd_sparse_lanczos_agg
611 (
612  INT4, -- Row ID
613  INT4, -- Column ID
614  FLOAT8, -- Value
615  FLOAT8[], -- q_j OR p_(j-1)
616  INT4 -- row_dim OR col_dim
617 )
618 (
619  stype = FLOAT8[],
620  --Note that only the sfunc is different
621  sfunc = MADLIB_SCHEMA.__svd_sparse_lanczos_sfunc
622  m4_ifdef(
623  `__GREENPLUM__',
624  `, prefunc = MADLIB_SCHEMA.__svd_lanczos_prefunc'
625  )
626 );
627 
628 ---------------------------------------------------------------------
629 --------Postproce Function for Computing Lanzcos P/V Vectors---------
630 ---------------------------------------------------------------------
631 CREATE OR REPLACE FUNCTION
632 MADLIB_SCHEMA.__svd_lanczos_pvec
633 (
634  vector FLOAT8[], -- Partial result from the aggregator
635  pvec FLOAT8[], -- Previous P vector
636  beta FLOAT8 -- Previous beta
637 )
638 RETURNS MADLIB_SCHEMA.__svd_lanczos_result
639 AS 'MODULE_PATHNAME', 'svd_lanczos_pvec'
640 LANGUAGE C;
641 
642 CREATE OR REPLACE FUNCTION
643 MADLIB_SCHEMA.__svd_lanczos_qvec
644 (
645  vector FLOAT8[], -- Partial result from the aggregator
646  qvec FLOAT8[], -- Previous P vector
647  alpha FLOAT8 -- Previous beta
648 )
649 RETURNS FLOAT8[]
650 AS 'MODULE_PATHNAME', 'svd_lanczos_qvec'
651 LANGUAGE C;
652 ---------------------------------------------------------------------
653 -----------------Gram-Schmidt Orthogonilization----------------------
654 ---------------------------------------------------------------------
655 CREATE OR REPLACE FUNCTION
656 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_sfunc
657 (
658  state FLOAT8[],
659  vec_unorthogonalized FLOAT8[],
660  vec_orthogonalized FLOAT8[]
661 )
662 RETURNS FLOAT8[]
663 AS 'MODULE_PATHNAME', 'svd_gram_schmidt_orthogonalize_sfunc'
664 LANGUAGE C;
665 
666 CREATE OR REPLACE FUNCTION
667 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_prefunc
668 (
669  state1 FLOAT8[],
670  state2 FLOAT8[]
671 )
672 RETURNS FLOAT8[]
673 AS 'MODULE_PATHNAME', 'svd_gram_schmidt_orthogonalize_prefunc'
674 LANGUAGE C STRICT;
675 
676 -- This function will also do the normalization
677 CREATE OR REPLACE FUNCTION
678 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_ffunc
679 (
680  state FLOAT8[]
681 )
682 RETURNS MADLIB_SCHEMA.__svd_lanczos_result
683 AS 'MODULE_PATHNAME', 'svd_gram_schmidt_orthogonalize_ffunc'
684 LANGUAGE C STRICT;
685 
686 DROP AGGREGATE IF EXISTS
687 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_agg
688 (
689  FLOAT8[], -- Unorthogonalized vector
690  FLOAT8[] -- Orthogonalized vector
691 );
692 
693 CREATE AGGREGATE
694 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_agg
695 (
696  FLOAT8[], -- Unorthogonalized vector
697  FLOAT8[] -- Orthogonalized vector
698 )
699 (
700  stype = FLOAT8[],
701  sfunc = MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_sfunc,
702  finalfunc = MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_ffunc
703  m4_ifdef(
704  `__GREENPLUM__',
705  `, prefunc = MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_prefunc'
706  )
707 );
708 
709 ---------------------------------------------------------------------
710 --------------------Lanczos Bidiagonalization------------------------
711 ---------------------------------------------------------------------
712 CREATE OR REPLACE FUNCTION
713 MADLIB_SCHEMA.__svd_lanczos_bidiagonalize
714 (
715  source_table TEXT,
716  output_table_prefix TEXT,
717  iter_num INT,
718  k INT,
719  is_block BOOLEAN
720 )
721 RETURNS VOID AS $$
722  PythonFunctionBodyOnly(`linalg', `svd')
723  return svd.lanczos_bidiagonalize(
724  schema_madlib, source_table,
725  output_table_prefix, iter_num, k, is_block)
726 $$ LANGUAGE plpythonu;
727 
728 CREATE OR REPLACE FUNCTION
729 MADLIB_SCHEMA.__svd_lanczos_bidiagonalize_sparse
730 (
731  source_table TEXT,
732  row_id TEXT,
733  col_id TEXT,
734  val TEXT,
735  output_table_prefix TEXT,
736  iter_num INT,
737  k INT
738 )
739 RETURNS VOID AS $$
740  PythonFunctionBodyOnly(`linalg', `svd')
741  return svd.lanczos_bidiagonalize_sparse(
742  schema_madlib, source_table, row_id, col_id,
743  val, output_table_prefix, iter_num, k)
744 $$ LANGUAGE plpythonu;
745 
746 ---------------------------------------------------------------------
747 -------------------- SVD of Bidiagonal matrix ------------------------
748 ---------------------------------------------------------------------
749 DROP TYPE IF EXISTS MADLIB_SCHEMA.__svd_bidiagonal_matrix_result;
750 CREATE TYPE MADLIB_SCHEMA.__svd_bidiagonal_matrix_result AS
751 (
752  left_matrix FLOAT8[][],
753  right_matrix FLOAT8[][],
754  singular_values FLOAT8[]
755 );
756 
757 ------------------------------------------------------------------------------
758 --The bidiagonal matrix is represented as a tripe: <row_ids, col_ids, vals>
759 --where:
760 -- row_ids is an array of integers representing row_ids of non-zero elements
761 -- col_ids is an array of integers representing col_ids of non-zero elements
762 -- vals is an array of doubles representing values of non-zero elements
763 --Note that we don't need the aggregator to convert the sparse bidiagonal
764 --matrix into a dense matrix
765 ------------------------------------------------------------------------------
766 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_decompose_bidiag(
767  row_ids INT[],
768  col_ids INT[],
769  vals FLOAT8[]
770 )
771 RETURNS MADLIB_SCHEMA.__svd_bidiagonal_matrix_result
772 AS 'MODULE_PATHNAME', 'svd_decompose_bidiag'
773 LANGUAGE C STRICT;
774 
775 -----------------------------------------------------------------------
776 -- Special Vector-Matrix Multiplication Functions
777 -----------------------------------------------------------------------
778 /**
779  * Multiplication of a row vector with an in-memory matrix
780  * vector: 1 x l
781  * matrix: l x l
782  * k: Sub-matrix of the size l x k
783  **/
784 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_mult_matrix(
785  vector FLOAT8[],
786  matrix FLOAT8[][],
787  k INT4
788 )
789 RETURNS FLOAT8[]
790 AS 'MODULE_PATHNAME', 'svd_vec_mult_matrix'
791 LANGUAGE C STRICT;
792 
793 DROP TYPE IF EXISTS MADLIB_SCHEMA.__svd_vec_mat_mult_result;
794 CREATE TYPE MADLIB_SCHEMA.__svd_vec_mat_mult_result AS
795 (
796  row_id INT4,
797  row_vec FLOAT8[]
798 );
799 
800 --Multiplication of a column vector with an in-memory matrix
801 --Return a set of m row vector of the size 1 * k
802 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_trans_mult_matrix_internal(
803  vector FLOAT8[], -- m * l row vector
804  matrix FLOAT8[][], -- l * l in-memory matrix
805  col_id INT4, -- Column ID
806  k INT4 -- Specify the size of submatrix l * k
807 )
808 RETURNS SETOF MADLIB_SCHEMA.__svd_vec_mat_mult_result
809 AS 'MODULE_PATHNAME', 'svd_vec_trans_mult_matrix'
810 LANGUAGE C STRICT;
811 
812 --Multiplication of P/Q vectors with Left/Right Singular Matrix of B
813 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_trans_mult_matrix(
814  vec_table TEXT, -- P/Q column vectors
815  mat_table TEXT, -- (svd_output).left_matrix/right_matrix
816  k INT4, -- Number of singular values
817  is_left BOOLEAN, -- Left matrix?
818  res_table TEXT -- Result
819 )
820 RETURNS VOID AS $$
821  PythonFunctionBodyOnly(`linalg', `svd')
822  return svd.svd_vec_trans_mult_matrix(
823  schema_madlib, vec_table, mat_table, k, res_table, is_left)
824 $$ LANGUAGE plpythonu;
825 
826 -----------------------------------------------------------------------
827 -- Help functions
828 -----------------------------------------------------------------------
829 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svd(
830  input_message TEXT
831 )
832 RETURNS TEXT AS $$
833 PythonFunctionBodyOnly(`linalg', `svd')
834  return svd.svd_help_message(schema_madlib, input_message)
835 $$ LANGUAGE plpythonu;
836 
837 
838 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svd()
839 RETURNS TEXT AS $$
840 PythonFunctionBodyOnly(`linalg', `svd')
841  return svd.svd_help_message(schema_madlib, None)
842 $$ LANGUAGE plpythonu;