User Documentation
 All Files Functions Groups
dense_linear_systems.sql_in
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file dense_linear_sytems.sql_in
4  *
5  * @brief SQL functions for linear systems
6  * @date January 2011
7  *
8  * @sa Computes the solution of a consistent linear system
9  *
10  *//* ----------------------------------------------------------------------- */
11 
12 m4_include(`SQLCommon.m4')
13 
14 
15 /**
16 @addtogroup grp_dense_linear_solver
17 
18 
19 <div class ="toc"><b>Contents</b>
20 <ul>
21 <li class="level1"><a href="#dls_about">About</a></li>
22 <li class="level1"><a href="#dls_online_help">Online Help</a></li>
23 <li class="level1"><a href="#dls_function">Function Syntax</a></li>
24 <li class="level1"><a href="#dls_args">Arguments</a></li>
25 <li class="level1"><a href="#dls_opt_params">Optimizer Parameters</a></li>
26 <li class="level1"><a href="#dls_output">Output Tables</a></li>
27 <li class="level1"><a href="#dls_examples">Examples</a></li>
28 </ul>
29 </div>
30 
31 @anchor dls_about
32 @about
33 
34 The linear systems module implements solution methods for systems of consistent
35 linear equations. Systems of linear equations take the form:
36 \f[
37  Ax = b
38 \f]
39 
40 where \f$x \in \mathbb{R}^{n}\f$, \f$A \in \mathbb{R}^{m \times n} \f$ and \f$b \in \mathbb{R}^{m}\f$.
41 We assume that there are no rows of \f$A\f$ where all elements are zero.
42 The algorithms implemented in this module can handle large dense
43 linear systems. Currently, the algorithms implemented in this module
44 solve the linear system by a direct decomposition. Hence, these methods are
45 known as <em>direct method</em>.
46 
47 @anchor dls_online_help
48 @par Online Help
49 
50 View short help messages using the following statements:
51 @verbatim
52 -- Summary of dense linear systems
53 SELECT madlib.linear_solver_dense();
54 
55 -- Function syntax and output table format
56 SELECT madlib.linear_solver_dense('usage');
57 
58 -- Syntax for direct methods
59 SELECT madlib.linear_solver_dense('direct');
60 @endverbatim
61 
62 @anchor dls_function
63 @par Function Syntax
64 
65 <pre>
66 SELECT linear_solver_dense(tbl_source, tbl_result, row_id, LHS,
67  RHS, grouping_col := NULL, optimizer := 'direct',
68  optimizer_params := 'algorithm = householderqr');
69 </pre>
70 
71 @anchor dls_args
72 @par Arguments
73 
74 <DL class="arglist">
75 <DT>tbl_source</DT>
76 <DD>Text value. The name of the table containing the training data.
77 The input data is expected to be of the following form:
78 <pre>{TABLE|VIEW} <em>sourceName</em> (
79  ...
80  <em>row_id</em> FLOAT8,
81  <em>left_hand_side</em> FLOAT8[],
82  <em>right_hand_side</em> FLOAT8,
83  ...
84 )</pre>
85 
86 Here, each row represents a single equation using. The <em> right_hand_side
87 </em> refers
88 to the right hand side of the equations while the <em> left_hand_side </em>
89 refers to the multipliers on the variables on the left hand side of the same
90 equations.
91 </DD>
92 
93 <DT>tbl_result</DT>
94 <DD>Text value. The name of the table where the output is saved.</DD>
95 
96 <DT>row_id</DT>
97 <DD>Text value. The name of the column storing the 'row id' of the equations.</DD>
98 
99 \note For a system with N equations, the row_id's must be a continuous
100 range of integers from \f$ 0 \ldots n-1 \f$.
101 
102 <DT>LHS</DT>
103 <DD>Text value. The name of the column storing the 'left hand side' of the
104 equations stored as an array.</DD>
105 
106 <DT>RHS</DT>
107 <DD>Text value. The name of the column storing the 'right hand side' of the
108 equations.</DD>
109 
110 <DT>grouping_col (optional) </DT>
111 <DD>Text value. Group by columns. Default: NULL.</DD>
112 \note The grouing columns is a placeholder in MADlib V1.1
113 
114 <DT>optimizer (optional) </DT>
115 <DD>Text value. Type of optimizer. Default: 'direct'.</DD>
116 
117 <DT>optimizer_params (optional) </DT>
118 <DD>Text value. Optimizer specific parameters. Default: NULL.</DD>
119 </DL>
120 
121 @anchor dls_opt_params
122 @par Optimizer Parameters
123 
124 For each optimizer, there are specific parameters that can be tuned
125 for better performance.
126 \par
127 <DL class="arglist">
128 <DT>algorithm (default: householderqr)</dT>
129 <DD>
130 
131  There are several algorithms that can be classified as 'direct' methods
132  of solving linear systems. MADlib dense linear system solvers provide
133  various algorithmic options for users.
134 
135  The following table provides a guideline on the choice of algorithm based
136  on conditions on the A matrix, speed of the algorithms and numerical stability.
137 
138  Algorithm | Contitions on A | Speed | Accuracy
139  ----------------------------------------------------------
140  householderqr | None | ++ | +
141  partialpivlu | Invertable | ++ | +
142  fullpivlu | None | - | +++
143  colpivhouseholderqr | None | + | ++
144  fullpivhouseholderqr | None | - | +++
145  llt | Pos. Definite | +++ | +
146  ldlt | Pos. or Neg Def | +++ | ++
147 
148  For speed '++' is faster than '+', which is faster than '-'.
149  For accuracy '+++' is better than '++'.
150 
151  More details about the individual algorithms can be found on the <a href="http://eigen.tuxfamily.org/dox-devel/group__TutorialLinearAlgebra.html"> Eigen documentation</a>. Eigen is an open source library for linear algebra.
152 
153 
154 </DD>
155 </DL>
156 
157 @anchor dls_output
158 @par Output statistics
159 Output is stored in the <em>tbl_result</em> table.
160 <DL class="arglist">
161 <DT>solution</dT>
162 <DD>
163 The solution is an array (of double precision) with the variables in the same
164 order as that provided as input in the 'left_hand_side' column name of the
165 'source_table'
166 </DD>
167 
168 <DT>residual_norm</dT>
169 <DD>
170 Computes the scaled residual norm, defined as \f$ \frac{|Ax - b|}{|b|} \f$.
171 This value is an indication of the accuracy of the solution.
172 </DD>
173 
174 <DT>iters</dT>
175 <DD>`
176 The number of iterations required by the algorithm (only applicable for
177  iterative algorithms). The output is NULL for
178 'direct' methods.
179 </DD>
180 
181 </DL>
182 
183 
184 
185 
186 
187 @anchor dls_examples
188 @examp
189 
190 -# Create the sample data set:
191 \verbatim
192 sql> CREATE TABLE linear_systems_test_data (id INTEGER NOT NULL,
193  lhs DOUBLE PRECISION[],
194  rhs DOUBLE PRECISION);
195 sql> INSERT INTO linear_systems_test_data(id, lhs, rhs) VaLUES
196  (0, ARRAY[1,0,0], 20),
197  (1, ARRAY[0,1,0], 15),
198  (2, ARRAY[0,0,1], 20);
199 \endverbatim
200 
201 -# Solve the linear systems with default parameters
202 \verbatim
203 sql> SELECT madlib.linear_solver_dense('linear_systems_test_data',
204  'output_table',
205  'id',
206  'lhs',
207  'rhs');
208 \endverbatim
209 
210 -# Obtain the output from the output table
211 \verbatim
212 sql> SELECT * FROM output_table;
213 --------------------+-------------------------------------
214 solution | {20,15,20}
215 residual_norm | 0
216 iters | NULL
217 \endverbatim
218 
219 
220 -# Chose a different algorithm than the default algorithm
221 \verbatim
222 drop table if exists result_table;
223 select madlib.linear_solver_dense(
224  'linear_systems_test_data',
225  'result_table',
226  'id',
227  'lhs',
228  'rhs',
229  NULL,
230  'direct',
231  'algorithm=llt'
232  );
233 \endverbatim
234 
235 
236 @sa File dense_linear_sytems.sql_in documenting the SQL functions.
237 
238 @internal
239 @sa Namespace \ref madlib::modules::linear_systems documenting the implementation in C++
240 @endinternal
241 */
242 
243 ------------------ Linear Systems ------------------------------
244 
245 CREATE TYPE MADLIB_SCHEMA.dense_linear_solver_result AS (
246  solution DOUBLE PRECISION[],
247  residual_norm DOUBLE PRECISION,
248  iters INTEGER
249 );
250 
251 CREATE TYPE MADLIB_SCHEMA.residual_norm_result AS (
252  residual_norm DOUBLE PRECISION
253 );
254 
255 
256 ------------------------ Compute the residuals ------------------------------
257 
258 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_transition(
259  state MADLIB_SCHEMA.bytea8,
260  a DOUBLE PRECISION[],
261  b DOUBLE PRECISION,
262  x DOUBLE PRECISION[])
263 RETURNS MADLIB_SCHEMA.bytea8
264 AS 'MODULE_PATHNAME'
265 LANGUAGE C IMMUTABLE STRICT;
266 
267 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_merge_states(
268  state1 MADLIB_SCHEMA.bytea8,
269  state2 MADLIB_SCHEMA.bytea8)
270 RETURNS MADLIB_SCHEMA.bytea8
271 AS 'MODULE_PATHNAME'
272 LANGUAGE C IMMUTABLE STRICT;
273 
274 
275 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_final(
276  state MADLIB_SCHEMA.bytea8)
277 RETURNS MADLIB_SCHEMA.residual_norm_result
278 AS 'MODULE_PATHNAME'
279 LANGUAGE C IMMUTABLE STRICT;
280 
281 /**
282  * @brief Compute the residual after solving the dense linear systems
283  *
284  * @param left_hand_side Column containing the left hand side of the system
285  * @param right_hand_side Column containing the right hand side of the system
286  * @param solution Solution of the linear system
287  *
288  *
289  * @return residual_norm FLOAT8:
290  *
291  * @usage
292  * - Get all the diagnostic statistics:\n
293  *
294  * <pre> SELECT dense_residual_norm(<em>row_id</em>,
295  * <em>left_hand_side</em>,
296  * <em> right_hand_side </em>,
297  * <em> solution </em>)
298  * FROM <em>dataTable</em>;
299  * </pre>
300  */
301 
302 CREATE AGGREGATE MADLIB_SCHEMA.dense_residual_norm(
303  /*+ "left_hand_side" */ DOUBLE PRECISION[],
304  /*+ "right_hand_side" */ DOUBLE PRECISION,
305  /*+ "solution" */ DOUBLE PRECISION[])(
306  STYPE=MADLIB_SCHEMA.bytea8,
307  SFUNC=MADLIB_SCHEMA.dense_residual_norm_transition,
308  m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.dense_residual_norm_merge_states,')
309  FINALFUNC=MADLIB_SCHEMA.dense_residual_norm_final,
310  INITCOND=''
311 );
312 
313 ------------------ Direct Method ------------------------------
314 
315 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_transition(
316  state DOUBLE PRECISION[],
317  row_id INTEGER,
318  a DOUBLE PRECISION[],
319  b DOUBLE PRECISION,
320  num_rows INTEGER,
321  algorithm INTEGER)
322 RETURNS DOUBLE PRECISION[]
323 AS 'MODULE_PATHNAME'
324 LANGUAGE C IMMUTABLE STRICT;
325 
326 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_merge_states(
327  state1 DOUBLE PRECISION[],
328  state2 DOUBLE PRECISION[])
329 RETURNS DOUBLE PRECISION[]
330 AS 'MODULE_PATHNAME'
331 LANGUAGE C IMMUTABLE STRICT;
332 
333 
334 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_final(
335  state DOUBLE PRECISION[])
336 RETURNS MADLIB_SCHEMA.dense_linear_solver_result
337 AS 'MODULE_PATHNAME'
338 LANGUAGE C IMMUTABLE STRICT;
339 
340 
341 /**
342  * @brief Solve a system of linear equations using the direct method
343  *
344  * @param row_id Column containing the row_id
345  * @param left_hand_side Column containing the left hand side of the system
346  * @param right_hand_side Column containing the right hand side of the system
347  * @param numEquations Number of equations
348  * @param algorithm Algorithm used for the dense linear solver
349  *
350  *
351  * @return A composite value:
352  * - <tt>solution FLOAT8[] </tt> - Array of marginal effects
353  * - <tt>residual_norm FLOAT8</tt> - Norm of the residual
354  * - <tt>iters INTEGER</tt> - Iterations taken
355  *
356  * @usage
357  * - Get all the diagnostic statistics:\n
358  *
359  * <pre> SELECT linear_system_dense(<em>row_id</em>,
360  * <em>left_hand_side</em>,
361  * <em> right_hand_side </em>,
362  * <em> numEquations </em>)
363  * FROM <em>dataTable</em>;
364  * </pre>
365  */
366 
367 CREATE AGGREGATE MADLIB_SCHEMA.dense_direct_linear_system(
368  /*+ "row_id" */ INTEGER,
369  /*+ "left_hand_side" */ DOUBLE PRECISION[],
370  /*+ "right_hand_side" */ DOUBLE PRECISION,
371  /*+ "numEquations" */ INTEGER,
372  /*+ "algorithm" */ INTEGER)(
373  STYPE=DOUBLE PRECISION[],
374  SFUNC=MADLIB_SCHEMA.dense_direct_linear_system_transition,
375  m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.dense_direct_linear_system_merge_states,')
376  FINALFUNC=MADLIB_SCHEMA.dense_direct_linear_system_final,
377  INITCOND='{0,0,0,0,0,0}'
378 );
379 
380 
381 --------------------------- Interface ----------------------------------
382 
383 /**
384  * @brief Help function, to print out the supported families
385  */
386 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
387  input_string VARCHAR
388 )
389 RETURNS VARCHAR AS $$
390 PythonFunction(linear_systems, dense_linear_systems, linear_solver_dense_help)
391 $$ LANGUAGE plpythonu;
392 
393 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense()
394 RETURNS VARCHAR AS $$
395 BEGIN
396  RETURN MADLIB_SCHEMA.linear_solver_dense('');
397 END;
398 $$ LANGUAGE plpgsql VOLATILE;
399 
400 
401 /**
402  @brief A wrapper function for the various marginal linear_systemsion analyzes.
403  *
404  * @param source_table String identifying the input table
405  * @param out_table String identifying the output table to be created
406  * @param row_id Column containing the row_id
407  * @param left_hand_side Column containing the left hand side of the system
408  * @param right_hand_side Column containing the right hand side of the system
409  * @param grouping_cols Columns to group by
410  * @param optimzer Optimizer to be used
411  * @param optimzer_options Optimal parameters for the algorithms
412  *
413  *
414  * @return void
415  *
416  * @usage
417  * For function summary information. Run
418  * sql> select linear_solver_dense('help');
419  * OR
420  * sql> select linear_solver_dense();
421  * OR
422  * sql> select linear_solver_dense('?');
423  * For function usage information. Run
424  * sql> select linear_solver_dense('usage');
425  *
426  */
427 
428 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
429  source_table VARCHAR -- name of input table
430  , out_table VARCHAR -- name of output table
431  , row_id VARCHAR -- name of the column containing row_id
432  , left_hand_side VARCHAR -- name of columns with lhs
433  , right_hand_side VARCHAR -- name of columns with rhs
434  , grouping_cols VARCHAR -- name of columns to group by
435  , optimizer VARCHAR -- Name of the optimizer
436  , optimizer_options VARCHAR -- Optimal parameters of the optimizer
437  )
438 RETURNS VOID AS $$
439 PythonFunction(linear_systems, dense_linear_systems, linear_solver_dense)
440 $$ LANGUAGE plpythonu;
441 
442 
443 
444 -- Default Variable calls for linear_solver_dense
445 ------------------------------------------------------------------------------
446 
447 /**
448  * @brief Marginal effects with default variables
449  **/
450 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
451  source_table VARCHAR -- name of input table
452  , out_table VARCHAR -- name of output table
453  , row_id VARCHAR -- name of the column containing row_id
454  , left_hand_side VARCHAR -- name of columns with lhs
455  , right_hand_side VARCHAR -- name of columns with rhs
456  )
457 RETURNS VOID AS $$
458 BEGIN
459  PERFORM MADLIB_SCHEMA.linear_solver_dense(
460  source_table,
461  out_table,
462  row_id,
463  left_hand_side,
464  right_hand_side,
465  NULL,
466  'direct',
467  NULL);
468 
469 END;
470 $$ LANGUAGE plpgsql VOLATILE;
471 
472 
473 /**
474  * @brief Marginal effects with default variables
475  **/
476 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
477  source_table VARCHAR -- name of input table
478  , out_table VARCHAR -- name of output table
479  , row_id VARCHAR -- name of the column containing row_id
480  , left_hand_side VARCHAR -- name of columns with lhs
481  , right_hand_side VARCHAR -- name of columns with rhs
482  , grouping_cols VARCHAR -- name of columns to group by
483  )
484 RETURNS VOID AS $$
485 BEGIN
486  PERFORM MADLIB_SCHEMA.linear_solver_dense(
487  source_table,
488  out_table,
489  row_id,
490  left_hand_side,
491  right_hand_side,
492  grouping_cols,
493  'direct',
494  NULL);
495 END;
496 $$ LANGUAGE plpgsql VOLATILE;
497 
498 /**
499  * @brief Marginal effects with default variables
500  **/
501 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
502  source_table VARCHAR -- name of input table
503  , out_table VARCHAR -- name of output table
504  , row_id VARCHAR -- name of the column containing row_id
505  , left_hand_side VARCHAR -- name of columns with lhs
506  , right_hand_side VARCHAR -- name of columns with rhs
507  , grouping_cols VARCHAR -- name of columns to group by
508  , optimizer VARCHAR -- Name of the optimizer
509  )
510 RETURNS VOID AS $$
511 BEGIN
512  PERFORM MADLIB_SCHEMA.linear_solver_dense(
513  source_table,
514  out_table,
515  row_id,
516  left_hand_side,
517  right_hand_side,
518  grouping_cols,
519  optimizer,
520  NULL);
521 END;
522 $$ LANGUAGE plpgsql VOLATILE;