User Documentation
 All Files Functions Groups
elastic_net.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file elastic_net.sql_in
4  *
5  * @brief SQL functions for elastic net regularization
6  * @date July 2012
7  *
8  * @sa For a brief introduction to elastic net, see the module
9  * description \ref grp_lasso.
10  *
11  *//* ----------------------------------------------------------------------- */
12 
13 m4_include(`SQLCommon.m4') --'
14 
15 /**
16 @addtogroup grp_elasticnet
17 
18 <div class="toc"><b>Contents</b><ul>
19 <li class="level1"><a href="#about">About</a></li>
20 <li class="level1"><a href="#help">Online Help</a></li>
21 <li class="level1"><a href="#train">Usage</a></li>
22 <li class="level2"><a href="#train">Training Function</a></li>
23 <li class="level3"><a href="#optimizer">Optimizer Parameters</a></li>
24 <li class="level2"><a href="#output">Output Table</a></li>
25 <li class="level2"><a href="#predict">Prediction Function</a></li>
26 <li class="level1"><a href="#examples">Examples</a></li>
27 <li class="level1"><a href="#seealso">See Also</a></li>
28 <li class="level1"><a href="#background">Technical Background</a></li>
29 <li class="level1"><a href="#literature">Literature</a></li>
30 </ul></div>
31 
32 @anchor about
33 @about
34 
35 This module implements elastic net regularization for linear and logistic regression problems.
36 
37 @anchor help
38 @par Online Help
39 
40 View short help messages using the following statements:
41 @verbatim
42 -- Summary of Elastic Net Regularization
43 madlib.elastic_net_train()
44 
45 -- Training function syntax and output table format
46 madlib.elastic_net_train('usage')
47 
48 -- Prediction function syntax
49 madlib.elastic_net_train('predict')
50 
51 -- Syntax for gaussian/linear model
52 madlib.elastic_net_train('gaussian')
53 madlib.elastic_net_train('linear')
54 
55 -- Syntax for binomial/logistic model
56 madlib.elastic_net_train('binomial')
57 madlib.elastic_net_train('logistic')
58 
59 -- Parameter formats for optimizers
60 madlib.elastic_net_train('fista')
61 madlib.elastic_net_train('igd')
62 @endverbatim
63 
64 @anchor train
65 @par Training Function
66 The training function has the following format:
67 @verbatim
68 madlib.elastic_net_train(
69  tbl_source, tbl_result, col_dep_var, col_ind_var,
70  regress_family, alpha, lambda_value, standardize,
71  grouping_col, optimizer := NULL,
72  optimizer_params := NULL, excluded := NULL,
73  max_iter := 10000, tolerance := 1e-6)
74 @endverbatim
75 
76 \note It is \b strongly \b recommended that you run
77 \c elastic_net_train() on a subset of the data with a limited
78 \e max_iter before applying it to the full data set with a large
79 \e max_iter. In the pre-run, you can adjust the parameters to get the
80 best performance and then apply the best set of parameters to the whole data
81 set.
82 
83 <DL class="arglist">
84 <DT>tbl_source</DT>
85 <DD>Text value. The name of the table containing the training data.</DD>
86 
87 <DT>tbl_result</DT>
88 <DD>Text value. Name of the generated table containing the output model.</DD>
89 
90 <DT>col_dep_var</DT>
91 <DD>Text value. An expression for the dependent variable.</DD>
92 <DD>Both \e col_dep_var and \e col_ind_var can be valid Postgres
93 expressions. For example, <tt>col_dep_var = 'log(y+1)'</tt>, and <tt>col_ind_var
94 = 'array[exp(x[1]), x[2], 1/(1+x[3])]'</tt>. In the binomial case, you can
95 use a Boolean expression, for example, <tt>col_dep_var = 'y < 0'</tt>.<DD>
96 
97 <DT>col_ind_var</DT>
98 <DD>Text value. An expression for the independent variables. Use \c '*' to
99 specify all columns of <em>tbl_source</em> except those listed in the
100 <em>excluded</em> string. If \e col_dep_var is a column name, it is
101 automatically excluded from the independent variables. However, if
102 \e col_dep_var is a valid Postgres expression, any column names used
103 within the expression are only excluded if they are explicitly included in the
104 \e excluded argument. It is a good idea to add all column names involved in
105 the dependent variable expression to the <em>excluded</em> string.</DD>
106 
107 <DT>regress_family</DT>
108 <DD>Text value. The regression type, either 'gaussian' ('linear') or 'binomial' ('logistic').</DD>
109 
110 <DT>alpha</DT>
111 <DD>Float8 value. Elastic net control parameter, value in [0, 1].</DD>
112 
113 <DT>lambda_value</DT>
114 <DD>Float8 value. Regularization parameter, positive.</DD>
115 
116 <DT>standardize</DT>
117 <DD>Boolean value. Whether to normalize the data. Setting this to True usually yields better results and faster convergence. Default: True.
118 </DD>
119 
120 <DT>grouping_col</DT>
121 <DD>Text value. <em>Not currently implemented. Any non-NULL value is ignored.</em> An expression list used to group the input dataset into discrete groups, running one regression per group. Similar to the SQL <tt>GROUP BY</tt> clause. When this value is null, no grouping is used and a single result model is generated. Default value: NULL.</DD>
122 
123 <DT>optimizer</DT>
124 <DD>Text value. Name of optimizer, either 'fista' or 'igd'. Default: 'fista'.</DD>
125 
126 <DT>optimizer_params</DT>
127 <DD>Text value. Optimizer parameters, delimited with commas. The parameters differ depending on the value of \e optimizer. See the descriptions below for details. Default: NULL.</DD>
128 
129 <DT>excluded</DT>
130 <DD>Text value. A comma-delimited list of column names excluded from features.
131 For example, <tt>'col1, col2'</tt>. If the \e col_ind_var is an array, \e excluded is a list of the integer array positions to exclude, for example <tt>'1,2'</tt>. If this argument is NULL or an empty string <tt>''</tt>, no columns are excluded.</DD>
132 
133 <DT>max_iter</DT>
134 <DD>Integer value. The maximum number of iterations that are allowed. Default: 10000.</DD>
135 
136 <DT>tolerance</DT>
137 <DD>Float value. The criteria to end iterations. Both the
138 'fista' and 'igd' optimizers compute the average difference between the
139 coefficients of two consecutive iterations, and when the difference is smaller
140 than \e tolerance or the iteration number is larger than \e max_iter, the
141 computation stops. The default is 1e-6.</DD>
142 </DL>
143 
144 @anchor optimizer
145 @par Optimizer Parameters
146 Optimizer parameters are supplied in a string containing a comma-delimited
147 list of name-value pairs. All of these named parameters are optional, and
148 their order does not matter. You must use the format "<param_name> = <value>"
149 to specify the value of a parameter, otherwise the parameter is ignored.
150 
151 When the \ref elastic_net_train() \e optimizer argument value is \b 'fista', the \e optimizer_params argument has the following format:
152 @verbatim
153  'max_stepsize = ..., eta = ..., warmup = ..., warmup_lambdas = ...,
154  warmup_lambda_no = ..., warmup_tolerance = ..., use_active_set = ...,
155  activeset_tolerance = ..., random_stepsize = ...'
156 @endverbatim
157 
158 <DL class="arglist">
159 <DT>max_stepsize</dt>
160 <DD>Initial backtracking step size. At each iteration, the algorithm first tries
161 <em>stepsize = max_stepsize</em>, and if it does not work out, it then tries a
162 smaller step size, <em>stepsize = stepsize/eta</em>, where \e eta must
163 be larger than 1. At first glance, this seems to perform repeated iterations for even one step, but using a larger step size actually greatly increases the computation speed and minimizes the total number of iterations. A careful choice of \e max_stepsize can decrease the computation time by more than 10 times.
164 The default is 4.0.</DD>
165 <DT>eta</DT>
166 <DD>If stepsize does not work \e stepsize / \e eta is tried. Must be greater than 1. The default is 2.</DD>
167 
168 <DT>warmup</DT>
169 <DD>If \e warmup is True, a series of lambda values, which is
170 strictly descent and ends at the lambda value that the user wants to calculate,
171 is used. The larger lambda gives very sparse solution, and the sparse
172 solution again is used as the initial guess for the next lambda's solution,
173 which speeds up the computation for the next lambda. For larger data sets,
174 this can sometimes accelerate the whole computation and may be faster than
175 computation on only one lambda value. The default is False.</DD>
176 
177 <DT>warmup_lambdas</D>
178 <DD>The lambda value series to use when \e warmup is True. The default is NULL, which means that lambda values will be automatically generated.</DD>
179 
180 <DT>warmup_lambda_no</DT>
181 <DD>How many lambdas are used in warm-up. If \e warmup_lambdas is not NULL, this value is overridden by the number of provided lambda values. The default is 15. </DD>
182 
183 <DT>warmup_tolerance</DT>
184 <DD>The value of tolerance used during warmup. The default is the same as the \e tolerance argument. </DD>
185 
186 <DT>use_active_set</DT>
187 <DD>If \e use_active_set is True, an active-set method is used to
188 speed up the computation. Considerable speedup is obtained by organizing the
189 iterations around the active set of features&mdash;those with nonzero coefficients.
190 After a complete cycle through all the variables, we iterate on only the active
191 set until convergence. If another complete cycle does not change the active set,
192 we are done, otherwise the process is repeated. The default is False. </DD>
193 
194 <DT>activeset_tolerance</DT>
195 <DD>The value of tolerance used during active set
196 calculation. The default is the same as \c tolerance.</DD>
197 
198 <DT>random_stepsize</DT>
199 <DD>Whether to add some randomness to the step size. Sometimes, this can speed
200 up the calculation. The default is False.</DD>
201 </DL>
202 
203 When the \ref elastic_net_train() \e optimizer argument value is \b 'igd', the \e optimizer_params argument has the following format:
204 @verbatim
205 'stepsize = ..., step_decay = ..., threshold = ..., warmup = ...,
206  warmup_lambdas = ..., warmup_lambda_no = ..., warmup_tolerance = ...,
207  parallel = ...'
208 @endverbatim
209 
210 <DL class="arglist">
211 <DT>stepsize</DT>
212 <DD>The default is 0.01.</DD>
213 <DT>step_decay</DT>
214 <DD>The actual setpsize used for current step is (previous stepsize) / exp(setp_decay). The default value is 0, which means that a constant stepsize is used in IGD.</DD>
215 <DT>threshold</DT>
216 <DD>When a coefficient is really small, set this coefficient to be 0. The default is 1e-10.</DD>
217 <DD>Due to the stochastic nature of SGD, we can only obtain very small values for
218 the fitting coefficients. Therefore, \e threshold is needed at the end of
219 the computation to screen out tiny values and hard-set them to
220 zeros. This is accomplished as follows: (1) multiply each coefficient with the
221 standard deviation of the corresponding feature; (2) compute the average of
222 absolute values of re-scaled coefficients; (3) divide each rescaled coefficient
223 with the average, and if the resulting absolute value is smaller than
224 \e threshold, set the original coefficient to zero.</DD>
225 <DT>warmup</DT>
226 <DD>If \e warmup is True, a series of lambda values, which is
227 strictly descent and ends at the lambda value that the user wants to calculate,
228 is used. The larger lambda gives very sparse solution, and the sparse
229 solution again is used as the initial guess for the next lambda's solution,
230 which speeds up the computation for the next lambda. For larger data sets,
231 this can sometimes accelerate the whole computation and may be faster than
232 computation on only one lambda value. The default is False.</DD>
233 <DT>warmup_lambdas</DT>
234 <DD>An array of lambda values to use for warmup. The default is Null.</DD>
235 <DT>warmup_lambda_no</DT>
236 <DD>The number of lambdas used in
237 warm-up. The default is 15. If \e warmup_lambdas is not NULL, this argument is overridden by the size of the \e warmup_lambdas array.</DD>
238 <DT>warmup_tolerance</DT>
239 <DD>The value of tolerance used during warmup.The default is the same as \c tolerance.</DD>
240 <DT>parallel</DT>
241 <DD>Whether to run the computation on multiple segments. The default is True.</DD>
242 <DD>SGD is a sequential algorithm in nature. When running in a distributed
243 manner, each segment of the data runs its own SGD model and then the models
244 are averaged to get a model for each iteration. This averaging might slow
245 down the convergence speed, although we also acquire the ability to process
246 large datasets on multiple machines. This algorithm, therefore, provides the
247 \e parallel optionto allow you to choose whether to do parallel computation.
248 </DD>
249 </DL>
250 
251 @anchor output
252 @par Output Table
253 The output table produced by the elastic_net_train() function has the following columns:
254 
255 <DL class="arglist">
256 <DT>family</DT>
257 <DD>The regression type: 'gaussian' or 'binomial'.</DD>
258 <DT>features</DT>
259 <DD>An array of the features (independent variables) passed into the analysis.</DD>
260 <DT>features_selected</DT>
261 <DD>An array of the features selected by the analysis.</DD>
262 <DT>coef_nonzero</DT>
263 <DD>Fitting coefficients for the selected features.</DD>
264 <DT>coef_all</DT>
265 <DD>Coefficients for all selected and unselected features</DD>
266 <DT>intercept</DT>
267 <DD>Fitting intercept for the model.</DD>
268 <DT>log_likelihood</DT>
269 <DD>The negative value of the first equation above (up to a constant depending on the data set).</DD>
270 <DT>standardize</DT>
271 <DD>Boolean value. Whether the data was normalized (\e standardize argument was True).
272 <DT>iteration_run</DT>
273 <DD>The number of iterations executed.
274 </DL>
275 
276 @anchor predict
277 @par Prediction Function
278 The prediction function has the following format:
279 @verbatim
280 madlib.elastic_net_predict(
281  '<regress_family>',
282  coefficients,
283  intercept,
284  ind_var
285  ) FROM tbl_result, tbl_new_source
286 @endverbatim
287 The above function returns a double value for each data point. When predicting with binomial models, the return value is 1 if the predicted result is True, and 0 if the prediction is False.
288 
289 <DL class="arglist">
290 <DT>regress_family</DT>
291 <DD>The type of regression, either 'gaussian' ('linear') or 'binomal' ('logistic').</DD>
292 <DT>coefficients</DT>
293 <DD>Fitting coefficients, as a DOUBLE array.</DD>
294 <DT>intercept</DT>
295 <DD>The intercept for the model.</DD>
296 <DT>ind_var</DT>
297 <DD>Independent variables, as a DOUBLE array.</DD>
298 <DT>tbl_result</DD>
299 <DD>The name of the output table from the training function.</DD>
300 <DT>tbl_new_source</DT>
301 <DD>The name of the table containing new data to predict.</DD>
302 </DL>
303 
304 There are several different formats of the prediction function:
305 
306 -#
307 @code
308 SELECT madlib.elastic_net_gaussian_predict (
309  coefficients, intercept, ind_var
310 ) FROM tbl_result, tbl_new_source LIMIT 10;
311 @endcode
312 
313 -#
314 @code
315 SELECT madlib.elastic_net_binomial_predict (
316  coefficients, intercept, ind_var
317 ) FROM tbl_result, tbl_new_source LIMIT 10;
318 @endcode
319 \n
320 This returns 10 BOOLEAN values.
321 
322 -#
323 @code
324 SELECT madlib.elastic_net_binomial_prob (
325  coefficients, intercept, ind_var
326 ) FROM tbl_result, tbl_new_source LIMIT 10;
327 @endcode
328 \n
329 This returns 10 probability values for True class.
330 
331 Alternatively, you can use another prediction function that stores the prediction
332 result in a table. This is useful if you want to use elastic net together with the
333 general cross validation function.
334 @code
335 SELECT madlib.elastic_net_predict(
336  'tbl_train_result',
337  'tbl_data',
338  'col_id', -- ID associated with each row
339  'tbl_predict' -- Prediction result
340 );
341 @endcode
342 You do not need to specify whether the model is "linear" or "logistic" because this information is already included in the result table.
343 
344 @anchor examples
345 @examp
346 -# Create an input data set.
347 @verbatim
348 sql> DROP TABLE IF EXISTS houses;
349 sql> CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
350  size INT, lot INT);
351 sql> COPY houses FROM STDIN WITH DELIMITER '|';
352  1 | 590 | 2 | 1 | 50000 | 770 | 22100
353  2 | 1050 | 3 | 2 | 85000 | 1410 | 12000
354  3 | 20 | 3 | 1 | 22500 | 1060 | 3500
355  4 | 870 | 2 | 2 | 90000 | 1300 | 17500
356  5 | 1320 | 3 | 2 | 133000 | 1500 | 30000
357  6 | 1350 | 2 | 1 | 90500 | 820 | 25700
358  7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000
359  8 | 680 | 2 | 1 | 142500 | 1170 | 22000
360  9 | 1840 | 3 | 2 | 160000 | 1500 | 19000
361  10 | 3680 | 4 | 2 | 240000 | 2790 | 20000
362  11 | 1660 | 3 | 1 | 87000 | 1030 | 17500
363  12 | 1620 | 3 | 2 | 118600 | 1250 | 20000
364  13 | 3100 | 3 | 2 | 140000 | 1760 | 38000
365  14 | 2070 | 2 | 3 | 148000 | 1550 | 14000
366  15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000
367 \.
368 @endverbatim
369 -# Train the model.
370 @verbatim
371 sql> DROP TABLE IF EXISTS houses_en;
372 sql> SELECT madlib.elastic_net_train(
373  'houses', 'houses_en', 'price', 'array[tax, bath, size]',
374  'gaussian', 0.5, 0.1, true, null, 'fista',
375  '',
376  null, 10000, 1e-6);
377 @endverbatim
378 -# View the resulting model.
379 @verbatim
380 -- Turn on expanded display to make it easier to read results.
381 sql> \x on
382 sql> SELECT * from houses_en;
383 @endverbatim
384 -# Use the prediction function to evaluate residuals.
385 @verbatim
386 sql> SELECT *, price - predict as residual FROM (
387  SELECT houses.*,
388  madlib.elastic_net_predict(
389  'gaussian', m.coef_nonzero, m.intercept, array[tax,bath,size]
390  ) as predict
391  FROM houses, houses_en m) s;
392 @endverbatim
393 
394 @anchor seealso
395 @sa File elastic_net.sql_in documenting the SQL functions.
396 @sa grp_validation
397 
398 @anchor background
399 @par Technical Background
400 
401 Elastic net regularization seeks to find a weight vector that, for any given training example set, minimizes:
402 \f[\min_{w \in R^N} L(w) + \lambda \left(\frac{(1-\alpha)}{2} \|w\|_2^2 + \alpha \|w\|_1 \right)\f]
403 where \f$L\f$ is the metric function that the user wants to minimize. Here \f$ \alpha \in [0,1] \f$
404 and \f$ lambda \geq 0 \f$. If \f$alpha = 0\f$, we have the ridge regularization (known also as Tikhonov regularization), and if \f$\alpha = 1\f$, we have the LASSO regularization.
405 
406 For the Gaussian response family (or linear model), we have
407 \f[L(\vec{w}) = \frac{1}{2}\left[\frac{1}{M} \sum_{m=1}^M (w^{t} x_m + w_{0} - y_m)^2 \right]
408 \f]
409 
410 For the Binomial response family (or logistic model), we have
411 \f[
412 L(\vec{w}) = \sum_{m=1}^M\left[y_m \log\left(1 + e^{-(w_0 +
413  \vec{w}\cdot\vec{x}_m)}\right) + (1-y_m) \log\left(1 + e^{w_0 +
414  \vec{w}\cdot\vec{x}_m}\right)\right]\ ,
415 \f]
416 where \f$y_m \in {0,1}\f$.
417 
418 To get better convergence, one can rescale the value of each element of x
419 \f[ x' \leftarrow \frac{x - \bar{x}}{\sigma_x} \f]
420 and for Gaussian case we also let
421 \f[y' \leftarrow y - \bar{y} \f]
422 and then minimize with the regularization terms.
423 At the end of the calculation, the orginal scales will be restored and an
424 intercept term will be obtained at the same time as a by-product.
425 
426 Note that fitting after scaling is not equivalent to directly fitting.
427 
428 @anchor literature
429 @literature
430 
431 [1] Elastic net regularization. http://en.wikipedia.org/wiki/Elastic_net_regularization
432 
433 [2] Beck, A. and M. Teboulle (2009), A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM J. on Imaging Sciences 2(1), 183-202.
434 
435 [3] Shai Shalev-Shwartz and Ambuj Tewari, Stochastic Methods for l1 Regularized Loss Minimization. Proceedings of the 26th International Conference on Machine Learning, Montreal, Canada, 2009.
436 
437 */
438 
439 ------------------------------------------------------------------------
440 
441 /**
442  * @brief Interface for elastic net
443  *
444  * @param tbl_source Name of data source table
445  * @param tbl_result Name of the table to store the results
446  * @param col_ind_var Name of independent variable column, independent variable is an array
447  * @param col_dep_var Name of dependent variable column
448  * @param regress_family Response type (gaussian or binomial)
449  * @param alpha The elastic net parameter, [0, 1]
450  * @param lambda_value The regularization parameter
451  * @param standardize Whether to normalize the variables (default True)
452  * @param grouping_col List of columns on which to apply grouping
453  * (currently only a placeholder)
454  * @param optimizer The optimization algorithm, 'fista' or 'igd'. Default is 'fista'
455  * @param optimizer_params Parameters of the above optimizer,
456  * the format is 'arg = value, ...'. Default is NULL
457  * @param exclude Which columns to exclude? Default is NULL
458  * (applicable only if col_ind_var is set as * or a column of array,
459  * column names as 'col1, col2, ...' if col_ind_var is '*';
460  * element indices as '1,2,3, ...' if col_ind_var is a column of array)
461  * @param max_iter Maximum number of iterations to run the algorithm
462  * (default value of 10000)
463  * @param tolerance Iteration stopping criteria. Default is 1e-6
464  */
465 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
466  tbl_source TEXT,
467  tbl_result TEXT,
468  col_dep_var TEXT,
469  col_ind_var TEXT,
470  regress_family TEXT,
471  alpha DOUBLE PRECISION,
472  lambda_value DOUBLE PRECISION,
473  standardize BOOLEAN,
474  grouping_col TEXT,
475  optimizer TEXT,
476  optimizer_params TEXT,
477  excluded TEXT,
478  max_iter INTEGER,
479  tolerance DOUBLE PRECISION
480 ) RETURNS VOID AS $$
481 PythonFunction(elastic_net, elastic_net, elastic_net_train)
482 $$ LANGUAGE plpythonu;
483 
484 ------------------------------------------------------------------------
485 -- Overloaded functions
486 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
487  tbl_source TEXT,
488  tbl_result TEXT,
489  col_ind_var TEXT,
490  col_dep_var TEXT,
491  regress_family TEXT,
492  alpha DOUBLE PRECISION,
493  lambda_value DOUBLE PRECISION,
494  standardization BOOLEAN,
495  grouping_columns TEXT,
496  optimizer TEXT,
497  optimizer_params TEXT,
498  excluded TEXT,
499  max_iter INTEGER
500 ) RETURNS VOID AS $$
501 BEGIN
502  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8,
503  $9, $10, $11, $12, $13, 1e-6);
504 END;
505 $$ LANGUAGE plpgsql VOLATILE;
506 
507 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
508  tbl_source TEXT,
509  tbl_result TEXT,
510  col_ind_var TEXT,
511  col_dep_var TEXT,
512  regress_family TEXT,
513  alpha DOUBLE PRECISION,
514  lambda_value DOUBLE PRECISION,
515  standardization BOOLEAN,
516  grouping_columns TEXT,
517  optimizer TEXT,
518  optimizer_params TEXT,
519  excluded TEXT
520 ) RETURNS VOID AS $$
521 BEGIN
522  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8,
523  $9, $10, $11, $12, 10000);
524 END;
525 $$ LANGUAGE plpgsql VOLATILE;
526 
527 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
528  tbl_source TEXT,
529  tbl_result TEXT,
530  col_ind_var TEXT,
531  col_dep_var TEXT,
532  regress_family TEXT,
533  alpha DOUBLE PRECISION,
534  lambda_value DOUBLE PRECISION,
535  standardization BOOLEAN,
536  grouping_columns TEXT,
537  optimizer TEXT,
538  optimizer_params TEXT
539 ) RETURNS VOID AS $$
540 BEGIN
541  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8,
542  $9, $10, $11, NULL);
543 END;
544 $$ LANGUAGE plpgsql VOLATILE;
545 
546 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
547  tbl_source TEXT,
548  tbl_result TEXT,
549  col_ind_var TEXT,
550  col_dep_var TEXT,
551  regress_family TEXT,
552  alpha DOUBLE PRECISION,
553  lambda_value DOUBLE PRECISION,
554  standardization BOOLEAN,
555  grouping_columns TEXT,
556  optimizer TEXT
557 ) RETURNS VOID AS $$
558 BEGIN
559  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8,
560  $9, $10, NULL::TEXT);
561 END;
562 $$ LANGUAGE plpgsql VOLATILE;
563 
564 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
565  tbl_source TEXT,
566  tbl_result TEXT,
567  col_ind_var TEXT,
568  col_dep_var TEXT,
569  regress_family TEXT,
570  alpha DOUBLE PRECISION,
571  lambda_value DOUBLE PRECISION,
572  standardization BOOLEAN,
573  grouping_columns TEXT
574 ) RETURNS VOID AS $$
575 BEGIN
576  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8,
577  $9, 'FISTA');
578 END;
579 $$ LANGUAGE plpgsql VOLATILE;
580 
581 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
582  tbl_source TEXT,
583  tbl_result TEXT,
584  col_ind_var TEXT,
585  col_dep_var TEXT,
586  regress_family TEXT,
587  alpha DOUBLE PRECISION,
588  lambda_value DOUBLE PRECISION,
589  standardization BOOLEAN
590 ) RETURNS VOID AS $$
591 BEGIN
592  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8,
593  NULL);
594 END;
595 $$ LANGUAGE plpgsql VOLATILE;
596 
597 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
598  tbl_source TEXT,
599  tbl_result TEXT,
600  col_ind_var TEXT,
601  col_dep_var TEXT,
602  regress_family TEXT,
603  alpha DOUBLE PRECISION,
604  lambda_value DOUBLE PRECISION
605 ) RETURNS VOID AS $$
606 BEGIN
607  PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, True);
608 END;
609 $$ LANGUAGE plpgsql VOLATILE;
610 
611 ------------------------------------------------------------------------
612 
613 /**
614  * @brief Help function, to print out the supported families
615  */
616 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ()
617 RETURNS TEXT AS $$
618 PythonFunction(elastic_net, elastic_net, elastic_net_help)
619 $$ LANGUAGE plpythonu;
620 
621 ------------------------------------------------------------------------
622 
623 /**
624  * @brief Help function, to print out the supported optimizer for a family
625  * or print out the parameter list for an optimizer
626  *
627  * @param family_or_optimizer Response type, 'gaussian' or 'binomial', or
628  * optimizer type
629  */
630 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train (
631  family_or_optimizer TEXT
632 ) RETURNS TEXT AS $$
633 PythonFunction(elastic_net, elastic_net, elastic_net_help)
634 $$ LANGUAGE plpythonu;
635 
636 ------------------------------------------------------------------------
637 ------------------------------------------------------------------------
638 ------------------------------------------------------------------------
639 
640 /**
641  * @brief Prediction and put the result in a table
642  * can be used together with General-CV
643  * @param tbl_model The result from elastic_net_train
644  * @param tbl_new_source Data table
645  * @param col_id Unique ID associated with each row
646  * @param tbl_predict Prediction result
647  */
648 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_predict (
649  tbl_model TEXT,
650  tbl_new_source TEXT,
651  col_id TEXT,
652  tbl_predict TEXT
653 ) RETURNS VOID AS $$
654 PythonFunction(elastic_net, elastic_net, elastic_net_predict_all)
655 $$ LANGUAGE plpythonu;
656 
657 ------------------------------------------------------------------------
658 
659 /**
660  * @brief Prediction use learned coefficients for a given example
661  *
662  * @param regress_family model family
663  * @param coefficients The fitting coefficients
664  * @param intercept The fitting intercept
665  * @param ind_var Features (independent variables)
666  *
667  * returns a double value. When regress_family is 'binomial' or 'logistic',
668  * this function returns 1 for True and 0 for False
669  */
670 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_predict (
671  regress_family TEXT,
672  coefficients DOUBLE PRECISION[],
673  intercept DOUBLE PRECISION,
674  ind_var DOUBLE PRECISION[]
675 ) RETURNS DOUBLE PRECISION AS $$
676 DECLARE
677  family_name TEXT;
678  binomial_result BOOLEAN;
679 BEGIN
680  family_name := lower(regress_family);
681 
682  IF family_name = 'gaussian' OR family_name = 'linear' THEN
683  RETURN MADLIB_SCHEMA.elastic_net_gaussian_predict(coefficients, intercept, ind_var);
684  END IF;
685 
686  IF family_name = 'binomial' OR family_name = 'logistic' THEN
687  binomial_result := MADLIB_SCHEMA.elastic_net_binomial_predict(coefficients, intercept, ind_var);
688  IF binomial_result THEN
689  return 1;
690  ELSE
691  return 0;
692  END IF;
693  END IF;
694 
695  RAISE EXCEPTION 'This regression family is not supported!';
696 END;
697 $$ LANGUAGE plpgsql IMMUTABLE STRICT;
698 
699 ------------------------------------------------------------------------
700 
701  /**
702  * @brief Prediction for linear models use learned coefficients for a given example
703  *
704  * @param coefficients Linear fitting coefficients
705  * @param intercept Linear fitting intercept
706  * @param ind_var Features (independent variables)
707  *
708  * returns a double value
709  */
710 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_gaussian_predict (
711  coefficients DOUBLE PRECISION[],
712  intercept DOUBLE PRECISION,
713  ind_var DOUBLE PRECISION[]
714 ) RETURNS DOUBLE PRECISION AS
715 'MODULE_PATHNAME', '__elastic_net_gaussian_predict'
716 LANGUAGE C IMMUTABLE STRICT;
717 
718 ------------------------------------------------------------------------
719 /**
720  * @brief Prediction for logistic models use learned coefficients for a given example
721  *
722  * @param coefficients Logistic fitting coefficients
723  * @param intercept Logistic fitting intercept
724  * @param ind_var Features (independent variables)
725  *
726  * returns a boolean value
727  */
728 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_binomial_predict (
729  coefficients DOUBLE PRECISION[],
730  intercept DOUBLE PRECISION,
731  ind_var DOUBLE PRECISION[]
732 ) RETURNS BOOLEAN AS
733 'MODULE_PATHNAME', '__elastic_net_binomial_predict'
734 LANGUAGE C IMMUTABLE STRICT;
735 
736 ------------------------------------------------------------------------
737 /**
738  * @brief Compute the probability of belonging to the True class for a given observation
739  *
740  * @param coefficients Logistic fitting coefficients
741  * @param intercept Logistic fitting intercept
742  * @param ind_var Features (independent variables)
743  *
744  * returns a double value, which is the probability of this data point being True class
745  */
746 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_binomial_prob (
747  coefficients DOUBLE PRECISION[],
748  intercept DOUBLE PRECISION,
749  ind_var DOUBLE PRECISION[]
750 ) RETURNS DOUBLE PRECISION AS
751 'MODULE_PATHNAME', '__elastic_net_binomial_prob'
752 LANGUAGE C IMMUTABLE STRICT;
753 
754 ------------------------------------------------------------------------
755 /* Compute the log-likelihood for one data point */
756 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__elastic_net_binomial_loglikelihood (
757  coefficients DOUBLE PRECISION[],
758  intercept DOUBLE PRECISION,
759  dep_var BOOLEAN,
760  ind_var DOUBLE PRECISION[]
761 ) RETURNS DOUBLE PRECISION AS
762 'MODULE_PATHNAME', '__elastic_net_binomial_loglikelihood'
763 LANGUAGE C IMMUTABLE STRICT;
764 
765 ------------------------------------------------------------------------
766 -- Compute the solution for just one step ------------------------------
767 ------------------------------------------------------------------------
769 CREATE TYPE MADLIB_SCHEMA.__elastic_net_result AS (
770  intercept DOUBLE PRECISION,
771  coefficients DOUBLE PRECISION[],
772  lambda_value DOUBLE PRECISION
773 );
774 
775 ------------------------------------------------------------------------
776 
777 /* IGD */
778 
779 CREATE FUNCTION MADLIB_SCHEMA.__gaussian_igd_transition (
780  state DOUBLE PRECISION[],
781  ind_var DOUBLE PRECISION[],
782  dep_var DOUBLE PRECISION,
783  pre_state DOUBLE PRECISION[],
784  lambda DOUBLE PRECISION,
785  alpha DOUBLE PRECISION,
786  dimension INTEGER,
787  stepsize DOUBLE PRECISION,
788  total_rows INTEGER,
789  xmean DOUBLE PRECISION[],
790  ymean DOUBLE PRECISION,
791  step_decay DOUBLE PRECISION
792 ) RETURNS DOUBLE PRECISION[]
793 AS 'MODULE_PATHNAME', 'gaussian_igd_transition'
794 LANGUAGE C IMMUTABLE;
795 
796 --
797 
798 CREATE FUNCTION MADLIB_SCHEMA.__gaussian_igd_merge (
799  state1 DOUBLE PRECISION[],
800  state2 DOUBLE PRECISION[]
801 ) RETURNS DOUBLE PRECISION[] AS
802 'MODULE_PATHNAME', 'gaussian_igd_merge'
803 LANGUAGE C IMMUTABLE STRICT;
804 
805 --
806 
807 CREATE FUNCTION MADLIB_SCHEMA.__gaussian_igd_final (
808  state DOUBLE PRECISION[]
809 ) RETURNS DOUBLE PRECISION[] AS
810 'MODULE_PATHNAME', 'gaussian_igd_final'
811 LANGUAGE C IMMUTABLE STRICT;
812 
813 /*
814  * Perform one iteration step of IGD for linear models
815  */
816 CREATE AGGREGATE MADLIB_SCHEMA.__gaussian_igd_step(
817  /* ind_var */ DOUBLE PRECISION[],
818  /* dep_var */ DOUBLE PRECISION,
819  /* pre_state */ DOUBLE PRECISION[],
820  /* lambda */ DOUBLE PRECISION,
821  /* alpha */ DOUBLE PRECISION,
822  /* dimension */ INTEGER,
823  /* stepsize */ DOUBLE PRECISION,
824  /* total_rows */ INTEGER,
825  /* xmeans */ DOUBLE PRECISION[],
826  /* ymean */ DOUBLE PRECISION,
827  /* step_decay */ DOUBLE PRECISION
828 ) (
829  SType = DOUBLE PRECISION[],
830  SFunc = MADLIB_SCHEMA.__gaussian_igd_transition,
831  m4_ifdef(`GREENPLUM', `prefunc = MADLIB_SCHEMA.__gaussian_igd_merge,')
832  FinalFunc = MADLIB_SCHEMA.__gaussian_igd_final,
833  InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
834 );
835 
836 CREATE AGGREGATE MADLIB_SCHEMA.__gaussian_igd_step_single_seg (
837  /* ind_var */ DOUBLE PRECISION[],
838  /* dep_var */ DOUBLE PRECISION,
839  /* pre_state */ DOUBLE PRECISION[],
840  /* lambda */ DOUBLE PRECISION,
841  /* alpha */ DOUBLE PRECISION,
842  /* dimension */ INTEGER,
843  /* stepsize */ DOUBLE PRECISION,
844  /* total_rows */ INTEGER,
845  /* xmeans */ DOUBLE PRECISION[],
846  /* ymean */ DOUBLE PRECISION,
847  /* step_decay */ DOUBLE PRECISION
848 ) (
849  SType = DOUBLE PRECISION[],
850  SFunc = MADLIB_SCHEMA.__gaussian_igd_transition,
851  -- m4_ifdef(`GREENPLUM', `prefunc = MADLIB_SCHEMA.__gaussian_igd_merge,')
852  FinalFunc = MADLIB_SCHEMA.__gaussian_igd_final,
853  InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
854 );
855 
856 --
857 
858 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_state_diff (
859  state1 DOUBLE PRECISION[],
860  state2 DOUBLE PRECISION[]
861 ) RETURNS DOUBLE PRECISION AS
862 'MODULE_PATHNAME', '__gaussian_igd_state_diff'
863 LANGUAGE C IMMUTABLE STRICT;
864 
865 --
866 
867 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_result (
868  in_state DOUBLE PRECISION[],
869  feature_sq DOUBLE PRECISION[],
870  threshold DOUBLE PRECISION,
871  tolerance DOUBLE PRECISION
872 ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS
873 'MODULE_PATHNAME', '__gaussian_igd_result'
874 LANGUAGE C IMMUTABLE STRICT;
875 
876 ------------------------------------------------------------------------
877 
878 /* FISTA */
879 
880 CREATE FUNCTION MADLIB_SCHEMA.__gaussian_fista_transition (
881  state DOUBLE PRECISION[],
882  ind_var DOUBLE PRECISION[],
883  dep_var DOUBLE PRECISION,
884  pre_state DOUBLE PRECISION[],
885  lambda DOUBLE PRECISION,
886  alpha DOUBLE PRECISION,
887  dimension INTEGER,
888  total_rows INTEGER,
889  max_stepsize DOUBLE PRECISION,
890  eta DOUBLE PRECISION,
891  use_active_set INTEGER,
892  is_active INTEGER,
893  random_stepsize INTEGER
894 ) RETURNS DOUBLE PRECISION[]
895 AS 'MODULE_PATHNAME', 'gaussian_fista_transition'
896 LANGUAGE C IMMUTABLE;
897 
898 --
899 
900 CREATE FUNCTION MADLIB_SCHEMA.__gaussian_fista_merge (
901  state1 DOUBLE PRECISION[],
902  state2 DOUBLE PRECISION[]
903 ) RETURNS DOUBLE PRECISION[] AS
904 'MODULE_PATHNAME', 'gaussian_fista_merge'
905 LANGUAGE C IMMUTABLE STRICT;
906 
907 --
908 
909 CREATE FUNCTION MADLIB_SCHEMA.__gaussian_fista_final (
910  state DOUBLE PRECISION[]
911 ) RETURNS DOUBLE PRECISION[] AS
912 'MODULE_PATHNAME', 'gaussian_fista_final'
913 LANGUAGE C IMMUTABLE STRICT;
914 
915 /*
916  Perform one iteration step of FISTA for linear models
917  */
918 CREATE AGGREGATE MADLIB_SCHEMA.__gaussian_fista_step(
919  /* ind_var */ DOUBLE PRECISION[],
920  /* dep_var */ DOUBLE PRECISION,
921  /* pre_state */ DOUBLE PRECISION[],
922  /* lambda */ DOUBLE PRECISION,
923  /* alpha */ DOUBLE PRECISION,
924  /* dimension */ INTEGER,
925  /* total_rows */ INTEGER,
926  /* max_stepsize */ DOUBLE PRECISION,
927  /* eta */ DOUBLE PRECISION,
928  /* use_active_set */ INTEGER,
929  /* is_active */ INTEGER,
930  /* random_stepsize */ INTEGER
931 ) (
932  SType = DOUBLE PRECISION[],
933  SFunc = MADLIB_SCHEMA.__gaussian_fista_transition,
934  m4_ifdef(`GREENPLUM', `prefunc = MADLIB_SCHEMA.__gaussian_fista_merge,')
935  FinalFunc = MADLIB_SCHEMA.__gaussian_fista_final,
936  InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
937 );
938 
939 --
940 
941 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_state_diff (
942  state1 DOUBLE PRECISION[],
943  state2 DOUBLE PRECISION[]
944 ) RETURNS DOUBLE PRECISION AS
945 'MODULE_PATHNAME', '__gaussian_fista_state_diff'
946 LANGUAGE C IMMUTABLE STRICT;
947 
948 --
949 
950 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_result (
951  in_state DOUBLE PRECISION[]
952 ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS
953 'MODULE_PATHNAME', '__gaussian_fista_result'
954 LANGUAGE C IMMUTABLE STRICT;
955 
956 ------------------------------------------------------------------------
957 ------------------------------------------------------------------------
958 ------------------------------------------------------------------------
959 
960 /* Binomial IGD */
961 
962 CREATE FUNCTION MADLIB_SCHEMA.__binomial_igd_transition (
963  state DOUBLE PRECISION[],
964  ind_var DOUBLE PRECISION[],
965  dep_var BOOLEAN,
966  pre_state DOUBLE PRECISION[],
967  lambda DOUBLE PRECISION,
968  alpha DOUBLE PRECISION,
969  dimension INTEGER,
970  stepsize DOUBLE PRECISION,
971  total_rows INTEGER,
972  xmean DOUBLE PRECISION[],
973  ymean DOUBLE PRECISION,
974  step_decay DOUBLE PRECISION
975 ) RETURNS DOUBLE PRECISION[]
976 AS 'MODULE_PATHNAME', 'binomial_igd_transition'
977 LANGUAGE C IMMUTABLE;
978 
979 --
980 
981 CREATE FUNCTION MADLIB_SCHEMA.__binomial_igd_merge (
982  state1 DOUBLE PRECISION[],
983  state2 DOUBLE PRECISION[]
984 ) RETURNS DOUBLE PRECISION[] AS
985 'MODULE_PATHNAME', 'binomial_igd_merge'
986 LANGUAGE C IMMUTABLE STRICT;
987 
988 --
989 
990 CREATE FUNCTION MADLIB_SCHEMA.__binomial_igd_final (
991  state DOUBLE PRECISION[]
992 ) RETURNS DOUBLE PRECISION[] AS
993 'MODULE_PATHNAME', 'binomial_igd_final'
994 LANGUAGE C IMMUTABLE STRICT;
995 
996 /*
997  * Perform one iteration step of IGD for linear models
998  */
999 CREATE AGGREGATE MADLIB_SCHEMA.__binomial_igd_step(
1000  /* ind_var */ DOUBLE PRECISION[],
1001  /* dep_var */ BOOLEAN,
1002  /* pre_state */ DOUBLE PRECISION[],
1003  /* lambda */ DOUBLE PRECISION,
1004  /* alpha */ DOUBLE PRECISION,
1005  /* dimension */ INTEGER,
1006  /* stepsize */ DOUBLE PRECISION,
1007  /* total_rows */ INTEGER,
1008  /* xmeans */ DOUBLE PRECISION[],
1009  /* ymean */ DOUBLE PRECISION,
1010  /* step_decay */ DOUBLE PRECISION
1011 ) (
1012  SType = DOUBLE PRECISION[],
1013  SFunc = MADLIB_SCHEMA.__binomial_igd_transition,
1014  m4_ifdef(`GREENPLUM', `prefunc = MADLIB_SCHEMA.__binomial_igd_merge,')
1015  FinalFunc = MADLIB_SCHEMA.__binomial_igd_final,
1016  InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
1017 );
1018 
1019 CREATE AGGREGATE MADLIB_SCHEMA.__binomial_igd_step_single_seg (
1020  /* ind_var */ DOUBLE PRECISION[],
1021  /* dep_var */ BOOLEAN,
1022  /* pre_state */ DOUBLE PRECISION[],
1023  /* lambda */ DOUBLE PRECISION,
1024  /* alpha */ DOUBLE PRECISION,
1025  /* dimension */ INTEGER,
1026  /* stepsize */ DOUBLE PRECISION,
1027  /* total_rows */ INTEGER,
1028  /* xmeans */ DOUBLE PRECISION[],
1029  /* ymean */ DOUBLE PRECISION,
1030  /* step_decay */ DOUBLE PRECISION
1031 ) (
1032  SType = DOUBLE PRECISION[],
1033  SFunc = MADLIB_SCHEMA.__binomial_igd_transition,
1034  -- m4_ifdef(`GREENPLUM', `prefunc = MADLIB_SCHEMA.__binomial_igd_merge,')
1035  FinalFunc = MADLIB_SCHEMA.__binomial_igd_final,
1036  InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
1037 );
1038 
1039 --
1040 
1041 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_state_diff (
1042  state1 DOUBLE PRECISION[],
1043  state2 DOUBLE PRECISION[]
1044 ) RETURNS DOUBLE PRECISION AS
1045 'MODULE_PATHNAME', '__binomial_igd_state_diff'
1046 LANGUAGE C IMMUTABLE STRICT;
1047 
1048 --
1049 
1050 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_result (
1051  in_state DOUBLE PRECISION[],
1052  feature_sq DOUBLE PRECISION[],
1053  threshold DOUBLE PRECISION,
1054  tolerance DOUBLE PRECISION
1055 ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS
1056 'MODULE_PATHNAME', '__binomial_igd_result'
1057 LANGUAGE C IMMUTABLE STRICT;
1058 
1059 ------------------------------------------------------------------------
1060 
1061 /* Binomial FISTA */
1062 
1063 CREATE FUNCTION MADLIB_SCHEMA.__binomial_fista_transition (
1064  state DOUBLE PRECISION[],
1065  ind_var DOUBLE PRECISION[],
1066  dep_var BOOLEAN,
1067  pre_state DOUBLE PRECISION[],
1068  lambda DOUBLE PRECISION,
1069  alpha DOUBLE PRECISION,
1070  dimension INTEGER,
1071  total_rows INTEGER,
1072  max_stepsize DOUBLE PRECISION,
1073  eta DOUBLE PRECISION,
1074  use_active_set INTEGER,
1075  is_active INTEGER,
1076  random_stepsize INTEGER
1077 ) RETURNS DOUBLE PRECISION[]
1078 AS 'MODULE_PATHNAME', 'binomial_fista_transition'
1079 LANGUAGE C IMMUTABLE;
1080 
1081 --
1082 
1083 CREATE FUNCTION MADLIB_SCHEMA.__binomial_fista_merge (
1084  state1 DOUBLE PRECISION[],
1085  state2 DOUBLE PRECISION[]
1086 ) RETURNS DOUBLE PRECISION[] AS
1087 'MODULE_PATHNAME', 'binomial_fista_merge'
1088 LANGUAGE C IMMUTABLE STRICT;
1089 
1090 --
1091 
1092 CREATE FUNCTION MADLIB_SCHEMA.__binomial_fista_final (
1093  state DOUBLE PRECISION[]
1094 ) RETURNS DOUBLE PRECISION[] AS
1095 'MODULE_PATHNAME', 'binomial_fista_final'
1096 LANGUAGE C IMMUTABLE STRICT;
1097 
1098 /*
1099  Perform one iteration step of FISTA for linear models
1100  */
1101 CREATE AGGREGATE MADLIB_SCHEMA.__binomial_fista_step(
1102  /* ind_var */ DOUBLE PRECISION[],
1103  /* dep_var */ BOOLEAN,
1104  /* pre_state */ DOUBLE PRECISION[],
1105  /* lambda */ DOUBLE PRECISION,
1106  /* alpha */ DOUBLE PRECISION,
1107  /* dimension */ INTEGER,
1108  /* total_rows */ INTEGER,
1109  /* max_stepsize */ DOUBLE PRECISION,
1110  /* eta */ DOUBLE PRECISION,
1111  /* use_active_set */ INTEGER,
1112  /* is_active */ INTEGER,
1113  /* random_stepsize */ INTEGER
1114 ) (
1115  SType = DOUBLE PRECISION[],
1116  SFunc = MADLIB_SCHEMA.__binomial_fista_transition,
1117  m4_ifdef(`GREENPLUM', `prefunc = MADLIB_SCHEMA.__binomial_fista_merge,')
1118  FinalFunc = MADLIB_SCHEMA.__binomial_fista_final,
1119  InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
1120 );
1121 
1122 --
1123 
1124 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_state_diff (
1125  state1 DOUBLE PRECISION[],
1126  state2 DOUBLE PRECISION[]
1127 ) RETURNS DOUBLE PRECISION AS
1128 'MODULE_PATHNAME', '__binomial_fista_state_diff'
1129 LANGUAGE C IMMUTABLE STRICT;
1130 
1131 --
1132 
1133 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_result (
1134  in_state DOUBLE PRECISION[]
1135 ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS
1136 'MODULE_PATHNAME', '__binomial_fista_result'
1137 LANGUAGE C IMMUTABLE STRICT;
1138 
1139