User Documentation
linalg.sql_in
Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------- *//**
00002  *
00003  * @file linalg.sql_in
00004  *
00005  * @brief SQL functions for linear algebra
00006  *
00007  * @sa For an overview of linear-algebra functions, see the module
00008  *     description \ref grp_linalg.
00009  *
00010  *//* ----------------------------------------------------------------------- */
00011 
00012 m4_include(`SQLCommon.m4')
00013 
00014 /**
00015 @internal
00016 @addtogroup grp_linalg
00017 
00018 @about
00019 
00020 Linear-algebra functions.
00021 
00022 @sa File linalg.sql_in documenting the SQL functions.
00023 */
00024 
00025 /**
00026  * @brief 1-norm of a vector
00027  *
00028  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00029  * @return \f$ \| x \|_1 = \sum_{i=1}^n |x_i| \f$
00030  */
00031 CREATE FUNCTION MADLIB_SCHEMA.norm1(
00032     x DOUBLE PRECISION[]
00033 ) RETURNS DOUBLE PRECISION
00034 AS 'MODULE_PATHNAME'
00035 LANGUAGE C
00036 IMMUTABLE
00037 STRICT;
00038 
00039 /**
00040  * @brief 2-norm of a vector
00041  *
00042  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00043  * @return \f$ \| x \|_2 = \sqrt{\sum_{i=1}^n x_i^2} \f$
00044  */
00045 CREATE FUNCTION MADLIB_SCHEMA.norm2(
00046     x DOUBLE PRECISION[]
00047 ) RETURNS DOUBLE PRECISION
00048 AS 'MODULE_PATHNAME'
00049 LANGUAGE C
00050 IMMUTABLE
00051 STRICT;
00052 
00053 /**
00054  * @brief 1-norm of the difference between two vectors
00055  *
00056  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00057  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
00058  * @return \f$ \| x - y \|_1 = \sum_{i=1}^n |x_i - y_i| \f$
00059  */
00060 CREATE FUNCTION MADLIB_SCHEMA.dist_norm1(
00061     x DOUBLE PRECISION[],
00062     y DOUBLE PRECISION[]
00063 ) RETURNS DOUBLE PRECISION
00064 AS 'MODULE_PATHNAME'
00065 LANGUAGE C
00066 IMMUTABLE
00067 STRICT;
00068 
00069 /**
00070  * @brief 2-norm of the difference between two vectors
00071  *
00072  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00073  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
00074  * @return \f$ \| x - y \|_2 = \sqrt{\sum_{i=1}^n (x_i - y_i)^2} \f$
00075  */
00076 CREATE FUNCTION MADLIB_SCHEMA.dist_norm2(
00077     x DOUBLE PRECISION[],
00078     y DOUBLE PRECISION[]
00079 ) RETURNS DOUBLE PRECISION
00080 AS 'MODULE_PATHNAME'
00081 LANGUAGE C
00082 IMMUTABLE
00083 STRICT;
00084 
00085 /**
00086  * @brief Squared 2-norm of the difference between two vectors
00087  *
00088  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00089  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
00090  * @return \f$ \| x - y \|_2^2 = \sum_{i=1}^n (x_i - y_i)^2 \f$
00091  */
00092 CREATE FUNCTION MADLIB_SCHEMA.squared_dist_norm2(
00093     x DOUBLE PRECISION[],
00094     y DOUBLE PRECISION[]
00095 ) RETURNS DOUBLE PRECISION
00096 AS 'MODULE_PATHNAME'
00097 LANGUAGE C
00098 IMMUTABLE
00099 STRICT;
00100 
00101 /**
00102  * @brief Angle between two vectors
00103  *
00104  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00105  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
00106  * @return \f$ \arccos\left(\frac{\langle \vec x, \vec y \rangle}
00107  *                               {\| \vec x \| \cdot \| \vec y \|}\right) \f$
00108  */
00109 CREATE FUNCTION MADLIB_SCHEMA.dist_angle(
00110     x DOUBLE PRECISION[],
00111     y DOUBLE PRECISION[]
00112 ) RETURNS DOUBLE PRECISION
00113 AS 'MODULE_PATHNAME'
00114 LANGUAGE C
00115 IMMUTABLE
00116 STRICT;
00117 
00118 /**
00119  * @brief Tanimoto distance between two vectors
00120  *
00121  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
00122  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
00123  * @return \f$ 1 - \frac{\langle \vec x, \vec y \rangle}
00124  *                            {\| \vec x \|^2 \cdot \| \vec y \|^2
00125  *                                - \langle \vec x, \vec y \rangle} \f$
00126  */
00127 CREATE FUNCTION MADLIB_SCHEMA.dist_tanimoto(
00128     x DOUBLE PRECISION[],
00129     y DOUBLE PRECISION[]
00130 ) RETURNS DOUBLE PRECISION
00131 AS 'MODULE_PATHNAME'
00132 LANGUAGE C
00133 IMMUTABLE
00134 STRICT;
00135 
00136 /*
00137  * @brief closest_column return type
00138  */
00139 CREATE TYPE MADLIB_SCHEMA.closest_column_result AS (
00140     column_id INTEGER,
00141     distance DOUBLE PRECISION
00142 );
00143 
00144 /**
00145  * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the column
00146  *     of \f$ M \f$ that is closest to \f$ \vec x \f$
00147  *
00148  * @param M Matrix \f$ M = (\vec{m_0} \dots \vec{m_{l-1}})
00149  *     \in \mathbb{R}^{k \times l} \f$
00150  * @param x Vector \f$ \vec x \in \mathbb R^k \f$
00151  * @param dist The metric \f$ \operatorname{dist} \f$. This needs to be a
00152  *     function with signature
00153  *     <tt>DOUBLE PRECISION[] x DOUBLE PRECISION[] -> DOUBLE PRECISION</tt>.
00154  *
00155  * @returns A composite value:
00156  *  - <tt>columns_id INTEGER</tt> - The 0-based index of the column of \f$ M \f$
00157  *     that is closest to \f$ x \f$. In case of ties, the first such index is
00158  *     returned. That is, \c columns_id is the minimum element in the set
00159  *     \f$ \arg\min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$.
00160  *  - <tt>distance DOUBLE PRECISION</tt> - The minimum distance between any
00161  *     column of \f$ M \f$ and \f$ x \f$. That is,
00162  *     \f$ \min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$.
00163  */
00164 CREATE FUNCTION MADLIB_SCHEMA.closest_column(
00165     M DOUBLE PRECISION[][],
00166     x DOUBLE PRECISION[],
00167     dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */
00168 ) RETURNS MADLIB_SCHEMA.closest_column_result
00169 IMMUTABLE
00170 STRICT
00171 LANGUAGE C
00172 AS 'MODULE_PATHNAME';
00173 
00174 CREATE FUNCTION MADLIB_SCHEMA.closest_column(
00175     M DOUBLE PRECISION[][],
00176     x DOUBLE PRECISION[]
00177 ) RETURNS MADLIB_SCHEMA.closest_column_result
00178 IMMUTABLE
00179 STRICT
00180 LANGUAGE sql
00181 AS $$
00182     SELECT MADLIB_SCHEMA.closest_column($1, $2,
00183         'MADLIB_SCHEMA.squared_dist_norm2')
00184 $$;
00185 
00186 /*
00187  * @brief closest_columns return type
00188  */
00189 CREATE TYPE MADLIB_SCHEMA.closest_columns_result AS (
00190     column_ids INTEGER[],
00191     distances DOUBLE PRECISION[]
00192 );
00193 
00194 /**
00195  * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the columns
00196  *     of \f$ M \f$ that are closest to \f$ \vec x \f$
00197  *
00198  * This function does essentially the same as \ref closest_column(), except that
00199  * it allows to specify the number of closest columns to return. The return
00200  * value is a composite value:
00201  *  - <tt>columns_ids INTEGER[]</tt> - The 0-based indices of the \c num columns
00202  *     of \f$ M \f$ that are closest to \f$ x \f$. In case of ties, the first
00203  *     such indices are returned.
00204  *  - <tt>distances DOUBLE PRECISION[]</tt> - The distances between the columns
00205  *     of \f$ M \f$ with indices in \c columns_ids and \f$ x \f$. That is,
00206  *     <tt>distances[i]</tt> contains
00207  *     \f$ \operatorname{dist}(\vec{m_j}, \vec x) \f$, where \f$ j = \f$
00208  *     <tt>columns_ids[i]</tt>.
00209  */
00210 CREATE FUNCTION MADLIB_SCHEMA.closest_columns(
00211     M DOUBLE PRECISION[][],
00212     x DOUBLE PRECISION[],
00213     num INTEGER,
00214     dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */
00215 ) RETURNS MADLIB_SCHEMA.closest_columns_result
00216 IMMUTABLE
00217 STRICT
00218 LANGUAGE C
00219 AS 'MODULE_PATHNAME';
00220 
00221 CREATE FUNCTION MADLIB_SCHEMA.closest_columns(
00222     M DOUBLE PRECISION[][],
00223     x DOUBLE PRECISION[],
00224     num INTEGER
00225 ) RETURNS MADLIB_SCHEMA.closest_columns_result
00226 IMMUTABLE
00227 STRICT
00228 LANGUAGE sql
00229 AS $$
00230     SELECT MADLIB_SCHEMA.closest_columns($1, $2, $3,
00231         'MADLIB_SCHEMA.squared_dist_norm2')
00232 $$;
00233 
00234 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_transition(
00235     state DOUBLE PRECISION[],
00236     x DOUBLE PRECISION[]
00237 ) RETURNS DOUBLE PRECISION[]
00238 LANGUAGE c
00239 IMMUTABLE
00240 CALLED ON NULL INPUT
00241 AS 'MODULE_PATHNAME';
00242 
00243 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_merge(
00244     state_left DOUBLE PRECISION[],
00245     state_right DOUBLE PRECISION[]
00246 ) RETURNS DOUBLE PRECISION[]
00247 LANGUAGE c
00248 IMMUTABLE
00249 STRICT
00250 AS 'MODULE_PATHNAME';
00251 
00252 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_final(
00253     state DOUBLE PRECISION[]
00254 ) RETURNS DOUBLE PRECISION[]
00255 LANGUAGE c
00256 IMMUTABLE
00257 STRICT
00258 AS 'MODULE_PATHNAME';
00259 
00260 /**
00261  * @brief Compute the average of vectors
00262  *
00263  * Given vectors \f$ x_1, \dots, x_n \f$, compute the average
00264  * \f$ \frac 1n \sum_{i=1}^n x_i \f$.
00265  *
00266  * @param x Point \f$ x_i \f$
00267  * @returns Average \f$ \frac 1n \sum_{i=1}^n x_i \f$
00268  */
00269 CREATE AGGREGATE MADLIB_SCHEMA.avg(
00270     /*+ x */ DOUBLE PRECISION[]
00271 ) (
00272     STYPE=DOUBLE PRECISION[],
00273     SFUNC=MADLIB_SCHEMA.avg_vector_transition,
00274     m4_ifdef(`__GREENPLUM__', `PREFUNC=MADLIB_SCHEMA.avg_vector_merge,')
00275     FINALFUNC=MADLIB_SCHEMA.avg_vector_final,
00276     INITCOND='{0,0,0}'
00277 );
00278 
00279 CREATE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_transition(
00280     state DOUBLE PRECISION[],
00281     x DOUBLE PRECISION[]
00282 ) RETURNS DOUBLE PRECISION[]
00283 LANGUAGE c
00284 IMMUTABLE
00285 CALLED ON NULL INPUT
00286 AS 'MODULE_PATHNAME';
00287 
00288 CREATE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_final(
00289     state DOUBLE PRECISION[]
00290 ) RETURNS DOUBLE PRECISION[]
00291 LANGUAGE c
00292 IMMUTABLE
00293 STRICT
00294 AS 'MODULE_PATHNAME';
00295 
00296 /**
00297  * @brief Compute the normalized average of vectors
00298  *
00299  * Given vectors \f$ x_1, \dots, x_n \f$, define
00300  * \f$ \widetilde{x} := \frac 1n \sum_{i=1}^n \frac{x_i}{\| x_i \|} \f$, and
00301  * compute the normalized average
00302  * \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$.
00303  *
00304  * @param x Point \f$ x_i \f$
00305  * @returns Normalized average \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$
00306  */
00307 CREATE AGGREGATE MADLIB_SCHEMA.normalized_avg(
00308     /*+ x */ DOUBLE PRECISION[]
00309 ) (
00310     STYPE=DOUBLE PRECISION[],
00311     SFUNC=MADLIB_SCHEMA.normalized_avg_vector_transition,
00312     m4_ifdef(`__GREENPLUM__', `PREFUNC=MADLIB_SCHEMA.avg_vector_merge,')
00313     FINALFUNC=MADLIB_SCHEMA.normalized_avg_vector_final,
00314     INITCOND='{0,0,0}'
00315 );
00316 
00317 CREATE FUNCTION MADLIB_SCHEMA.matrix_agg_transition(
00318     state DOUBLE PRECISION[],
00319     x DOUBLE PRECISION[]
00320 ) RETURNS DOUBLE PRECISION[]
00321 LANGUAGE c
00322 IMMUTABLE
00323 STRICT
00324 AS 'MODULE_PATHNAME';
00325 
00326 CREATE FUNCTION MADLIB_SCHEMA.matrix_agg_final(
00327     state DOUBLE PRECISION[]
00328 ) RETURNS DOUBLE PRECISION[]
00329 LANGUAGE c
00330 IMMUTABLE
00331 STRICT
00332 AS 'MODULE_PATHNAME';
00333 
00334 /**
00335  * @brief Combine vectors to a matrix
00336  *
00337  * Given vectors \f$ \vec x_1, \dots, \vec x_n \in \mathbb R^m \f$,
00338  * return matrix \f$ ( \vec x_1 \dots \vec x_n ) \in \mathbb R^{m \times n}\f$.
00339  *
00340  * @param x Vector \f$ x_i \f$
00341  * @returns Matrix with columns \f$ x_1, \dots, x_n \f$
00342  */
00343 CREATE
00344 m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED'))
00345 AGGREGATE MADLIB_SCHEMA.matrix_agg(
00346     /*+ x */ DOUBLE PRECISION[]
00347 ) (
00348     STYPE=DOUBLE PRECISION[],
00349     SFUNC=MADLIB_SCHEMA.matrix_agg_transition,
00350     FINALFUNC=MADLIB_SCHEMA.matrix_agg_final,
00351     INITCOND='{0,0,0}'
00352 );
00353 
00354 /**
00355  * @brief Return the column of a matrix
00356  *
00357  * @param matrix Two-dimensional matrix
00358  * @param col Column of the matrix to return (0-based index)
00359  */
00360 CREATE FUNCTION MADLIB_SCHEMA.matrix_column(
00361     matrix DOUBLE PRECISION[][],
00362     col INTEGER
00363 ) RETURNS DOUBLE PRECISION[]
00364 LANGUAGE c
00365 IMMUTABLE
00366 STRICT
00367 AS 'MODULE_PATHNAME';