MADlib
0.7 A newer version is available
User Documentation
|
00001 /* ----------------------------------------------------------------------- *//** 00002 * 00003 * @file linalg.sql_in 00004 * 00005 * @brief SQL functions for linear algebra 00006 * 00007 * @sa For an overview of linear-algebra functions, see the module 00008 * description \ref grp_linalg. 00009 * 00010 *//* ----------------------------------------------------------------------- */ 00011 00012 m4_include(`SQLCommon.m4') 00013 00014 /** 00015 @internal 00016 @addtogroup grp_linalg 00017 00018 @about 00019 00020 Linear-algebra functions. 00021 00022 @sa File linalg.sql_in documenting the SQL functions. 00023 */ 00024 00025 /** 00026 * @brief 1-norm of a vector 00027 * 00028 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00029 * @return \f$ \| x \|_1 = \sum_{i=1}^n |x_i| \f$ 00030 */ 00031 CREATE FUNCTION MADLIB_SCHEMA.norm1( 00032 x DOUBLE PRECISION[] 00033 ) RETURNS DOUBLE PRECISION 00034 AS 'MODULE_PATHNAME' 00035 LANGUAGE C 00036 IMMUTABLE 00037 STRICT; 00038 00039 /** 00040 * @brief 2-norm of a vector 00041 * 00042 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00043 * @return \f$ \| x \|_2 = \sqrt{\sum_{i=1}^n x_i^2} \f$ 00044 */ 00045 CREATE FUNCTION MADLIB_SCHEMA.norm2( 00046 x DOUBLE PRECISION[] 00047 ) RETURNS DOUBLE PRECISION 00048 AS 'MODULE_PATHNAME' 00049 LANGUAGE C 00050 IMMUTABLE 00051 STRICT; 00052 00053 /** 00054 * @brief 1-norm of the difference between two vectors 00055 * 00056 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00057 * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ 00058 * @return \f$ \| x - y \|_1 = \sum_{i=1}^n |x_i - y_i| \f$ 00059 */ 00060 CREATE FUNCTION MADLIB_SCHEMA.dist_norm1( 00061 x DOUBLE PRECISION[], 00062 y DOUBLE PRECISION[] 00063 ) RETURNS DOUBLE PRECISION 00064 AS 'MODULE_PATHNAME' 00065 LANGUAGE C 00066 IMMUTABLE 00067 STRICT; 00068 00069 /** 00070 * @brief 2-norm of the difference between two vectors 00071 * 00072 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00073 * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ 00074 * @return \f$ \| x - y \|_2 = \sqrt{\sum_{i=1}^n (x_i - y_i)^2} \f$ 00075 */ 00076 CREATE FUNCTION MADLIB_SCHEMA.dist_norm2( 00077 x DOUBLE PRECISION[], 00078 y DOUBLE PRECISION[] 00079 ) RETURNS DOUBLE PRECISION 00080 AS 'MODULE_PATHNAME' 00081 LANGUAGE C 00082 IMMUTABLE 00083 STRICT; 00084 00085 /** 00086 * @brief Squared 2-norm of the difference between two vectors 00087 * 00088 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00089 * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ 00090 * @return \f$ \| x - y \|_2^2 = \sum_{i=1}^n (x_i - y_i)^2 \f$ 00091 */ 00092 CREATE FUNCTION MADLIB_SCHEMA.squared_dist_norm2( 00093 x DOUBLE PRECISION[], 00094 y DOUBLE PRECISION[] 00095 ) RETURNS DOUBLE PRECISION 00096 AS 'MODULE_PATHNAME' 00097 LANGUAGE C 00098 IMMUTABLE 00099 STRICT; 00100 00101 /** 00102 * @brief Angle between two vectors 00103 * 00104 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00105 * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ 00106 * @return \f$ \arccos\left(\frac{\langle \vec x, \vec y \rangle} 00107 * {\| \vec x \| \cdot \| \vec y \|}\right) \f$ 00108 */ 00109 CREATE FUNCTION MADLIB_SCHEMA.dist_angle( 00110 x DOUBLE PRECISION[], 00111 y DOUBLE PRECISION[] 00112 ) RETURNS DOUBLE PRECISION 00113 AS 'MODULE_PATHNAME' 00114 LANGUAGE C 00115 IMMUTABLE 00116 STRICT; 00117 00118 /** 00119 * @brief Tanimoto distance between two vectors 00120 * 00121 * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ 00122 * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ 00123 * @return \f$ 1 - \frac{\langle \vec x, \vec y \rangle} 00124 * {\| \vec x \|^2 \cdot \| \vec y \|^2 00125 * - \langle \vec x, \vec y \rangle} \f$ 00126 */ 00127 CREATE FUNCTION MADLIB_SCHEMA.dist_tanimoto( 00128 x DOUBLE PRECISION[], 00129 y DOUBLE PRECISION[] 00130 ) RETURNS DOUBLE PRECISION 00131 AS 'MODULE_PATHNAME' 00132 LANGUAGE C 00133 IMMUTABLE 00134 STRICT; 00135 00136 /* 00137 * @brief closest_column return type 00138 */ 00139 CREATE TYPE MADLIB_SCHEMA.closest_column_result AS ( 00140 column_id INTEGER, 00141 distance DOUBLE PRECISION 00142 ); 00143 00144 /** 00145 * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the column 00146 * of \f$ M \f$ that is closest to \f$ \vec x \f$ 00147 * 00148 * @param M Matrix \f$ M = (\vec{m_0} \dots \vec{m_{l-1}}) 00149 * \in \mathbb{R}^{k \times l} \f$ 00150 * @param x Vector \f$ \vec x \in \mathbb R^k \f$ 00151 * @param dist The metric \f$ \operatorname{dist} \f$. This needs to be a 00152 * function with signature 00153 * <tt>DOUBLE PRECISION[] x DOUBLE PRECISION[] -> DOUBLE PRECISION</tt>. 00154 * 00155 * @returns A composite value: 00156 * - <tt>columns_id INTEGER</tt> - The 0-based index of the column of \f$ M \f$ 00157 * that is closest to \f$ x \f$. In case of ties, the first such index is 00158 * returned. That is, \c columns_id is the minimum element in the set 00159 * \f$ \arg\min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$. 00160 * - <tt>distance DOUBLE PRECISION</tt> - The minimum distance between any 00161 * column of \f$ M \f$ and \f$ x \f$. That is, 00162 * \f$ \min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$. 00163 */ 00164 CREATE FUNCTION MADLIB_SCHEMA.closest_column( 00165 M DOUBLE PRECISION[][], 00166 x DOUBLE PRECISION[], 00167 dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */ 00168 ) RETURNS MADLIB_SCHEMA.closest_column_result 00169 IMMUTABLE 00170 STRICT 00171 LANGUAGE C 00172 AS 'MODULE_PATHNAME'; 00173 00174 CREATE FUNCTION MADLIB_SCHEMA.closest_column( 00175 M DOUBLE PRECISION[][], 00176 x DOUBLE PRECISION[] 00177 ) RETURNS MADLIB_SCHEMA.closest_column_result 00178 IMMUTABLE 00179 STRICT 00180 LANGUAGE sql 00181 AS $$ 00182 SELECT MADLIB_SCHEMA.closest_column($1, $2, 00183 'MADLIB_SCHEMA.squared_dist_norm2') 00184 $$; 00185 00186 /* 00187 * @brief closest_columns return type 00188 */ 00189 CREATE TYPE MADLIB_SCHEMA.closest_columns_result AS ( 00190 column_ids INTEGER[], 00191 distances DOUBLE PRECISION[] 00192 ); 00193 00194 /** 00195 * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the columns 00196 * of \f$ M \f$ that are closest to \f$ \vec x \f$ 00197 * 00198 * This function does essentially the same as \ref closest_column(), except that 00199 * it allows to specify the number of closest columns to return. The return 00200 * value is a composite value: 00201 * - <tt>columns_ids INTEGER[]</tt> - The 0-based indices of the \c num columns 00202 * of \f$ M \f$ that are closest to \f$ x \f$. In case of ties, the first 00203 * such indices are returned. 00204 * - <tt>distances DOUBLE PRECISION[]</tt> - The distances between the columns 00205 * of \f$ M \f$ with indices in \c columns_ids and \f$ x \f$. That is, 00206 * <tt>distances[i]</tt> contains 00207 * \f$ \operatorname{dist}(\vec{m_j}, \vec x) \f$, where \f$ j = \f$ 00208 * <tt>columns_ids[i]</tt>. 00209 */ 00210 CREATE FUNCTION MADLIB_SCHEMA.closest_columns( 00211 M DOUBLE PRECISION[][], 00212 x DOUBLE PRECISION[], 00213 num INTEGER, 00214 dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */ 00215 ) RETURNS MADLIB_SCHEMA.closest_columns_result 00216 IMMUTABLE 00217 STRICT 00218 LANGUAGE C 00219 AS 'MODULE_PATHNAME'; 00220 00221 CREATE FUNCTION MADLIB_SCHEMA.closest_columns( 00222 M DOUBLE PRECISION[][], 00223 x DOUBLE PRECISION[], 00224 num INTEGER 00225 ) RETURNS MADLIB_SCHEMA.closest_columns_result 00226 IMMUTABLE 00227 STRICT 00228 LANGUAGE sql 00229 AS $$ 00230 SELECT MADLIB_SCHEMA.closest_columns($1, $2, $3, 00231 'MADLIB_SCHEMA.squared_dist_norm2') 00232 $$; 00233 00234 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_transition( 00235 state DOUBLE PRECISION[], 00236 x DOUBLE PRECISION[] 00237 ) RETURNS DOUBLE PRECISION[] 00238 LANGUAGE c 00239 IMMUTABLE 00240 CALLED ON NULL INPUT 00241 AS 'MODULE_PATHNAME'; 00242 00243 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_merge( 00244 state_left DOUBLE PRECISION[], 00245 state_right DOUBLE PRECISION[] 00246 ) RETURNS DOUBLE PRECISION[] 00247 LANGUAGE c 00248 IMMUTABLE 00249 STRICT 00250 AS 'MODULE_PATHNAME'; 00251 00252 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_final( 00253 state DOUBLE PRECISION[] 00254 ) RETURNS DOUBLE PRECISION[] 00255 LANGUAGE c 00256 IMMUTABLE 00257 STRICT 00258 AS 'MODULE_PATHNAME'; 00259 00260 /** 00261 * @brief Compute the average of vectors 00262 * 00263 * Given vectors \f$ x_1, \dots, x_n \f$, compute the average 00264 * \f$ \frac 1n \sum_{i=1}^n x_i \f$. 00265 * 00266 * @param x Point \f$ x_i \f$ 00267 * @returns Average \f$ \frac 1n \sum_{i=1}^n x_i \f$ 00268 */ 00269 CREATE AGGREGATE MADLIB_SCHEMA.avg( 00270 /*+ x */ DOUBLE PRECISION[] 00271 ) ( 00272 STYPE=DOUBLE PRECISION[], 00273 SFUNC=MADLIB_SCHEMA.avg_vector_transition, 00274 m4_ifdef(`__GREENPLUM__', `PREFUNC=MADLIB_SCHEMA.avg_vector_merge,') 00275 FINALFUNC=MADLIB_SCHEMA.avg_vector_final, 00276 INITCOND='{0,0,0}' 00277 ); 00278 00279 CREATE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_transition( 00280 state DOUBLE PRECISION[], 00281 x DOUBLE PRECISION[] 00282 ) RETURNS DOUBLE PRECISION[] 00283 LANGUAGE c 00284 IMMUTABLE 00285 CALLED ON NULL INPUT 00286 AS 'MODULE_PATHNAME'; 00287 00288 CREATE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_final( 00289 state DOUBLE PRECISION[] 00290 ) RETURNS DOUBLE PRECISION[] 00291 LANGUAGE c 00292 IMMUTABLE 00293 STRICT 00294 AS 'MODULE_PATHNAME'; 00295 00296 /** 00297 * @brief Compute the normalized average of vectors 00298 * 00299 * Given vectors \f$ x_1, \dots, x_n \f$, define 00300 * \f$ \widetilde{x} := \frac 1n \sum_{i=1}^n \frac{x_i}{\| x_i \|} \f$, and 00301 * compute the normalized average 00302 * \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$. 00303 * 00304 * @param x Point \f$ x_i \f$ 00305 * @returns Normalized average \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$ 00306 */ 00307 CREATE AGGREGATE MADLIB_SCHEMA.normalized_avg( 00308 /*+ x */ DOUBLE PRECISION[] 00309 ) ( 00310 STYPE=DOUBLE PRECISION[], 00311 SFUNC=MADLIB_SCHEMA.normalized_avg_vector_transition, 00312 m4_ifdef(`__GREENPLUM__', `PREFUNC=MADLIB_SCHEMA.avg_vector_merge,') 00313 FINALFUNC=MADLIB_SCHEMA.normalized_avg_vector_final, 00314 INITCOND='{0,0,0}' 00315 ); 00316 00317 CREATE FUNCTION MADLIB_SCHEMA.matrix_agg_transition( 00318 state DOUBLE PRECISION[], 00319 x DOUBLE PRECISION[] 00320 ) RETURNS DOUBLE PRECISION[] 00321 LANGUAGE c 00322 IMMUTABLE 00323 STRICT 00324 AS 'MODULE_PATHNAME'; 00325 00326 CREATE FUNCTION MADLIB_SCHEMA.matrix_agg_final( 00327 state DOUBLE PRECISION[] 00328 ) RETURNS DOUBLE PRECISION[] 00329 LANGUAGE c 00330 IMMUTABLE 00331 STRICT 00332 AS 'MODULE_PATHNAME'; 00333 00334 /** 00335 * @brief Combine vectors to a matrix 00336 * 00337 * Given vectors \f$ \vec x_1, \dots, \vec x_n \in \mathbb R^m \f$, 00338 * return matrix \f$ ( \vec x_1 \dots \vec x_n ) \in \mathbb R^{m \times n}\f$. 00339 * 00340 * @param x Vector \f$ x_i \f$ 00341 * @returns Matrix with columns \f$ x_1, \dots, x_n \f$ 00342 */ 00343 CREATE 00344 m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED')) 00345 AGGREGATE MADLIB_SCHEMA.matrix_agg( 00346 /*+ x */ DOUBLE PRECISION[] 00347 ) ( 00348 STYPE=DOUBLE PRECISION[], 00349 SFUNC=MADLIB_SCHEMA.matrix_agg_transition, 00350 FINALFUNC=MADLIB_SCHEMA.matrix_agg_final, 00351 INITCOND='{0,0,0}' 00352 ); 00353 00354 /** 00355 * @brief Return the column of a matrix 00356 * 00357 * @param matrix Two-dimensional matrix 00358 * @param col Column of the matrix to return (0-based index) 00359 */ 00360 CREATE FUNCTION MADLIB_SCHEMA.matrix_column( 00361 matrix DOUBLE PRECISION[][], 00362 col INTEGER 00363 ) RETURNS DOUBLE PRECISION[] 00364 LANGUAGE c 00365 IMMUTABLE 00366 STRICT 00367 AS 'MODULE_PATHNAME';