User Documentation
 All Files Functions Groups
linalg.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file linalg.sql_in
4  *
5  * @brief SQL functions for linear algebra
6  *
7  * @sa For an overview of linear-algebra functions, see the module
8  * description \ref grp_linalg.
9  *
10  *//* ----------------------------------------------------------------------- */
11 
12 m4_include(`SQLCommon.m4')
13 
14 /**
15 @addtogroup grp_linalg
16 
17 \warning <em> This MADlib method is still in early stage development. There may be some
18 issues that will be addressed in a future version. Interface and implementation
19 is subject to change. </em>
20 
21 @about
22 
23 The linalg module consists of useful utility functions for basic linear
24 algebra operations. Several of these functions can be used
25 while implementing new algorithms.
26 
27 Refer to the file for documentation on each of the utlity functions.
28 
29 
30 Linear-algebra functions.
31 
32 @sa File linalg.sql_in documenting the SQL functions.
33 */
34 
35 /**
36  * @brief 1-norm of a vector
37  *
38  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
39  * @return \f$ \| x \|_1 = \sum_{i=1}^n |x_i| \f$
40  */
41 CREATE FUNCTION MADLIB_SCHEMA.norm1(
42  x DOUBLE PRECISION[]
43 ) RETURNS DOUBLE PRECISION
44 AS 'MODULE_PATHNAME'
45 LANGUAGE C
46 IMMUTABLE
47 STRICT;
48 
49 /**
50  * @brief 2-norm of a vector
51  *
52  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
53  * @return \f$ \| x \|_2 = \sqrt{\sum_{i=1}^n x_i^2} \f$
54  */
55 CREATE FUNCTION MADLIB_SCHEMA.norm2(
56  x DOUBLE PRECISION[]
57 ) RETURNS DOUBLE PRECISION
58 AS 'MODULE_PATHNAME'
59 LANGUAGE C
60 IMMUTABLE
61 STRICT;
62 
63 /**
64  * @brief 1-norm of the difference between two vectors
65  *
66  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
67  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
68  * @return \f$ \| x - y \|_1 = \sum_{i=1}^n |x_i - y_i| \f$
69  */
70 CREATE FUNCTION MADLIB_SCHEMA.dist_norm1(
71  x DOUBLE PRECISION[],
72  y DOUBLE PRECISION[]
73 ) RETURNS DOUBLE PRECISION
74 AS 'MODULE_PATHNAME'
75 LANGUAGE C
76 IMMUTABLE
77 STRICT;
78 
79 /**
80  * @brief 2-norm of the difference between two vectors
81  *
82  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
83  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
84  * @return \f$ \| x - y \|_2 = \sqrt{\sum_{i=1}^n (x_i - y_i)^2} \f$
85  */
86 CREATE FUNCTION MADLIB_SCHEMA.dist_norm2(
87  x DOUBLE PRECISION[],
88  y DOUBLE PRECISION[]
89 ) RETURNS DOUBLE PRECISION
90 AS 'MODULE_PATHNAME'
91 LANGUAGE C
92 IMMUTABLE
93 STRICT;
94 
95 /**
96  * @brief Squared 2-norm of the difference between two vectors
97  *
98  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
99  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
100  * @return \f$ \| x - y \|_2^2 = \sum_{i=1}^n (x_i - y_i)^2 \f$
101  */
102 CREATE FUNCTION MADLIB_SCHEMA.squared_dist_norm2(
103  x DOUBLE PRECISION[],
104  y DOUBLE PRECISION[]
105 ) RETURNS DOUBLE PRECISION
106 AS 'MODULE_PATHNAME'
107 LANGUAGE C
108 IMMUTABLE
109 STRICT;
110 
111 /**
112  * @brief Angle between two vectors
113  *
114  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
115  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
116  * @return \f$ \arccos\left(\frac{\langle \vec x, \vec y \rangle}
117  * {\| \vec x \| \cdot \| \vec y \|}\right) \f$
118  */
119 CREATE FUNCTION MADLIB_SCHEMA.dist_angle(
120  x DOUBLE PRECISION[],
121  y DOUBLE PRECISION[]
122 ) RETURNS DOUBLE PRECISION
123 AS 'MODULE_PATHNAME'
124 LANGUAGE C
125 IMMUTABLE
126 STRICT;
127 
128 /**
129  * @brief Tanimoto distance between two vectors
130  *
131  * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
132  * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
133  * @return \f$ 1 - \frac{\langle \vec x, \vec y \rangle}
134  * {\| \vec x \|^2 \cdot \| \vec y \|^2
135  * - \langle \vec x, \vec y \rangle} \f$
136  */
137 CREATE FUNCTION MADLIB_SCHEMA.dist_tanimoto(
138  x DOUBLE PRECISION[],
139  y DOUBLE PRECISION[]
140 ) RETURNS DOUBLE PRECISION
141 AS 'MODULE_PATHNAME'
142 LANGUAGE C
143 IMMUTABLE
144 STRICT;
145 
146 /*
147  * @brief closest_column return type
148  */
149 CREATE TYPE MADLIB_SCHEMA.closest_column_result AS (
150  column_id INTEGER,
151  distance DOUBLE PRECISION
152 );
153 
154 /**
155  * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the column
156  * of \f$ M \f$ that is closest to \f$ \vec x \f$
157  *
158  * @param M Matrix \f$ M = (\vec{m_0} \dots \vec{m_{l-1}})
159  * \in \mathbb{R}^{k \times l} \f$
160  * @param x Vector \f$ \vec x \in \mathbb R^k \f$
161  * @param dist The metric \f$ \operatorname{dist} \f$. This needs to be a
162  * function with signature
163  * <tt>DOUBLE PRECISION[] x DOUBLE PRECISION[] -> DOUBLE PRECISION</tt>.
164  *
165  * @returns A composite value:
166  * - <tt>columns_id INTEGER</tt> - The 0-based index of the column of \f$ M \f$
167  * that is closest to \f$ x \f$. In case of ties, the first such index is
168  * returned. That is, \c columns_id is the minimum element in the set
169  * \f$ \arg\min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$.
170  * - <tt>distance DOUBLE PRECISION</tt> - The minimum distance between any
171  * column of \f$ M \f$ and \f$ x \f$. That is,
172  * \f$ \min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$.
173  */
174 CREATE FUNCTION MADLIB_SCHEMA.closest_column(
175  M DOUBLE PRECISION[][],
176  x DOUBLE PRECISION[],
177  dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */
178 ) RETURNS MADLIB_SCHEMA.closest_column_result
179 IMMUTABLE
180 STRICT
181 LANGUAGE C
182 AS 'MODULE_PATHNAME';
183 
184 CREATE FUNCTION MADLIB_SCHEMA.closest_column(
185  M DOUBLE PRECISION[][],
186  x DOUBLE PRECISION[]
187 ) RETURNS MADLIB_SCHEMA.closest_column_result
188 IMMUTABLE
189 STRICT
190 LANGUAGE sql
191 AS $$
192  SELECT MADLIB_SCHEMA.closest_column($1, $2,
193  'MADLIB_SCHEMA.squared_dist_norm2')
194 $$;
195 
196 /*
197  * @brief closest_columns return type
198  */
199 CREATE TYPE MADLIB_SCHEMA.closest_columns_result AS (
200  column_ids INTEGER[],
201  distances DOUBLE PRECISION[]
202 );
203 
204 /**
205  * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the columns
206  * of \f$ M \f$ that are closest to \f$ \vec x \f$
207  *
208  * This function does essentially the same as \ref closest_column(), except that
209  * it allows to specify the number of closest columns to return. The return
210  * value is a composite value:
211  * - <tt>columns_ids INTEGER[]</tt> - The 0-based indices of the \c num columns
212  * of \f$ M \f$ that are closest to \f$ x \f$. In case of ties, the first
213  * such indices are returned.
214  * - <tt>distances DOUBLE PRECISION[]</tt> - The distances between the columns
215  * of \f$ M \f$ with indices in \c columns_ids and \f$ x \f$. That is,
216  * <tt>distances[i]</tt> contains
217  * \f$ \operatorname{dist}(\vec{m_j}, \vec x) \f$, where \f$ j = \f$
218  * <tt>columns_ids[i]</tt>.
219  */
220 CREATE FUNCTION MADLIB_SCHEMA.closest_columns(
221  M DOUBLE PRECISION[][],
222  x DOUBLE PRECISION[],
223  num INTEGER,
224  dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */
225 ) RETURNS MADLIB_SCHEMA.closest_columns_result
226 IMMUTABLE
227 STRICT
228 LANGUAGE C
229 AS 'MODULE_PATHNAME';
230 
231 CREATE FUNCTION MADLIB_SCHEMA.closest_columns(
232  M DOUBLE PRECISION[][],
233  x DOUBLE PRECISION[],
234  num INTEGER
235 ) RETURNS MADLIB_SCHEMA.closest_columns_result
236 IMMUTABLE
237 STRICT
238 LANGUAGE sql
239 AS $$
240  SELECT MADLIB_SCHEMA.closest_columns($1, $2, $3,
241  'MADLIB_SCHEMA.squared_dist_norm2')
242 $$;
243 
244 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_transition(
245  state DOUBLE PRECISION[],
246  x DOUBLE PRECISION[]
247 ) RETURNS DOUBLE PRECISION[]
248 LANGUAGE c
249 IMMUTABLE
250 CALLED ON NULL INPUT
251 AS 'MODULE_PATHNAME';
252 
253 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_merge(
254  state_left DOUBLE PRECISION[],
255  state_right DOUBLE PRECISION[]
256 ) RETURNS DOUBLE PRECISION[]
257 LANGUAGE c
258 IMMUTABLE
259 STRICT
260 AS 'MODULE_PATHNAME';
261 
262 CREATE FUNCTION MADLIB_SCHEMA.avg_vector_final(
263  state DOUBLE PRECISION[]
264 ) RETURNS DOUBLE PRECISION[]
265 LANGUAGE c
266 IMMUTABLE
267 STRICT
268 AS 'MODULE_PATHNAME';
269 
270 /**
271  * @brief Compute the average of vectors
272  *
273  * Given vectors \f$ x_1, \dots, x_n \f$, compute the average
274  * \f$ \frac 1n \sum_{i=1}^n x_i \f$.
275  *
276  * @param x Point \f$ x_i \f$
277  * @returns Average \f$ \frac 1n \sum_{i=1}^n x_i \f$
278  */
279 CREATE AGGREGATE MADLIB_SCHEMA.avg(
280  /*+ x */ DOUBLE PRECISION[]
281 ) (
282  STYPE=DOUBLE PRECISION[],
283  SFUNC=MADLIB_SCHEMA.avg_vector_transition,
284  m4_ifdef(`__GREENPLUM__', `PREFUNC=MADLIB_SCHEMA.avg_vector_merge,')
285  FINALFUNC=MADLIB_SCHEMA.avg_vector_final,
286  INITCOND='{0,0,0}'
287 );
288 
289 CREATE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_transition(
290  state DOUBLE PRECISION[],
291  x DOUBLE PRECISION[]
292 ) RETURNS DOUBLE PRECISION[]
293 LANGUAGE c
294 IMMUTABLE
295 CALLED ON NULL INPUT
296 AS 'MODULE_PATHNAME';
297 
298 CREATE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_final(
299  state DOUBLE PRECISION[]
300 ) RETURNS DOUBLE PRECISION[]
301 LANGUAGE c
302 IMMUTABLE
303 STRICT
304 AS 'MODULE_PATHNAME';
305 
306 /**
307  * @brief Compute the normalized average of vectors
308  *
309  * Given vectors \f$ x_1, \dots, x_n \f$, define
310  * \f$ \widetilde{x} := \frac 1n \sum_{i=1}^n \frac{x_i}{\| x_i \|} \f$, and
311  * compute the normalized average
312  * \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$.
313  *
314  * @param x Point \f$ x_i \f$
315  * @returns Normalized average \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$
316  */
317 CREATE AGGREGATE MADLIB_SCHEMA.normalized_avg(
318  /*+ x */ DOUBLE PRECISION[]
319 ) (
320  STYPE=DOUBLE PRECISION[],
321  SFUNC=MADLIB_SCHEMA.normalized_avg_vector_transition,
322  m4_ifdef(`__GREENPLUM__', `PREFUNC=MADLIB_SCHEMA.avg_vector_merge,')
323  FINALFUNC=MADLIB_SCHEMA.normalized_avg_vector_final,
324  INITCOND='{0,0,0}'
325 );
326 
327 CREATE FUNCTION MADLIB_SCHEMA.matrix_agg_transition(
328  state DOUBLE PRECISION[],
329  x DOUBLE PRECISION[]
330 ) RETURNS DOUBLE PRECISION[]
331 LANGUAGE c
332 IMMUTABLE
333 STRICT
334 AS 'MODULE_PATHNAME';
335 
336 CREATE FUNCTION MADLIB_SCHEMA.matrix_agg_final(
337  state DOUBLE PRECISION[]
338 ) RETURNS DOUBLE PRECISION[]
339 LANGUAGE c
340 IMMUTABLE
341 STRICT
342 AS 'MODULE_PATHNAME';
343 
344 /**
345  * @brief Combine vectors to a matrix
346  *
347  * Given vectors \f$ \vec x_1, \dots, \vec x_n \in \mathbb R^m \f$,
348  * return matrix \f$ ( \vec x_1 \dots \vec x_n ) \in \mathbb R^{m \times n}\f$.
349  *
350  * @param x Vector \f$ x_i \f$
351  * @returns Matrix with columns \f$ x_1, \dots, x_n \f$
352  */
353 CREATE
354 m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED'))
355 AGGREGATE MADLIB_SCHEMA.matrix_agg(
356  /*+ x */ DOUBLE PRECISION[]
357 ) (
358  STYPE=DOUBLE PRECISION[],
359  SFUNC=MADLIB_SCHEMA.matrix_agg_transition,
360  FINALFUNC=MADLIB_SCHEMA.matrix_agg_final,
361  INITCOND='{0,0,0}'
362 );
363 
364 /**
365  * @brief Return the column of a matrix
366  *
367  * @param matrix Two-dimensional matrix
368  * @param col Column of the matrix to return (0-based index)
369  */
370 CREATE FUNCTION MADLIB_SCHEMA.matrix_column(
371  matrix DOUBLE PRECISION[][],
372  col INTEGER
373 ) RETURNS DOUBLE PRECISION[]
374 LANGUAGE c
375 IMMUTABLE
376 STRICT
377 AS 'MODULE_PATHNAME';