User Documentation
 All Files Functions Groups
svec.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file svec.sql_in
4  *
5  * @brief SQL type definitions and functions for sparse vector data type
6  * <tt>svec</tt>
7  *
8  * @sa For an introduction to the sparse vector implementation, see the module
9  * description \ref grp_svec.
10  *
11  *//* ----------------------------------------------------------------------- */
12 
13 m4_include(`SQLCommon.m4')
14 
15 /**
16 @addtogroup grp_svec
17 
18 @about
19 
20 This module implements a sparse vector data type named "svec", which
21 gives compressed storage of sparse vectors with many duplicate elements.
22 
23 When we use arrays of floating point numbers for various calculations,
24  we will sometimes have long runs of zeros (or some other default value).
25  This is common in applications like scientific computing,
26  retail optimization, and text processing. Each floating point number takes
27  8 bytes of storage in memory and/or disk, so saving those zeros is often
28  worthwhile. There are also many computations that can benefit from skipping
29  over the zeros.
30 
31  To focus the discussion, consider, for example, the following
32  array of doubles stored as a Postgres/GP "float8[]" data type:
33 
34 \code
35 '{0, 33,...40,000 zeros..., 12, 22 }'::float8[].
36 \endcode
37 
38  This array would occupy slightly more than 320KB of memory/disk, most of
39  it zeros. Even if we were to exploit the null bitmap and store the zeros
40  as nulls, we would still end up with a 5KB null bitmap, which is still
41  not nearly as memory efficient as we'd like. Also, as we perform various
42  operations on the array, we'll often be doing work on 40,000 fields that
43  would turn out not to be important.
44 
45  To solve the problems associated with the processing of sparse vectors
46  discussed above, we adopt a simple Run Length Encoding (RLE) scheme to
47  represent sparse vectors as pairs of count-value arrays. So, for example,
48  the array above would be represented as follows
49 
50 \code
51 '{1,1,40000,1,1}:{0,33,0,12,22}'::MADLIB_SCHEMA.svec,
52 \endcode
53 
54  which says there is 1 occurrence of 0, followed by 1 occurrence of 33,
55  followed by 40,000 occurrences of 0, etc. In contrast to the naive
56  representations, we only need 5 integers and 5 floating point numbers
57  to store the array. Further, it is easy to implement vector operations
58  that can take advantage of the RLE representation to make computations
59  faster. The module provides a library of such functions.
60 
61  The current version only supports sparse vectors of float8
62  values. Future versions will support other base types.
63 
64 @usage
65 
66  SVEC's can be constructed directly as follows:
67  <pre>
68  SELECT '{n1,n2,...,nk}:{v1,v2,...vk}'::MADLIB_SCHEMA.svec;
69  </pre>
70  WHERE <tt>n1,n2,...,nk</tt> specifies the counts for the values <tt>v1,v2,...,vk</tt>.
71 
72  Or, SVEC's can also be casted from a float array:
73  <pre>
74  SELECT ('{v1,v2,...vk}'::float[])::MADLIB_SCHEMA.svec;
75  </pre>
76 
77  Syntax reference can be found in svec.sql_in.
78 
79  Users need to add MADLIB_SCHEMA to their search_path to use the svec operators
80  defined in the module.
81 
82 @examp
83 
84  We can use operations with svec type like <, >, *, **, /, =, +, SUM, etc,
85  and they have meanings associated with typical vector operations. For
86  example, the plus (+) operator adds each of the terms of two vectors having
87  the same dimension together.
88 \code
89 sql> SELECT ('{0,1,5}'::float8[]::MADLIB_SCHEMA.svec + '{4,3,2}'::float8[]::MADLIB_SCHEMA.svec)::float8[];
90  float8
91 ---------
92  {4,4,7}
93 \endcode
94 
95  Without the casting into float8[] at the end, we get:
96 \code
97 sql> SELECT '{0,1,5}'::float8[]::MADLIB_SCHEMA.svec + '{4,3,2}'::float8[]::MADLIB_SCHEMA.svec;
98  ?column?
99 ----------
100 {2,1}:{4,7}
101 \endcode
102 
103  A dot product (%*%) between the two vectors will result in a scalar
104  result of type float8. The dot product should be (0*4 + 1*3 + 5*2) = 13,
105  like this:
106 \code
107 sql> SELECT '{0,1,5}'::float8[]::MADLIB_SCHEMA.svec %*% '{4,3,2}'::float8[]::MADLIB_SCHEMA.svec;
108  ?column?
109 ----------
110  13
111 \endcode
112 
113  Special vector aggregate functions are also available. SUM is self
114  explanatory. SVEC_COUNT_NONZERO evaluates the count of non-zero terms
115  in each column found in a set of n-dimensional svecs and returns an
116  svec with the counts. For instance, if we have the vectors {0,1,5},
117  {10,0,3},{0,0,3},{0,1,0}, then executing the SVEC_COUNT_NONZERO() aggregate
118  function would result in {1,2,3}:
119 
120 \code
121 sql> create table list (a MADLIB_SCHEMA.svec);
122 sql> insert into list values ('{0,1,5}'::float8[]), ('{10,0,3}'::float8[]), ('{0,0,3}'::float8[]),('{0,1,0}'::float8[]);
123 
124 sql> SELECT MADLIB_SCHEMA.svec_count_nonzero(a)::float8[] FROM list;
125 svec_count_nonzero
126 -----------------
127  {1,2,3}
128 \endcode
129 
130  We do not use null bitmaps in the svec data type. A null value in an svec
131  is represented explicitly as an NVP (No Value Present) value. For example,
132  we have:
133 \code
134 sql> SELECT '{1,2,3}:{4,null,5}'::MADLIB_SCHEMA.svec;
135  svec
136 -------------------
137  {1,2,3}:{4,NVP,5}
138 
139 sql> SELECT '{1,2,3}:{4,null,5}'::MADLIB_SCHEMA.svec + '{2,2,2}:{8,9,10}'::MADLIB_SCHEMA.svec;
140  ?column?
141  --------------------------
142  {1,2,1,2}:{12,NVP,14,15}
143 \endcode
144 
145  An element of an svec can be accessed using the svec_proj() function,
146  which takes an svec and the index of the element desired.
147 \code
148 sql> SELECT MADLIB_SCHEMA.svec_proj('{1,2,3}:{4,5,6}'::MADLIB_SCHEMA.svec, 1) + MADLIB_SCHEMA.svec_proj('{4,5,6}:{1,2,3}'::MADLIB_SCHEMA.svec, 15);
149  ?column?
150 ----------
151  7
152 \endcode
153 
154  A subvector of an svec can be accessed using the svec_subvec() function,
155  which takes an svec and the start and end index of the subvector desired.
156 \code
157 sql> SELECT MADLIB_SCHEMA.svec_subvec('{2,4,6}:{1,3,5}'::MADLIB_SCHEMA.svec, 2, 11);
158  svec_subvec
159 -----------------
160  {1,4,5}:{1,3,5}
161 \endcode
162 
163  The elements/subvector of an svec can be changed using the function
164  svec_change(). It takes three arguments: an m-dimensional svec sv1, a
165  start index j, and an n-dimensional svec sv2 such that j + n - 1 <= m,
166  and returns an svec like sv1 but with the subvector sv1[j:j+n-1]
167  replaced by sv2. An example follows:
168 \code
169 sql> SELECT MADLIB_SCHEMA.svec_change('{1,2,3}:{4,5,6}'::MADLIB_SCHEMA.svec,3,'{2}:{3}'::MADLIB_SCHEMA.svec);
170  svec_change
171 ---------------------
172  {1,1,2,2}:{4,5,3,6}
173 \endcode
174 
175  There are also higher-order functions for processing svecs. For example,
176  the following is the corresponding function for lapply() in R.
177 \code
178 sql> SELECT MADLIB_SCHEMA.svec_lapply('sqrt', '{1,2,3}:{4,5,6}'::MADLIB_SCHEMA.svec);
179  svec_lapply
180 -----------------------------------------------
181  {1,2,3}:{2,2.23606797749979,2.44948974278318}
182 \endcode
183 
184  The full list of functions available for operating on svecs are available
185  in svec.sql.
186 
187 <b> A More Extensive Example</b>
188 
189  For a text classification example, let's assume we have a dictionary
190  composed of words in a sorted text array:
191 \code
192 sql> create table features (a text[]);
193 sql> insert into features values
194  ('{am,before,being,bothered,corpus,document,i,in,is,me,
195  never,now,one,really,second,the,third,this,until}');
196 \endcode
197  We have a set of documents, each represented as an array of words:
198 \code
199 sql> create table documents(a int,b text[]);
200 sql> insert into documents values
201  (1,'{this,is,one,document,in,the,corpus}'),
202  (2,'{i,am,the,second,document,in,the,corpus}'),
203  (3,'{being,third,never,really,bothered,me,until,now}'),
204  (4,'{the,document,before,me,is,the,third,document}');
205 \endcode
206 
207  Now we have a dictionary and some documents, we would like to do some
208  document categorization using vector arithmetic on word counts and
209  proportions of dictionary words in each document.
210 
211  To start this process, we'll need to find the dictionary words in each
212  document. We'll prepare what is called a Sparse Feature Vector or SFV
213  for each document. An SFV is a vector of dimension N, where N is the
214  number of dictionary words, and in each cell of an SFV is a count of
215  each dictionary word in the document.
216 
217  Inside the sparse vector library, we have a function that will create
218  an SFV from a document, so we can just do this:
219 \code
220 sql> SELECT MADLIB_SCHEMA.svec_sfv((SELECT a FROM features LIMIT 1),b)::float8[]
221  FROM documents;
222 
223  svec_sfv
224 -----------------------------------------
225  {0,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0}
226  {0,0,1,1,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1}
227  {1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,2,0,0,0}
228  {0,1,0,0,0,2,0,0,1,1,0,0,0,0,0,2,1,0,0}
229 \endcode
230  Note that the output of MADLIB_SCHEMA.svec_sfv() is an svec for each
231  document containing the count of each of the dictionary words in the
232  ordinal positions of the dictionary. This can more easily be understood
233  by lining up the feature vector and text like this:
234 \code
235 sql> SELECT MADLIB_SCHEMA.svec_sfv((SELECT a FROM features LIMIT 1),b)::float8[]
236  , b
237  FROM documents;
238 
239  svec_sfv | b
240 -----------------------------------------+--------------------------------------------------
241  {1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,2,0,0,0} | {i,am,the,second,document,in,the,corpus}
242  {0,1,0,0,0,2,0,0,1,1,0,0,0,0,0,2,1,0,0} | {the,document,before,me,is,the,third,document}
243  {0,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0} | {this,is,one,document,in,the,corpus}
244  {0,0,1,1,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1} | {being,third,never,really,bothered,me,until,now}
245 
246 sql> SELECT * FROM features;
247  a
248 --------------------------------------------------------------------------------------------------------
249 {am,before,being,bothered,corpus,document,i,in,is,me,never,now,one,really,second,the,third,this,until}
250 \endcode
251 
252  Now when we look at the document "i am the second document in the corpus",
253  its SFV is {1,3*0,1,1,1,1,6*0,1,2}. The word "am" is the first ordinate in
254  the dictionary and there is 1 instance of it in the SFV. The word "before"
255  has no instances in the document, so its value is "0" and so on.
256 
257  The function MADLIB_SCHEMA.svec_sfv() can process large
258  numbers of documents into their SFVs in parallel at high speed.
259 
260  The rest of the categorization process is all vector math. The actual
261  count is hardly ever used. Instead, it's turned into a weight. The most
262  common weight is called tf/idf for Term Frequency / Inverse Document
263  Frequency. The calculation for a given term in a given document is
264 \code
265 {#Times in document} * log {#Documents / #Documents the term appears in}.
266 \endcode
267  For instance, the term "document" in document A would have weight
268  1 * log (4/3). In document D, it would have weight 2 * log (4/3).
269  Terms that appear in every document would have tf/idf weight 0, since
270  log (4/4) = log(1) = 0. (Our example has no term like that.) That
271  usually sends a lot of values to 0.
272 
273  For this part of the processing, we'll need to have a sparse vector of
274  the dictionary dimension (19) with the values
275 \code
276 log(#documents/#Documents each term appears in).
277 \endcode
278  There will be one such vector for the whole list of documents (aka the
279  "corpus"). The #documents is just a count of all of the documents, in
280  this case 4, but there is one divisor for each dictionary word and its
281  value is the count of all the times that word appears in the document.
282  This single vector for the whole corpus can then be scalar product
283  multiplied by each document SFV to produce the Term Frequency/Inverse
284  Document Frequency weights.
285 
286  This can be done as follows:
287 \code
288 sql> create table corpus as
289  (SELECT a, MADLIB_SCHEMA.svec_sfv((SELECT a FROM features LIMIT 1),b) sfv
290  FROM documents);
291 sql> create table weights as
292  (SELECT a docnum, MADLIB_SCHEMA.svec_mult(sfv, logidf) tf_idf
293  FROM (SELECT MADLIB_SCHEMA.svec_log(MADLIB_SCHEMA.svec_div(count(sfv)::MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec_count_nonzero(sfv))) logidf
294  FROM corpus) foo, corpus ORDER BYdocnum);
295 sql> SELECT * FROM weights;
296 
297 docnum | tf_idf
298 -------+----------------------------------------------------------------------
299  1 | {4,1,1,1,2,3,1,2,1,1,1,1}:{0,0.69,0.28,0,0.69,0,1.38,0,0.28,0,1.38,0}
300  2 | {1,3,1,1,1,1,6,1,1,3}:{1.38,0,0.69,0.28,1.38,0.69,0,1.38,0.57,0}
301  3 | {2,2,5,1,2,1,1,2,1,1,1}:{0,1.38,0,0.69,1.38,0,1.38,0,0.69,0,1.38}
302  4 | {1,1,3,1,2,2,5,1,1,2}:{0,1.38,0,0.57,0,0.69,0,0.57,0.69,0}
303 \endcode
304 
305  We can now get the "angular distance" between one document and the rest
306  of the documents using the ACOS of the dot product of the document vectors:
307  The following calculates the angular distance between the first document
308  and each of the other documents:
309 \code
310 sql> SELECT docnum,
311  180. * ( ACOS( MADLIB_SCHEMA.svec_dmin( 1., MADLIB_SCHEMA.svec_dot(tf_idf, testdoc)
312  / (MADLIB_SCHEMA.svec_l2norm(tf_idf)*MADLIB_SCHEMA.svec_l2norm(testdoc))))/3.141592654) angular_distance
313  FROM weights,(SELECT tf_idf testdoc FROM weights WHERE docnum = 1 LIMIT 1) foo
314  ORDER BY 1;
315 
316 docnum | angular_distance
317 --------+------------------
318  1 | 0
319  2 | 78.8235846096986
320  3 | 89.9999999882484
321  4 | 80.0232034288617
322 \endcode
323  We can see that the angular distance between document 1 and itself
324  is 0 degrees and between document 1 and 3 is 90 degrees because they
325  share no features at all. The angular distance can now be plugged into
326  machine learning algorithms that rely on a distance measure between
327  data points.
328 
329  SVEC also provides functionality for declaring array given and array of positions and array of values, intermediate values betweens those
330  are declared to be base value that user provides in the same function call. In the example below the fist array of integers represents the
331  positions for the array two (array of floats). Positions do not need to come in the sorted order.
332  Third value represents desired maximum size of the array. This assures that array is of that size
333  even if last position is not. If max size < 1 that value is ignored and array will end at the last position in the position vector. Final value is a float representing the base value to be used between the declared ones (0 would be a common candidate):
334 \code
335 sql> SELECT MADLIB_SCHEMA.svec_cast_positions_float8arr(ARRAY[1,2,7,5,87],ARRAY[.1,.2,.7,.5,.87],90,0.0);
336 
337  svec_cast_positions_float8arr
338 -----------------------------------------------------
339 {1,1,2,1,1,1,79,1,3}:{0.1,0.2,0,0.5,0,0.7,0,0.87,0}
340 (1 row)
341 \endcode
342 
343  Other examples of svecs usage can be found in the k-means module.
344 
345 @sa File svec.sql_in documenting the SQL functions.
346 
347 @internal
348 @sa File sparse_vector.c documenting the implementation in C.
349 @endinternal
350 */
351 
352 
353 --! @file svec.sql_in
354 --!
355 
356 -- DROP SCHEMA MADLIB_SCHEMA CASCADE;
357 -- CREATE SCHEMA MADLIB_SCHEMA;
358 
359 -- DROP TYPE IF EXISTS MADLIB_SCHEMA.svec CASCADE;
360 CREATE TYPE MADLIB_SCHEMA.svec;
361 
362 --! SVEC constructor from CSTRING.
363 --!
364 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_in(cstring)
365  RETURNS MADLIB_SCHEMA.svec
366  AS 'MODULE_PATHNAME'
367  LANGUAGE C IMMUTABLE STRICT;
368 
369 --! Converts SVEC to CSTRING.
370 --!
371 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_out(MADLIB_SCHEMA.svec)
372  RETURNS cstring
373  AS 'MODULE_PATHNAME'
374  LANGUAGE C IMMUTABLE STRICT;
375 
376 --! Converts SVEC internal representation to SVEC.
377 --!
378 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_recv(internal)
379  RETURNS MADLIB_SCHEMA.svec
380  AS 'MODULE_PATHNAME'
381  LANGUAGE C IMMUTABLE STRICT;
382 
383 --! Converts SVEC to BYTEA.
384 --!
385 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_send(MADLIB_SCHEMA.svec)
386  RETURNS bytea
387  AS 'MODULE_PATHNAME'
388  LANGUAGE C IMMUTABLE STRICT;
389 
390 CREATE TYPE MADLIB_SCHEMA.svec (
391  internallength = VARIABLE,
392  input = MADLIB_SCHEMA.svec_in,
393  output = MADLIB_SCHEMA.svec_out,
394  send = MADLIB_SCHEMA.svec_send,
395  receive = MADLIB_SCHEMA.svec_recv,
396  storage=EXTENDED,
397  alignment = double
398 );
399 
400 --! Basic floating point scalar operator: MIN.
401 --!
402 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmin(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_min' LANGUAGE C IMMUTABLE;
403 
404 --! Basic floating point scalar operator: MAX.
405 --!
406 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmax(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_max' LANGUAGE C IMMUTABLE;
408 --! Counts the number of non-zero entries in the input vector; the second argument is capped at 1, then added to the first; used as the sfunc in the svec_count_nonzero() aggregate below.
409 --!
410 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_count(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec
411 AS 'MODULE_PATHNAME', 'svec_count' STRICT LANGUAGE C IMMUTABLE;
412 
413 --! Adds two SVECs together, element by element.
414 --!
415 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_plus(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_plus' STRICT LANGUAGE C IMMUTABLE;
416 
417 --! Minus second SVEC from the first, element by element.
418 --!
419 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_minus(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_minus' STRICT LANGUAGE C IMMUTABLE;
420 
421 --! Computes the logarithm of each element of the input SVEC.
422 --!
423 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_log(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_log' STRICT LANGUAGE C IMMUTABLE;
425 --! Divides the first SVEC by the second, element by element.
426 --!
427 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_div(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_div' STRICT LANGUAGE C IMMUTABLE;
429 --! Multiplies two SVEVs together, element by element.
430 --!
431 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mult(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_mult' STRICT LANGUAGE C IMMUTABLE;
433 --! Raises each element of the first SVEC to the power given by second SVEC, which must have dimension 1 (a scalar).
434 --!
435 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pow(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pow' STRICT LANGUAGE C IMMUTABLE;
436 
437 --! Returns true if two SVECs are equal. If the two SVEC's are of different size, then will return false.
438 --!
439 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq' STRICT LANGUAGE C IMMUTABLE;
440 
441 --! Returns true if two SVECs are equal, not counting zeros (zero equals anything). If the two SVEC's are of different size, then the function essentially zero-pads the shorter one and performs the comparison.
442 --!
443 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq_non_zero(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq_non_zero' STRICT LANGUAGE C IMMUTABLE;
444 
445 --! Returns true if left svec contains right one, meaning that every non-zero value in the right svec equals left one
446 --!
447 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_contains(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_contains' STRICT LANGUAGE C IMMUTABLE;
448 
449 --! Returns true if two float8 arrays are equal
450 --!
451 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_eq(float8[],float8[]) RETURNS boolean AS 'MODULE_PATHNAME', 'float8arr_equals' LANGUAGE C IMMUTABLE;
452 
453 --! Minus second array from the first array, element by element.
454 --!
455 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_minus_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_minus_float8arr' LANGUAGE C IMMUTABLE;
456 
457 --! Minus second SVEC from the first array, element by element.
458 --!
459 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_minus_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_minus_svec' LANGUAGE C IMMUTABLE;
460 
461 --! Minus second array from the first SVEC, element by element.
462 --!
463 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_minus_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_minus_float8arr' LANGUAGE C IMMUTABLE;
464 
465 --! Adds two arrays together, element by element.
466 --!
467 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_plus_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_plus_float8arr' LANGUAGE C IMMUTABLE;
468 
469 --! Adds an array and an SVEC, element by element.
470 --!
471 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_plus_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_plus_svec' LANGUAGE C IMMUTABLE;
472 
473 --! Adds an SVEC and an array, element by element.
474 --!
475 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_plus_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_plus_float8arr' LANGUAGE C IMMUTABLE;
476 
477 --! Multiplies two float8 arrays, element by element.
478 --!
479 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_mult_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_mult_float8arr' LANGUAGE C IMMUTABLE;
480 
481 --! Multiplies an array and an SVEC, element by element.
482 --!
483 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_mult_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_mult_svec' LANGUAGE C IMMUTABLE;
484 
485 --! Multiplies an SVEC and an array, element by element.
486 --!
487 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mult_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_mult_float8arr' LANGUAGE C IMMUTABLE;
488 
489 --! Divides a float8 array by another, element by element.
490 --!
491 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_div_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_div_float8arr' LANGUAGE C IMMUTABLE;
492 
493 --! Divides a float8 array by an SVEC, element by element.
494 --!
495 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_div_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_div_svec' LANGUAGE C IMMUTABLE;
496 
497 --! Divides an SVEC by a float8 array, element by element.
498 --!
499 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_div_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_div_float8arr' LANGUAGE C IMMUTABLE;
500 
501 --! Computes the dot product of two SVECs.
502 --!
503 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_dot' STRICT LANGUAGE C IMMUTABLE;
504 
505 --! Computes the dot product of two float8 arrays.
506 --!
507 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(float8[],float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_dot' STRICT LANGUAGE C IMMUTABLE;
508 
509 --! Computes the dot product of an SVEC and a float8 array.
510 --!
511 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(MADLIB_SCHEMA.svec,float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_dot_float8arr' STRICT LANGUAGE C IMMUTABLE;
512 
513 --! Computes the dot product of a float8 array and an SVEC.
514 --!
515 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(float8[],MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_dot_svec' STRICT LANGUAGE C IMMUTABLE;
516 
517 --! Computes the l2norm of an SVEC.
518 --!
519 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l2norm' STRICT LANGUAGE C IMMUTABLE;
520 
521 --! Computes the l2norm of a float8 array.
522 --!
523 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l2norm' LANGUAGE C IMMUTABLE;
524 
525 --! Computes the l2norm distance between two SVECs.
526 --!
527 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l2norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
528 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l2norm' LANGUAGE C STRICT IMMUTABLE;
530 --! Computes the l1norm distance between two SVECs.
531 --!
532 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
533 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l1norm' LANGUAGE C STRICT IMMUTABLE;
534 
535 --! Computes the l1norm of an SVEC.
536 --!
537 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l1norm' STRICT LANGUAGE C IMMUTABLE;
538 
539 --! Computes the l1norm of a float8 array.
540 --!
541 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l1norm' STRICT LANGUAGE C IMMUTABLE;
542 
543 --! Computes the angle between two SVECs in radians.
544 --!
545 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.angle(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
546 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_angle' LANGUAGE C STRICT IMMUTABLE;
547 
548 --! Computes the Tanimoto distance between two SVECs.
549 --!
550 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.tanimoto_distance(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
551 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_tanimoto_distance' LANGUAGE C STRICT IMMUTABLE;
552 
553 --! Unnests an SVEC into a table of uncompressed values
554 --!
555 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_unnest(MADLIB_SCHEMA.svec) RETURNS setof float8 AS 'MODULE_PATHNAME', 'svec_unnest' LANGUAGE C IMMUTABLE;
556 
557 --! Appends an element to the back of an SVEC.
558 --!
559 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pivot(MADLIB_SCHEMA.svec,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pivot' LANGUAGE C IMMUTABLE;
560 
561 --! Sums the elements of an SVEC.
562 --!
563 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_summate' STRICT LANGUAGE C IMMUTABLE;
564 
565 --! Sums the elements of a float8 array.
566 --!
567 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_summate' STRICT LANGUAGE C IMMUTABLE;
568 
569 --! Computes the median element of a float8 array.
570 --!
571 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_median' STRICT LANGUAGE C IMMUTABLE;
573 --! Computes the median element of an SVEC.
574 --!
575 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_median' STRICT LANGUAGE C IMMUTABLE;
576 
577 --! Compares an SVEC to a float8, and returns positions of all elements not equal to the float as an array. Element index here starts at 0.
578 --!
579 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_positions(MADLIB_SCHEMA.svec, FLOAT8) RETURNS INT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_positions' STRICT LANGUAGE C IMMUTABLE;
580 
581 --! Compares an SVEC to a float8, and returns values of all elements not equal to the float as an array.
582 --!
583 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_values(MADLIB_SCHEMA.svec, FLOAT8) RETURNS FLOAT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_values' STRICT LANGUAGE C IMMUTABLE;
584 
586 --! Casts an int2 into an SVEC.
587 --!
588 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int2(int2) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int2' STRICT LANGUAGE C IMMUTABLE;
590 --! Casts an int4 into an SVEC.
591 --!
592 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int4(int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int4' STRICT LANGUAGE C IMMUTABLE;
594 --! Casts an int8 into an SVEC.
595 --!
596 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int8(bigint) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int8' STRICT LANGUAGE C IMMUTABLE;
598 --! Casts a float4 into an SVEC.
599 --!
600 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float4(float4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float4' STRICT LANGUAGE C IMMUTABLE;
602 --! Casts a float8 into an SVEC.
603 --!
604 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float8(float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float8' STRICT LANGUAGE C IMMUTABLE;
606 --! Casts a numeric into an SVEC.
607 --!
608 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_numeric(numeric) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_numeric' STRICT LANGUAGE C IMMUTABLE;
609 
610 --! Casts an int2 into a float8 array.
611 --!
612 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int2(int2) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int2' STRICT LANGUAGE C IMMUTABLE;
613 
614 --! Casts an int4 into a float8 array.
615 --!
616 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int4(int4) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int4' STRICT LANGUAGE C IMMUTABLE;
617 
618 --! Casts an int8 into a float8 array.
619 --!
620 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int8(bigint) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int8' STRICT LANGUAGE C IMMUTABLE;
621 
622 --! Casts a float4 into a float8 array.
623 --!
624 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_float4(float4) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_float4' STRICT LANGUAGE C IMMUTABLE;
625 
626 --! Casts a float8 into a float8 array.
627 --!
628 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_float8(float8) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_float8' STRICT LANGUAGE C IMMUTABLE;
629 
630 --! Casts a numeric into a float8 array.
631 --!
632 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_numeric(numeric) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_numeric' STRICT LANGUAGE C IMMUTABLE;
633 
634 --! Casts a float8 into an SVEC.
635 --!
636 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float8arr(float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float8arr' STRICT LANGUAGE C IMMUTABLE;
637 
638 --! Casts an array of int8 positions, float8 values into an SVEC.
639 --!
640 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_positions_float8arr(int8[],float8[],int8,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_positions_float8arr' STRICT LANGUAGE C IMMUTABLE;
641 
642 --! Casts an SVEC into a float8 array.
643 --!
644 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_return_array(MADLIB_SCHEMA.svec) RETURNS float8[] AS 'MODULE_PATHNAME', 'svec_return_array' LANGUAGE C IMMUTABLE;
645 
646 --! Concatenates two SVECs.
647 --!
648 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_concat(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_concat' LANGUAGE C IMMUTABLE;
649 
650 --! Replicates n copies of an SVEC and concatenates them together.
651 --!
652 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_concat_replicate(int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_concat_replicate' LANGUAGE C IMMUTABLE;
653 
654 --! Returns the dimension of an SVEC.
655 --!
656 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dimension(MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_dimension' LANGUAGE C IMMUTABLE;
657 
658 --! Applies a given function to each element of an SVEC.
659 --!
660 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_lapply(text,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_lapply' LANGUAGE C IMMUTABLE;
661 
662 --! Appends a run-length block to the back of an SVEC.
663 --!
664 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_append(MADLIB_SCHEMA.svec,float8,int8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_append' LANGUAGE C IMMUTABLE;
665 
666 --! Projects onto an element of an SVEC.
667 --!
668 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_proj(MADLIB_SCHEMA.svec,int4) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_proj' LANGUAGE C IMMUTABLE;
669 
670 --! Extracts a subvector of an SVEC given the subvector's start and end indices.
671 --!
672 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_subvec(MADLIB_SCHEMA.svec,int4,int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_subvec' LANGUAGE C IMMUTABLE;
673 
674 --! Reverses the elements of an SVEC.
675 --!
676 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_reverse' LANGUAGE C IMMUTABLE;
677 
678 --! Replaces the subvector of a given SVEC at a given start index with another SVEC. Note that element index should start at 1.
679 --!
680 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_change(MADLIB_SCHEMA.svec,int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_change' LANGUAGE C IMMUTABLE;
681 
682 --! Computes the hash of an SVEC.
683 --!
684 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_hash(MADLIB_SCHEMA.svec) RETURNS int4 AS 'MODULE_PATHNAME', 'svec_hash' STRICT LANGUAGE C IMMUTABLE;
685 
686 --! Computes the word-occurence vector of a document
687 --!
688 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sfv(text[], text[]) RETURNS MADLIB_SCHEMA.svec AS
689 'MODULE_PATHNAME', 'gp_extract_feature_histogram' LANGUAGE C IMMUTABLE;
691 --! Sorts an array of texts. This function should be in MADlib common.
692 --!
693 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sort(text[]) RETURNS text[] AS $$
694  SELECT array(SELECT unnest($1::text[]) ORDER BY 1);
695 $$ LANGUAGE SQL;
696 
697 --! Converts an svec to a text string
698 --!
699 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_to_string(MADLIB_SCHEMA.svec)
700 RETURNS text AS 'MODULE_PATHNAME', 'svec_to_string' STRICT LANGUAGE C IMMUTABLE;
701 
702 --! Converts a text string to an svec
703 --!
704 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_from_string(text)
705 RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_from_string' STRICT LANGUAGE C IMMUTABLE;
707 
708 /*
709 DROP OPERATOR IF EXISTS || ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
710 DROP OPERATOR IF EXISTS - ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
711 DROP OPERATOR IF EXISTS + ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
712 DROP OPERATOR IF EXISTS / ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
713 DROP OPERATOR IF EXISTS %*% ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
714 DROP OPERATOR IF EXISTS * ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
715 DROP OPERATOR IF EXISTS ^ ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
716 */
717 
718 CREATE OPERATOR MADLIB_SCHEMA.|| (
719  LEFTARG = MADLIB_SCHEMA.svec,
720  RIGHTARG = MADLIB_SCHEMA.svec,
721  PROCEDURE = MADLIB_SCHEMA.svec_concat
722 );
723 
724 CREATE OPERATOR MADLIB_SCHEMA.- (
725  LEFTARG = MADLIB_SCHEMA.svec,
726  RIGHTARG = MADLIB_SCHEMA.svec,
727  PROCEDURE = MADLIB_SCHEMA.svec_minus
728 );
729 CREATE OPERATOR MADLIB_SCHEMA.+ (
730  LEFTARG = MADLIB_SCHEMA.svec,
731  RIGHTARG = MADLIB_SCHEMA.svec,
732  PROCEDURE = MADLIB_SCHEMA.svec_plus
733 );
734 CREATE OPERATOR MADLIB_SCHEMA./ (
735  LEFTARG = MADLIB_SCHEMA.svec,
736  RIGHTARG = MADLIB_SCHEMA.svec,
737  PROCEDURE = MADLIB_SCHEMA.svec_div
738 );
739 CREATE OPERATOR MADLIB_SCHEMA.%*% (
740  LEFTARG = MADLIB_SCHEMA.svec,
741  RIGHTARG = MADLIB_SCHEMA.svec,
742  PROCEDURE = MADLIB_SCHEMA.svec_dot
743 );
744 CREATE OPERATOR MADLIB_SCHEMA.* (
745  LEFTARG = MADLIB_SCHEMA.svec,
746  RIGHTARG = MADLIB_SCHEMA.svec,
747  PROCEDURE = MADLIB_SCHEMA.svec_mult
748 );
749 CREATE OPERATOR MADLIB_SCHEMA.^ (
750  LEFTARG = MADLIB_SCHEMA.svec,
751  RIGHTARG = MADLIB_SCHEMA.svec,
752  PROCEDURE = MADLIB_SCHEMA.svec_pow
753 );
754 
755 -- float8[] operators
756 -- DROP OPERATOR IF EXISTS = ( float8[], float8[]);
757 /*
758 DROP OPERATOR IF EXISTS %*% ( float8[], MADLIB_SCHEMA.svec);
759 DROP OPERATOR IF EXISTS %*% ( MADLIB_SCHEMA.svec, float8[]);
760 DROP OPERATOR IF EXISTS %*% ( float8[], float8[]);
761 DROP OPERATOR IF EXISTS - ( float8[], float8[]);
762 DROP OPERATOR IF EXISTS + ( float8[], float8[]);
763 DROP OPERATOR IF EXISTS * ( float8[], float8[]);
764 DROP OPERATOR IF EXISTS / ( float8[], float8[]);
765 DROP OPERATOR IF EXISTS - ( float8[], MADLIB_SCHEMA.svec);
766 DROP OPERATOR IF EXISTS + ( float8[], MADLIB_SCHEMA.svec);
767 DROP OPERATOR IF EXISTS * ( float8[], MADLIB_SCHEMA.svec);
768 DROP OPERATOR IF EXISTS / ( float8[], MADLIB_SCHEMA.svec);
769 DROP OPERATOR IF EXISTS - ( MADLIB_SCHEMA.svec, float8[]);
770 DROP OPERATOR IF EXISTS + ( MADLIB_SCHEMA.svec, float8[]);
771 DROP OPERATOR IF EXISTS * ( MADLIB_SCHEMA.svec, float8[]);
772 DROP OPERATOR IF EXISTS / ( MADLIB_SCHEMA.svec, float8[]);
773 */
774 
775 /*
776 CREATE OPERATOR MADLIB_SCHEMA.= (
777  leftarg = float8[],
778  rightarg = float8[],
779  procedure = MADLIB_SCHEMA.float8arr_eq,
780  commutator = operator(MADLIB_SCHEMA.=) ,
781 -- negator = operator(MADLIB_SCHEMA.<>) ,
782  restrict = eqsel, join = eqjoinsel
783 );
784 */
785 
786 CREATE OPERATOR MADLIB_SCHEMA.%*% (
787  LEFTARG = float8[],
788  RIGHTARG = float8[],
789  PROCEDURE = MADLIB_SCHEMA.svec_dot
790 );
791 CREATE OPERATOR MADLIB_SCHEMA.%*% (
792  LEFTARG = float8[],
793  RIGHTARG = MADLIB_SCHEMA.svec,
794  PROCEDURE = MADLIB_SCHEMA.svec_dot
795 );
796 CREATE OPERATOR MADLIB_SCHEMA.%*% (
797  LEFTARG = MADLIB_SCHEMA.svec,
798  RIGHTARG = float8[],
799  PROCEDURE = MADLIB_SCHEMA.svec_dot
800 );
801 CREATE OPERATOR MADLIB_SCHEMA.- (
802  LEFTARG = float8[],
803  RIGHTARG = float8[],
804  PROCEDURE = MADLIB_SCHEMA.float8arr_minus_float8arr
805 );
806 CREATE OPERATOR MADLIB_SCHEMA.+ (
807  LEFTARG = float8[],
808  RIGHTARG = float8[],
809  PROCEDURE = MADLIB_SCHEMA.float8arr_plus_float8arr
810 );
811 CREATE OPERATOR MADLIB_SCHEMA.* (
812  LEFTARG = float8[],
813  RIGHTARG = float8[],
814  PROCEDURE = MADLIB_SCHEMA.float8arr_mult_float8arr
815 );
816 CREATE OPERATOR MADLIB_SCHEMA./ (
817  LEFTARG = float8[],
818  RIGHTARG = float8[],
819  PROCEDURE = MADLIB_SCHEMA.float8arr_div_float8arr
820 );
821 
822 CREATE OPERATOR MADLIB_SCHEMA.- (
823  LEFTARG = float8[],
824  RIGHTARG = MADLIB_SCHEMA.svec,
825  PROCEDURE = MADLIB_SCHEMA.float8arr_minus_svec
826 );
827 CREATE OPERATOR MADLIB_SCHEMA.+ (
828  LEFTARG = float8[],
829  RIGHTARG = MADLIB_SCHEMA.svec,
830  PROCEDURE = MADLIB_SCHEMA.float8arr_plus_svec
831 );
832 CREATE OPERATOR MADLIB_SCHEMA.* (
833  LEFTARG = float8[],
834  RIGHTARG = MADLIB_SCHEMA.svec,
835  PROCEDURE = MADLIB_SCHEMA.float8arr_mult_svec
836 );
837 CREATE OPERATOR MADLIB_SCHEMA./ (
838  LEFTARG = float8[],
839  RIGHTARG = MADLIB_SCHEMA.svec,
840  PROCEDURE = MADLIB_SCHEMA.float8arr_div_svec
841 );
842 
843 CREATE OPERATOR MADLIB_SCHEMA.- (
844  LEFTARG = MADLIB_SCHEMA.svec,
845  RIGHTARG = float8[],
846  PROCEDURE = MADLIB_SCHEMA.svec_minus_float8arr
847 );
848 CREATE OPERATOR MADLIB_SCHEMA.+ (
849  LEFTARG = MADLIB_SCHEMA.svec,
850  RIGHTARG = float8[],
851  PROCEDURE = MADLIB_SCHEMA.svec_plus_float8arr
852 );
853 CREATE OPERATOR MADLIB_SCHEMA.* (
854  LEFTARG = MADLIB_SCHEMA.svec,
855  RIGHTARG = float8[],
856  PROCEDURE = MADLIB_SCHEMA.svec_mult_float8arr
857 );
858 CREATE OPERATOR MADLIB_SCHEMA./ (
859  LEFTARG = MADLIB_SCHEMA.svec,
860  RIGHTARG = float8[],
861  PROCEDURE = MADLIB_SCHEMA.svec_div_float8arr
862 );
863 
864 /*
865 DROP CAST IF EXISTS (int2 AS MADLIB_SCHEMA.svec) ;
866 DROP CAST IF EXISTS (integer AS MADLIB_SCHEMA.svec) ;
867 DROP CAST IF EXISTS (bigint AS MADLIB_SCHEMA.svec) ;
868 DROP CAST IF EXISTS (float4 AS MADLIB_SCHEMA.svec) ;
869 DROP CAST IF EXISTS (float8 AS MADLIB_SCHEMA.svec) ;
870 DROP CAST IF EXISTS (numeric AS MADLIB_SCHEMA.svec) ;
871 */
872 
873 CREATE CAST (int2 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int2(int2) ; -- AS IMPLICIT;
874 CREATE CAST (integer AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int4(integer) ; -- AS IMPLICIT;
875 CREATE CAST (bigint AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int8(bigint) ; -- AS IMPLICIT;
876 CREATE CAST (float4 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float4(float4) ; -- AS IMPLICIT;
877 CREATE CAST (float8 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float8(float8) ; -- AS IMPLICIT;
878 CREATE CAST (numeric AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_numeric(numeric) ; -- AS IMPLICIT;
879 
880 /*
881 DROP CAST IF EXISTS (int2 AS float8[]) ;
882 DROP CAST IF EXISTS (integer AS float8[]) ;
883 DROP CAST IF EXISTS (bigint AS float8[]) ;
884 DROP CAST IF EXISTS (float4 AS float8[]) ;
885 DROP CAST IF EXISTS (float8 AS float8[]) ;
886 DROP CAST IF EXISTS (numeric AS float8[]) ;
887 */
888 
889 -- CREATE CAST (int2 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int2(int2) ; -- AS IMPLICIT;
890 -- CREATE CAST (integer AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int4(integer) ; -- AS IMPLICIT;
891 -- CREATE CAST (bigint AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int8(bigint) ; -- AS IMPLICIT;
892 -- CREATE CAST (float4 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_float4(float4) ; -- AS IMPLICIT;
893 -- CREATE CAST (float8 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_float8(float8) ; -- AS IMPLICIT;
894 -- CREATE CAST (numeric AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_numeric(numeric) ; -- AS IMPLICIT;
895 
896 -- DROP CAST IF EXISTS (MADLIB_SCHEMA.svec AS float8[]) ;
897 -- DROP CAST IF EXISTS (float8[] AS MADLIB_SCHEMA.svec) ;
898 
899 CREATE CAST (MADLIB_SCHEMA.svec AS float8[]) WITH FUNCTION MADLIB_SCHEMA.svec_return_array(MADLIB_SCHEMA.svec) ; -- AS IMPLICIT;
900 CREATE CAST (float8[] AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float8arr(float8[]) ; -- AS IMPLICIT;
901 
902 -- DROP OPERATOR IF EXISTS = (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) ;
903 
904 
905 CREATE OPERATOR MADLIB_SCHEMA.= (
906  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_eq,
907  commutator = operator(MADLIB_SCHEMA.=) ,
908 -- negator = operator(MADLIB_SCHEMA.<>) ,
909  restrict = eqsel, join = eqjoinsel
910 );
911 
912 --! Transition function for mean(svec) aggregate
913 --!
914 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_transition( FLOAT[], MADLIB_SCHEMA.svec)
915 RETURNS FLOAT[] AS 'MODULE_PATHNAME'
916 LANGUAGE C IMMUTABLE;
917 
918 --! Preliminary merge function for mean(svec) aggregate
919 --!
920 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_prefunc( FLOAT[], FLOAT[])
921 RETURNS FLOAT[] AS 'MODULE_PATHNAME'
922 LANGUAGE C IMMUTABLE;
923 
924 --! Final function for mean(svec) aggregate
925 --!
926 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_final( FLOAT[])
927 RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME'
928 LANGUAGE C IMMUTABLE;
929 
930 --! Aggregate that computes the element-wise mean of a list of vectors.
931 --!
932 CREATE AGGREGATE MADLIB_SCHEMA.mean( MADLIB_SCHEMA.svec) (
933  SFUNC = MADLIB_SCHEMA.svec_mean_transition,
934  m4_ifdef(`__GREENPLUM__',`prefunc = MADLIB_SCHEMA.svec_mean_prefunc,')
935  FINALFUNC = MADLIB_SCHEMA.svec_mean_final,
936  STYPE = FLOAT[]
937 );
938 
939 --! Aggregate that provides the element-wise sum of a list of vectors.
940 --!
941 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_sum(MADLIB_SCHEMA.svec);
942 CREATE AGGREGATE MADLIB_SCHEMA.svec_sum (MADLIB_SCHEMA.svec) (
943  SFUNC = MADLIB_SCHEMA.svec_plus,
944  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.svec_plus,')
945  INITCOND = '{1}:{0.}', -- Zero
946  STYPE = MADLIB_SCHEMA.svec
947 );
949 --! Aggregate that provides a tally of nonzero entries in a list of vectors.
950 --!
951 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_count_nonzero(MADLIB_SCHEMA.svec);
952 CREATE AGGREGATE MADLIB_SCHEMA.svec_count_nonzero (MADLIB_SCHEMA.svec) (
953  SFUNC = MADLIB_SCHEMA.svec_count,
954  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.svec_plus,')
955  INITCOND = '{1}:{0.}', -- Zero
956  STYPE = MADLIB_SCHEMA.svec
957 );
958 
959 --! Aggregate that turns a list of float8 values into an SVEC.
960 --!
961 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_agg(float8);
962 CREATE
963 m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED'))
964 AGGREGATE MADLIB_SCHEMA.svec_agg (float8) (
965  SFUNC = MADLIB_SCHEMA.svec_pivot,
966  m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `', ``prefunc=MADLIB_SCHEMA.svec_concat,''))
967  STYPE = MADLIB_SCHEMA.svec
968 );
969 
970 --! Aggregate that computes the median element of a list of float8 values.
971 --!
972 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_median_inmemory(float8);
973 CREATE AGGREGATE MADLIB_SCHEMA.svec_median_inmemory (float8) (
974  SFUNC = MADLIB_SCHEMA.svec_pivot,
975  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.svec_concat,')
976  FINALFUNC = MADLIB_SCHEMA.svec_median,
977  STYPE = MADLIB_SCHEMA.svec
978 );
979 
980 -- Comparisons based on L2 Norm
981 --! Returns true if the l2 norm of the first SVEC is less than that of the second SVEC.
982 --!
983 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_lt(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_lt' LANGUAGE C IMMUTABLE;
985 --! Returns true if the l2 norm of the first SVEC is less than or equal to that of the second SVEC.
986 --!
987 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_le(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_le' LANGUAGE C IMMUTABLE;
988 
989 --! Returns true if the l2 norm of the first SVEC is equal to that of the second SVEC.
990 --!
991 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_eq(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_eq' LANGUAGE C IMMUTABLE;
992 
993 --! Returns true if the l2 norm of the first SVEC is not equal to that of the second SVEC.
994 --!
995 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_ne(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_ne' LANGUAGE C IMMUTABLE;
996 
997 --! Returns true if the l2 norm of the first SVEC is greater than that of the second SVEC.
998 --!
999 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_gt(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_gt' LANGUAGE C IMMUTABLE;
1000 
1001 --! Returns true if the l2 norm of the first SVEC is greater than or equal to that of the second SVEC.
1002 --!
1003 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_ge(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_ge' LANGUAGE C IMMUTABLE;
1004 
1005 --! Returns a value indicating the relative values of the l2 norms of two SVECs.
1006 --!
1007 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_cmp(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_l2_cmp' LANGUAGE C IMMUTABLE;
1008 
1009 --! Normalizes an SVEC that is divides all elements by its norm/magnitude.
1010 --!
1011 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalize(MADLIB_SCHEMA.svec)
1012 RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_normalize' LANGUAGE C IMMUTABLE STRICT;
1014 /*
1015 DROP OPERATOR IF EXISTS < (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1016 DROP OPERATOR IF EXISTS <= (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1017 DROP OPERATOR IF EXISTS <> (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) ;
1018 DROP OPERATOR IF EXISTS == (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1019 DROP OPERATOR IF EXISTS > (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1020 DROP OPERATOR IF EXISTS >= (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1021 DROP OPERATOR IF EXISTS *|| (int4, MADLIB_SCHEMA.svec) ;
1022 */
1023 
1024 CREATE OPERATOR MADLIB_SCHEMA.< (
1025  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_lt,
1026  commutator = operator(MADLIB_SCHEMA.>) , negator = operator(MADLIB_SCHEMA.>=) ,
1027  restrict = scalarltsel, join = scalarltjoinsel
1028 );
1029 CREATE OPERATOR MADLIB_SCHEMA.<= (
1030  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_le,
1031  commutator = operator(MADLIB_SCHEMA.>=) , negator = operator(MADLIB_SCHEMA.>) ,
1032  restrict = scalarltsel, join = scalarltjoinsel
1033 );
1034 CREATE OPERATOR MADLIB_SCHEMA.<> (
1035  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_eq,
1036  commutator = operator(MADLIB_SCHEMA.<>) ,
1037  negator = operator(MADLIB_SCHEMA.=),
1038  restrict = eqsel, join = eqjoinsel
1039 );
1040 CREATE OPERATOR MADLIB_SCHEMA.== (
1041  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_eq,
1042  commutator = operator(MADLIB_SCHEMA.=) ,
1043  negator = operator(MADLIB_SCHEMA.<>) ,
1044  restrict = eqsel, join = eqjoinsel
1045 );
1046 CREATE OPERATOR MADLIB_SCHEMA.>= (
1047  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_ge,
1048  commutator = operator(MADLIB_SCHEMA.<=) , negator = operator(MADLIB_SCHEMA.<) ,
1049  restrict = scalargtsel, join = scalargtjoinsel
1050 );
1051 CREATE OPERATOR MADLIB_SCHEMA.> (
1052  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_gt,
1053  commutator = operator(MADLIB_SCHEMA.<) , negator = operator(MADLIB_SCHEMA.<=) ,
1054  restrict = scalargtsel, join = scalargtjoinsel
1055 );
1056 
1057 CREATE OPERATOR MADLIB_SCHEMA.*|| (
1058  leftarg = int4, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_concat_replicate
1059 );
1060 
1061 CREATE OPERATOR CLASS MADLIB_SCHEMA.svec_l2_ops
1062 DEFAULT FOR TYPE MADLIB_SCHEMA.svec USING btree AS
1063 OPERATOR 1 MADLIB_SCHEMA.< ,
1064 OPERATOR 2 MADLIB_SCHEMA.<= ,
1065 OPERATOR 3 MADLIB_SCHEMA.== ,
1066 OPERATOR 4 MADLIB_SCHEMA.>= ,
1067 OPERATOR 5 MADLIB_SCHEMA.> ,
1068 FUNCTION 1 MADLIB_SCHEMA.svec_l2_cmp(MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
1069