User Documentation
 All Files Functions Groups
svec.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file svec.sql_in
4  *
5  * @brief SQL type definitions and functions for sparse vector data type
6  * <tt>svec</tt>
7  *
8  * @sa For an introduction to the sparse vector implementation, see the module
9  * description \ref grp_svec.
10  *
11  *//* ----------------------------------------------------------------------- */
12 
13 m4_include(`SQLCommon.m4')
14 
15 /**
16 @addtogroup grp_svec
17 
18 @about
19 
20 This module implements a sparse vector data type, named "svec", which
21 provides compressed storage of vectors that have many duplicate elements.
22 
23 Arrays of floating point numbers for various calculations sometimes have
24 long runs of zeros (or some other default value). This is common in
25 applications like scientific computing, retail optimization, and text
26 processing. Each floating point number takes 8 bytes of storage in memory
27 and/or disk, so saving those zeros is often worthwhile. There are also
28 many computations that can benefit from skipping over the zeros.
29 
30 Consider, for example, the following
31 array of doubles stored as a Postgres/Greenplum "float8[]" data type:
32 
33 \code
34 '{0, 33,...40,000 zeros..., 12, 22 }'::float8[]
35 \endcode
36 
37 This array would occupy slightly more than 320KB of memory or disk, most of
38 it zeros. Even if we were to exploit the null bitmap and store the zeros
39 as nulls, we would still end up with a 5KB null bitmap, which is still not
40 nearly as memory efficient as we'd like. Also, as we perform various
41 operations on the array, we do work on 40,000 fields that turn out to be
42 unimportant.
43 
44 To solve the problems associated with the processing of vectors discussed
45 above, the svec type employs a simple Run Length Encoding (RLE) scheme to
46 represent sparse vectors as pairs of count-value arrays. For example, the
47 array above would be represented as
48 
49 \code
50 '{1,1,40000,1,1}:{0,33,0,12,22}'::MADLIB_SCHEMA.svec
51 \endcode
52 
53 which says there is 1 occurrence of 0, followed by 1 occurrence of 33,
54 followed by 40,000 occurrences of 0, etc. This uses just 5 integers and 5
55 floating point numbers to store the array. Further, it is easy to
56 implement vector operations that can take advantage of the RLE
57 representation to make computations faster. The SVEC module provides a library
58 of such functions.
59 
60 The current version only supports sparse vectors of float8 values. Future
61 versions will support other base types.
62 
63 @usage
64 
65 An SVEC can be constructed directly with a constant expression, as follows:
66 \code
67 SELECT '{n1,n2,...,nk}:{v1,v2,...vk}'::MADLIB_SCHEMA.svec;
68 \endcode
69 where <tt>n1,n2,...,nk</tt> specifies the counts for the values <tt>v1,v2,...,vk</tt>.
70 
71 A float array can be cast to an SVEC:
72 \code
73 SELECT ('{v1,v2,...vk}'::float[])::MADLIB_SCHEMA.svec;
74 \endcode
75 
76 An SVEC can be created with an aggregation:
77 \code
78 SELECT MADLIB_SCHEMA.svec_agg(v1) FROM generate_series(1,k);
79 \endcode
80 
81 An SVEC can be created using the
82 <tt>madlib.svec_cast_positions_float8arr()</tt> function by supplying an
83 array of positions and an array of values at those positions:
84 \code
85 SELECT MADLIB_SCHEMA.svec_cast_positions_float8arr(
86  array[n1,n2,...nk], -- positions of values in vector
87  array[v1,v2,...vk], -- values at each position
88  length, -- length of vector
89  base) -- value at unspecified positions
90 \endcode
91 For example, the following expression:
92 \code
93 SELECT MADLIB_SCHEMA.svec_cast_positions_float8arr(
94  array[1,3,5],
95  array[2,4,6],
96  10,
97  0.0)
98 \endcode
99 produces this SVEC:
100 \code
101  svec_cast_positions_float8arr
102  -------------------------------
103  {1,1,1,1,1,5}:{2,0,4,0,6,0}
104 \endcode
105 
106 Add MADLIB_SCHEMA to the search_path to use the svec operators
107 defined in the module.
108 
109 See the file svec.sql_in for complete syntax.
110 
111 @examp
112 
113 We can use operations with svec type like <, >, *, **, /, =, +, SUM, etc,
114 and they have meanings associated with typical vector operations. For
115 example, the plus (+) operator adds each of the terms of two vectors having
116 the same dimension together.
117 \code
118 sql> SELECT ('{0,1,5}'::float8[]::MADLIB_SCHEMA.svec + '{4,3,2}'::float8[]::MADLIB_SCHEMA.svec)::float8[];
119  float8
120 ---------
121  {4,4,7}
122 \endcode
123 
124 Without the casting into float8[] at the end, we get:
125 \code
126 sql> SELECT '{0,1,5}'::float8[]::MADLIB_SCHEMA.svec + '{4,3,2}'::float8[]::MADLIB_SCHEMA.svec;
127  ?column?
128 ----------
129 {2,1}:{4,7}
130 \endcode
131 
132 A dot product (%*%) between the two vectors will result in a scalar
133 result of type float8. The dot product should be (0*4 + 1*3 + 5*2) = 13,
134 like this:
135 \code
136 sql> SELECT '{0,1,5}'::float8[]::MADLIB_SCHEMA.svec %*% '{4,3,2}'::float8[]::MADLIB_SCHEMA.svec;
137  ?column?
138 ----------
139  13
140 \endcode
141 
142 Special vector aggregate functions are also available. SUM is self
143 explanatory. SVEC_COUNT_NONZERO evaluates the count of non-zero terms
144 in each column found in a set of n-dimensional svecs and returns an
145 svec with the counts. For instance, if we have the vectors {0,1,5},
146 {10,0,3},{0,0,3},{0,1,0}, then executing the SVEC_COUNT_NONZERO() aggregate
147 function would result in {1,2,3}:
148 
149 \code
150 sql> create table list (a MADLIB_SCHEMA.svec);
151 sql> insert into list values ('{0,1,5}'::float8[]), ('{10,0,3}'::float8[]), ('{0,0,3}'::float8[]),('{0,1,0}'::float8[]);
152 
153 sql> SELECT MADLIB_SCHEMA.svec_count_nonzero(a)::float8[] FROM list;
154 svec_count_nonzero
155 -----------------
156  {1,2,3}
157 \endcode
158 
159 We do not use null bitmaps in the svec data type. A null value in an svec
160 is represented explicitly as an NVP (No Value Present) value. For example,
161 we have:
162 \code
163 sql> SELECT '{1,2,3}:{4,null,5}'::MADLIB_SCHEMA.svec;
164  svec
165 -------------------
166  {1,2,3}:{4,NVP,5}
167 
168 sql> SELECT '{1,2,3}:{4,null,5}'::MADLIB_SCHEMA.svec + '{2,2,2}:{8,9,10}'::MADLIB_SCHEMA.svec;
169  ?column?
170  --------------------------
171  {1,2,1,2}:{12,NVP,14,15}
172 \endcode
173 
174 An element of an svec can be accessed using the svec_proj() function,
175 which takes an svec and the index of the element desired.
176 \code
177 sql> SELECT MADLIB_SCHEMA.svec_proj('{1,2,3}:{4,5,6}'::MADLIB_SCHEMA.svec, 1) + MADLIB_SCHEMA.svec_proj('{4,5,6}:{1,2,3}'::MADLIB_SCHEMA.svec, 15);
178  ?column?
179 ----------
180  7
181 \endcode
182 
183 A subvector of an svec can be accessed using the svec_subvec() function,
184 which takes an svec and the start and end index of the subvector desired.
185 \code
186 sql> SELECT MADLIB_SCHEMA.svec_subvec('{2,4,6}:{1,3,5}'::MADLIB_SCHEMA.svec, 2, 11);
187  svec_subvec
188 -----------------
189  {1,4,5}:{1,3,5}
190 \endcode
191 
192 The elements/subvector of an svec can be changed using the function
193 svec_change(). It takes three arguments: an m-dimensional svec sv1, a
194 start index j, and an n-dimensional svec sv2 such that j + n - 1 <= m,
195 and returns an svec like sv1 but with the subvector sv1[j:j+n-1]
196 replaced by sv2. An example follows:
197 \code
198 sql> SELECT MADLIB_SCHEMA.svec_change('{1,2,3}:{4,5,6}'::MADLIB_SCHEMA.svec,3,'{2}:{3}'::MADLIB_SCHEMA.svec);
199  svec_change
200 ---------------------
201  {1,1,2,2}:{4,5,3,6}
202 \endcode
203 
204 There are also higher-order functions for processing svecs. For example,
205 the following is the corresponding function for lapply() in R.
206 \code
207 sql> SELECT MADLIB_SCHEMA.svec_lapply('sqrt', '{1,2,3}:{4,5,6}'::MADLIB_SCHEMA.svec);
208  svec_lapply
209 -----------------------------------------------
210  {1,2,3}:{2,2.23606797749979,2.44948974278318}
211 \endcode
212 
213 The full list of functions available for operating on svecs are available
214 in svec.sql-in.
215 
216 <b> A More Extensive Example</b>
217 
218  For a text classification example, let's assume we have a dictionary
219  composed of words in a sorted text array:
220 \code
221 sql> create table features (a text[]);
222 sql> insert into features values
223  ('{am,before,being,bothered,corpus,document,i,in,is,me,
224  never,now,one,really,second,the,third,this,until}');
225 \endcode
226  We have a set of documents, each represented as an array of words:
227 \code
228 sql> create table documents(a int,b text[]);
229 sql> insert into documents values
230  (1,'{this,is,one,document,in,the,corpus}'),
231  (2,'{i,am,the,second,document,in,the,corpus}'),
232  (3,'{being,third,never,really,bothered,me,until,now}'),
233  (4,'{the,document,before,me,is,the,third,document}');
234 \endcode
235 
236  Now we have a dictionary and some documents, we would like to do some
237  document categorization using vector arithmetic on word counts and
238  proportions of dictionary words in each document.
239 
240  To start this process, we'll need to find the dictionary words in each
241  document. We'll prepare what is called a Sparse Feature Vector or SFV
242  for each document. An SFV is a vector of dimension N, where N is the
243  number of dictionary words, and in each cell of an SFV is a count of
244  each dictionary word in the document.
245 
246  Inside the sparse vector library, we have a function that will create
247  an SFV from a document, so we can just do this:
248 \code
249 sql> SELECT MADLIB_SCHEMA.svec_sfv((SELECT a FROM features LIMIT 1),b)::float8[]
250  FROM documents;
251 
252  svec_sfv
253 -----------------------------------------
254  {0,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0}
255  {0,0,1,1,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1}
256  {1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,2,0,0,0}
257  {0,1,0,0,0,2,0,0,1,1,0,0,0,0,0,2,1,0,0}
258 \endcode
259  Note that the output of MADLIB_SCHEMA.svec_sfv() is an svec for each
260  document containing the count of each of the dictionary words in the
261  ordinal positions of the dictionary. This can more easily be understood
262  by lining up the feature vector and text like this:
263 \code
264 sql> SELECT MADLIB_SCHEMA.svec_sfv((SELECT a FROM features LIMIT 1),b)::float8[]
265  , b
266  FROM documents;
267 
268  svec_sfv | b
269 -----------------------------------------+--------------------------------------------------
270  {1,0,0,0,1,1,1,1,0,0,0,0,0,0,1,2,0,0,0} | {i,am,the,second,document,in,the,corpus}
271  {0,1,0,0,0,2,0,0,1,1,0,0,0,0,0,2,1,0,0} | {the,document,before,me,is,the,third,document}
272  {0,0,0,0,1,1,0,1,1,0,0,0,1,0,0,1,0,1,0} | {this,is,one,document,in,the,corpus}
273  {0,0,1,1,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1} | {being,third,never,really,bothered,me,until,now}
274 
275 sql> SELECT * FROM features;
276  a
277 --------------------------------------------------------------------------------------------------------
278 {am,before,being,bothered,corpus,document,i,in,is,me,never,now,one,really,second,the,third,this,until}
279 \endcode
280 
281  Now when we look at the document "i am the second document in the corpus",
282  its SFV is {1,3*0,1,1,1,1,6*0,1,2}. The word "am" is the first ordinate in
283  the dictionary and there is 1 instance of it in the SFV. The word "before"
284  has no instances in the document, so its value is "0" and so on.
285 
286  The function MADLIB_SCHEMA.svec_sfv() can process large
287  numbers of documents into their SFVs in parallel at high speed.
288 
289  The rest of the categorization process is all vector math. The actual
290  count is hardly ever used. Instead, it's turned into a weight. The most
291  common weight is called tf/idf for Term Frequency / Inverse Document
292  Frequency. The calculation for a given term in a given document is
293 \code
294 {#Times in document} * log {#Documents / #Documents the term appears in}.
295 \endcode
296  For instance, the term "document" in document A would have weight
297  1 * log (4/3). In document D, it would have weight 2 * log (4/3).
298  Terms that appear in every document would have tf/idf weight 0, since
299  log (4/4) = log(1) = 0. (Our example has no term like that.) That
300  usually sends a lot of values to 0.
301 
302  For this part of the processing, we'll need to have a sparse vector of
303  the dictionary dimension (19) with the values
304 \code
305 log(#documents/#Documents each term appears in).
306 \endcode
307  There will be one such vector for the whole list of documents (aka the
308  "corpus"). The #documents is just a count of all of the documents, in
309  this case 4, but there is one divisor for each dictionary word and its
310  value is the count of all the times that word appears in the document.
311  This single vector for the whole corpus can then be scalar product
312  multiplied by each document SFV to produce the Term Frequency/Inverse
313  Document Frequency weights.
314 
315  This can be done as follows:
316 \code
317 sql> create table corpus as
318  (SELECT a, MADLIB_SCHEMA.svec_sfv((SELECT a FROM features LIMIT 1),b) sfv
319  FROM documents);
320 sql> create table weights as
321  (SELECT a docnum, MADLIB_SCHEMA.svec_mult(sfv, logidf) tf_idf
322  FROM (SELECT MADLIB_SCHEMA.svec_log(MADLIB_SCHEMA.svec_div(count(sfv)::MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec_count_nonzero(sfv))) logidf
323  FROM corpus) foo, corpus ORDER BYdocnum);
324 sql> SELECT * FROM weights;
325 
326 docnum | tf_idf
327 -------+----------------------------------------------------------------------
328  1 | {4,1,1,1,2,3,1,2,1,1,1,1}:{0,0.69,0.28,0,0.69,0,1.38,0,0.28,0,1.38,0}
329  2 | {1,3,1,1,1,1,6,1,1,3}:{1.38,0,0.69,0.28,1.38,0.69,0,1.38,0.57,0}
330  3 | {2,2,5,1,2,1,1,2,1,1,1}:{0,1.38,0,0.69,1.38,0,1.38,0,0.69,0,1.38}
331  4 | {1,1,3,1,2,2,5,1,1,2}:{0,1.38,0,0.57,0,0.69,0,0.57,0.69,0}
332 \endcode
333 
334  We can now get the "angular distance" between one document and the rest
335  of the documents using the ACOS of the dot product of the document vectors:
336  The following calculates the angular distance between the first document
337  and each of the other documents:
338 \code
339 sql> SELECT docnum,
340  180. * ( ACOS( MADLIB_SCHEMA.svec_dmin( 1., MADLIB_SCHEMA.svec_dot(tf_idf, testdoc)
341  / (MADLIB_SCHEMA.svec_l2norm(tf_idf)*MADLIB_SCHEMA.svec_l2norm(testdoc))))/3.141592654) angular_distance
342  FROM weights,(SELECT tf_idf testdoc FROM weights WHERE docnum = 1 LIMIT 1) foo
343  ORDER BY 1;
344 
345 docnum | angular_distance
346 --------+------------------
347  1 | 0
348  2 | 78.8235846096986
349  3 | 89.9999999882484
350  4 | 80.0232034288617
351 \endcode
352  We can see that the angular distance between document 1 and itself
353  is 0 degrees and between document 1 and 3 is 90 degrees because they
354  share no features at all. The angular distance can now be plugged into
355  machine learning algorithms that rely on a distance measure between
356  data points.
357 
358  SVEC also provides functionality for declaring array given an array of positions and array of values, intermediate values betweens those
359  are declared to be base value that user provides in the same function call. In the example below the fist array of integers represents the
360  positions for the array two (array of floats). Positions do not need to come in the sorted order.
361  Third value represents desired maximum size of the array. This assures that array is of that size
362  even if last position is not. If max size < 1 that value is ignored and array will end at the last position in the position vector. Final value is a float representing the base value to be used between the declared ones (0 would be a common candidate):
363 \code
364 sql> SELECT MADLIB_SCHEMA.svec_cast_positions_float8arr(ARRAY[1,2,7,5,87],ARRAY[.1,.2,.7,.5,.87],90,0.0);
365 
366  svec_cast_positions_float8arr
367 -----------------------------------------------------
368 {1,1,2,1,1,1,79,1,3}:{0.1,0.2,0,0.5,0,0.7,0,0.87,0}
369 (1 row)
370 \endcode
371 
372  Other examples of svecs usage can be found in the k-means module.
373 
374 @sa File svec.sql_in documenting the SQL functions.
375 
376 @internal
377 @sa File sparse_vector.c documenting the implementation in C.
378 @endinternal
379 */
380 
381 
382 --! @file svec.sql_in
383 --!
384 
385 -- DROP SCHEMA MADLIB_SCHEMA CASCADE;
386 -- CREATE SCHEMA MADLIB_SCHEMA;
387 
388 -- DROP TYPE IF EXISTS MADLIB_SCHEMA.svec CASCADE;
389 CREATE TYPE MADLIB_SCHEMA.svec;
390 
391 --! SVEC constructor from CSTRING.
392 --!
393 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_in(cstring)
394  RETURNS MADLIB_SCHEMA.svec
395  AS 'MODULE_PATHNAME'
396  LANGUAGE C IMMUTABLE STRICT;
397 
398 --! Converts SVEC to CSTRING.
399 --!
400 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_out(MADLIB_SCHEMA.svec)
401  RETURNS cstring
402  AS 'MODULE_PATHNAME'
403  LANGUAGE C IMMUTABLE STRICT;
404 
405 --! Converts SVEC internal representation to SVEC.
406 --!
407 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_recv(internal)
408  RETURNS MADLIB_SCHEMA.svec
409  AS 'MODULE_PATHNAME'
410  LANGUAGE C IMMUTABLE STRICT;
411 
412 --! Converts SVEC to BYTEA.
413 --!
414 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_send(MADLIB_SCHEMA.svec)
415  RETURNS bytea
416  AS 'MODULE_PATHNAME'
417  LANGUAGE C IMMUTABLE STRICT;
418 
419 CREATE TYPE MADLIB_SCHEMA.svec (
420  internallength = VARIABLE,
421  input = MADLIB_SCHEMA.svec_in,
422  output = MADLIB_SCHEMA.svec_out,
423  send = MADLIB_SCHEMA.svec_send,
424  receive = MADLIB_SCHEMA.svec_recv,
425  storage=EXTENDED,
426  alignment = double
427 );
428 
429 --! Basic floating point scalar operator: MIN.
430 --!
431 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmin(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_min' LANGUAGE C IMMUTABLE;
432 
433 --! Basic floating point scalar operator: MAX.
434 --!
435 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmax(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_max' LANGUAGE C IMMUTABLE;
437 --! Counts the number of non-zero entries in the input vector; the second argument is capped at 1, then added to the first; used as the sfunc in the svec_count_nonzero() aggregate below.
438 --!
439 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_count(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec
440 AS 'MODULE_PATHNAME', 'svec_count' STRICT LANGUAGE C IMMUTABLE;
441 
442 --! Adds two SVECs together, element by element.
443 --!
444 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_plus(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_plus' STRICT LANGUAGE C IMMUTABLE;
445 
446 --! Minus second SVEC from the first, element by element.
447 --!
448 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_minus(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_minus' STRICT LANGUAGE C IMMUTABLE;
449 
450 --! Computes the logarithm of each element of the input SVEC.
451 --!
452 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_log(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_log' STRICT LANGUAGE C IMMUTABLE;
454 --! Divides the first SVEC by the second, element by element.
455 --!
456 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_div(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_div' STRICT LANGUAGE C IMMUTABLE;
458 --! Multiplies two SVEVs together, element by element.
459 --!
460 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mult(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_mult' STRICT LANGUAGE C IMMUTABLE;
462 --! Raises each element of the first SVEC to the power given by second SVEC, which must have dimension 1 (a scalar).
463 --!
464 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pow(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pow' STRICT LANGUAGE C IMMUTABLE;
465 
466 --! Returns true if two SVECs are equal. If the two SVECs are of different size, then will return false.
467 --!
468 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq' STRICT LANGUAGE C IMMUTABLE;
469 
470 --! Returns true if two SVECs are equal, not counting zeros (zero equals anything). If the two SVECs are of different size, then the function essentially zero-pads the shorter one and performs the comparison.
471 --!
472 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq_non_zero(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq_non_zero' STRICT LANGUAGE C IMMUTABLE;
473 
474 --! Returns true if left svec contains right one, meaning that every non-zero value in the right svec equals left one
475 --!
476 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_contains(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_contains' STRICT LANGUAGE C IMMUTABLE;
477 
478 --! Returns true if two float8 arrays are equal
479 --!
480 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_eq(float8[],float8[]) RETURNS boolean AS 'MODULE_PATHNAME', 'float8arr_equals' LANGUAGE C IMMUTABLE;
481 
482 --! Minus second array from the first array, element by element.
483 --!
484 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_minus_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_minus_float8arr' LANGUAGE C IMMUTABLE;
485 
486 --! Minus second SVEC from the first array, element by element.
487 --!
488 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_minus_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_minus_svec' LANGUAGE C IMMUTABLE;
489 
490 --! Minus second array from the first SVEC, element by element.
491 --!
492 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_minus_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_minus_float8arr' LANGUAGE C IMMUTABLE;
493 
494 --! Adds two arrays together, element by element.
495 --!
496 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_plus_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_plus_float8arr' LANGUAGE C IMMUTABLE;
497 
498 --! Adds an array and an SVEC, element by element.
499 --!
500 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_plus_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_plus_svec' LANGUAGE C IMMUTABLE;
501 
502 --! Adds an SVEC and an array, element by element.
503 --!
504 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_plus_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_plus_float8arr' LANGUAGE C IMMUTABLE;
505 
506 --! Multiplies two float8 arrays, element by element.
507 --!
508 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_mult_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_mult_float8arr' LANGUAGE C IMMUTABLE;
509 
510 --! Multiplies an array and an SVEC, element by element.
511 --!
512 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_mult_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_mult_svec' LANGUAGE C IMMUTABLE;
513 
514 --! Multiplies an SVEC and an array, element by element.
515 --!
516 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mult_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_mult_float8arr' LANGUAGE C IMMUTABLE;
517 
518 --! Divides a float8 array by another, element by element.
519 --!
520 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_div_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_div_float8arr' LANGUAGE C IMMUTABLE;
521 
522 --! Divides a float8 array by an SVEC, element by element.
523 --!
524 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_div_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_div_svec' LANGUAGE C IMMUTABLE;
525 
526 --! Divides an SVEC by a float8 array, element by element.
527 --!
528 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_div_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_div_float8arr' LANGUAGE C IMMUTABLE;
529 
530 --! Computes the dot product of two SVECs.
531 --!
532 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_dot' STRICT LANGUAGE C IMMUTABLE;
533 
534 --! Computes the dot product of two float8 arrays.
535 --!
536 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(float8[],float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_dot' STRICT LANGUAGE C IMMUTABLE;
537 
538 --! Computes the dot product of an SVEC and a float8 array.
539 --!
540 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(MADLIB_SCHEMA.svec,float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_dot_float8arr' STRICT LANGUAGE C IMMUTABLE;
541 
542 --! Computes the dot product of a float8 array and an SVEC.
543 --!
544 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dot(float8[],MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_dot_svec' STRICT LANGUAGE C IMMUTABLE;
545 
546 --! Computes the l2norm of an SVEC.
547 --!
548 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l2norm' STRICT LANGUAGE C IMMUTABLE;
549 
550 --! Computes the l2norm of a float8 array.
551 --!
552 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l2norm' LANGUAGE C IMMUTABLE;
553 
554 --! Computes the l2norm distance between two SVECs.
555 --!
556 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l2norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
557 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l2norm' LANGUAGE C STRICT IMMUTABLE;
559 --! Computes the l1norm distance between two SVECs.
560 --!
561 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
562 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l1norm' LANGUAGE C STRICT IMMUTABLE;
563 
564 --! Computes the l1norm of an SVEC.
565 --!
566 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l1norm' STRICT LANGUAGE C IMMUTABLE;
567 
568 --! Computes the l1norm of a float8 array.
569 --!
570 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l1norm' STRICT LANGUAGE C IMMUTABLE;
571 
572 --! Computes the angle between two SVECs in radians.
573 --!
574 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.angle(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
575 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_angle' LANGUAGE C STRICT IMMUTABLE;
576 
577 --! Computes the Tanimoto distance between two SVECs.
578 --!
579 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.tanimoto_distance(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec)
580 RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_tanimoto_distance' LANGUAGE C STRICT IMMUTABLE;
581 
582 --! Unnests an SVEC into a table of uncompressed values
583 --!
584 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_unnest(MADLIB_SCHEMA.svec) RETURNS setof float8 AS 'MODULE_PATHNAME', 'svec_unnest' LANGUAGE C IMMUTABLE;
585 
586 --! Appends an element to the back of an SVEC.
587 --!
588 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pivot(MADLIB_SCHEMA.svec,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pivot' LANGUAGE C IMMUTABLE;
589 
590 --! Sums the elements of an SVEC.
591 --!
592 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_summate' STRICT LANGUAGE C IMMUTABLE;
593 
594 --! Sums the elements of a float8 array.
595 --!
596 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_summate' STRICT LANGUAGE C IMMUTABLE;
597 
598 --! Computes the median element of a float8 array.
599 --!
600 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_median' STRICT LANGUAGE C IMMUTABLE;
602 --! Computes the median element of an SVEC.
603 --!
604 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_median' STRICT LANGUAGE C IMMUTABLE;
605 
606 --! Compares an SVEC to a float8, and returns positions of all elements not equal to the float as an array. Element index here starts at 0.
607 --!
608 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_positions(MADLIB_SCHEMA.svec, FLOAT8) RETURNS INT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_positions' STRICT LANGUAGE C IMMUTABLE;
609 
610 --! Compares an SVEC to a float8, and returns values of all elements not equal to the float as an array.
611 --!
612 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_values(MADLIB_SCHEMA.svec, FLOAT8) RETURNS FLOAT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_values' STRICT LANGUAGE C IMMUTABLE;
613 
615 --! Casts an int2 into an SVEC.
616 --!
617 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int2(int2) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int2' STRICT LANGUAGE C IMMUTABLE;
619 --! Casts an int4 into an SVEC.
620 --!
621 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int4(int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int4' STRICT LANGUAGE C IMMUTABLE;
623 --! Casts an int8 into an SVEC.
624 --!
625 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int8(bigint) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int8' STRICT LANGUAGE C IMMUTABLE;
627 --! Casts a float4 into an SVEC.
628 --!
629 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float4(float4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float4' STRICT LANGUAGE C IMMUTABLE;
631 --! Casts a float8 into an SVEC.
632 --!
633 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float8(float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float8' STRICT LANGUAGE C IMMUTABLE;
635 --! Casts a numeric into an SVEC.
636 --!
637 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_numeric(numeric) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_numeric' STRICT LANGUAGE C IMMUTABLE;
638 
639 --! Casts an int2 into a float8 array.
640 --!
641 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int2(int2) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int2' STRICT LANGUAGE C IMMUTABLE;
642 
643 --! Casts an int4 into a float8 array.
644 --!
645 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int4(int4) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int4' STRICT LANGUAGE C IMMUTABLE;
646 
647 --! Casts an int8 into a float8 array.
648 --!
649 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int8(bigint) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int8' STRICT LANGUAGE C IMMUTABLE;
650 
651 --! Casts a float4 into a float8 array.
652 --!
653 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_float4(float4) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_float4' STRICT LANGUAGE C IMMUTABLE;
654 
655 --! Casts a float8 into a float8 array.
656 --!
657 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_float8(float8) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_float8' STRICT LANGUAGE C IMMUTABLE;
658 
659 --! Casts a numeric into a float8 array.
660 --!
661 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_numeric(numeric) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_numeric' STRICT LANGUAGE C IMMUTABLE;
662 
663 --! Casts a float8 into an SVEC.
664 --!
665 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float8arr(float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float8arr' STRICT LANGUAGE C IMMUTABLE;
666 
667 --! Casts an array of int8 positions, float8 values into an SVEC.
668 --!
669 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_positions_float8arr(int8[],float8[],int8,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_positions_float8arr' STRICT LANGUAGE C IMMUTABLE;
670 
671 --! Casts an SVEC into a float8 array.
672 --!
673 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_return_array(MADLIB_SCHEMA.svec) RETURNS float8[] AS 'MODULE_PATHNAME', 'svec_return_array' LANGUAGE C IMMUTABLE;
674 
675 --! Concatenates two SVECs.
676 --!
677 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_concat(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_concat' LANGUAGE C IMMUTABLE;
678 
679 --! Replicates n copies of an SVEC and concatenates them together.
680 --!
681 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_concat_replicate(int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_concat_replicate' LANGUAGE C IMMUTABLE;
682 
683 --! Returns the dimension of an SVEC.
684 --!
685 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dimension(MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_dimension' LANGUAGE C IMMUTABLE;
686 
687 --! Applies a given function to each element of an SVEC.
688 --!
689 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_lapply(text,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_lapply' LANGUAGE C IMMUTABLE;
690 
691 --! Appends a run-length block to the back of an SVEC.
692 --!
693 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_append(MADLIB_SCHEMA.svec,float8,int8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_append' LANGUAGE C IMMUTABLE;
694 
695 --! Projects onto an element of an SVEC.
696 --!
697 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_proj(MADLIB_SCHEMA.svec,int4) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_proj' LANGUAGE C IMMUTABLE;
698 
699 --! Extracts a subvector of an SVEC given the subvector's start and end indices.
700 --!
701 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_subvec(MADLIB_SCHEMA.svec,int4,int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_subvec' LANGUAGE C IMMUTABLE;
702 
703 --! Reverses the elements of an SVEC.
704 --!
705 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_reverse' LANGUAGE C IMMUTABLE;
706 
707 --! Replaces the subvector of a given SVEC at a given start index with another SVEC. Note that element index should start at 1.
708 --!
709 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_change(MADLIB_SCHEMA.svec,int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_change' LANGUAGE C IMMUTABLE;
710 
711 --! Computes the hash of an SVEC.
712 --!
713 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_hash(MADLIB_SCHEMA.svec) RETURNS int4 AS 'MODULE_PATHNAME', 'svec_hash' STRICT LANGUAGE C IMMUTABLE;
714 
715 --! Computes the word-occurence vector of a document
716 --!
717 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sfv(text[], text[]) RETURNS MADLIB_SCHEMA.svec AS
718 'MODULE_PATHNAME', 'gp_extract_feature_histogram' LANGUAGE C IMMUTABLE;
720 --! Sorts an array of texts. This function should be in MADlib common.
721 --!
722 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sort(text[]) RETURNS text[] AS $$
723  SELECT array(SELECT unnest($1::text[]) ORDER BY 1);
724 $$ LANGUAGE SQL;
725 
726 --! Converts an svec to a text string
727 --!
728 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_to_string(MADLIB_SCHEMA.svec)
729 RETURNS text AS 'MODULE_PATHNAME', 'svec_to_string' STRICT LANGUAGE C IMMUTABLE;
730 
731 --! Converts a text string to an svec
732 --!
733 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_from_string(text)
734 RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_from_string' STRICT LANGUAGE C IMMUTABLE;
736 
737 /*
738 DROP OPERATOR IF EXISTS || ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
739 DROP OPERATOR IF EXISTS - ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
740 DROP OPERATOR IF EXISTS + ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
741 DROP OPERATOR IF EXISTS / ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
742 DROP OPERATOR IF EXISTS %*% ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
743 DROP OPERATOR IF EXISTS * ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
744 DROP OPERATOR IF EXISTS ^ ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
745 */
746 
747 CREATE OPERATOR MADLIB_SCHEMA.|| (
748  LEFTARG = MADLIB_SCHEMA.svec,
749  RIGHTARG = MADLIB_SCHEMA.svec,
750  PROCEDURE = MADLIB_SCHEMA.svec_concat
751 );
752 
753 CREATE OPERATOR MADLIB_SCHEMA.- (
754  LEFTARG = MADLIB_SCHEMA.svec,
755  RIGHTARG = MADLIB_SCHEMA.svec,
756  PROCEDURE = MADLIB_SCHEMA.svec_minus
757 );
758 CREATE OPERATOR MADLIB_SCHEMA.+ (
759  LEFTARG = MADLIB_SCHEMA.svec,
760  RIGHTARG = MADLIB_SCHEMA.svec,
761  PROCEDURE = MADLIB_SCHEMA.svec_plus
762 );
763 CREATE OPERATOR MADLIB_SCHEMA./ (
764  LEFTARG = MADLIB_SCHEMA.svec,
765  RIGHTARG = MADLIB_SCHEMA.svec,
766  PROCEDURE = MADLIB_SCHEMA.svec_div
767 );
768 CREATE OPERATOR MADLIB_SCHEMA.%*% (
769  LEFTARG = MADLIB_SCHEMA.svec,
770  RIGHTARG = MADLIB_SCHEMA.svec,
771  PROCEDURE = MADLIB_SCHEMA.svec_dot
772 );
773 CREATE OPERATOR MADLIB_SCHEMA.* (
774  LEFTARG = MADLIB_SCHEMA.svec,
775  RIGHTARG = MADLIB_SCHEMA.svec,
776  PROCEDURE = MADLIB_SCHEMA.svec_mult
777 );
778 CREATE OPERATOR MADLIB_SCHEMA.^ (
779  LEFTARG = MADLIB_SCHEMA.svec,
780  RIGHTARG = MADLIB_SCHEMA.svec,
781  PROCEDURE = MADLIB_SCHEMA.svec_pow
782 );
783 
784 -- float8[] operators
785 -- DROP OPERATOR IF EXISTS = ( float8[], float8[]);
786 /*
787 DROP OPERATOR IF EXISTS %*% ( float8[], MADLIB_SCHEMA.svec);
788 DROP OPERATOR IF EXISTS %*% ( MADLIB_SCHEMA.svec, float8[]);
789 DROP OPERATOR IF EXISTS %*% ( float8[], float8[]);
790 DROP OPERATOR IF EXISTS - ( float8[], float8[]);
791 DROP OPERATOR IF EXISTS + ( float8[], float8[]);
792 DROP OPERATOR IF EXISTS * ( float8[], float8[]);
793 DROP OPERATOR IF EXISTS / ( float8[], float8[]);
794 DROP OPERATOR IF EXISTS - ( float8[], MADLIB_SCHEMA.svec);
795 DROP OPERATOR IF EXISTS + ( float8[], MADLIB_SCHEMA.svec);
796 DROP OPERATOR IF EXISTS * ( float8[], MADLIB_SCHEMA.svec);
797 DROP OPERATOR IF EXISTS / ( float8[], MADLIB_SCHEMA.svec);
798 DROP OPERATOR IF EXISTS - ( MADLIB_SCHEMA.svec, float8[]);
799 DROP OPERATOR IF EXISTS + ( MADLIB_SCHEMA.svec, float8[]);
800 DROP OPERATOR IF EXISTS * ( MADLIB_SCHEMA.svec, float8[]);
801 DROP OPERATOR IF EXISTS / ( MADLIB_SCHEMA.svec, float8[]);
802 */
803 
804 /*
805 CREATE OPERATOR MADLIB_SCHEMA.= (
806  leftarg = float8[],
807  rightarg = float8[],
808  procedure = MADLIB_SCHEMA.float8arr_eq,
809  commutator = operator(MADLIB_SCHEMA.=) ,
810 -- negator = operator(MADLIB_SCHEMA.<>) ,
811  restrict = eqsel, join = eqjoinsel
812 );
813 */
814 
815 CREATE OPERATOR MADLIB_SCHEMA.%*% (
816  LEFTARG = float8[],
817  RIGHTARG = float8[],
818  PROCEDURE = MADLIB_SCHEMA.svec_dot
819 );
820 CREATE OPERATOR MADLIB_SCHEMA.%*% (
821  LEFTARG = float8[],
822  RIGHTARG = MADLIB_SCHEMA.svec,
823  PROCEDURE = MADLIB_SCHEMA.svec_dot
824 );
825 CREATE OPERATOR MADLIB_SCHEMA.%*% (
826  LEFTARG = MADLIB_SCHEMA.svec,
827  RIGHTARG = float8[],
828  PROCEDURE = MADLIB_SCHEMA.svec_dot
829 );
830 CREATE OPERATOR MADLIB_SCHEMA.- (
831  LEFTARG = float8[],
832  RIGHTARG = float8[],
833  PROCEDURE = MADLIB_SCHEMA.float8arr_minus_float8arr
834 );
835 CREATE OPERATOR MADLIB_SCHEMA.+ (
836  LEFTARG = float8[],
837  RIGHTARG = float8[],
838  PROCEDURE = MADLIB_SCHEMA.float8arr_plus_float8arr
839 );
840 CREATE OPERATOR MADLIB_SCHEMA.* (
841  LEFTARG = float8[],
842  RIGHTARG = float8[],
843  PROCEDURE = MADLIB_SCHEMA.float8arr_mult_float8arr
844 );
845 CREATE OPERATOR MADLIB_SCHEMA./ (
846  LEFTARG = float8[],
847  RIGHTARG = float8[],
848  PROCEDURE = MADLIB_SCHEMA.float8arr_div_float8arr
849 );
850 
851 CREATE OPERATOR MADLIB_SCHEMA.- (
852  LEFTARG = float8[],
853  RIGHTARG = MADLIB_SCHEMA.svec,
854  PROCEDURE = MADLIB_SCHEMA.float8arr_minus_svec
855 );
856 CREATE OPERATOR MADLIB_SCHEMA.+ (
857  LEFTARG = float8[],
858  RIGHTARG = MADLIB_SCHEMA.svec,
859  PROCEDURE = MADLIB_SCHEMA.float8arr_plus_svec
860 );
861 CREATE OPERATOR MADLIB_SCHEMA.* (
862  LEFTARG = float8[],
863  RIGHTARG = MADLIB_SCHEMA.svec,
864  PROCEDURE = MADLIB_SCHEMA.float8arr_mult_svec
865 );
866 CREATE OPERATOR MADLIB_SCHEMA./ (
867  LEFTARG = float8[],
868  RIGHTARG = MADLIB_SCHEMA.svec,
869  PROCEDURE = MADLIB_SCHEMA.float8arr_div_svec
870 );
871 
872 CREATE OPERATOR MADLIB_SCHEMA.- (
873  LEFTARG = MADLIB_SCHEMA.svec,
874  RIGHTARG = float8[],
875  PROCEDURE = MADLIB_SCHEMA.svec_minus_float8arr
876 );
877 CREATE OPERATOR MADLIB_SCHEMA.+ (
878  LEFTARG = MADLIB_SCHEMA.svec,
879  RIGHTARG = float8[],
880  PROCEDURE = MADLIB_SCHEMA.svec_plus_float8arr
881 );
882 CREATE OPERATOR MADLIB_SCHEMA.* (
883  LEFTARG = MADLIB_SCHEMA.svec,
884  RIGHTARG = float8[],
885  PROCEDURE = MADLIB_SCHEMA.svec_mult_float8arr
886 );
887 CREATE OPERATOR MADLIB_SCHEMA./ (
888  LEFTARG = MADLIB_SCHEMA.svec,
889  RIGHTARG = float8[],
890  PROCEDURE = MADLIB_SCHEMA.svec_div_float8arr
891 );
892 
893 /*
894 DROP CAST IF EXISTS (int2 AS MADLIB_SCHEMA.svec) ;
895 DROP CAST IF EXISTS (integer AS MADLIB_SCHEMA.svec) ;
896 DROP CAST IF EXISTS (bigint AS MADLIB_SCHEMA.svec) ;
897 DROP CAST IF EXISTS (float4 AS MADLIB_SCHEMA.svec) ;
898 DROP CAST IF EXISTS (float8 AS MADLIB_SCHEMA.svec) ;
899 DROP CAST IF EXISTS (numeric AS MADLIB_SCHEMA.svec) ;
900 */
901 
902 CREATE CAST (int2 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int2(int2) ; -- AS IMPLICIT;
903 CREATE CAST (integer AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int4(integer) ; -- AS IMPLICIT;
904 CREATE CAST (bigint AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int8(bigint) ; -- AS IMPLICIT;
905 CREATE CAST (float4 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float4(float4) ; -- AS IMPLICIT;
906 CREATE CAST (float8 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float8(float8) ; -- AS IMPLICIT;
907 CREATE CAST (numeric AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_numeric(numeric) ; -- AS IMPLICIT;
908 
909 /*
910 DROP CAST IF EXISTS (int2 AS float8[]) ;
911 DROP CAST IF EXISTS (integer AS float8[]) ;
912 DROP CAST IF EXISTS (bigint AS float8[]) ;
913 DROP CAST IF EXISTS (float4 AS float8[]) ;
914 DROP CAST IF EXISTS (float8 AS float8[]) ;
915 DROP CAST IF EXISTS (numeric AS float8[]) ;
916 */
917 
918 -- CREATE CAST (int2 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int2(int2) ; -- AS IMPLICIT;
919 -- CREATE CAST (integer AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int4(integer) ; -- AS IMPLICIT;
920 -- CREATE CAST (bigint AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int8(bigint) ; -- AS IMPLICIT;
921 -- CREATE CAST (float4 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_float4(float4) ; -- AS IMPLICIT;
922 -- CREATE CAST (float8 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_float8(float8) ; -- AS IMPLICIT;
923 -- CREATE CAST (numeric AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_numeric(numeric) ; -- AS IMPLICIT;
924 
925 -- DROP CAST IF EXISTS (MADLIB_SCHEMA.svec AS float8[]) ;
926 -- DROP CAST IF EXISTS (float8[] AS MADLIB_SCHEMA.svec) ;
927 
928 CREATE CAST (MADLIB_SCHEMA.svec AS float8[]) WITH FUNCTION MADLIB_SCHEMA.svec_return_array(MADLIB_SCHEMA.svec) ; -- AS IMPLICIT;
929 CREATE CAST (float8[] AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float8arr(float8[]) ; -- AS IMPLICIT;
930 
931 -- DROP OPERATOR IF EXISTS = (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) ;
932 
933 
934 CREATE OPERATOR MADLIB_SCHEMA.= (
935  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_eq,
936  commutator = operator(MADLIB_SCHEMA.=) ,
937 -- negator = operator(MADLIB_SCHEMA.<>) ,
938  restrict = eqsel, join = eqjoinsel
939 );
940 
941 --! Transition function for mean(svec) aggregate
942 --!
943 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_transition( FLOAT[], MADLIB_SCHEMA.svec)
944 RETURNS FLOAT[] AS 'MODULE_PATHNAME'
945 LANGUAGE C IMMUTABLE;
946 
947 --! Preliminary merge function for mean(svec) aggregate
948 --!
949 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_prefunc( FLOAT[], FLOAT[])
950 RETURNS FLOAT[] AS 'MODULE_PATHNAME'
951 LANGUAGE C IMMUTABLE;
952 
953 --! Final function for mean(svec) aggregate
954 --!
955 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_final( FLOAT[])
956 RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME'
957 LANGUAGE C IMMUTABLE;
958 
959 --! Aggregate that computes the element-wise mean of a list of vectors.
960 --!
961 CREATE AGGREGATE MADLIB_SCHEMA.mean( MADLIB_SCHEMA.svec) (
962  SFUNC = MADLIB_SCHEMA.svec_mean_transition,
963  m4_ifdef(`__GREENPLUM__',`prefunc = MADLIB_SCHEMA.svec_mean_prefunc,')
964  FINALFUNC = MADLIB_SCHEMA.svec_mean_final,
965  STYPE = FLOAT[]
966 );
967 
968 --! Aggregate that provides the element-wise sum of a list of vectors.
969 --!
970 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_sum(MADLIB_SCHEMA.svec);
971 CREATE AGGREGATE MADLIB_SCHEMA.svec_sum (MADLIB_SCHEMA.svec) (
972  SFUNC = MADLIB_SCHEMA.svec_plus,
973  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.svec_plus,')
974  INITCOND = '{1}:{0.}', -- Zero
975  STYPE = MADLIB_SCHEMA.svec
976 );
978 --! Aggregate that provides a tally of nonzero entries in a list of vectors.
979 --!
980 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_count_nonzero(MADLIB_SCHEMA.svec);
981 CREATE AGGREGATE MADLIB_SCHEMA.svec_count_nonzero (MADLIB_SCHEMA.svec) (
982  SFUNC = MADLIB_SCHEMA.svec_count,
983  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.svec_plus,')
984  INITCOND = '{1}:{0.}', -- Zero
985  STYPE = MADLIB_SCHEMA.svec
986 );
987 
988 --! Aggregate that turns a list of float8 values into an SVEC.
989 --!
990 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_agg(float8);
991 CREATE
992 m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED'))
993 AGGREGATE MADLIB_SCHEMA.svec_agg (float8) (
994  SFUNC = MADLIB_SCHEMA.svec_pivot,
995  m4_ifdef(`__GREENPLUM__', m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `', ``prefunc=MADLIB_SCHEMA.svec_concat,''))
996  STYPE = MADLIB_SCHEMA.svec
997 );
998 
999 --! Aggregate that computes the median element of a list of float8 values.
1000 --!
1001 -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_median_inmemory(float8);
1002 CREATE AGGREGATE MADLIB_SCHEMA.svec_median_inmemory (float8) (
1003  SFUNC = MADLIB_SCHEMA.svec_pivot,
1004  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.svec_concat,')
1005  FINALFUNC = MADLIB_SCHEMA.svec_median,
1006  STYPE = MADLIB_SCHEMA.svec
1007 );
1008 
1009 -- Comparisons based on L2 Norm
1010 --! Returns true if the l2 norm of the first SVEC is less than that of the second SVEC.
1011 --!
1012 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_lt(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_lt' LANGUAGE C IMMUTABLE;
1014 --! Returns true if the l2 norm of the first SVEC is less than or equal to that of the second SVEC.
1015 --!
1016 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_le(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_le' LANGUAGE C IMMUTABLE;
1017 
1018 --! Returns true if the l2 norm of the first SVEC is equal to that of the second SVEC.
1019 --!
1020 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_eq(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_eq' LANGUAGE C IMMUTABLE;
1021 
1022 --! Returns true if the l2 norm of the first SVEC is not equal to that of the second SVEC.
1023 --!
1024 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_ne(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_ne' LANGUAGE C IMMUTABLE;
1025 
1026 --! Returns true if the l2 norm of the first SVEC is greater than that of the second SVEC.
1027 --!
1028 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_gt(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_gt' LANGUAGE C IMMUTABLE;
1029 
1030 --! Returns true if the l2 norm of the first SVEC is greater than or equal to that of the second SVEC.
1031 --!
1032 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_ge(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_ge' LANGUAGE C IMMUTABLE;
1033 
1034 --! Returns a value indicating the relative values of the l2 norms of two SVECs.
1035 --!
1036 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_cmp(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_l2_cmp' LANGUAGE C IMMUTABLE;
1037 
1038 --! Normalizes an SVEC that is divides all elements by its norm/magnitude.
1039 --!
1040 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalize(MADLIB_SCHEMA.svec)
1041 RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_normalize' LANGUAGE C IMMUTABLE STRICT;
1043 /*
1044 DROP OPERATOR IF EXISTS < (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1045 DROP OPERATOR IF EXISTS <= (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1046 DROP OPERATOR IF EXISTS <> (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) ;
1047 DROP OPERATOR IF EXISTS == (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1048 DROP OPERATOR IF EXISTS > (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1049 DROP OPERATOR IF EXISTS >= (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
1050 DROP OPERATOR IF EXISTS *|| (int4, MADLIB_SCHEMA.svec) ;
1051 */
1052 
1053 CREATE OPERATOR MADLIB_SCHEMA.< (
1054  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_lt,
1055  commutator = operator(MADLIB_SCHEMA.>) , negator = operator(MADLIB_SCHEMA.>=) ,
1056  restrict = scalarltsel, join = scalarltjoinsel
1057 );
1058 CREATE OPERATOR MADLIB_SCHEMA.<= (
1059  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_le,
1060  commutator = operator(MADLIB_SCHEMA.>=) , negator = operator(MADLIB_SCHEMA.>) ,
1061  restrict = scalarltsel, join = scalarltjoinsel
1062 );
1063 CREATE OPERATOR MADLIB_SCHEMA.<> (
1064  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_eq,
1065  commutator = operator(MADLIB_SCHEMA.<>) ,
1066  negator = operator(MADLIB_SCHEMA.=),
1067  restrict = eqsel, join = eqjoinsel
1068 );
1069 CREATE OPERATOR MADLIB_SCHEMA.== (
1070  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_eq,
1071  commutator = operator(MADLIB_SCHEMA.=) ,
1072  negator = operator(MADLIB_SCHEMA.<>) ,
1073  restrict = eqsel, join = eqjoinsel
1074 );
1075 CREATE OPERATOR MADLIB_SCHEMA.>= (
1076  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_ge,
1077  commutator = operator(MADLIB_SCHEMA.<=) , negator = operator(MADLIB_SCHEMA.<) ,
1078  restrict = scalargtsel, join = scalargtjoinsel
1079 );
1080 CREATE OPERATOR MADLIB_SCHEMA.> (
1081  leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_gt,
1082  commutator = operator(MADLIB_SCHEMA.<) , negator = operator(MADLIB_SCHEMA.<=) ,
1083  restrict = scalargtsel, join = scalargtjoinsel
1084 );
1085 
1086 CREATE OPERATOR MADLIB_SCHEMA.*|| (
1087  leftarg = int4, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_concat_replicate
1088 );
1089 
1090 CREATE OPERATOR CLASS MADLIB_SCHEMA.svec_l2_ops
1091 DEFAULT FOR TYPE MADLIB_SCHEMA.svec USING btree AS
1092 OPERATOR 1 MADLIB_SCHEMA.< ,
1093 OPERATOR 2 MADLIB_SCHEMA.<= ,
1094 OPERATOR 3 MADLIB_SCHEMA.== ,
1095 OPERATOR 4 MADLIB_SCHEMA.>= ,
1096 OPERATOR 5 MADLIB_SCHEMA.> ,
1097 FUNCTION 1 MADLIB_SCHEMA.svec_l2_cmp(MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
1098