User Documentation
 All Files Functions Groups
utilities.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file utilities.sql_in
4  *
5  * @brief SQL functions for carrying out routine tasks
6  *
7  * @sa For a brief overview of utility functions, see the
8  * module description \ref grp_utilities.
9  *
10  *//* ----------------------------------------------------------------------- */
11 
12 m4_include(`SQLCommon.m4') --'
13 m4_changequote(<!,!>)
14 
15 /**
16 @addtogroup grp_utilities
17 
18 \warning <em> This MADlib method is still in early stage development. There may be some
19 issues that will be addressed in a future version. Interface and implementation
20 is subject to change. </em>
21 
22 @about
23 
24 The utility module consists of useful utility functions to assist data
25 scientists in using the product. Several of these functions can be used
26 while implementing new algorithms.
27 
28 Refer to the file for documentation on each of the utlity functions.
29 
30 
31 @sa File utilities.sql_in documenting the SQL functions.
32 
33 */
34 
35 /**
36  * @brief Return MADlib build information.
37  *
38  * @returns Summary of MADlib build information, consisting of MADlib version,
39  * git revision, cmake configuration time, build type, build system,
40  * C compiler, and C++ compiler
41  */
42 CREATE FUNCTION MADLIB_SCHEMA.version()
43 RETURNS TEXT
44 LANGUAGE sql
45 IMMUTABLE
46 AS $$
47  SELECT (
48  'MADlib version: __MADLIB_VERSION__, '
49  'git revision: __MADLIB_GIT_REVISION__, '
50  'cmake configuration time: __MADLIB_BUILD_TIME__, '
51  'build type: __MADLIB_BUILD_TYPE__, '
52  'build system: __MADLIB_BUILD_SYSTEM__, '
53  'C compiler: __MADLIB_C_COMPILER__, '
54  'C++ compiler: __MADLIB_CXX_COMPILER__')::TEXT
55 $$;
56 
57 
58 /**
59  * @brief Raise an exception if the given condition is not satisfied.
60  */
61 CREATE FUNCTION MADLIB_SCHEMA.assert(condition BOOLEAN, msg VARCHAR)
62 RETURNS VOID
63 LANGUAGE plpgsql
64 IMMUTABLE
65 AS $$
66 BEGIN
67  IF NOT condition THEN
68  RAISE EXCEPTION 'Failed assertion: %', msg;
69  END IF;
70 END
71 $$;
72 
73 ------------------------------------------------------------------------
74 
75 /**
76  * @brief Compute the relative error of an approximate value
77  */
78 CREATE FUNCTION MADLIB_SCHEMA.relative_error(
79  approx DOUBLE PRECISION,
80  value DOUBLE PRECISION
81 ) RETURNS DOUBLE PRECISION
82 LANGUAGE sql
83 AS $$
84  SELECT abs(($1 - $2)/$2)
85 $$;
86 
87 ------------------------------------------------------------------------
88 
89 /**
90  * @brief Compute the relative error (w.r.t. the 2-norm) of an apprixmate vector
91  */
92 CREATE FUNCTION MADLIB_SCHEMA.relative_error(
93  approx DOUBLE PRECISION[],
94  value DOUBLE PRECISION[]
95 ) RETURNS DOUBLE PRECISION
96 LANGUAGE sql
97 AS $$
98  SELECT MADLIB_SCHEMA.dist_norm2($1, $2) / MADLIB_SCHEMA.norm2($2)
99 $$;
101 ------------------------------------------------------------------------
102 
103 /**
104  * @brief Check if a SQL statement raises an error
105  *
106  * @param sql The SQL statement
107  * @returns \c TRUE if an exception is raised while executing \c sql, \c FALSE
108  * otherwise.
109  */
110 CREATE FUNCTION MADLIB_SCHEMA.check_if_raises_error(
111  sql TEXT
112 ) RETURNS BOOLEAN
113 LANGUAGE plpgsql
114 AS $$
115 BEGIN
116  EXECUTE sql;
117  RETURN FALSE;
118 EXCEPTION
119  WHEN OTHERS THEN
120  RETURN TRUE;
121 END;
122 $$;
123 
124 ------------------------------------------------------------------------
125 
126 /**
127  * @brief Check if a column exists in a table
128  *
129  * @param source_table Source table
130  * @param column_name Column name in the table
131  * @returns \c TRUE if it exsists and FALSE if not
132  */
133 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_col_exists(
134  source_table TEXT,
135  column_name TEXT
136 )
137 RETURNS BOOLEAN AS $$
138 DECLARE
139  sql TEXT;
140  input_table_name VARCHAR[];
141  actual_table_name VARCHAR;
142  schema_name VARCHAR;
143 BEGIN
144 
145  input_table_name = regexp_split_to_array(source_table, E'\\.');
146  IF array_upper(input_table_name, 1) = 1 THEN
147  actual_table_name = input_table_name[1];
148  schema_name := current_schema();
149  ELSIF array_upper(input_table_name, 1) = 2 THEN
150  actual_table_name = input_table_name[2];
151  schema_name = input_table_name[1];
152  ELSE
153  RAISE EXCEPTION 'Incorrect input source table name provided';
154  END IF;
155  sql := 'SELECT MADLIB_SCHEMA.assert(count( column_name )>0, ''Error'') FROM information_schema.columns WHERE table_schema = ''' || schema_name || ''' AND table_name = ''' || actual_table_name || ''' AND column_name= ''' || column_name || '''';
156  raise notice '%', sql;
157 
158  RETURN NOT MADLIB_SCHEMA.check_if_raises_error(sql);
159 END;
160 $$ LANGUAGE plpgsql VOLATILE;
161 ------------------------------------------------------------------------
162 
163 /**
164  * @brief Check if a floating-point number is NaN (not a number)
165  *
166  * This function exists for portability. Some DBMSs like PostgreSQL treat
167  * floating-point numbers as fully ordered -- contrary to IEEE 754. (See, e.g.,
168  * the <a href=
169  * "http://www.postgresql.org/docs/current/static/datatype-numeric.html#DATATYPE-FLOAT"
170  * >PostgreSQL documentation</a>. For portability, MADlib code should not make
171  * use of such "features" directly, but only use isnan() instead.
172  *
173  * @param number
174  * @returns \c TRUE if \c number is \c NaN, \c FALSE otherwise
175  */
176 CREATE FUNCTION MADLIB_SCHEMA.isnan(
177  number DOUBLE PRECISION
178 ) RETURNS BOOLEAN
179 LANGUAGE sql
180 AS $$
181  SELECT $1 = 'NaN'::DOUBLE PRECISION;
182 $$;
183 
184 ------------------------------------------------------------------------
185 
186 /**
187  * @brief Create the temporary schema if it does not exist yet
188  */
189 CREATE FUNCTION MADLIB_SCHEMA.create_schema_pg_temp()
190 RETURNS VOID
191 LANGUAGE plpgsql
192 VOLATILE
193 AS $$
194 BEGIN
195  -- pg_my_temp_schema() is a built-in function
196  IF pg_my_temp_schema() = 0 THEN
197  -- The pg_temp schema does not exist, yet. Creating a temporary table
198  -- will create it. Note: There is *no* race condition here, because
199  -- every session has its own temp schema.
200  EXECUTE 'CREATE TEMPORARY TABLE _madlib_temp_table AS SELECT 1;
201  DROP TABLE pg_temp._madlib_temp_table CASCADE;';
202  END IF;
203 END;
204 $$;
205 
206 ------------------------------------------------------------------------
207 
208 /**
209  * @brief Create volatile noop function
210  *
211  * The only use of this function is as an optimization fence when used in the
212  * SELECT list of a query. See, e.g.,
213  * http://archives.postgresql.org/pgsql-sql/2012-07/msg00030.php
214  */
215 CREATE FUNCTION MADLIB_SCHEMA.noop()
216 RETURNS VOID
217 VOLATILE
218 LANGUAGE c
219 AS 'MODULE_PATHNAME';
220 
221 ------------------------------------------------------------------------
222 
223 /*
224  * Create type bytea8 with 8-byte alignment.
225  */
226 CREATE TYPE MADLIB_SCHEMA.bytea8;
227 
228 CREATE FUNCTION MADLIB_SCHEMA.bytea8in(cstring)
229 RETURNS MADLIB_SCHEMA.bytea8 AS 'byteain'
230 LANGUAGE internal IMMUTABLE STRICT;
231 
232 CREATE FUNCTION MADLIB_SCHEMA.bytea8out(MADLIB_SCHEMA.bytea8)
233 RETURNS cstring AS 'byteaout'
234 LANGUAGE internal IMMUTABLE STRICT;
235 
236 CREATE FUNCTION MADLIB_SCHEMA.bytea8recv(internal)
237 RETURNS MADLIB_SCHEMA.bytea8 AS 'bytearecv'
238 LANGUAGE internal IMMUTABLE STRICT;
239 
240 CREATE FUNCTION MADLIB_SCHEMA.bytea8send(MADLIB_SCHEMA.bytea8)
241 RETURNS bytea AS 'byteasend'
242 LANGUAGE internal IMMUTABLE STRICT;
243 
244 CREATE TYPE MADLIB_SCHEMA.bytea8(
245  INPUT = MADLIB_SCHEMA.bytea8in,
246  OUTPUT = MADLIB_SCHEMA.bytea8out,
247  RECEIVE = MADLIB_SCHEMA.bytea8recv,
248  SEND = MADLIB_SCHEMA.bytea8send,
249  ALIGNMENT = double
250 );
251 
252 /**
253  * @brief Get all column names except dependent variable
254 **/
255 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__internal_get_col_names_except_dep_variable(
256  source_table VARCHAR -- name of input table
257  , dependent_varname VARCHAR -- name of dependent variable
258 )
259 RETURNS VARCHAR AS $$
260 DECLARE
261 col_names VARCHAR[];
262 BEGIN
263  EXECUTE 'SELECT ARRAY(SELECT DISTINCT column_name::varchar from ' ||
264  ' information_schema.columns WHERE ' ||
265  'column_name NOT LIKE ''' || dependent_varname || '''' ||
266  'AND table_name LIKE ''' || source_table || ''')'
267  INTO col_names;
268  RETURN 'ARRAY[' || array_to_string(col_names, ',') || ']';
269 END;
270 $$ LANGUAGE plpgsql VOLATILE;
271 
272 ------------------------------------------------------------------------
273 /**
274  * @brief Generate random remporary names for temp table and other names
275  */
276 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__unique_string ()
277 RETURNS VARCHAR AS $$
278 PythonFunction(utilities, utilities, __unique_string)
279 $$ LANGUAGE plpythonu;
280 
281 ------------------------------------------------------------------------
282 /**
283  * @brief Takes a string of comma separated values and puts it into an array
284  */
285 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._string_to_array (
286  s VARCHAR
287 )
288 RETURNS VARCHAR[] AS $$
289 PythonFunction(utilities, utilities, _string_to_sql_array)
290 $$ LANGUAGE plpythonu;
291 
292 ------------------------------------------------------------------------
293 /**
294  * @brief Cast boolean into text
295  */
296 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bool_to_text (BOOLEAN)
297 RETURNS TEXT
298 STRICT
299 LANGUAGE SQL AS '
300  SELECT CASE
301  WHEN $1 THEN ''t''
302  ELSE ''f''
303  END;
304 ';
305 
306 m4_ifdef(`__GREENPLUM__', `
307 CREATE CAST (BOOLEAN AS TEXT)
308  WITH FUNCTION MADLIB_SCHEMA.bool_to_text(BOOLEAN)
309  AS ASSIGNMENT;
310 ')
311 
312 
313 ------------------------------------------------------------------------
314 
315 /*
316  * An array_agg() function is defined in module array_ops (to compatibility with
317  * GP 4.0.
318  */
319 
320 m4_changequote(<!`!>,<!'!>)