User Documentation
 All Files Functions Groups
dt_utility.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file dt_utility.sql_in
4  *
5  * @brief Utility functions widely used in C4.5 and random forest.
6  * @date April 5, 2012
7  *
8  *//* ----------------------------------------------------------------------- */
9 
10 
11 /*
12  * @brief Cast any value to text.
13  *
14  * @param val A value with any specific type.
15  *
16  * @return The text format string for the value.
17  *
18  * @note Greenplum doesn't support boolean to text casting.
19  *
20  */
21 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char
22  (
23  val anyelement
24  )
25 RETURNS TEXT
26 AS 'MODULE_PATHNAME', 'dt_to_text'
27 LANGUAGE C STRICT IMMUTABLE;
28 
29 
30 /*
31  * @brief Cast regclass to text. we will not create a cast,
32  * since it may override the existing cast.
33  * Although there is no cast for regclass to text,
34  * PL/PGSQL can coerce it to text automatically.
35  * Another implementation can use sql function:
36  * select textin(regclassout('pg_class'::regclass));
37  *
38  * @param rc The regclass of the table.
39  *
40  * @return The text representation for the regclass.
41  *
42  */
43 CREATE or replace FUNCTION MADLIB_SCHEMA.__regclass_to_text
44  (
45  rc regclass
46  )
47 RETURNS TEXT
48 AS $$
49 BEGIN
50  RETURN rc;
51 END
52 $$ LANGUAGE PLPGSQL IMMUTABLE;
53 
54 
55 /*
56  * @brief Format a string with the value in args array.
57  *
58  * @param fmt The format string.
59  * @param args The specified elements in format string.
60  *
61  * @return The formated string.
62  *
63  * @note Each '%' in fmt will be replaced with the corresponding value of args.
64  * The number of '%'s should equal to the length of array args.
65  *
66  */
67 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
68  (
69  fmt TEXT,
70  args TEXT[]
71  )
72 RETURNS TEXT
73 AS 'MODULE_PATHNAME', 'dt_text_format'
74 LANGUAGE C IMMUTABLE;
75 
76 
77 /*
78  * @brief Short form to format a string with four parameters.
79  *
80  * @param arg1 The first argument.
81  * @param arg2 The second argument.
82  * @param arg3 The third argument.
83  * @param arg4 The fouth argument.
84  *
85  * @return The formated string.
86  *
87  */
88 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
89  (
90  fmt TEXT,
91  arg1 TEXT,
92  arg2 TEXT,
93  arg3 TEXT,
94  arg4 TEXT
95  )
96 RETURNS TEXT AS $$
97  SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3, $4, $5]);
98 $$ LANGUAGE sql IMMUTABLE;
99 
100 
101 /*
102  * @brief Short form to format a string with three parameters.
103  *
104  * @param arg1 The first argument.
105  * @param arg2 The second argument.
106  * @param arg3 The third argument.
107  *
108  * @return The formated string.
109  *
110  */
111 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
112  (
113  fmt TEXT,
114  arg1 TEXT,
115  arg2 TEXT,
116  arg3 TEXT
117 )
118 RETURNS TEXT AS $$
119  SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3, $4]);
120 $$ LANGUAGE sql IMMUTABLE;
121 
122 
123 /*
124  * @brief Short form to format a string with two parameters.
125  *
126  * @param arg1 The first argument.
127  * @param arg2 The second argument.
128  *
129  * @return The formated string.
130  *
131  */
132 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
133  (
134  fmt TEXT,
135  arg1 TEXT,
136  arg2 TEXT
137  )
138 RETURNS TEXT AS $$
139  SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3]);
140 $$ LANGUAGE sql IMMUTABLE;
141 
142 
143 /*
144  * @brief Short form to format a string with a parameter.
145  *
146  * @param arg1 The first argument.
147  *
148  * @return The formated string.
149  *
150  */
151 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
152  (
153  fmt TEXT,
154  arg1 TEXT
155  )
156 RETURNS TEXT AS $$
157  SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2]);
158 $$ LANGUAGE sql IMMUTABLE;
159 
160 
161 /*
162  * @brief Raise exception if the condition is false.
163  *
164  * @param condition The assert condition.
165  * @param reason The reason string displayed when assert failure.
166  *
167  */
168 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert
169  (
170  condition BOOLEAN,
171  reason TEXT
172  )
173 RETURNS void AS $$
174 BEGIN
175  IF (NOT condition) THEN
176  RAISE EXCEPTION 'ERROR: %', reason;
177  END IF;
178 END
179 $$ LANGUAGE PLPGSQL IMMUTABLE;
180 
181 
182 /*
183  * @brief Test if the specified table exists or not.
184  *
185  * @param full_table_name The full table name.
186  *
187  */
188 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__table_exists
189  (
190  full_table_name TEXT
191  )
192 RETURNS BOOLEAN AS
193 'MODULE_PATHNAME', 'table_exists'
194 LANGUAGE C IMMUTABLE;
195 
196 
197 /*
198  * @brief Test if the specified column exists or not.
199  *
200  * @param full_table_name The full table name.
201  *
202  * @return True if the column exists, otherwise return false.
203  *
204  */
205 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__column_exists
206  (
207  full_table_name TEXT,
208  column_name TEXT
209  )
210 RETURNS BOOLEAN AS $$
211 DECLARE
212  curstmt TEXT := '';
213  result INT := 0;
214 BEGIN
215  PERFORM MADLIB_SCHEMA.__assert
216  (
217  (full_table_name IS NOT NULL) AND (column_name IS NOT NULL),
218  'the table name and column name must not be null'
219  );
220 
221  IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN
222  SELECT MADLIB_SCHEMA.__format
223  (
224  'SELECT COUNT(*)
225  FROM pg_catalog.pg_attribute
226  WHERE attnum > 0 AND
227  (NOT attisdropped) AND
228  attname = ''%'' AND
229  attrelid = ''%''::regclass',
230  ARRAY[
231  column_name,
232  full_table_name
233  ]
234  ) INTO curstmt;
235 
236  EXECUTE curstmt INTO result;
237 
238  RETURN result >= 1;
239  END IF;
240 
241  RETURN 'f';
242 END
243 $$ LANGUAGE PLPGSQL STABLE;
244 
245 
246 /*
247  * @brief Assert if the specified table exists or not.
248  *
249  * @param full_table_name The full table name.
250  *
251  */
252 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert_table
253  (
254  full_table_name TEXT,
255  existence BOOLEAN
256  )
257 RETURNS void AS $$
258 DECLARE
259  err_msg TEXT;
260 BEGIN
261  IF (existence) THEN
262  err_msg = 'assertion failure. Table: ''' || full_table_name ||
263  ''' does not exist';
264  ELSE
265  err_msg = 'assertion failure. Table: ''' || full_table_name ||
266  ''' already exists';
267  END IF;
268 
269  PERFORM MADLIB_SCHEMA.__assert
270  (
271  MADLIB_SCHEMA.__table_exists(full_table_name) = existence,
272  err_msg
273  );
274 END
275 $$ LANGUAGE PLPGSQL STABLE;
276 
277 
278 /*
279  * @brief Strip the schema name from the full table name.
280  *
281  * @param full_table_name The full table name.
282  *
283  * @return The table name without schema name.
284  *
285  */
286 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__strip_schema_name
287  (
288  full_table_name TEXT
289  )
290 RETURNS TEXT AS $$
291 DECLARE
292  str_val TEXT;
293 BEGIN
294  PERFORM MADLIB_SCHEMA.__assert
295  (
296  full_table_name IS NOT NULL,
297  'table name should not be null'
298  );
299 
300 
301  str_val = trim(both ' ' FROM split_part(full_table_name, '.', 2));
302 
303  IF (length(str_val) = 0) THEN
304  str_val = btrim(full_table_name, ' ');
305  END IF;
306 
307  RETURN lower(str_val);
308 end
309 $$ LANGUAGE PLPGSQL IMMUTABLE;
310 
311 
312 /*
313  * @brief Get the schema name from a full table name.
314  * if there is no schema name in the full table name, then
315  * if the table exists, we return the schema name from catalog
316  * else the current schema name,
317  * else return the schema name from the full table name directly.
318  *
319  * @param full_table_name The full table name.
320  *
321  * @return The schema name of the table.
322  *
323  * @note This function should be VOLATILE, since we may get schema name
324  * from a new created table in the same transaction.
325  *
326  */
327 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_schema_name
328  (
329  full_table_name TEXT
330  )
331 RETURNS TEXT AS $$
332 DECLARE
333  table_name TEXT;
334  temp TEXT[];
335  len INT;
336  curstmt TEXT;
337  schema_name TEXT;
338 BEGIN
339  PERFORM MADLIB_SCHEMA.__assert
340  (
341  full_table_name IS NOT NULL,
342  'table name should not be null'
343  );
344 
345  temp = string_to_array(full_table_name, '.');
346  len = array_upper(temp, 1);
347 
348  IF (1 = len) THEN
349  -- if table exists, return the schema name from catalog
350  IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN
351  SELECT nspname
352  FROM pg_catalog.pg_namespace n
353  WHERE n.oid =
354  (
355  SELECT relnamespace FROM pg_catalog.pg_class
356  WHERE oid= full_table_name::regclass
357  )
358 
359  INTO schema_name;
360  ELSE
361  -- get the current schema name
362  schema_name = current_schema();
363  END IF;
364  ELSE
365  PERFORM MADLIB_SCHEMA.__assert
366  (
367  len = 2,
368  'wrong full table name<' || full_table_name || '>'
369  );
370  -- get the shema name directly
371  schema_name = lower(btrim(temp[1], ' '));
372  END IF;
373 
374  RETURN schema_name;
375 end
376 $$ LANGUAGE PLPGSQL;
377 
378 
379 /*
380  * @brief Test if the given element is in the specified array or not.
381  *
382  * @param find The element to be found.
383  * @param arr The array containing the elements.
384  *
385  * @return True the element is in the array. Otherwise returns false.
386  *
387  */
388 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_search
389  (
390  find ANYELEMENT,
391  arr ANYARRAY
392  )
393 RETURNS BOOLEAN AS $$
394  SELECT count(*) = 1
395  FROM
396  (
397  SELECT unnest($2) as elem
398  ) t
399  WHERE $1 IS NOT DISTINCT FROM elem
400 $$ LANGUAGE sql IMMUTABLE;
401 
402 
403 /*
404  * @brief Test if each element in the given array is a column of the table.
405  *
406  * @param column_names The array containing the columns to be tested.
407  * @param table_name The full table name.
408  *
409  * @return True if each element of column_names is a column of the table.
410  *
411  */
412 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__columns_in_table
413  (
414  column_names TEXT[],
415  table_name TEXT
416  )
417 RETURNS BOOLEAN AS $$
418  SELECT count(*) = 0
419  FROM
420  (
421  SELECT unnest($1)
422  EXCEPT
423  SELECT quote_ident(attname)
424  FROM pg_attribute
425  WHERE attrelid = $2::regclass AND
426  attnum > 0 AND
427  NOT attisdropped
428  ) t;
429 $$ LANGUAGE sql STABLE;
430 
431 
432 /*
433  * @brief Get the number of columns for a given table.
434  *
435  * @param table_name The full table name.
436  *
437  * @return The number of columns in the given table.
438  *
439  */
440 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_columns
441  (
442  table_name TEXT
443  )
444 RETURNS INT AS $$
445  SELECT count(attname)::INT
446  FROM pg_attribute
447  WHERE attrelid = $1::regclass AND
448  attnum > 0 AND
449  NOT attisdropped
450 $$ LANGUAGE sql STABLE;
451 
452 
453 /*
454  * @brief Convert a string with delimiter ',' to an array.
455  * Each element in the array is trimed from the start
456  * and end using a space.
457  *
458  * @param csv_str The string with elements delimited by ','.
459  *
460  * @return The splitting string array.
461  *
462  * @note If the input string is NULL or an empty string
463  * after trimmed with ' ', then NULL will be returned.
464  *
465  */
466 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__csvstr_to_array
467  (
468  csv_str TEXT
469  )
470 RETURNS TEXT[] AS $$
471 DECLARE
472  ret TEXT[];
473  str_val TEXT;
474  index INTEGER;
475 BEGIN
476  ret = string_to_array(lower(btrim(csv_str, ' ')), ',');
477 
478  -- if the input array is NULL or an empty one,
479  -- then we return NULL directly
480  -- (array_upper will return non-NULL otherwise)
481  IF (array_upper(ret, 1) IS NULL) THEN
482  RETURN NULL;
483  END IF;
484 
485  -- null or empty array will be filtered
486  FOR index IN 1..array_upper(ret, 1) LOOP
487  ret[index] = quote_ident(btrim(ret[index], ' '));
488  END LOOP;
489 
490  RETURN ret;
491 END
492 $$ LANGUAGE PLPGSQL IMMUTABLE;
493 
494 
495 /*
496  * @brief Retrieve a BOOL array. The ith element in the array
497  * indicate whether arr2[i] is in arr1 or not. The size
498  * of the BOOL array is the same as arr2. For example,
499  * arr1 = ['aa', 'bb', 'dd'],
500  * arr2 = ['aa', 'dd', 'bb', 'ee', 'ccc']
501  * then the BOOL array is:
502  * ['t', 't', 't', 'f', 'f']
503  *
504  * @param src_arr The source array.
505  * @param tst_arr The array to be tested.
506  *
507  * @return A BOOL array.
508  *
509  */
510 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_elem_in
511  (
512  src_arr ANYARRAY,
513  tst_arr ANYARRAY
514  )
515 RETURNS BOOLEAN[] AS $$
516  SELECT array_agg(elem in (SELECT unnest($1)))
517  FROM
518  (
519  SELECT unnest($2) as elem
520  ) t
521 $$ LANGUAGE sql IMMUTABLE;
522 
523 
524 /*
525  * @brief Sort the given array.
526  *
527  * @param arr The array to be sorted.
528  *
529  * @return The sorted array.
530  *
531  */
532 
533 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_sort
534  (
535  arr ANYARRAY
536  )
537 RETURNS ANYARRAY AS $$
538  SELECT ARRAY
539  (SELECT elem FROM unnest($1) elem ORDER BY elem LIMIT ALL)
540 $$ LANGUAGE sql IMMUTABLE;
541