13 m4_include(`SQLCommon.m4
')
15 /* Own macro definitions */
18 m4_ifdef(`__GREENPLUM__', 1, 0) &&
19 __DBMS_VERSION_MAJOR__ * 100 + __DBMS_VERSION_MINOR__ < 401
21 `m4_define(`__GREENPLUM_PRE_4_1__
')'
25 m4_ifdef(`__POSTGRESQL__
', 1, 0) &&
26 __DBMS_VERSION_MAJOR__ < 9
28 `m4_define(`__POSTGRESQL_PRE_9_0__')
'
32 m4_ifdef(`__GREENPLUM__', 1, 0) &&
33 __DBMS_VERSION_MAJOR__ * 10000 +
34 __DBMS_VERSION_MINOR__ * 100 +
35 __DBMS_VERSION_PATCH__ >= 40201
37 `m4_define(`__GREENPLUM_GE_4_2_1__
')'
250 DROP TYPE IF EXISTS MADLIB_SCHEMA.__enc_tbl_result;
251 CREATE TYPE MADLIB_SCHEMA.__enc_tbl_result AS
253 pre_proc_time INTERVAL,
254 breakup_tbl_time INTERVAL,
255 gen_kv_time INTERVAL,
256 gen_enc_time INTERVAL,
257 post_proc_time INTERVAL
274 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__validate_input_table
276 full_table_name TEXT,
277 feature_columns TEXT[],
285 all_columns TEXT := '';
288 -- find the first (LIMIT 1) unsupported data type if the input table has.
289 stmt= 'SELECT atttypid
291 WHERE attrelid ='||quote_literal(full_table_name)||'::regclass AND
293 (not attisdropped) AND
299 ''SMALLINT''::regtype::oid,
300 ''INT''::regtype::oid,
301 ''BIGINT''::regtype::oid,
302 ''FLOAT8''::regtype::oid,
303 ''REAL''::regtype::oid,
304 ''DECIMAL''::regtype::oid,
305 ''INET''::regtype::oid,
306 ''CIDR''::regtype::oid,
307 ''MACADDR''::regtype::oid,
308 ''BOOLEAN''::regtype::oid,
309 ''CHAR''::regtype::oid,
310 ''VARCHAR''::regtype::oid,
311 ''TEXT''::regtype::oid,
312 ''"
char"''::regtype::oid,
313 ''DATE''::regtype::oid,
314 ''TIME''::regtype::oid,
315 ''TIMETZ''::regtype::oid,
316 ''TIMESTAMP''::regtype::oid,
317 ''TIMESTAMPTZ''::regtype::oid,
318 ''INTERVAL''::regtype::oid
323 IF (feature_columns IS NOT NULL) THEN
324 -- If user do not specify feature columns, we use all those columns.
325 -- Otherwise, we just need to check those specified columns.
326 index = array_lower(feature_columns, 1);
327 WHILE (index <= array_upper(feature_columns, 1)) LOOP
328 all_columns = all_columns ||
329 quote_literal(feature_columns[index]) ||
334 all_columns = all_columns || quote_literal(id_column) || ',';
335 all_columns = all_columns || quote_literal(class_column);
336 stmt = stmt ||' AND attname IN ('||all_columns||') ';
339 stmt = stmt||' LIMIT 1;';
341 EXECUTE stmt INTO rec;
343 IF (rec IS NOT NULL) THEN
344 -- Print the first unsupported data type, and supported types.
345 RAISE EXCEPTION 'Unsupported data type [%]. Supported types include:
346 SMALLINT, INT, BIGINT, FLOAT8, REAL,
347 DECIMAL, INET, CIDR, MACADDR, BOOLEAN,
348 CHAR, VARCHAR, TEXT, "
char",
349 DATE, TIME, TIMETZ, TIMESTAMP, TIMESTAMPTZ, and INTERVAL',
350 rec.atttypid::regtype;
353 SELECT MADLIB_SCHEMA.__format
368 EXECUTE stmt INTO rec;
370 -- check if the
id column has duplicated value
371 PERFORM MADLIB_SCHEMA.__assert
374 'The training table ' || full_table_name || ' must not have duplicated
id'
390 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_classtable_name
396 classtable_name TEXT;
400 PERFORM MADLIB_SCHEMA.__assert_table
406 curstmt = MADLIB_SCHEMA.__format
408 'SELECT MADLIB_SCHEMA.__regclass_to_text
409 (table_oid) as table_name
411 WHERE column_type = ''c''',
417 EXECUTE curstmt INTO classtable_name;
419 RETURN classtable_name;
431 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__drop_metatable
440 IF (meta_tbl_name is NULL ) THEN
444 PERFORM MADLIB_SCHEMA.__assert_table(meta_tbl_name, 't');
446 -- get the Key-Value tables
447 curstmt = MADLIB_SCHEMA.__format
449 'SELECT MADLIB_SCHEMA.__regclass_to_text
450 (table_oid) as table_name
455 WHERE table_oid IS NOT NULL
463 -- drop all the Key-Value tables
464 FOR name IN EXECUTE curstmt LOOP
465 EXECUTE 'DROP TABLE IF EXISTS ' || name || ' CASCADE;';
468 -- drop the metatable
469 EXECUTE 'DROP TABLE ' || meta_tbl_name || ' CASCADE;';
480 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__create_metatable
489 -- the maximum length of an identifier is 63
490 PERFORM MADLIB_SCHEMA.__assert
492 length(MADLIB_SCHEMA.__strip_schema_name(meta_tbl_name)) <= 63,
493 'The maximum length of ' ||
494 MADLIB_SCHEMA.__strip_schema_name(meta_tbl_name) ||
498 -- must not be existence
499 PERFORM MADLIB_SCHEMA.__assert_table(meta_tbl_name, 'f');
501 -- 'f' for feature, 'c' for class, 'i' for
id
502 -- 't' for continuous value, 'f' for discrete value
503 curstmt = MADLIB_SCHEMA.__format
512 ) m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (
id)')',
536 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__insert_into_metatable
549 tbl_txt TEXT := 'NULL';
551 PERFORM MADLIB_SCHEMA.__assert
553 column_type = 'f' OR column_type = 'i' OR column_type = 'c',
554 'column type must be ''f'', ''i'' or ''c'''
556 IF (table_name IS NOT NULL) THEN
557 tbl_txt = '''' || table_name || '''';
560 curstmt = MADLIB_SCHEMA.__format
562 'INSERT INTO % VALUES
563 (%, ''%'', ''%'', ''%'', %::regclass, %);',
569 MADLIB_SCHEMA.__to_char(is_cont),
587 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__validate_metatable
596 PERFORM MADLIB_SCHEMA.__assert_table(meta_tbl_name, 't');
598 -- if one of those KV tables doesn't exist,
599 -- we raise exception.
600 curstmt = MADLIB_SCHEMA.__format
602 'SELECT MADLIB_SCHEMA.__assert_table
603 (MADLIB_SCHEMA.__regclass_to_text(table_oid), ''t'')
608 WHERE table_oid IS NOT NULL
628 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__distinct_feature_value
638 curstmt = MADLIB_SCHEMA.__format
640 'SELECT num_dist_value
642 WHERE column_type=''f'' AND
id = %',
647 EXECUTE curstmt INTO result;
662 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_feature
671 curstmt = MADLIB_SCHEMA.__format
675 WHERE column_type=''f''',
679 EXECUTE curstmt INTO result;
694 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_class
702 class_table_name TEXT := '';
704 curstmt = MADLIB_SCHEMA.__format
706 'SELECT MADLIB_SCHEMA.__regclass_to_text(table_oid)
708 WHERE column_type=''c''',
712 EXECUTE curstmt INTO class_table_name;
714 curstmt = MADLIB_SCHEMA.__format
721 EXECUTE curstmt INTO result;
737 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_feature_name
747 curstmt = MADLIB_SCHEMA.__format
751 WHERE
id = % AND column_type = ''f'';',
753 MADLIB_SCHEMA.__to_char(feature_index)
756 EXECUTE curstmt INTO result;
775 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_column_value
787 tmp_txt TEXT := ' WHERE column_type = ''c''';
789 PERFORM MADLIB_SCHEMA.__assert
792 'the code of the value should not be null'
795 IF (column_type <> 'c') THEN
796 tmp_txt = MADLIB_SCHEMA.__format
798 ' WHERE
id = % AND column_type = ''%''',
804 curstmt = MADLIB_SCHEMA.__format
808 MADLIB_SCHEMA.__regclass_to_text(table_oid)]
815 EXECUTE curstmt INTO names;
817 PERFORM MADLIB_SCHEMA.__assert(names[1] IS NOT NULL, 'No such column name');
818 PERFORM MADLIB_SCHEMA.__assert(names[2] IS NOT NULL, 'No such table name');
820 curstmt = MADLIB_SCHEMA.__format
822 'SELECT MADLIB_SCHEMA.__to_char(fval)
829 EXECUTE curstmt INTO result;
831 IF (result IS NULL) THEN
851 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_feature_value
861 result = MADLIB_SCHEMA.__get_column_value
882 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_id_column_name
891 PERFORM MADLIB_SCHEMA.__assert_table
897 curstmt = MADLIB_SCHEMA.__format
901 WHERE column_type = ''i''
906 EXECUTE curstmt INTO result;
921 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_class_column_name
930 PERFORM MADLIB_SCHEMA.__assert_table
936 curstmt = MADLIB_SCHEMA.__format
940 WHERE column_type = ''c'' LIMIT 1',
944 EXECUTE curstmt INTO result;
960 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_class_value
969 result = MADLIB_SCHEMA.__get_column_value(0, code, 'c', meta_tbl_name);
1010 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__breakup_table
1012 input_tbl_name TEXT,
1013 breakup_tbl_name TEXT,
1017 attr_col_names TEXT[],
1019 h2hmv_routine_id INT,
1025 exec_begin TIMESTAMP;
1026 where_txt TEXT := '';
1027 fval_txt TEXT := 'fval';
1029 exec_begin = clock_timestamp();
1031 EXECUTE 'DROP TABLE IF EXISTS ' || breakup_tbl_name;
1033 m4_changequote(`>>>', `<<<')
1034 m4_ifdef(>>>__GREENPLUM_GE_4_2_1__<<<, >>>
1035 -- if the DB is GPDB and its
version is greater than or equal
1036 -- to 4.2, then we will use RLE compression for the encoded table.
1037 curstmt = MADLIB_SCHEMA.__format
1039 'CREATE TEMP TABLE %
1041 id BIGINT ENCODING (compresstype=RLE_TYPE),
1042 fid INT ENCODING (compresstype=RLE_TYPE),
1043 fval TEXT ENCODING (compresstype=RLE_TYPE),
1044 is_cont BOOL ENCODING (compresstype=RLE_TYPE),
1045 class INT ENCODING (compresstype=RLE_TYPE)
1047 WITH(appendonly=true, orientation=column)
1048 DISTRIBUTED BY(
id)',
1054 curstmt = MADLIB_SCHEMA.__format
1056 'CREATE TEMP TABLE %
1064 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (
id)')',
1070 m4_changequote(>>>`<<<, >>>'<<<)
1074 -- the supported missing value representation (' ', '?' and NULL) will
1075 -- be replace with NULL for easy processing later.
1076 -- the function __to_char is needed because on some databases an explicit
1077 -- cast to text is unavailable.
1078 IF (h2hmv_routine_id = 1) THEN
1079 where_txt = ' WHERE NULLIF(NULLIF(btrim(fval, '' ''), ''?''), '''') IS NOT NULL';
1081 fval_txt = ' CASE WHEN NULLIF(NULLIF(btrim(fval, '' ''), ''?''), '''') IS NULL THEN
1088 IF (cls_col_name IS NULL) THEN
1089 -- if the kv_cls_name is null, then the class column will be null
1090 curstmt = MADLIB_SCHEMA.__format
1092 'INSERT INTO %(
id, fid, fval, is_cont, class)
1093 SELECT
id, fid, % as fval, is_cont, class
1096 SELECT %, generate_series(1, %) as fid,
1097 unnest(array[MADLIB_SCHEMA.__to_char(%)]) as fval,
1098 unnest(array[''%''::BOOL]::BOOL[]) as is_cont, NULL as class
1107 array_upper(is_conts, 1)::TEXT,
1108 array_to_string(attr_col_names, '), MADLIB_SCHEMA.__to_char('),
1109 array_to_string(is_conts, ''','''),
1116 -- for scoring, as the class column may have some values which are not
1117 -- appear in the training table, we need use left join here to ensure all
1118 -- the rows of the input table were breakup. Here, we simple encode those
1119 -- values to 0. Therefore, during scoring, the samples with 0 (encoded
1120 -- value) as class label will be recognized as mis-classified.
1121 curstmt = MADLIB_SCHEMA.__format
1123 'INSERT INTO %(
id, fid, fval, is_cont, class)
1124 SELECT
id, fid, % as fval, is_cont, class
1127 SELECT %, generate_series(1, %) as fid,
1128 unnest(array[MADLIB_SCHEMA.__to_char(%)]) as fval,
1129 unnest(array[''%''::BOOL]::BOOL[]) as is_cont,
1130 coalesce(code, 0::INT) as class
1133 ON MADLIB_SCHEMA.__to_char(t1.%) = t2.fval
1140 array_upper(is_conts, 1)::TEXT,
1141 array_to_string(attr_col_names, '), MADLIB_SCHEMA.__to_char('),
1142 array_to_string(is_conts, ''','''),
1153 IF (verbosity > 0) THEN
1154 RAISE INFO '%', curstmt;
1155 RAISE INFO 'time of breaking up the training table:%',
1156 clock_timestamp() - exec_begin;
1159 $$ LANGUAGE PLPGSQL;
1180 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gen_vertical_encoded_table
1182 breakup_tbl_name TEXT,
1191 exec_begin TIMESTAMP;
1194 IF (is_tbl_tmp) THEN
1198 EXECUTE 'DROP TABLE IF EXISTS ' || enc_tbl_name;
1200 m4_changequote(`>>>', `<<<')
1201 m4_ifdef(>>>__GREENPLUM_GE_4_2_1__<<<, >>>
1202 curstmt = MADLIB_SCHEMA.__format
1206 id BIGINT ENCODING (compresstype=RLE_TYPE),
1207 fid INT ENCODING (compresstype=RLE_TYPE),
1208 fval FLOAT8 ENCODING (compresstype=RLE_TYPE),
1209 is_cont BOOL ENCODING (compresstype=RLE_TYPE),
1210 class INT ENCODING (compresstype=RLE_TYPE)
1212 WITH(appendonly=true, orientation=column)
1213 DISTRIBUTED BY(
id)',
1220 curstmt = MADLIB_SCHEMA.__format
1230 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (
id)')',
1237 m4_changequote(>>>`<<<, >>>'<<<)
1239 IF (verbosity > 0) THEN
1240 RAISE INFO '%', curstmt;
1244 -- Generating the encoded table through join the breakup table with
1245 -- the KV table for all the features
1246 curstmt = MADLIB_SCHEMA.__format
1248 'INSERT INTO %(
id, fid, fval, is_cont, class)
1249 SELECT p.
id AS
id, p.fid AS fid,
1250 CASE WHEN (p.is_cont AND p.fval IS NOT NULL) THEN
1255 p.is_cont AS is_cont,
1256 p.class::INT AS class
1261 (coalesce(m.fval, '''') = (coalesce(p.fval, '''')))',
1269 IF (verbosity > 0) THEN
1270 RAISE INFO '%', curstmt;
1274 $$ LANGUAGE PLPGSQL;
1286 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gen_horizontal_encoded_table
1296 exec_begin TIMESTAMP;
1298 exec_begin = clock_timestamp();
1300 EXECUTE 'DROP TABLE IF EXISTS ' || hor_tbl_name;
1301 curstmt = MADLIB_SCHEMA.__format
1303 'CREATE TEMP TABLE %(
id, fvals, class) AS
1306 MADLIB_SCHEMA.__array_indexed_agg(fval, %, fid) as fvals,
1307 min(class)::INT as class
1310 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (
id)')',
1319 IF (verbosity > 0) THEN
1320 RAISE INFO 'time of generating horizontal table from vertical table:%',
1321 clock_timestamp() - exec_begin;
1324 $$ LANGUAGE PLPGSQL;
1347 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__encode_columns
1350 breakup_tbl_name TEXT,
1351 h2hmv_routine_id INT,
1360 -- This table will be used to generate the KV table
1361 -- for the discrete features and retrieve the number
1362 -- of distinct values for a feature outside of this
1363 -- function. Therefore, don't drop this table in this
1365 EXECUTE 'DROP TABLE IF EXISTS tmp_dist_table';
1366 curstmt = MADLIB_SCHEMA.__format
1368 'CREATE TEMP TABLE tmp_dist_table AS
1369 SELECT fid, fval, is_cont
1371 GROUP BY fid, fval, is_cont',
1376 IF (verbosity > 0) THEN
1377 RAISE INFO '%', curstmt;
1382 -- create the KV table for all the features and
1383 -- populate the keys of the discrete features
1385 EXECUTE 'DROP TABLE IF EXISTS ' || kv_attr_name;
1386 curstmt = MADLIB_SCHEMA.__format
1388 'CREATE TABLE %(fid, fval, code) AS
1392 (rank() OVER (PARTITION BY fid ORDER BY fval))::FLOAT8 AS code
1395 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (fid, fval)')',
1400 IF (verbosity > 0) THEN
1401 RAISE INFO '%', curstmt;
1405 -- In "explicit" mode, we need to replace the missing
1406 -- value with the average value. Therefore, we keep
1407 -- those values to the KV table.
1408 IF (h2hmv_routine_id = 2) THEN
1409 curstmt = MADLIB_SCHEMA.__format
1411 'INSERT INTO %(fid, fval, code)
1415 coalesce(
avg(fval::FLOAT8), 0.0)
1425 IF (verbosity > 0) THEN
1426 RAISE INFO '%', curstmt;
1432 $$ LANGUAGE PLPGSQL;
1452 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__encode_table
1454 input_tbl_name TEXT,
1456 feature_names TEXT[],
1458 cont_attr_names TEXT[],
1459 enc_table_name TEXT,
1461 h2hmv_routine_id INT,
1467 attr_col_names TEXT[];
1468 lit_attr_col_names TEXT[];
1469 kv_attr_name TEXT := enc_table_name || '_col';
1470 kv_cls_name TEXT := enc_table_name || '_class';
1472 breakup_tbl_name TEXT := 'tmp_breakup_table';
1473 exec_begin TIMESTAMP;
1474 ret MADLIB_SCHEMA.__enc_tbl_result;
1476 exec_begin = clock_timestamp();
1478 -- validate the training table
1479 PERFORM MADLIB_SCHEMA.__validate_input_table
1488 PERFORM MADLIB_SCHEMA.__create_metatable(meta_tbl_name);
1490 -- retrieve all the features' names
1491 IF (feature_names IS NULL) THEN
1492 m4_changequote(`>>>', `<<<')
1493 m4_ifdef(`__HAS_ORDERED_AGGREGATES__', >>>
1494 curstmt = MADLIB_SCHEMA.__format
1496 'SELECT
array_agg(quote_ident(attname) ORDER BY attname) as attnames
1498 WHERE attrelid = ''%''::regclass and attnum > 0 AND
1499 attname <> ''%'' AND
1500 attname <> ''%'' AND
1509 EXECUTE curstmt INTO attr_col_names;
1511 curstmt = MADLIB_SCHEMA.__format
1515 SELECT quote_ident(attname)
1517 WHERE attrelid = ''%''::regclass and attnum > 0 AND
1518 attname <> ''%'' AND
1519 attname <> ''%'' AND
1530 EXECUTE curstmt INTO attr_col_names;
1532 m4_changequote(>>>`<<<, >>>'<<<)
1534 attr_col_names = MADLIB_SCHEMA.__array_sort(feature_names);
1537 -- an array contains if a feature is continuous or not
1538 -- the subscript is corresponding to the feature's ID
1539 is_conts = MADLIB_SCHEMA.__array_elem_in(cont_attr_names, attr_col_names);
1541 ret.pre_proc_time = clock_timestamp() - exec_begin;
1542 exec_begin = clock_timestamp();
1544 -- create the KV table for the class column.
1545 EXECUTE 'DROP TABLE IF EXISTS ' || kv_cls_name;
1546 curstmt = MADLIB_SCHEMA.__format
1550 MADLIB_SCHEMA.__to_char(%) AS fval,
1551 rank() OVER (ORDER BY %) AS code
1554 SELECT % FROM % GROUP BY %
1556 m4_ifdef(`__GREENPLUM__', `DISTRIBUTED BY (fval)')',
1566 IF (verbosity > 0) THEN
1567 RAISE INFO '%', curstmt;
1571 ret.gen_kv_time = clock_timestamp() - exec_begin;
1572 exec_begin = clock_timestamp();
1574 -- breakup each record of the training table and keep the result
1575 -- into a new table.
1576 PERFORM MADLIB_SCHEMA.__breakup_table
1589 ret.breakup_tbl_time= clock_timestamp() - exec_begin;
1590 exec_begin = clock_timestamp();
1592 -- generate the KV table for both continuous features
1593 -- and discrete features.
1594 PERFORM MADLIB_SCHEMA.__encode_columns
1602 ret.gen_kv_time = ret.gen_kv_time + (clock_timestamp() - exec_begin);
1603 exec_begin = clock_timestamp();
1605 -- generate the encoded table using the breakup table
1606 -- and KV table for all the features.
1607 PERFORM MADLIB_SCHEMA.__gen_vertical_encoded_table
1616 ret.gen_enc_time = clock_timestamp() - exec_begin;
1617 exec_begin = clock_timestamp();
1619 SELECT ARRAY(SELECT quote_literal(unnest(attr_col_names))) INTO lit_attr_col_names;
1621 -- put the features' meta information to the metatable
1622 curstmt = MADLIB_SCHEMA.__format
1625 SELECT fid as
id, (ARRAY[%])[fid] as column_name,
1626 ''f'' as column_type,
1628 ''%''::regclass::OID,
1629 count(fid) as num_dist_value
1631 GROUP BY fid, is_cont',
1634 array_to_string(lit_attr_col_names, ','),
1642 IF (h2hmv_routine_id = 1) THEN
1643 -- retrieve the information of the columns (all the values in those
1644 -- columns are missing), and insert them to the meta table.
1645 curstmt = MADLIB_SCHEMA.__format
1648 SELECT
id, (ARRAY[%])[
id] as column_name,
1649 ''f'' as column_type, ''t'', NULL, 0
1651 SELECT generate_series(1, %)
id
1653 SELECT
id FROM % WHERE column_type = ''f''
1657 array_to_string(lit_attr_col_names, ','),
1658 array_upper(attr_col_names, 1)::TEXT,
1665 -- no need this table
1666 EXECUTE 'DROP TABLE IF EXISTS tmp_dist_table';
1668 -- put the class's meta information to the metatable
1669 curstmt = MADLIB_SCHEMA.__format
1672 SELECT 0 as
id,''%'',
1673 ''c'' as column_type,
1675 ''%''::regclass::OID,
1676 count(code) as num_dist_value
1689 -- put the
id's meta information to the metatable
1690 PERFORM MADLIB_SCHEMA.__insert_into_metatable
1693 array_upper(attr_col_names, 1) + 1,
1698 -- analyze the table, so that later the optimizer has the statistics
1699 -- information about this table
1700 EXECUTE 'ANALYZE ' || enc_table_name;
1702 ret.post_proc_time = clock_timestamp() - exec_begin;
1704 IF (verbosity > 0) THEN
1705 RAISE INFO 'time of encoding: %', ret;
1708 $$ LANGUAGE PLPGSQL;
1722 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__encode_table
1724 input_tbl_name TEXT,
1727 h2hmv_routine_id INT,
1733 attr_col_names TEXT[];
1739 exec_begin TIMESTAMP;
1740 breakup_tbl_name TEXT := 'tmp_breakup_table';
1742 exec_begin = clock_timestamp();
1744 curstmt = MADLIB_SCHEMA.__format
1746 'SELECT column_name FROM % WHERE column_type=''i''',
1749 EXECUTE curstmt INTO id_col_name;
1751 curstmt = MADLIB_SCHEMA.__format
1753 'SELECT column_name FROM % WHERE column_type=''c''',
1756 EXECUTE curstmt INTO cls_col_name;
1758 IF (NOT MADLIB_SCHEMA.__column_exists(input_tbl_name, cls_col_name)) THEN
1759 cls_col_name = NULL;
1762 m4_changequote(`>>>', `<<<')
1763 m4_ifdef(`__HAS_ORDERED_AGGREGATES__', >>>
1764 curstmt = MADLIB_SCHEMA.__format
1766 'SELECT
array_agg(column_name order by
id)
1767 FROM % WHERE column_type=''f''',
1770 EXECUTE curstmt INTO attr_col_names;
1772 curstmt = MADLIB_SCHEMA.__format
1777 WHERE column_type=''f''',
1780 EXECUTE curstmt INTO is_conts;
1782 curstmt = MADLIB_SCHEMA.__format
1787 FROM % WHERE column_type=''f''
1793 EXECUTE curstmt INTO attr_col_names;
1795 curstmt = MADLIB_SCHEMA.__format
1801 WHERE column_type=''f''
1807 EXECUTE curstmt INTO is_conts;
1809 m4_changequote(>>>`<<<, >>>'<<<)
1811 curstmt = MADLIB_SCHEMA.__format
1814 MADLIB_SCHEMA.__regclass_to_text(table_oid) as table_name
1816 WHERE column_type=''f'' limit 1',
1819 EXECUTE curstmt INTO kv_attr_name;
1821 curstmt = MADLIB_SCHEMA.__format
1824 MADLIB_SCHEMA.__regclass_to_text(table_oid) as table_name
1826 WHERE column_type=''c'' limit 1',
1829 EXECUTE curstmt INTO kv_cls_name;
1831 PERFORM MADLIB_SCHEMA.__validate_input_table
1839 -- breakup each record from the classification/scoring
1840 -- table and kept the results into a new table.
1841 PERFORM MADLIB_SCHEMA.__breakup_table
1854 -- generate the vertical encoded table.
1855 PERFORM MADLIB_SCHEMA.__gen_vertical_encoded_table
1864 -- generate the horizontal encoded table.
1865 EXECUTE 'DROP TABLE IF EXISTS ' || enc_tbl_name;
1866 PERFORM MADLIB_SCHEMA.__gen_horizontal_encoded_table
1870 array_upper(is_conts, 1),
1874 EXECUTE 'DROP TABLE IF EXISTS dt_tmp_ver_table';
1876 IF (verbosity > 0) THEN
1877 RAISE INFO 'Encoding time:%', clock_timestamp() - exec_begin;
1880 $$ LANGUAGE PLPGSQL;