13 m4_include(`SQLCommon.m4
')
140 CREATE TYPE MADLIB_SCHEMA.lmf_result AS (
141 matrix_u DOUBLE PRECISION[],
142 matrix_v DOUBLE PRECISION[],
143 rmse DOUBLE PRECISION
146 --------------------------------------------------------------------------
147 -- create SQL functions for IGD optimizer
148 --------------------------------------------------------------------------
149 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_transition(
150 state DOUBLE PRECISION[],
153 val DOUBLE PRECISION,
154 previous_state DOUBLE PRECISION[],
158 stepsize DOUBLE PRECISION,
159 scale_factor DOUBLE PRECISION)
160 RETURNS DOUBLE PRECISION[]
162 LANGUAGE C IMMUTABLE;
164 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_merge(
165 state1 DOUBLE PRECISION[],
166 state2 DOUBLE PRECISION[])
167 RETURNS DOUBLE PRECISION[]
169 LANGUAGE C IMMUTABLE STRICT;
171 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_final(
172 state DOUBLE PRECISION[])
173 RETURNS DOUBLE PRECISION[]
175 LANGUAGE C IMMUTABLE STRICT;
182 CREATE AGGREGATE MADLIB_SCHEMA.lmf_igd_step(
183 /*+ row_num */ SMALLINT,
184 /*+ column_num */ SMALLINT,
185 /*+ val */ DOUBLE PRECISION,
186 /*+ previous_state */ DOUBLE PRECISION[],
187 /*+ row_dim */ SMALLINT,
188 /*+ column_dim */ SMALLINT,
189 /*+ max_rank */ SMALLINT,
190 /*+ stepsize */ DOUBLE PRECISION,
191 /*+ scale_factor */ DOUBLE PRECISION) (
192 STYPE=DOUBLE PRECISION[],
193 SFUNC=MADLIB_SCHEMA.lmf_igd_transition,
194 -- m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.lmf_igd_merge,
')
195 FINALFUNC=MADLIB_SCHEMA.lmf_igd_final,
196 INITCOND='{0,0,0,0,0,0,0,0,0}
'
199 CREATE FUNCTION MADLIB_SCHEMA.internal_lmf_igd_distance(
200 /*+ state1 */ DOUBLE PRECISION[],
201 /*+ state2 */ DOUBLE PRECISION[])
202 RETURNS DOUBLE PRECISION AS
204 LANGUAGE c IMMUTABLE STRICT;
206 CREATE FUNCTION MADLIB_SCHEMA.internal_lmf_igd_result(
207 /*+ state */ DOUBLE PRECISION[])
208 RETURNS MADLIB_SCHEMA.lmf_result AS
210 LANGUAGE c IMMUTABLE STRICT;
213 CREATE FUNCTION MADLIB_SCHEMA.internal_execute_using_lmf_igd_args(
214 sql VARCHAR, INTEGER, INTEGER, INTEGER, DOUBLE PRECISION,
215 DOUBLE PRECISION, INTEGER, DOUBLE PRECISION
220 AS 'MODULE_PATHNAME
', 'exec_sql_using
';
222 CREATE FUNCTION MADLIB_SCHEMA.internal_compute_lmf_igd(
230 AS $$PythonFunction(convex, lmf_igd, compute_lmf_igd)$$
231 LANGUAGE plpythonu VOLATILE;
256 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run(
262 row_dim INTEGER /*+ DEFAULT 'SELECT max(col_row) FROM rel_source' */,
265 stepsize DOUBLE PRECISION ,
266 scale_factor DOUBLE PRECISION ,
267 num_iterations INTEGER ,
268 tolerance DOUBLE PRECISION )
269 RETURNS INTEGER AS $$
271 iteration_run INTEGER;
273 rmse DOUBLE PRECISION;
274 old_messages VARCHAR;
276 RAISE NOTICE 'Matrix % to be factorized: % x %', rel_source, row_dim, column_dim;
278 -- We first setup the argument table. Rationale: We want to avoid all data
279 -- conversion between native types and Python code. Instead, we use Python
280 -- as a pure driver layer.
282 (SELECT setting FROM pg_settings WHERE name = 'client_min_messages');
283 EXECUTE 'SET client_min_messages TO warning';
285 -- Unfortunately, the EXECUTE USING syntax is only available starting
288 -- We therefore have to emulate.
289 PERFORM MADLIB_SCHEMA.internal_execute_using_lmf_igd_args($sql$
290 DROP TABLE IF EXISTS pg_temp._madlib_lmf_igd_args;
291 CREATE TABLE pg_temp._madlib_lmf_igd_args AS
298 $6 AS num_iterations,
301 row_dim, column_dim, max_rank, stepsize,
302 scale_factor, num_iterations, tolerance);
303 EXECUTE 'SET client_min_messages TO ' || old_messages;
305 -- Perform acutal computation.
306 -- Unfortunately, Greenplum and PostgreSQL <= 8.2 do not have conversion
307 -- operators from regclass to varchar/text.
308 iteration_run := MADLIB_SCHEMA.internal_compute_lmf_igd(
309 '_madlib_lmf_igd_args', '_madlib_lmf_igd_state',
310 textin(regclassout(rel_source)), col_row, col_column, col_value);
312 -- create result table if it does not exist
314 EXECUTE 'SELECT 1 FROM ' || rel_output || ' LIMIT 0';
316 WHEN undefined_table THEN
318 CREATE TABLE ' || rel_output || ' (
320 matrix_u DOUBLE PRECISION[],
321 matrix_v DOUBLE PRECISION[],
322 rmse DOUBLE PRECISION)';
325 -- A work-around for GPDB not supporting RETURNING for INSERT
326 -- We generate an
id using nextval before INSERT
328 SELECT nextval(' || quote_literal(rel_output || '_id_seq') ||'::regclass)'
332 -- Retrieve result from state table and insert it
334 INSERT INTO ' || rel_output || '
335 SELECT ' || model_id || ', (result).*
337 SELECT MADLIB_SCHEMA.internal_lmf_igd_result(_state) AS result
338 FROM _madlib_lmf_igd_state
339 WHERE _iteration = ' || iteration_run || '
344 FROM ' || rel_output || '
345 WHERE
id = ' || model_id
348 -- return description
350 Finished low-rank matrix factorization using incremental gradient
351 * table : % (%, %, %)
355 * view : SELECT * FROM % WHERE
id = %',
356 rel_source, col_row, col_column, col_value, rmse, rel_output, model_id;
360 $$ LANGUAGE plpgsql VOLATILE;
371 stepsize DOUBLE PRECISION,
372 scale_factor DOUBLE PRECISION)
373 RETURNS INTEGER AS $$
374 SELECT MADLIB_SCHEMA.
lmf_igd_run($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, 10, 0.0001);
375 $$ LANGUAGE sql VOLATILE;
377 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run(
386 stepsize DOUBLE PRECISION)
387 RETURNS INTEGER AS $$
388 -- set scale_factor as default 0.1
389 SELECT MADLIB_SCHEMA.lmf_igd_run($1, $2, $3, $4, $5, $6, $7, $8, $9, 0.1);
390 $$ LANGUAGE sql VOLATILE;
392 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run(
401 RETURNS INTEGER AS $$
402 -- set stepsize as default 0.01
403 SELECT MADLIB_SCHEMA.lmf_igd_run($1, $2, $3, $4, $5, $6, $7, $8, 0.01);
404 $$ LANGUAGE sql VOLATILE;
406 CREATE FUNCTION MADLIB_SCHEMA.lmf_igd_run(
412 RETURNS INTEGER AS $$
418 SELECT max(' || col_row || '), max(' || col_column || ')
419 FROM ' || textin(regclassout(rel_source))
420 INTO row_dim, column_dim;
422 RETURN (SELECT MADLIB_SCHEMA.lmf_igd_run($1, $2, $3, $4, $5, row_dim, column_dim, 20));
424 $$ LANGUAGE plpgsql VOLATILE;