User Documentation
sample.sql_in
Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------- *//**
00002  *
00003  * @file sample.sql_in
00004  *
00005  * @brief SQL functions for random sampling
00006  *
00007  * @sa For an overview of random-sampling functions, see the module
00008  *     description \ref grp_sample.
00009  *
00010  *//* ----------------------------------------------------------------------- */
00011 
00012 m4_include(`SQLCommon.m4')
00013 
00014 /**
00015 @addtogroup grp_sample
00016 
00017 @about
00018 
00019 Random-sampling functions.
00020 
00021 @sa File sample.sql_in documenting the SQL functions.
00022 */
00023 
00024 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_int64(
00025     state MADLIB_SCHEMA.bytea8,
00026     value BIGINT,
00027     weight DOUBLE PRECISION
00028 ) RETURNS MADLIB_SCHEMA.bytea8
00029 AS 'MODULE_PATHNAME'
00030 LANGUAGE C
00031 VOLATILE
00032 STRICT;
00033 
00034 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_int64(
00035     state_left MADLIB_SCHEMA.bytea8,
00036     state_right MADLIB_SCHEMA.bytea8
00037 ) RETURNS MADLIB_SCHEMA.bytea8
00038 AS 'MODULE_PATHNAME'
00039 LANGUAGE C
00040 VOLATILE
00041 STRICT;
00042 
00043 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_final_int64(
00044     state MADLIB_SCHEMA.bytea8
00045 ) RETURNS BIGINT
00046 AS 'MODULE_PATHNAME'
00047 LANGUAGE C
00048 IMMUTABLE
00049 STRICT;
00050 
00051 /**
00052  * @brief Sample a single row according to weights
00053  *
00054  * @param value Value of row. Uniqueness is not enforced. If a value occurs
00055  *     multiple times, the probability of sampling this value is proportional to
00056  *     the sum of its weights.
00057  * @param weight Weight for row. A negative value here is treated has zero
00058  *     weight.
00059  * @return \c identifier of the selected row. The probability of sampling any
00060  *     particular row <tt>(value, weight)</tt> is
00061  *     <tt>weight/SUM(weight)</tt>.
00062  */
00063 CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample(
00064     /*+ value */ BIGINT,
00065     /*+ weight */ DOUBLE PRECISION) (
00066 
00067     SFUNC=MADLIB_SCHEMA.weighted_sample_transition_int64,
00068     STYPE=MADLIB_SCHEMA.bytea8,
00069     FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_int64,
00070     m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.weighted_sample_merge_int64,')
00071     INITCOND=''
00072 );
00073 
00074 
00075 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_vector(
00076     state MADLIB_SCHEMA.bytea8,
00077     value DOUBLE PRECISION[],
00078     weight DOUBLE PRECISION
00079 ) RETURNS MADLIB_SCHEMA.bytea8
00080 AS 'MODULE_PATHNAME'
00081 LANGUAGE C
00082 VOLATILE
00083 STRICT;
00084 
00085 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_vector(
00086     state_left MADLIB_SCHEMA.bytea8,
00087     state_right MADLIB_SCHEMA.bytea8
00088 ) RETURNS MADLIB_SCHEMA.bytea8
00089 AS 'MODULE_PATHNAME'
00090 LANGUAGE C
00091 VOLATILE
00092 STRICT;
00093 
00094 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_final_vector(
00095     state MADLIB_SCHEMA.bytea8
00096 ) RETURNS DOUBLE PRECISION[]
00097 AS 'MODULE_PATHNAME'
00098 LANGUAGE C
00099 IMMUTABLE
00100 STRICT;
00101 
00102 CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample(
00103     /*+ value */ DOUBLE PRECISION[],
00104     /*+ weight */ DOUBLE PRECISION) (
00105 
00106     SFUNC=MADLIB_SCHEMA.weighted_sample_transition_vector,
00107     STYPE=MADLIB_SCHEMA.bytea8,
00108     FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_vector,
00109     m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.weighted_sample_merge_vector,')
00110     INITCOND=''
00111 );