MADlib
0.7 A newer version is available
User Documentation
|
00001 /* ----------------------------------------------------------------------- *//** 00002 * 00003 * @file sample.sql_in 00004 * 00005 * @brief SQL functions for random sampling 00006 * 00007 * @sa For an overview of random-sampling functions, see the module 00008 * description \ref grp_sample. 00009 * 00010 *//* ----------------------------------------------------------------------- */ 00011 00012 m4_include(`SQLCommon.m4') 00013 00014 /** 00015 @addtogroup grp_sample 00016 00017 @about 00018 00019 Random-sampling functions. 00020 00021 @sa File sample.sql_in documenting the SQL functions. 00022 */ 00023 00024 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_int64( 00025 state MADLIB_SCHEMA.bytea8, 00026 value BIGINT, 00027 weight DOUBLE PRECISION 00028 ) RETURNS MADLIB_SCHEMA.bytea8 00029 AS 'MODULE_PATHNAME' 00030 LANGUAGE C 00031 VOLATILE 00032 STRICT; 00033 00034 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_int64( 00035 state_left MADLIB_SCHEMA.bytea8, 00036 state_right MADLIB_SCHEMA.bytea8 00037 ) RETURNS MADLIB_SCHEMA.bytea8 00038 AS 'MODULE_PATHNAME' 00039 LANGUAGE C 00040 VOLATILE 00041 STRICT; 00042 00043 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_final_int64( 00044 state MADLIB_SCHEMA.bytea8 00045 ) RETURNS BIGINT 00046 AS 'MODULE_PATHNAME' 00047 LANGUAGE C 00048 IMMUTABLE 00049 STRICT; 00050 00051 /** 00052 * @brief Sample a single row according to weights 00053 * 00054 * @param value Value of row. Uniqueness is not enforced. If a value occurs 00055 * multiple times, the probability of sampling this value is proportional to 00056 * the sum of its weights. 00057 * @param weight Weight for row. A negative value here is treated has zero 00058 * weight. 00059 * @return \c identifier of the selected row. The probability of sampling any 00060 * particular row <tt>(value, weight)</tt> is 00061 * <tt>weight/SUM(weight)</tt>. 00062 */ 00063 CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample( 00064 /*+ value */ BIGINT, 00065 /*+ weight */ DOUBLE PRECISION) ( 00066 00067 SFUNC=MADLIB_SCHEMA.weighted_sample_transition_int64, 00068 STYPE=MADLIB_SCHEMA.bytea8, 00069 FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_int64, 00070 m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.weighted_sample_merge_int64,') 00071 INITCOND='' 00072 ); 00073 00074 00075 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_vector( 00076 state MADLIB_SCHEMA.bytea8, 00077 value DOUBLE PRECISION[], 00078 weight DOUBLE PRECISION 00079 ) RETURNS MADLIB_SCHEMA.bytea8 00080 AS 'MODULE_PATHNAME' 00081 LANGUAGE C 00082 VOLATILE 00083 STRICT; 00084 00085 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_vector( 00086 state_left MADLIB_SCHEMA.bytea8, 00087 state_right MADLIB_SCHEMA.bytea8 00088 ) RETURNS MADLIB_SCHEMA.bytea8 00089 AS 'MODULE_PATHNAME' 00090 LANGUAGE C 00091 VOLATILE 00092 STRICT; 00093 00094 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_final_vector( 00095 state MADLIB_SCHEMA.bytea8 00096 ) RETURNS DOUBLE PRECISION[] 00097 AS 'MODULE_PATHNAME' 00098 LANGUAGE C 00099 IMMUTABLE 00100 STRICT; 00101 00102 CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample( 00103 /*+ value */ DOUBLE PRECISION[], 00104 /*+ weight */ DOUBLE PRECISION) ( 00105 00106 SFUNC=MADLIB_SCHEMA.weighted_sample_transition_vector, 00107 STYPE=MADLIB_SCHEMA.bytea8, 00108 FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_vector, 00109 m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.weighted_sample_merge_vector,') 00110 INITCOND='' 00111 );