User Documentation
 All Files Functions Groups
sample.sql_in
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------- *//**
2  *
3  * @file sample.sql_in
4  *
5  * @brief SQL functions for random sampling
6  *
7  * @sa For an overview of random-sampling functions, see the module
8  * description \ref grp_sample.
9  *
10  *//* ----------------------------------------------------------------------- */
11 
12 m4_include(`SQLCommon.m4')
13 
14 /**
15 @addtogroup grp_sample
16 
17 \warning <em> This MADlib method is still in early stage development. There may be some
18 issues that will be addressed in a future version. Interface and implementation
19 is subject to change. </em>
20 
21 @about
22 
23 The random sampling module consists of useful utility functions for sampling
24 operations. Several of these functions can be used while implementing
25 new algorithms.
26 
27 Refer to the file for documentation on each of the utlity functions.
28 
29 @sa File sample.sql_in documenting the SQL functions.
30 */
31 
32 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_int64(
33  state MADLIB_SCHEMA.bytea8,
34  value BIGINT,
35  weight DOUBLE PRECISION
36 ) RETURNS MADLIB_SCHEMA.bytea8
37 AS 'MODULE_PATHNAME'
38 LANGUAGE C
39 VOLATILE
40 STRICT;
41 
42 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_int64(
43  state_left MADLIB_SCHEMA.bytea8,
44  state_right MADLIB_SCHEMA.bytea8
45 ) RETURNS MADLIB_SCHEMA.bytea8
46 AS 'MODULE_PATHNAME'
47 LANGUAGE C
48 VOLATILE
49 STRICT;
50 
51 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_final_int64(
52  state MADLIB_SCHEMA.bytea8
53 ) RETURNS BIGINT
54 AS 'MODULE_PATHNAME'
55 LANGUAGE C
56 IMMUTABLE
57 STRICT;
58 
59 /**
60  * @brief Sample a single row according to weights
61  *
62  * @param value Value of row. Uniqueness is not enforced. If a value occurs
63  * multiple times, the probability of sampling this value is proportional to
64  * the sum of its weights.
65  * @param weight Weight for row. A negative value here is treated has zero
66  * weight.
67  * @return \c identifier of the selected row. The probability of sampling any
68  * particular row <tt>(value, weight)</tt> is
69  * <tt>weight/SUM(weight)</tt>.
70  */
71 CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample(
72  /*+ value */ BIGINT,
73  /*+ weight */ DOUBLE PRECISION) (
74 
75  SFUNC=MADLIB_SCHEMA.weighted_sample_transition_int64,
76  STYPE=MADLIB_SCHEMA.bytea8,
77  FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_int64,
78  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.weighted_sample_merge_int64,')
79  INITCOND=''
80 );
81 
82 
83 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_vector(
84  state MADLIB_SCHEMA.bytea8,
85  value DOUBLE PRECISION[],
86  weight DOUBLE PRECISION
87 ) RETURNS MADLIB_SCHEMA.bytea8
88 AS 'MODULE_PATHNAME'
89 LANGUAGE C
90 VOLATILE
91 STRICT;
92 
93 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_vector(
94  state_left MADLIB_SCHEMA.bytea8,
95  state_right MADLIB_SCHEMA.bytea8
96 ) RETURNS MADLIB_SCHEMA.bytea8
97 AS 'MODULE_PATHNAME'
98 LANGUAGE C
99 VOLATILE
100 STRICT;
101 
102 CREATE FUNCTION MADLIB_SCHEMA.weighted_sample_final_vector(
103  state MADLIB_SCHEMA.bytea8
104 ) RETURNS DOUBLE PRECISION[]
105 AS 'MODULE_PATHNAME'
106 LANGUAGE C
107 IMMUTABLE
108 STRICT;
109 
110 CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample(
111  /*+ value */ DOUBLE PRECISION[],
112  /*+ weight */ DOUBLE PRECISION) (
113 
114  SFUNC=MADLIB_SCHEMA.weighted_sample_transition_vector,
115  STYPE=MADLIB_SCHEMA.bytea8,
116  FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_vector,
117  m4_ifdef(`__GREENPLUM__',`prefunc=MADLIB_SCHEMA.weighted_sample_merge_vector,')
118  INITCOND=''
119 );