Functions | |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth, integer min_split, integer min_bucket, integer n_bins, text pruning_params, text null_handling_params, boolean verbose_mode) |
Training of decision tree. More... | |
void | __build_tree (boolean is_classification, text split_criterion, text training_table_name, text output_table_name, text id_col_name, text dependent_variable, boolean dep_is_bool, text list_of_features, varchar[] cat_features, varchar[] ordered_cat_features, varchar[] boolean_cats, varchar[] con_features, text grouping_cols, text weights, integer max_depth, integer min_split, integer min_bucket, integer n_bins, text cp_table, smallint max_n_surr, text msg_level, text null_proxy, integer n_folds) |
text | tree_train (text message) |
text | tree_train () |
bytea8 | _dst_compute_con_splits_transition (bytea8 state, float8[] con_features, integer n_per_seg, smallint num_splits) |
bytea8 | _dst_compute_con_splits_final (bytea8 state) |
aggregate bytea8 | _dst_compute_con_splits (float8[], integer, smallint) |
integer [] | _dst_compute_entropy_transition (integer[] state, integer encoded_dep_var, integer num_dep_var) |
integer [] | _dst_compute_entropy_merge (integer[] state1, integer[] state2) |
float8 | _dst_compute_entropy_final (integer[] state) |
aggregate float8 | _dst_compute_entropy (integer, integer) |
integer [] | _map_catlevel_to_int (text[] cat_values_in_text, text[] cat_levels_in_text, integer[] cat_n_levels, boolean null_as_category) |
bytea8 | _initialize_decision_tree (boolean is_regression_tree, text impurity_function, smallint num_response_labels, smallint max_n_surr) |
bytea8 | _compute_leaf_stats_transition (bytea8 state, bytea8 tree_state, integer[] cat_features, float8[] con_features, float8 response, float8 weight, integer[] cat_levels, bytea8 con_splits, smallint n_response_labels, boolean weights_as_rows) |
bytea8 | _compute_leaf_stats_merge (bytea8 state1, bytea8 state2) |
aggregate bytea8 | _compute_leaf_stats (bytea8, integer[], float8[], float8, float8, integer[], bytea8, smallint, boolean) |
_tree_result_type | _dt_apply (bytea8 tree, bytea8 state, bytea8 con_splits, smallint min_split, smallint min_bucket, smallint max_depth, boolean subsample, integer num_random_features) |
bytea8 | _compute_surr_stats_transition (bytea8 state, bytea8 tree_state, integer[] cat_features, float8[] con_features, integer[] cat_levels, bytea8 con_splits, integer dup_count) |
aggregate bytea8 | _compute_surr_stats (bytea8, integer[], float8[], integer[], bytea8, integer) |
bytea8 | _dt_surr_apply (bytea8 tree, bytea8 state, bytea8 con_splits) |
_flattened_tree | _print_decision_tree (bytea8 tree) |
float8 [] | _compute_var_importance (bytea8 tree, integer n_cat_features, integer n_con_features) |
float8 | _predict_dt_response (bytea8 tree, integer[] cat_features, float8[] con_features) |
float8 [] | _predict_dt_prob (bytea8 tree, integer[] cat_features, float8[] con_features) |
void | tree_predict (text model, text source, text output, text pred_type) |
Use decision tree model to make predictions. More... | |
void | __tree_predict (text model, text source, text output, text pred_type, boolean use_existing_tables, integer k) |
void | tree_predict (text model, text source, text output) |
text | tree_predict (text message) |
text | tree_predict () |
varchar | tree_surr_display (text model_table) |
Display decision tree in dot or text format. More... | |
varchar | tree_surr_display () |
varchar | tree_display (text model_table, boolean dot_format, boolean verbose) |
Display decision tree in dot or text format. More... | |
varchar | tree_display (text model_table, boolean dot_format) |
varchar | tree_display (text model_table) |
varchar | tree_display () |
text | _display_decision_tree (bytea8 tree, text[] cat_features, text[] con_features, text[] cat_levels_in_text, integer[] cat_n_levels, text[] dependent_levels, text id_prefix, boolean verbose) |
text | _display_decision_tree (bytea8 tree, text[] cat_features, text[] con_features, text[] cat_levels_in_text, integer[] cat_n_levels, text[] dependent_levels, text id_prefix) |
text | _display_decision_tree_surrogate (bytea8 tree, text[] cat_features, text[] con_features, text[] cat_levels_in_text, integer[] cat_n_levels) |
text | _display_text_decision_tree (bytea8 tree, text[] cat_features, text[] con_features, text[] cat_levels_in_text, integer[] cat_n_levels, text[] dependent_levels) |
set< _cat_levels_type > | _gen_cat_levels_set (text[] grp_keys, integer[] cat_n_levels, integer n_cat, text[] cat_sorted_origin) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth, integer min_split, integer min_bucket, integer n_bins, text pruning_params, text null_handling_params) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth, integer min_split, integer min_bucket, integer n_bins, text pruning_params) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth, integer min_split, integer min_bucket, integer n_bins) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth, integer min_split, integer min_bucket) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth, integer min_split) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights, integer max_depth) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols, text weights) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion, text grouping_cols) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude, text split_criterion) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features, text list_of_features_to_exclude) |
void | tree_train (text training_table_name, text output_table_name, text id_col_name, text dependent_variable, text list_of_features) |
void __build_tree | ( | boolean | is_classification, |
text | split_criterion, | ||
text | training_table_name, | ||
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
boolean | dep_is_bool, | ||
text | list_of_features, | ||
varchar [] | cat_features, | ||
varchar [] | ordered_cat_features, | ||
varchar [] | boolean_cats, | ||
varchar [] | con_features, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split, | ||
integer | min_bucket, | ||
integer | n_bins, | ||
text | cp_table, | ||
smallint | max_n_surr, | ||
text | msg_level, | ||
text | null_proxy, | ||
integer | n_folds | ||
) |
void __tree_predict | ( | text | model, |
text | source, | ||
text | output, | ||
text | pred_type, | ||
boolean | use_existing_tables, | ||
integer | k | ||
) |
aggregate bytea8 _compute_leaf_stats | ( | bytea8 | , |
integer | [], | ||
float8 | [], | ||
float8 | , | ||
float8 | , | ||
integer | [], | ||
bytea8 | , | ||
smallint | , | ||
boolean | |||
) |
bytea8 _compute_leaf_stats_merge | ( | bytea8 | state1, |
bytea8 | state2 | ||
) |
bytea8 _compute_leaf_stats_transition | ( | bytea8 | state, |
bytea8 | tree_state, | ||
integer [] | cat_features, | ||
float8 [] | con_features, | ||
float8 | response, | ||
float8 | weight, | ||
integer [] | cat_levels, | ||
bytea8 | con_splits, | ||
smallint | n_response_labels, | ||
boolean | weights_as_rows | ||
) |
aggregate bytea8 _compute_surr_stats | ( | bytea8 | , |
integer | [], | ||
float8 | [], | ||
integer | [], | ||
bytea8 | , | ||
integer | |||
) |
bytea8 _compute_surr_stats_transition | ( | bytea8 | state, |
bytea8 | tree_state, | ||
integer [] | cat_features, | ||
float8 [] | con_features, | ||
integer [] | cat_levels, | ||
bytea8 | con_splits, | ||
integer | dup_count | ||
) |
float8 [] _compute_var_importance | ( | bytea8 | tree, |
integer | n_cat_features, | ||
integer | n_con_features | ||
) |
text _display_decision_tree | ( | bytea8 | tree, |
text [] | cat_features, | ||
text [] | con_features, | ||
text [] | cat_levels_in_text, | ||
integer [] | cat_n_levels, | ||
text [] | dependent_levels, | ||
text | id_prefix, | ||
boolean | verbose | ||
) |
text _display_decision_tree | ( | bytea8 | tree, |
text [] | cat_features, | ||
text [] | con_features, | ||
text [] | cat_levels_in_text, | ||
integer [] | cat_n_levels, | ||
text [] | dependent_levels, | ||
text | id_prefix | ||
) |
text _display_decision_tree_surrogate | ( | bytea8 | tree, |
text [] | cat_features, | ||
text [] | con_features, | ||
text [] | cat_levels_in_text, | ||
integer [] | cat_n_levels | ||
) |
text _display_text_decision_tree | ( | bytea8 | tree, |
text [] | cat_features, | ||
text [] | con_features, | ||
text [] | cat_levels_in_text, | ||
integer [] | cat_n_levels, | ||
text [] | dependent_levels | ||
) |
aggregate bytea8 _dst_compute_con_splits | ( | float8 | [], |
integer | , | ||
smallint | |||
) |
bytea8 _dst_compute_con_splits_final | ( | bytea8 | state | ) |
bytea8 _dst_compute_con_splits_transition | ( | bytea8 | state, |
float8 [] | con_features, | ||
integer | n_per_seg, | ||
smallint | num_splits | ||
) |
aggregate float8 _dst_compute_entropy | ( | integer | , |
integer | |||
) |
float8 _dst_compute_entropy_final | ( | integer [] | state | ) |
integer [] _dst_compute_entropy_merge | ( | integer [] | state1, |
integer [] | state2 | ||
) |
integer [] _dst_compute_entropy_transition | ( | integer [] | state, |
integer | encoded_dep_var, | ||
integer | num_dep_var | ||
) |
_tree_result_type _dt_apply | ( | bytea8 | tree, |
bytea8 | state, | ||
bytea8 | con_splits, | ||
smallint | min_split, | ||
smallint | min_bucket, | ||
smallint | max_depth, | ||
boolean | subsample, | ||
integer | num_random_features | ||
) |
bytea8 _dt_surr_apply | ( | bytea8 | tree, |
bytea8 | state, | ||
bytea8 | con_splits | ||
) |
set<_cat_levels_type> _gen_cat_levels_set | ( | text [] | grp_keys, |
integer [] | cat_n_levels, | ||
integer | n_cat, | ||
text [] | cat_sorted_origin | ||
) |
bytea8 _initialize_decision_tree | ( | boolean | is_regression_tree, |
text | impurity_function, | ||
smallint | num_response_labels, | ||
smallint | max_n_surr | ||
) |
integer [] _map_catlevel_to_int | ( | text [] | cat_values_in_text, |
text [] | cat_levels_in_text, | ||
integer [] | cat_n_levels, | ||
boolean | null_as_category | ||
) |
float8 [] _predict_dt_prob | ( | bytea8 | tree, |
integer [] | cat_features, | ||
float8 [] | con_features | ||
) |
float8 _predict_dt_response | ( | bytea8 | tree, |
integer [] | cat_features, | ||
float8 [] | con_features | ||
) |
_flattened_tree _print_decision_tree | ( | bytea8 | tree | ) |
varchar tree_display | ( | text | model_table, |
boolean | dot_format, | ||
boolean | verbose | ||
) |
tree_model | Name of the table containing the decision tree model |
varchar tree_display | ( | text | model_table, |
boolean | dot_format | ||
) |
varchar tree_display | ( | text | model_table | ) |
varchar tree_display | ( | ) |
void tree_predict | ( | text | model, |
text | source, | ||
text | output, | ||
text | pred_type | ||
) |
model | Name of the table containing the decision tree model |
source | Name of table containing prediction data |
output | Name of table to output prediction results |
pred_type | OPTIONAL (Default = 'response'). For regression trees, 'response', implies output is the predicted value. For classification trees, this can be 'response', giving the classification prediction as output, or ‘prob’, giving the class probabilities as output (for two classes, only a single probability value is output that corresponds to the first class when the two classes are sorted by name; in case of more than two classes, an array of class probabilities (a probability of each class) is output). |
See Decision Tree for more details.
void tree_predict | ( | text | model, |
text | source, | ||
text | output | ||
) |
text tree_predict | ( | text | message | ) |
text tree_predict | ( | ) |
varchar tree_surr_display | ( | text | model_table | ) |
tree_model | Name of the table containing the decision tree model |
varchar tree_surr_display | ( | ) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split, | ||
integer | min_bucket, | ||
integer | n_bins, | ||
text | pruning_params, | ||
text | null_handling_params, | ||
boolean | verbose_mode | ||
) |
split_criterion | Various options to compute the feature to split a node. Available options are 'gini', 'cross-entropy', and 'misclassification'. The "cart" algorithm provides an additional option of 'mse'. |
training_table_name | Name of the table containing data. |
output_table_name | Name of the table to output the model. |
id_col_name | Name of column containing the id information in training data. |
dependent_variable | Name of the column that contains the output for training. Boolean, integer and text are considered classification outputs, while float values are considered regression outputs. |
list_of_features | List of column names (comma-separated string) to use as predictors. Can also be a ‘*’ implying all columns are to be used as predictors (except the ones included in the next argument). Boolean, integer, and text columns are considered categorical columns. |
list_of_features_to_exclude | OPTIONAL. List of column names (comma-separated string) to exlude from the predictors list. |
grouping_cols | OPTIONAL. List of column names (comma-separated string) to group the data by. This will lead to creating multiple decision trees, one for each group. |
weights | OPTIONAL. Column name containing weights for each observation. |
max_depth | OPTIONAL (Default = 7). Set the maximum depth of any node of the final tree, with the root node counted as depth 0. A deeper tree can lead to better prediction but will also result in longer processing time and higher memory usage. |
min_split | OPTIONAL (Default = 20). Minimum number of observations that must exist in a node for a split to be attempted. |
min_bucket | OPTIONAL (Default = minsplit/3). Minimum number of observations in any terminal node. If only one of minbucket or minsplit is specified, minsplit is set to minbucket*3 or minbucket to minsplit/3, as appropriate. |
n_bins | optional (default = 20) number of bins to use during binning. continuous-valued features are binned into discrete bins (per the quartile values) to compute split bound- aries. this global parameter is used to compute the resolution of the bins. higher number of bins will lead to higher processing time. |
pruning_params | (default: cp=0) pruning parameter string containing key-value pairs. the keys can be: cp (default = 0.01) a complexity parameter that determines that a split is attempted only if it decreases the overall lack of fit by a factor of ‘cp’. n_folds (default = 0) number of cross-validation folds |
verbose_mode | optional (default = false) prints status information on the splits performed and any other information useful for debugging. |
see Decision Tree for more details.
text tree_train | ( | text | message | ) |
text tree_train | ( | ) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split, | ||
integer | min_bucket, | ||
integer | n_bins, | ||
text | pruning_params, | ||
text | null_handling_params | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split, | ||
integer | min_bucket, | ||
integer | n_bins, | ||
text | pruning_params | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split, | ||
integer | min_bucket, | ||
integer | n_bins | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split, | ||
integer | min_bucket | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth, | ||
integer | min_split | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights, | ||
integer | max_depth | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols, | ||
text | weights | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion, | ||
text | grouping_cols | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude, | ||
text | split_criterion | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features, | ||
text | list_of_features_to_exclude | ||
) |
void tree_train | ( | text | training_table_name, |
text | output_table_name, | ||
text | id_col_name, | ||
text | dependent_variable, | ||
text | list_of_features | ||
) |