SQL functions for logistic regression. More...

Functions
float8 []	__logregr_cg_step_transition (float8[], boolean, float8[], float8[])

float8 []	__logregr_irls_step_transition (float8[], boolean, float8[], float8[])

float8 []	__logregr_igd_step_transition (float8[], boolean, float8[], float8[])

float8 []	__logregr_cg_step_merge_states (float8[] state1, float8[] state2)

float8 []	__logregr_irls_step_merge_states (float8[] state1, float8[] state2)

float8 []	__logregr_igd_step_merge_states (float8[] state1, float8[] state2)

float8 []	__logregr_cg_step_final (float8[] state)

float8 []	__logregr_irls_step_final (float8[] state)

float8 []	__logregr_igd_step_final (float8[] state)

aggregate float8 []	__logregr_cg_step (boolean y, float8[] x, float8[] previous_state)

aggregate float8 []	__logregr_irls_step (boolean y, float8[] x, float8[] previous_state)

aggregate float8 []	__logregr_igd_step (boolean y, float8[] x, float8[] previous_state)

float8	__logregr_cg_step_distance (float8[] state1, float8[] state2)

__logregr_result	__logregr_cg_result (float8[] state)

float8	__logregr_irls_step_distance (float8[] state1, float8[] state2)

__logregr_result	__logregr_irls_result (float8[] state)

float8	__logregr_igd_step_distance (float8[] state1, float8[] state2)

__logregr_result	__logregr_igd_result (float8[] state)

void	logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter, varchar optimizer, float8 tolerance, boolean verbose)
	Compute logistic-regression coefficients and diagnostic statistics. More...

void	logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname)

void	logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols)

void	logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter)

void	logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter, varchar optimizer)

void	logregr_train (varchar source_table, varchar out_table, varchar dependent_varname, varchar independent_varname, varchar grouping_cols, integer max_iter, varchar optimizer, float8 tolerance)

text	logregr_train (text message)

text	logregr_train ()

float8	logistic (float8 x)
	Evaluate the usual logistic function in an under-/overflow-safe way. More...

boolean	logregr_predict (float8[] coef, float8[] col_ind_var)
	Predict the boolean value of a dependent variable for a specific independent variable value in a logistic regression model. More...

text	logregr_predict (text message)

text	logregr_predict ()

float8	logregr_predict_prob (float8[] coef, float8[] col_ind_var)
	Compute the probability of the boolean dependent variable being True for a specific independent variable iin a logistic regression model. More...

text	logregr_predict_prob (text message)

text	logregr_predict_prob ()

Detailed Description

Date: January 2011

See also: For a brief introduction to logistic regression, see the module description Logistic Regression.

Function Documentation

◆ __logregr_cg_result()

__logregr_result __logregr_cg_result ( float8 [] state )

◆ __logregr_cg_step()

aggregate float8 [] __logregr_cg_step	(	boolean	y,
		float8 []	x,
		float8 []	previous_state
	)

◆ __logregr_cg_step_distance()

float8 __logregr_cg_step_distance	(	float8 []	state1,
		float8 []	state2
	)

◆ __logregr_cg_step_final()

float8 [] __logregr_cg_step_final ( float8 [] state )

◆ __logregr_cg_step_merge_states()

float8 [] __logregr_cg_step_merge_states	(	float8 []	state1,
		float8 []	state2
	)

◆ __logregr_cg_step_transition()

float8 [] __logregr_cg_step_transition	(	float8	[],
		boolean	,
		float8	[],
		float8	[]
	)

◆ __logregr_igd_result()

__logregr_result __logregr_igd_result ( float8 [] state )

◆ __logregr_igd_step()

aggregate float8 [] __logregr_igd_step	(	boolean	y,
		float8 []	x,
		float8 []	previous_state
	)

◆ __logregr_igd_step_distance()

float8 __logregr_igd_step_distance	(	float8 []	state1,
		float8 []	state2
	)

◆ __logregr_igd_step_final()

float8 [] __logregr_igd_step_final ( float8 [] state )

◆ __logregr_igd_step_merge_states()

float8 [] __logregr_igd_step_merge_states	(	float8 []	state1,
		float8 []	state2
	)

◆ __logregr_igd_step_transition()

float8 [] __logregr_igd_step_transition	(	float8	[],
		boolean	,
		float8	[],
		float8	[]
	)

◆ __logregr_irls_result()

__logregr_result __logregr_irls_result ( float8 [] state )

◆ __logregr_irls_step()

aggregate float8 [] __logregr_irls_step	(	boolean	y,
		float8 []	x,
		float8 []	previous_state
	)

◆ __logregr_irls_step_distance()

float8 __logregr_irls_step_distance	(	float8 []	state1,
		float8 []	state2
	)

◆ __logregr_irls_step_final()

float8 [] __logregr_irls_step_final ( float8 [] state )

◆ __logregr_irls_step_merge_states()

float8 [] __logregr_irls_step_merge_states	(	float8 []	state1,
		float8 []	state2
	)

◆ __logregr_irls_step_transition()

float8 [] __logregr_irls_step_transition	(	float8	[],
		boolean	,
		float8	[],
		float8	[]
	)

◆ logistic()

float8 logistic ( float8 x )

Parameters

x

Returns: \( \frac{1}{1 + \exp(-x)} \)

Evaluating this expression directly can lead to under- or overflows. This function performs the evaluation in a safe manner, making use of the following observations:

In order for the outcome of \( \exp(x) \) to be within the range of the minimum positive double-precision number (i.e., \( 2^{-1074} \)) and the maximum positive double-precision number (i.e., \( (1 + (1 - 2^{52})) * 2^{1023}) \), \( x \) has to be within the natural logarithm of these numbers, so roughly in between -744 and 709. However, \( 1 + \exp(x) \) will just evaluate to 1 if \( \exp(x) \) is less than the machine epsilon (i.e., \( 2^{-52} \)) or, equivalently, if \( x \) is less than the natural logarithm of that; i.e., in any case if \( x \) is less than -37. Note that taking the reciprocal of the largest double-precision number will not cause an underflow. Hence, no further checks are necessary.

◆ logregr_predict() [1/3]

boolean logregr_predict	(	float8 []	coef,
		float8 []	col_ind_var
	)

Parameters

coef	Coefficients obtained by running logistic regression.
col_ind	Independent variable array

Returns: Boolean value of the dependent variable

This function computes the dot product of the independent variables and the coefficients. This requires the length of the two vectors to be the same.

◆ logregr_predict() [2/3]

text logregr_predict ( text message )

◆ logregr_predict() [3/3]

text logregr_predict ( )

◆ logregr_predict_prob() [1/3]

float8 logregr_predict_prob	(	float8 []	coef,
		float8 []	col_ind_var
	)

Parameters

coef	Coefficients obtained by running logistic regression.
col_ind	Independent variable array

Returns: Probability value of the dependent variable being True

This function computes the dot product of the independent variables and the coefficients, hence requires the length of the two vectors to be the same.

◆ logregr_predict_prob() [2/3]

text logregr_predict_prob ( text message )

◆ logregr_predict_prob() [3/3]

text logregr_predict_prob ( )

◆ logregr_train() [1/8]

void logregr_train	(	varchar	source_table,
		varchar	out_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		varchar	grouping_cols,
		integer	max_iter,
		varchar	optimizer,
		float8	tolerance,
		boolean	verbose
	)

To include an intercept in the model, set one coordinate in the independentVariables array to 1.

Parameters

source_table	Name of the source relation containing the training data
out_table	Name of the output relation to store the model results Columns of the output relation are as follows: - <tt>coef FLOAT8[]</tt> - Array of coefficients, \form#79 - <tt>log_likelihood FLOAT8</tt> - Log-likelihood \form#80 - <tt>std_err FLOAT8[]</tt> - Array of standard errors, \( \mathit{se}(c_1), \dots, \mathit{se}(c_k) \) `z_stats FLOAT8[]` - Array of Wald z-statistics, \( \boldsymbol z \) `p_values FLOAT8[]` - Array of Wald p-values, \( \boldsymbol p \) `odds_ratios FLOAT8[]`: Array of odds ratios, \( \mathit{odds}(c_1), \dots, \mathit{odds}(c_k) \) `condition_no FLOAT8` - The condition number of matrix \( X^T A X \) during the iteration immediately preceding convergence (i.e., \( A \) is computed using the coefficients of the previous iteration)
dependent_varname	Name of the dependent column (of type BOOLEAN)
independent_varname	Name of the independent column (of type DOUBLE PRECISION[])
grouping_col	Comma delimited list of column names to group-by
max_iter	The maximum number of iterations
optimizer	The optimizer to use (either `'irls'`/`'newton'` for iteratively reweighted least squares or `'cg'` for conjugent gradient)
tolerance	The difference between log-likelihood values in successive iterations that should indicate convergence. This value should be non-negative and a zero value here disables the convergence criterion, and execution will only stop after `maxNumIterations` iterations.
verbose	If true, any error or warning message will be printed to the console (irrespective of the 'client_min_messages' set by server). If false, no error/warning message is printed to console.

Usage

Get vector of coefficients \( \boldsymbol c \) and all diagnostic statistics:

SELECT logregr_train('sourceName', 'outName'
          'dependentVariable', 'independentVariables');
         SELECT * from outName;

Get vector of coefficients \( \boldsymbol c \):
```
SELECT coef from outName;
```
Get a subset of the output columns, e.g., only the array of coefficients \( \boldsymbol c \), the log-likelihood of determination \( l(\boldsymbol c) \), and the array of p-values \( \boldsymbol p \):
```
SELECT coef, log_likelihood, p_values FROM outName;
```

Note: This function starts an iterative algorithm. It is not an aggregate function. Source, output, and column names have to be passed as strings (due to limitations of the SQL syntax).

◆ logregr_train() [2/8]

void logregr_train	(	varchar	source_table,
		varchar	out_table,
		varchar	dependent_varname,
		varchar	independent_varname
	)

◆ logregr_train() [3/8]

void logregr_train	(	varchar	source_table,
		varchar	out_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		varchar	grouping_cols
	)

◆ logregr_train() [4/8]

void logregr_train	(	varchar	source_table,
		varchar	out_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		varchar	grouping_cols,
		integer	max_iter
	)

◆ logregr_train() [5/8]

void logregr_train	(	varchar	source_table,
		varchar	out_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		varchar	grouping_cols,
		integer	max_iter,
		varchar	optimizer
	)

◆ logregr_train() [6/8]

void logregr_train	(	varchar	source_table,
		varchar	out_table,
		varchar	dependent_varname,
		varchar	independent_varname,
		varchar	grouping_cols,
		integer	max_iter,
		varchar	optimizer,
		float8	tolerance
	)

◆ logregr_train() [7/8]

text logregr_train ( text message )

◆ logregr_train() [8/8]

text logregr_train ( )

Functions

Detailed Description

Function Documentation

◆ __logregr_cg_result()

◆ __logregr_cg_step()

◆ __logregr_cg_step_distance()

◆ __logregr_cg_step_final()

◆ __logregr_cg_step_merge_states()

◆ __logregr_cg_step_transition()

◆ __logregr_igd_result()

◆ __logregr_igd_step()

◆ __logregr_igd_step_distance()

◆ __logregr_igd_step_final()

◆ __logregr_igd_step_merge_states()

◆ __logregr_igd_step_transition()

◆ __logregr_irls_result()

◆ __logregr_irls_step()

◆ __logregr_irls_step_distance()

◆ __logregr_irls_step_final()

◆ __logregr_irls_step_merge_states()

◆ __logregr_irls_step_transition()

◆ logistic()

◆ logregr_predict() [1/3]

◆ logregr_predict() [2/3]

◆ logregr_predict() [3/3]

◆ logregr_predict_prob() [1/3]

◆ logregr_predict_prob() [2/3]

◆ logregr_predict_prob() [3/3]

◆ logregr_train() [1/8]

◆ logregr_train() [2/8]

◆ logregr_train() [3/8]

◆ logregr_train() [4/8]

◆ logregr_train() [5/8]

◆ logregr_train() [6/8]

◆ logregr_train() [7/8]

◆ logregr_train() [8/8]