We currently only support the dummy coding technique. Dummy coding is used when a researcher wants to compare other groups of the predictor variable with one specific group of the predictor variable. Often, the specific group to compare with is called the reference group.
create_indicator_variables( source_table, output_table, categorical_cols, keep_null, distributed_by )
Arguments
DROP TABLE IF EXISTS abalone; CREATE TABLE abalone ( sex character varying, length double precision, diameter double precision, height double precision ); COPY abalone (sex, length, diameter, height) FROM stdin WITH DELIMITER '|' NULL as '@'; M| 0.455 | 0.365 | 0.095 F| 0.53 | 0.42 | 0.135 M| 0.35 | 0.265 | 0.09 F| 0.53 | 0.415 | 0.15 M| 0.44 | 0.365 | 0.125 F| 0.545 | 0.425 | 0.125 I| 0.33 | 0.255 | 0.08 F| 0.55 | 0.44 | 0.15 I| 0.425 | 0.30 | 0.095 F| 0.525 | 0.38 | 0.140 M| 0.475 | 0.37 | 0.125 F| 0.535 | 0.405 | 0.145 M| 0.43 | 0.358 | 0.11 F| 0.47 | 0.355 | 0.100 M| 0.49 | 0.38 | 0.135 F| 0.44 | 0.340 | 0.100 M| 0.5 | 0.400 | 0.13 F| 0.565 | 0.44 | 0.155 I| 0.355 | 0.280 | 0.085 F| 0.550 | 0.415 | 0.135 | 0.475 | 0.37 | 0.125 \.
drop table if exists abalone_out; select madlib.create_indicator_variables ('abalone', 'abalone_out', 'sex'); select * from abalone_out;
sex | length | diameter | height | sex_F | sex_I | sex_M -----+--------+----------+--------+--------+--------+------- F | 0.53 | 0.42 | 0.135 | 1 | 0 | 0 F | 0.53 | 0.415 | 0.15 | 1 | 0 | 0 F | 0.545 | 0.425 | 0.125 | 1 | 0 | 0 F | 0.55 | 0.44 | 0.15 | 1 | 0 | 0 F | 0.525 | 0.38 | 0.14 | 1 | 0 | 0 F | 0.535 | 0.405 | 0.145 | 1 | 0 | 0 F | 0.47 | 0.355 | 0.1 | 1 | 0 | 0 F | 0.44 | 0.34 | 0.1 | 1 | 0 | 0 F | 0.565 | 0.44 | 0.155 | 1 | 0 | 0 F | 0.55 | 0.415 | 0.135 | 1 | 0 | 0 M | 0.455 | 0.365 | 0.095 | 0 | 0 | 1 M | 0.35 | 0.265 | 0.09 | 0 | 0 | 0 M | 0.44 | 0.365 | 0.125 | 0 | 0 | 0 I | 0.33 | 0.255 | 0.08 | 0 | 1 | 0 I | 0.425 | 0.3 | 0.095 | 0 | 1 | 0 M | 0.475 | 0.37 | 0.125 | 0 | 0 | 0 M | 0.43 | 0.358 | 0.11 | 0 | 0 | 0 M | 0.49 | 0.38 | 0.135 | 0 | 0 | 0 M | 0.5 | 0.4 | 0.13 | 0 | 0 | 0 I | 0.355 | 0.28 | 0.085 | 0 | 1 | 0 NULL | 0.55 | 0.415 | 0.135 | NULL | NULL | NULL
drop table if exists abalone_out; select madlib.create_indicator_variables'abalone', 'abalone_out', 'sex', True); select * from abalone_out;
sex | length | diameter | height | sex_F | sex_I | sex_M | sex_NULL ---—+-----—+-------—+-----—+-----—+-----—+----—+----— F | 0.53 | 0.42 | 0.135 | 1 | 0 | 0 | 0 F | 0.53 | 0.415 | 0.15 | 1 | 0 | 0 | 0 F | 0.545 | 0.425 | 0.125 | 1 | 0 | 0 | 0 F | 0.55 | 0.44 | 0.15 | 1 | 0 | 0 | 0 F | 0.525 | 0.38 | 0.14 | 1 | 0 | 0 | 0 F | 0.535 | 0.405 | 0.145 | 1 | 0 | 0 | 0 F | 0.47 | 0.355 | 0.1 | 1 | 0 | 0 | 0 F | 0.44 | 0.34 | 0.1 | 1 | 0 | 0 | 0 F | 0.565 | 0.44 | 0.155 | 1 | 0 | 0 | 0 F | 0.55 | 0.415 | 0.135 | 1 | 0 | 0 | 0 M | 0.455 | 0.365 | 0.095 | 0 | 0 | 1 | 0 M | 0.35 | 0.265 | 0.09 | 0 | 0 | 0 | 0 M | 0.44 | 0.365 | 0.125 | 0 | 0 | 0 | 0 I | 0.33 | 0.255 | 0.08 | 0 | 1 | 0 | 0 I | 0.425 | 0.3 | 0.095 | 0 | 1 | 0 | 0 M | 0.475 | 0.37 | 0.125 | 0 | 0 | 0 | 0 M | 0.43 | 0.358 | 0.11 | 0 | 0 | 0 | 0 M | 0.49 | 0.38 | 0.135 | 0 | 0 | 0 | 0 M | 0.5 | 0.4 | 0.13 | 0 | 0 | 0 | 0 I | 0.355 | 0.28 | 0.085 | 0 | 1 | 0 | 0 NULL | 0.55 | 0.415 | 0.135 | 0 | 0 | 0 | 1