10 m4_include(`SQLCommon.m4
')
133 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.crf_train_data(datapath text) RETURNS void AS
135 # import label data to the database
136 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.crf_label CASCADE;" + \
137 "CREATE TABLE MADLIB_SCHEMA.crf_label(id integer,label text);" + \
138 "COPY MADLIB_SCHEMA.crf_label(id,label) FROM '
" + datapath + "/crf_label.tab
'";
141 # import regex to regex table
142 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.crf_regex CASCADE;" + \
143 "CREATE TABLE MADLIB_SCHEMA.crf_regex (pattern text,name text);" + \
144 "COPY MADLIB_SCHEMA.crf_regex(pattern,name) FROM '" + datapath + "/crf_regex.tab
'";
147 # import training data to the database
148 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.train_segmenttbl CASCADE;" + \
149 "CREATE TABLE MADLIB_SCHEMA.train_segmenttbl(start_pos integer,doc_id integer,seg_text text,label integer,max_pos integer);" + \
150 "COPY MADLIB_SCHEMA.train_segmenttbl(start_pos,doc_id,seg_text,label,max_pos) FROM '" + datapath + "/crf_traindata.tab
'";
153 query ="DROP TABLE IF EXISTS MADLIB_SCHEMA.crf_feature;" + \
154 "CREATE TABLE MADLIB_SCHEMA.crf_feature (id integer,name text,prev_label_id integer,label_id integer,weight float);"
158 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.crf_dictionary;" + \
159 "CREATE TABLE MADLIB_SCHEMA.crf_dictionary(token text,total integer);"
162 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.featuretbl;" + \
163 "CREATE TABLE MADLIB_SCHEMA.featuretbl(doc_id integer,f_size FLOAT8,sparse_r FLOAT8[],dense_m FLOAT8[],sparse_m FLOAT8[]);"
166 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.crf_feature_dic;" + \
167 "CREATE TABLE MADLIB_SCHEMA.crf_feature_dic(f_index integer, f_name text, feature integer[]);"
170 $$ LANGUAGE plpythonu STRICT;
209 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.crf_test_data(datapath text) RETURNS void AS
212 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.test_segmenttbl CASCADE;" + \
213 "CREATE TABLE MADLIB_SCHEMA.test_segmenttbl (start_pos integer,doc_id integer,seg_text text, max_pos integer)";
217 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.viterbi_rtbl;" + \
218 "CREATE TABLE MADLIB_SCHEMA.viterbi_rtbl (seg_text text, label integer, score integer)";
222 query = "DROP TABLE IF EXISTS MADLIB_SCHEMA.viterbi_mtbl;" + \
223 "CREATE TABLE MADLIB_SCHEMA.viterbi_mtbl (score integer[])";
226 # import tokenized document to the segment table
227 query = "COPY MADLIB_SCHEMA.test_segmenttbl (start_pos,doc_id,seg_text,max_pos) FROM '
" + datapath + "/crf_testdata.tab
'";
230 $$ language plpythonu STRICT;