11 m4_include(`SQLCommon.m4
')
21 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vcrf_top1_view (segtbl text, labeltbl text, result_tbl text, vw text) returns text AS
23 rv = plpy.execute('SELECT COUNT(*) AS total FROM
' + labeltbl);
24 nlabel = rv[0]['total
']
25 query = """create view """ + vw + """ AS
26 select segs.doc_id, start_pos, seg_text, L.label, (L.id+1) as id, (result.label[max_pos+2]::float/1000000) as prob
27 from """ + segtbl + """ segs, """ + labeltbl + """ L, """ + result_tbl + """ result
28 where result.label[segs.start_pos+1]=L.id and segs.doc_id=result.doc_id
29 order by doc_id, start_pos;"""
32 $$ language plpythonu strict;
41 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vcrf_top1_label(mArray int[], rArray int[], nlabel int)
42 returns int[] as 'MODULE_PATHNAME
' language c strict;
55 CREATE OR REPLACE FUNCTION
56 MADLIB_SCHEMA.vcrf_label(segtbl text, factor_mtbl text, factor_rtbl text, labeltbl text, resulttbl text) RETURNS text AS
58 origClientMinMessages = plpy.execute("SELECT setting AS setting FROM pg_settings WHERE name = \'client_min_messages\';")
59 plpy.execute("SET client_min_messages TO warning;")
61 plpy.execute("SELECT MADLIB_SCHEMA.create_schema_pg_temp();");
63 m_factors = "pg_temp._madlib_m_factors"
64 r_factors = "pg_temp._madlib_r_factors"
65 segtbl_digits = "pg_temp._madlib_segtbl_digits"
66 resulttbl_raw = "pg_temp._madlib_" + resulttbl + "_raw"
68 plpy.execute("""DROP TABLE IF EXISTS """ + m_factors + """,""" + r_factors + """,""" + segtbl_digits + """,""" + resulttbl_raw + """;""")
69 plpy.execute("""CREATE TABLE """ + resulttbl_raw + """(doc_id integer, label integer[]);""")
71 plpy.execute("""SET client_min_messages TO """ + str(origClientMinMessages[0]['setting
']) + """;""")
73 # replace digits with "DIGIT" keyword
74 plpy.execute("""SELECT start_pos,doc_id,seg_text,max_pos INTO """ + segtbl_digits + """ FROM """ + segtbl + """ WHERE
75 NOT (seg_text ~ E'^[-+]?([0-9]{1,3}[,]?)*[0-9]{1,3}$
' OR seg_text ~ E'^[-+]?[0-9]*[.][0-9]+$
');""")
76 plpy.execute("""INSERT INTO """ + segtbl_digits + """ SELECT start_pos,doc_id,'DIGIT
',max_pos FROM """ + segtbl + """ WHERE
77 seg_text ~ E'^[-+]?([0-9]{1,3}[,]?)*[0-9]{1,3}$
' OR seg_text ~E'^[-+]?[0-9]*[.][0-9]+$
';""")
80 -- for each sentence, store array representation of r_factors
81 m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `
82 select doc_id,
array_agg(score order by start_pos, label) as score
86 from """ + factor_rtbl + """ factors,
87 """ + segtbl_digits + """ seg
88 where factors.seg_text = seg.seg_text
89 and doc_id = ss.doc_id
90 order by start_pos, label
93 into """ + r_factors + """
94 from (select doc_id, start_pos, label, score
95 from """ + factor_rtbl + """ factors,
96 """ + segtbl_digits + """ seg
97 where factors.seg_text=seg.seg_text) as ss
101 plpy.execute("analyze " + r_factors + ";")
104 -- array representation of m_factor
106 into """ + m_factors + """
108 from """ + factor_mtbl + """ factors) as ss; """
111 rv = plpy.execute('SELECT COUNT(*) AS total FROM ' + labeltbl);
112 nlabel = rv[0]['total']
114 query = (""" INSERT INTO """ + resulttbl_raw + """
115 SELECT doc_id, MADLIB_SCHEMA.
vcrf_top1_label(mfactors.score, rfactors.score, """ + str(nlabel) + """ )
116 FROM """ + m_factors + """ mfactors, """ + r_factors + """ rfactors;""")
120 query = "SELECT * FROM MADLIB_SCHEMA.
vcrf_top1_view(\'" + segtbl + "\', \'" + labeltbl + "\', \'" + resulttbl_raw + "\', \'" + resulttbl + "\');"
123 $$ LANGUAGE plpythonu STRICT;