Package pylal :: Module ligolw_dbinjfind
[hide private]
[frames] | no frames]

Source Code for Module pylal.ligolw_dbinjfind

  1  import sys 
  2   
  3  from glue.ligolw import lsctables 
  4  from glue.ligolw import ilwd 
  5   
  6  from pylal import ligolw_sqlutils as sqlutils 
  7  from pylal import ligolw_dataUtils as dataUtils 
  8   
9 -def make_rec_sngls_table( connection, recovery_table ):
10 """ 11 Makes a temporary table containing events from the given recovery table 12 that could potentially be injections --- i.e., events from the "simulation" 13 datatype --- and the process id of the injection jobs that created them. 14 This allows for quick matching between injections and single events later 15 on. 16 """ 17 sqlquery = ''.join([''' 18 CREATE TEMP TABLE rec_sngls AS 19 SELECT 20 experiment_summary.sim_proc_id AS sim_proc_id, 21 ''', recovery_table, '''.* 22 FROM 23 ''', recovery_table, ''' 24 ''', sqlutils.join_experiment_tables_to_sngl_table( recovery_table ), ''' 25 WHERE 26 experiment_summary.datatype == "simulation"''' ]) 27 connection.cursor().execute(sqlquery)
28 29
30 -def dbinjfind( connection, simulation_table, recovery_table, match_criteria, rough_match = None, rejection_criteria = [], rough_rejection = None, verbose = False ):
31 32 # validate simulation_table and recovery_table 33 simulation_table = sqlutils.validate_option( simulation_table ) 34 recovery_table = sqlutils.validate_option( recovery_table ) 35 36 # create DataRow classes to store data for each table 37 simColumns = sqlutils.get_column_names_from_table( connection, simulation_table ) 38 recColumns = sqlutils.get_column_names_from_table( connection, recovery_table ) 39 40 SimDataRow = dataUtils.createDataRowClass( simulation_table, columns = simColumns ) 41 RecDataRow = dataUtils.createDataRowClass( recovery_table, columns = recColumns ) 42 43 # create a temporary table to store the eligible foreground events that can be matched 44 if verbose: 45 print >> sys.stdout, "Getting eligible events..." 46 make_rec_sngls_table( connection, recovery_table ) 47 48 # if using rough match, create an index for it 49 rough_match_test = '' 50 if rough_match is not None: 51 simRough, recRough, winRough = rough_match 52 simRough = sqlutils.validate_option( simRough ) 53 recRough = sqlutils.validate_option( recRough ) 54 sqlquery = "CREATE INDEX rs_rmtch_idx ON rec_sngls (%s)" % recRough 55 connection.cursor().execute( sqlquery ) 56 rough_match_test = "rec_sngls.%s >= sim.%s - %f AND rec_sngls.%s <= sim.%s + %f AND\n" %( recRough, simRough, winRough, recRough, simRough, winRough ) 57 58 59 # 60 # Remove triggers that match all_data triggers 61 # 62 if rejection_criteria != []: 63 if verbose: 64 print >> sys.stdout, "Applying rejection test to eligible events..." 65 # If comparing to all_data for rejection, create a temp table of all data events 66 # This rejection test only uses the single-ifo triggers from coincident events 67 sqlquery = ''.join([''' 68 CREATE TEMP TABLE all_data_sngls AS 69 SELECT 70 ''', recovery_table, '''.* 71 FROM 72 ''', recovery_table, ''' 73 ''', sqlutils.join_experiment_tables_to_sngl_table( recovery_table ), ''' 74 WHERE 75 experiment_summary.datatype == "all_data"''' ]) 76 connection.cursor().execute(sqlquery) 77 78 rough_test = '' 79 if rough_rejection is not None: 80 rejRough, rejRoughWin = rough_rejection 81 rejRough = sqlutils.validate_option( rejRough ) 82 sqlquery = "CREATE INDEX ads_rmtch_idx ON all_data_sngls (%s)" % rejRough 83 connection.cursor().execute( sqlquery ) 84 rough_test = "all_data_sngls.%s >= rec_sngls.%s - %f AND all_data_sngls.%s <= rec_sngls.%s + %f AND\n" % ( rejRough, rejRough, rejRoughWin, rejRough, rejRough, rejRoughWin ) 85 86 # cycle over the rejection criteria, creating a function in the database for each 87 rejection_tests = [] 88 for n,(thisFunc, window) in enumerate(rejection_criteria): 89 compF = dataUtils.CompareDataRows(RecDataRow, RecDataRow) 90 funcName = 'matches_all_data%i' % n 91 # Note: setting the match criteria also sets the needed columns 92 compF.set_matchCriteriaA(thisFunc) 93 compF.set_matchCriteriaB(thisFunc) 94 # need different diff function if using eThinca 95 if thisFunc == 'eThinca': 96 diffFunc = compF.eThincaSngl 97 else: 98 diffFunc = compF.diffRowARowB 99 compF.create_dbCompF(connection, diffFunc, funcName, window) 100 simSnglCols = ','.join(['rec_sngls.%s' %(col) for col in compF.neededColumnsA]) 101 allSnglCols = ','.join(['all_data_sngls.%s' %(col) for col in compF.neededColumnsB]) 102 rejection_tests.append( '%s(%s, %s)' %(funcName, simSnglCols, allSnglCols) ) 103 104 # now remove triggers 105 sqlquery = ''.join([ ''' 106 DELETE FROM 107 rec_sngls 108 WHERE EXISTS ( 109 SELECT 110 * 111 FROM 112 all_data_sngls 113 WHERE 114 ''', rough_test, '\nAND '.join( rejection_tests ), ')' ]) 115 connection.cursor().execute(sqlquery) 116 connection.commit() 117 118 # 119 # Determine Sim-Sngl matches 120 # 121 122 if verbose: 123 print >> sys.stdout, "Applying match criteria to find sim-sngl maps..." 124 # cycle over the match criteria, creating a function in the database for each 125 match_tests = [] 126 for n,(simFunc, snglFunc, window) in enumerate(match_criteria): 127 compF = dataUtils.CompareDataRows(SimDataRow, RecDataRow) 128 # set the name of the compare function to use in the database 129 funcName = 'are_match%i' % n 130 compF.set_matchCriteriaA(simFunc) 131 compF.set_matchCriteriaB(snglFunc) 132 # need different diff function if using eThinca 133 if simFunc == 'eThinca': 134 diffFunc = compF.eThincaSim 135 else: 136 diffFunc = compF.diffSimSngl 137 compF.create_dbCompF(connection, diffFunc, funcName, window) 138 simCols = ','.join(['sim.%s'%(col) for col in compF.neededColumnsA]) 139 snglCols = ','.join(['rec_sngls.%s'%(col) for col in compF.neededColumnsB]) 140 match_tests.append( '%s(%s, %s)' %(funcName, simCols, snglCols) ) 141 142 # determine matches 143 sqlquery = ''.join([""" 144 CREATE TEMP TABLE found_inj AS 145 SELECT 146 sim.simulation_id AS sim_id, 147 rec_sngls.event_id AS event_id 148 FROM 149 """, simulation_table, """ AS sim, rec_sngls 150 WHERE 151 sim.process_id == rec_sngls.sim_proc_id AND 152 """, rough_match_test, '\n\t\tAND'.join( match_tests ) ]) 153 connection.cursor().execute(sqlquery) 154 connection.commit()
155
156 -def strlst_is_subset(stringA, stringB):
157 return set(stringA.split(',')).issubset(set(stringB.split(',')))
158
159 -def write_coincidences(connection, map_label, search, process_id, verbose = False):
160 """ 161 Writes coincidences to coinc_event_map table. 162 """ 163 # for all the maps, see if there is another coincidence 164 if verbose: 165 print >> sys.stdout, "Getting mapped sngls belonging to a coincident event..." 166 connection.create_aggregate("ag_cat", 1, sqlutils.aggregate_concatenate) 167 connection.create_function("issubset", 2, strlst_is_subset) 168 169 sqlquery = ''' 170 CREATE INDEX finj_simid_idx ON found_inj (sim_id); 171 CREATE INDEX finj_eid_idx ON found_inj (event_id); 172 CREATE TEMP TABLE coinc_inj AS 173 SELECT 174 found_inj.sim_id AS sid, 175 found_inj.event_id AS evid, 176 coinc_event_map.coinc_event_id AS ceid 177 FROM 178 found_inj 179 JOIN 180 coinc_event_map 181 ON ( 182 coinc_event_map.event_id == evid ) 183 WHERE issubset( 184 ( 185 SELECT ag_cat(c.event_id) 186 FROM coinc_event_map AS c 187 WHERE c.coinc_event_id == ceid 188 GROUP BY c.coinc_event_id 189 ORDER BY c.event_id ASC), 190 ( 191 SELECT ag_cat(b.event_id) 192 FROM found_inj AS b 193 WHERE b.sim_id == sid 194 GROUP BY b.sim_id 195 ORDER BY b.event_id ASC) 196 ); 197 CREATE INDEX cij_eid_idx ON coinc_inj (evid); 198 ''' 199 connection.cursor().executescript(sqlquery) 200 # get the sim_coincs 201 sqlquery = "SELECT DISTINCT sid, ceid FROM coinc_inj" 202 sim_coincs = [(ilwd.ilwdchar(sim_id), ilwd.ilwdchar(ceid)) for ceid, sim_id in connection.cursor().execute( sqlquery ).fetchall()] 203 204 # get the sim_sngls 205 sqlquery = "SELECT sim_id, event_id FROM found_inj WHERE event_id NOT IN (SELECT DISTINCT evid FROM coinc_inj)" 206 sim_sngls = [(ilwd.ilwdchar(sim_id), ilwd.ilwdchar(eid)) for sim_id, eid in connection.cursor().execute( sqlquery ).fetchall()] 207 208 # create a new coinc_def id for this map label, if it already doesn't exist 209 coinc_def_id = sqlutils.write_newstyle_coinc_def_entry( connection, map_label, search=search ) 210 211 # get the time_slide id 212 # XXX: NOTE: We are assuming that all simulation entries have the same time_slide id 213 sqlquery = 'SELECT DISTINCT time_slide_id FROM experiment_summary WHERE datatype LIKE "simulation%"' 214 time_slide_id = connection.cursor().execute(sqlquery).fetchall() 215 if len(time_slide_id) > 1: 216 raise ValueError, "more than one time_slide_id found for the simulation datatype" 217 elif len(time_slide_id) == 0: 218 raise ValueError, "no time_slide_id found for the simulation datatype" 219 time_slide_id = ilwd.ilwdchar(time_slide_id.pop()[0]) 220 221 # write the number of new entries needed for the sim_coincs to the coinc_event table 222 if verbose: 223 print >> sys.stdout, "Adding injection maps to coinc_event table..." 224 new_ceids = sqlutils.add_coinc_event_entries( connection, process_id, coinc_def_id, time_slide_id, len(sim_coincs) ) 225 226 # add these new entries to coinc_event_map table 227 if verbose: 228 print >> sys.stdout, "Adding injection-coinc_event maps to coinc_event_map table..." 229 sqlquery = 'INSERT INTO coinc_event_map (coinc_event_id, table_name, event_id) VALUES (?,?,?)' 230 connection.cursor().executemany( sqlquery, [(str(ceid), sim_id.table_name, str(sim_id)) for ceid, (sim_id, _) in zip(new_ceids, sim_coincs)] ) 231 connection.cursor().executemany( sqlquery, [(str(ceid), coinc_ceid.table_name, str(coinc_ceid)) for ceid, (_, coinc_ceid) in zip(new_ceids, sim_coincs)] ) 232 233 # ditto for the sim-sngls 234 if verbose: 235 print >> sys.stdout, "Adding injection-sngl maps to coinc_event_map table..." 236 new_ceids = sqlutils.add_coinc_event_entries( connection, process_id, coinc_def_id, time_slide_id, len(sim_sngls) ) 237 connection.cursor().executemany( sqlquery, [(str(ceid), sim_id.table_name, str(sim_id)) for ceid, (sim_id, _) in zip(new_ceids, sim_sngls)] ) 238 connection.cursor().executemany( sqlquery, [(str(ceid), eid.table_name, str(eid)) for ceid, (_, eid) in zip(new_ceids, sim_sngls)] ) 239 240 # update the number of events in the coinc_event table 241 if verbose: 242 print >> sys.stdout, "Updating coinc_event nevents column..." 243 sqlutils.update_coinctab_nevents( connection )
244