Package pylal :: Module cbc_table_utils
[hide private]
[frames] | no frames]

Source Code for Module pylal.cbc_table_utils

  1  # Copyright (C) 2012  Matthew West 
  2  # 
  3  # This program is free software; you can redistribute it and/or modify it 
  4  # under the terms of the GNU General Public License as published by the 
  5  # Free Software Foundation; either version 2 of the License, or (at your 
  6  # option) any later version. 
  7  # 
  8  # This program is distributed in the hope that it will be useful, but 
  9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 11  # Public License for more details. 
 12  # 
 13  # You should have received a copy of the GNU General Public License along 
 14  # with this program; if not, write to the Free Software Foundation, Inc., 
 15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 16   
 17   
 18  # 
 19  # ============================================================================= 
 20  # 
 21  #                                    Preamble 
 22  # 
 23  # ============================================================================= 
 24  # 
 25   
 26  """ 
 27   
 28  """ 
 29   
 30  import itertools 
 31  import math 
 32  from optparse import OptionParser 
 33  import re 
 34  import sys 
 35  import os 
 36  try: 
 37      any 
 38      all 
 39  except NameError: 
 40      # Python < 2.5 
 41      from glue.iterutils import any, all 
 42   
 43  from glue import iterutils 
 44  from glue import lal 
 45  from glue.ligolw import ligolw 
 46  from glue.ligolw import table 
 47  from glue.ligolw import lsctables 
 48  from glue.ligolw import utils 
 49  from glue import git_version 
 50   
 51  __author__ = "Matt West <matthew.west@ligo.org>" 
 52  __version__ = "git id %s" % git_version.id 
 53  __date__ = git_version.date 
 54   
 55   
 56   
 57  # 
 58  # ============================================================================= 
 59  # 
 60  #                         Depopulate ligolw_xml tables  
 61  # 
 62  # ============================================================================= 
 63  # 
 64   
65 -def depopulate_sngl_inspiral(xmldoc, verbose = False):
66 """ 67 This function takes the lists of event_ids from the sngl_inspiral and coinc_event_map 68 tables and determine the difference, such that the newlist contains only non-coinc 69 single-ifo triggers. Then it remove these non-coinc triggers from the sngl_inspiral table. 70 """ 71 sngls_tbl = lsctables.SnglInspiralTable.table.get_table(xmldoc) 72 sngls_tbl_eid = sngls_tbl.getColumnByName("event_id") 73 74 coinc_map_tbl = lsctables.CoincMapTable.get_table(xmldoc) 75 76 if len(coinc_map_tbl) == 0: 77 if verbose: 78 print >> sys.stderr, "This file lacks any coincident events. All %i single-ifo "\ 79 % len(sngls_tbl_eid) + "inspiral triggers have been removed." 80 del sngls_tbl[:] 81 else: 82 coinc_map_tbl_eid = set(coinc_map_tbl.getColumnByName("event_id")) 83 non_coincs = set(sngls_tbl_eid) - coinc_map_tbl_eid 84 if verbose: 85 print >> sys.stderr, "%i single-ifo inspiral triggers not associated with a "\ 86 % len(non_coincs) + "coincident event are being removed." 87 88 coinc_sngls_tbl = xmldoc.childNodes[0].insertBefore( lsctables.New(lsctables.SnglInspiralTable), sngls_tbl) 89 if verbose: 90 print >> sys.stderr, "Creating mapping dictionary." 91 # Create a mapping dictionary 92 sngls_tbl_dict = {} 93 for idx, eid in enumerate(sngls_tbl_eid): 94 sngls_tbl_dict[eid] = idx 95 if verbose: 96 print >> sys.stderr, "Creating new table." 97 # Create the new coincident sngls table 98 for idx, event_id in enumerate(coinc_map_tbl_eid): 99 coinc_sngls_tbl.insert( idx, sngls_tbl[sngls_tbl_dict[event_id]] ) 100 xmldoc.childNodes[0].removeChild(sngls_tbl) 101 if verbose: 102 print >> sys.stderr, "Done removing non-coincident events."
103 104
105 -def remove_process_rows(xmldoc, process_ids, verbose = False):
106 proc_tbl = lsctables.ProcessTable.get_table(xmldoc) 107 proc_param_tbl = lsctables.ProcessParamsTable.get_table(xmldoc) 108 109 # Remove rows in process table whose process_id are found in sd_pids 110 len_p_tbl = len(proc_tbl) 111 for i, row in enumerate(proc_tbl[::-1]): 112 if row.process_id in process_ids: 113 del proc_tbl[len_p_tbl-i-1] 114 if verbose: 115 print >> sys.stderr, "\t%i rows in the process table have been removed" \ 116 %(len_p_tbl - len(proc_tbl)) 117 118 # Remove rows in process_params table whose process_id are found in sd_pids 119 len_pp_tbl = len(proc_param_tbl) 120 for i, row in enumerate(proc_param_tbl[::-1]): 121 if row.process_id in process_ids: 122 del proc_param_tbl[len_pp_tbl-i-1] 123 if verbose: 124 print >> sys.stderr, "\t%i rows in the process param table have been removed" \ 125 %(len_pp_tbl - len(proc_param_tbl))
126 127
128 -def drop_segment_tables(xmldoc, verbose = False):
129 """ 130 Drop the segment, segment_definer & segment_summary tables from the 131 xmldoc. In addition, remove the rows in the process & process_params 132 tables that have process_ids found in the segment_definer table. 133 """ 134 seg_tbl = lsctables.SegmentTable.get_table(xmldoc) 135 seg_sum_tbl = lsctables.SegmentSumTable.get_table(xmldoc) 136 seg_def_tbl = lsctables.SegmentDefTable.get_table(xmldoc) 137 # determine the unique process_ids for the segment tables 138 sd_pids = set(seg_def_tbl.getColumnByName("process_id")) 139 140 if verbose: 141 print >> sys.stderr, "Depopulate process tables of segment process_ids" 142 remove_process_rows(xmldoc, sd_pids, verbose=verbose) 143 144 # remove segment, segment_definer & segment_summary tables from xmldoc 145 xmldoc.childNodes[0].removeChild(seg_tbl) 146 seg_tbl.unlink() 147 xmldoc.childNodes[0].removeChild(seg_sum_tbl) 148 seg_sum_tbl.unlink() 149 xmldoc.childNodes[0].removeChild(seg_def_tbl) 150 seg_def_tbl.unlink() 151 if verbose: 152 print >> sys.stderr, "segment, segment-definer & segment-summary tables dropped from xmldoc"
153 154
155 -def drop_vetodef_table(xmldoc, verbose = False):
156 """ 157 Drop the veto_definer table from the xmldoc and remove the rows in the 158 process & process_params tables that have process_ids found in the 159 veto_definer table. 160 """ 161 veto_def_tbl = lsctables.VetoDefTable.get_table(xmldoc) 162 # determine the unique process_ids for the segment tables 163 vd_pids = set(veto_def_tbl.getColumnByName("process_id")) 164 165 if verbose: 166 print >> sys.stderr, "Depopulate process tables of veto_definer process_ids" 167 remove_process_rows(xmldoc, vd_pids, verbose=verbose) 168 169 # remove veto_definer table from xmldoc 170 xmldoc.childNodes[0].removeChild(veto_def_tbl) 171 veto_def_tbl.unlink() 172 if verbose: 173 print >> sys.stderr, "Veto-Definer table dropped from xmldoc"
174 175
176 -def depopulate_experiment_tables(xmldoc, verbose = False):
177 """ 178 Removes entries from the experiment tables that do not have events 179 or durations in them. In other words, if none of the rows in the 180 experiment_summary table that are assoicated with a single experiment_id 181 have neither an event in them nor any duration then all of the rows in the 182 experiment_summary table associated with that experiment_id are deleted, 183 as well as the corresponding row in the experiment table. If, however, just 184 one of the rows in the experiment_summary table associated with an experiment_id 185 have at least a 1 in their nevents column or at least 1 second in the 186 durations column then nothing associated with that experiment_id are deleted. 187 (In other words, if just one time slide in an experiment has just 1 event or 188 is just 1 second long, then none of the time slides in that experiment are deleted, 189 even if all of the other time slides in that experiment have nothing in them.) 190 """ 191 192 if verbose: 193 print >>sys.stderr, "Depopulating the experiment tables...", 194 195 # 196 # find the experiment and experiment summary table 197 # 198 199 try: 200 experiment_table = lsctables.ExperimentTable.get_table(xmldoc) 201 except ValueError: 202 # no table --> no-op 203 if verbose: 204 print >>sys.stderr, "Cannot find the experiment table" 205 return 206 207 try: 208 experiment_summ_table = lsctables.ExperimentSummaryTable.get_table(xmldoc) 209 except ValueError: 210 # no table --> no-op 211 if verbose: 212 print >>sys.stderr, "Cannot find the experiment_summary table" 213 return 214 215 del_eid_indices = [] 216 del_esid_indices = [] 217 218 for mm, erow in enumerate(experiment_table): 219 this_eid = erow.experiment_id 220 es_index_list = [] 221 for nn, esrow in enumerate(experiment_summ_table): 222 if esrow.experiment_id == this_eid and (esrow.duration or esrow.nevents): 223 # something in this experiment, go on to next experiment_id 224 break 225 if esrow.experiment_id == this_eid: 226 es_index_list.append(nn) 227 if nn == len(experiment_summ_table) - 1: 228 # if get to here, nothing in that experiment, mark id and indices 229 # for removal 230 del_eid_indices.append(mm) 231 del_esid_indices += es_index_list 232 233 # delte all experiments who's eids fall in del_eid_indices 234 del_eid_indices.sort(reverse = True) 235 for eid_index in del_eid_indices: 236 del experiment_table[eid_index] 237 # delete all experiment_summaries whose esids fall in del_esid_indices 238 del_esid_indices.sort(reverse = True) 239 for esid_index in del_esid_indices: 240 del experiment_summ_table[esid_index] 241 242 if verbose: 243 print >> sys.stderr, "removed %i empty experiment(s) from the experiment table and %i " + \ 244 "associated time slides from the experiment_summary table." \ 245 %( len(del_eid_indices), len(del_esid_indices) )
246 247 248 # 249 # ============================================================================= 250 # 251 # experiment and experiment_summ tables 252 # 253 # ============================================================================= 254 # 255
256 -def get_experiment_times(xmldoc):
257 """ 258 Use the start & end-times stored in the segment_summary table to define 259 the experiment times. This presumes that the vetoes file has been added 260 to the file being analyzed. 261 """ 262 segment_summary_tbl = lsctables.SegmentSumTable.get_table(xmldoc) 263 expr_start_time = min(segment_summary_tbl.getColumnByName("start_time")) 264 expr_end_time = max(segment_summary_tbl.getColumnByName("end_time")) 265 266 return expr_start_time, expr_end_time
267
268 -def populate_experiment_table( 269 xmldoc, 270 search_group, 271 trigger_program, 272 lars_id, 273 instruments, 274 comments = None, 275 add_inst_subsets = False, 276 verbose = False 277 ):
278 """ 279 Populate the experiment table using the given entries. If 280 add_inst_subsets is set to True, will write additional 281 entries for every possible sub-combination of the given instrument 282 set. Returns a dictionary of experiment_ids keyed by the instrument 283 set. 284 285 @xmldoc: xmldoc to get/write table to 286 @lars_id: lars_id of the experiment 287 @search_group: lsc group that performed the experiment (e.g., cbc) 288 @trigger_program: name of the program that performed the analysis 289 (e.g., inspiral, ringdown, etc.) 290 @comments: any desired comments 291 @add_inst_subsets: will write an entry for every possible subset 292 of @instruments 293 @verbose: be verbose 294 """ 295 296 if verbose: 297 print >> sys.stderr, "\tPopulating the Experiment table..." 298 299 # find the experiment table or create one if needed 300 try: 301 expr_table = lsctables.ExperimentTable.get_table(xmldoc) 302 except ValueError: 303 expr_table = xmldoc.childNodes[0].appendChild(lsctables.New(lsctables.ExperimentTable)) 304 305 # determine experiment start and end times 306 expr_start_time, expr_end_time = get_experiment_times(xmldoc) 307 308 # write entry to the experiment table for the given instruments if it doesn't already exist 309 experiment_ids = {} 310 311 for nn in range(len(instruments), 1, -1): 312 if not add_inst_subsets and (nn != len(instruments)): 313 break 314 # add every possible sub-combination of the instrument set if 315 # they're not already in the table 316 for sub_combo in iterutils.choices( list(instruments), nn ): 317 if frozenset(sub_combo) not in experiment_ids: 318 experiment_ids[frozenset(sub_combo)] = expr_table.write_new_expr_id( 319 search_group, 320 trigger_program, 321 lars_id, 322 sub_combo, 323 expr_start_time, 324 expr_end_time, 325 comments = comments 326 ) 327 328 return experiment_ids
329
330 -def get_experiment_type(xmldoc, time_slide_dict):
331 """ 332 Determine the which experiment type(s) the coincident triggers in this 333 file belong to. It uses information from the inspiral files stored in 334 the process params table to decide if the triggers come from playground 335 time or are from an injection run. If the time_slide_dict has more than 336 one entry, then the triggers are from a slide run. 337 """ 338 # get the param column of the process_params table 339 process_params_tbl = lsctables.ProcessParamsTable.get_table(xmldoc) 340 pp_value = set(process_params_tbl.getColumnByName("value")) 341 pp_param = set(process_params_tbl.getColumnByName("param")) 342 343 zero_lag_dict = dict([dict_entry for dict_entry in time_slide_dict.items() if not any( dict_entry[1].values() )]) 344 345 # determine experiment type(s) 346 datatypes = ['all_data', 'exclude_play', 'playground'] 347 348 usertags = set(['FULL_DATA','PLAYGROUND']) 349 if len(zero_lag_dict): 350 if ('--injection-file' in pp_param) and not (pp_value & usertags): 351 datatypes = ['simulation'] 352 elif 'PLAYGROUND' in pp_value: 353 datatypes = ['playground'] 354 355 if len(time_slide_dict) > len(zero_lag_dict): 356 datatypes += ['slide'] 357 358 return datatypes
359 360
361 -def populate_experiment_summ_table( 362 xmldoc, 363 experiment_id, 364 time_slide_dict, 365 veto_def_name, 366 verbose = False 367 ):
368 """ 369 Populate the experiment_summ_table using an experiment_id, a 370 veto_def_name, and a list of time_slide ids. 371 372 @xmldoc: xmldoc to get/write table to 373 @experiment_id: experiment_id to be added to the table. 374 @veto_def_name: veto_def_name to be added to the table. 375 @time_slide_dict: time_slide table as dictionary; used to set time_slide_id 376 column and figure out whether or not is zero-lag. Can either be the result 377 of lsctables.time_slide_table.as_dict or any dictionary having same format. 378 """ 379 380 if verbose: 381 print >> sys.stderr, "\tPopulating the Experiment Summary table..." 382 383 # find the experiment_summary table or create one if needed 384 try: 385 expr_summ_table = lsctables.ExperimentSummaryTable.get_table(xmldoc) 386 except ValueError: 387 expr_summ_table = xmldoc.childNodes[0].appendChild(lsctables.New(lsctables.ExperimentSummaryTable)) 388 389 # populate the experiment_summary table 390 datatypes = get_experiment_type(xmldoc, time_slide_dict) 391 392 for type in datatypes: 393 for slide_id in time_slide_dict: 394 if type == "slide" and not any( time_slide_dict[slide_id].values() ): 395 continue 396 if type != "slide" and any( time_slide_dict[slide_id].values() ): 397 continue 398 expr_summ_table.write_experiment_summ( 399 experiment_id, 400 slide_id, 401 veto_def_name, 402 type, 403 sim_proc_id = None 404 ) 405 if type != "slide": 406 break
407
408 -def get_on_instruments(xmldoc, trigger_program):
409 process_tbl = lsctables.ProcessTable.get_table(xmldoc) 410 instruments = set([]) 411 for row in process_tbl: 412 if row.program == trigger_program: 413 instruments.add(row.ifos) 414 return instruments
415 416
417 -def generate_experiment_tables(xmldoc, **cmdline_opts):
418 """ 419 Create or adds entries to the experiment table and experiment_summ 420 table using instruments pulled from the search summary table and 421 offsets pulled from the time_slide table. 422 """ 423 424 if cmdline_opts["verbose"]: 425 print >> sys.stderr, "Populating the experiment and experiment_summary tables using " + \ 426 "search_summary and time_slide tables..." 427 428 # Get the instruments that were on 429 instruments = get_on_instruments(xmldoc, cmdline_opts["trigger_program"]) 430 431 # find the experiment & experiment_summary table or create one if needed 432 try: 433 lsctables.ExperimentSummaryTable.get_table(xmldoc) 434 except ValueError: 435 xmldoc.childNodes[0].appendChild(lsctables.New(lsctables.ExperimentSummaryTable)) 436 437 try: 438 lsctables.ExperimentTable.get_table(xmldoc) 439 except ValueError: 440 xmldoc.childNodes[0].appendChild(lsctables.New(lsctables.ExperimentTable)) 441 442 # Populate the experiment table 443 experiment_ids = populate_experiment_table( 444 xmldoc, 445 cmdline_opts["search_group"], 446 cmdline_opts["trigger_program"], 447 cmdline_opts["lars_id"], 448 instruments, 449 comments = cmdline_opts["comment"], 450 add_inst_subsets = True, 451 verbose = cmdline_opts["verbose"] 452 ) 453 454 # Get the time_slide table as dict 455 time_slide_dict = lsctables.TimeSlideTable.get_table(xmldoc).as_dict() 456 457 # Populate the experiment_summary table 458 for instruments in experiment_ids: 459 populate_experiment_summ_table( 460 xmldoc, 461 experiment_ids[instruments], 462 time_slide_dict, 463 cmdline_opts["vetoes_name"], 464 verbose = cmdline_opts["verbose"] 465 )
466 467
468 -def populate_experiment_map(xmldoc, veto_def_name, verbose = False):
469 from glue.pipeline import s2play as is_in_playground 470 471 # 472 # find the experiment_map table or create one if needed 473 # 474 if verbose: 475 print >> sys.stderr, "\tMapping coinc events to experiment_summary table..." 476 477 try: 478 expr_map_table = lsctables.ExperimentMapTable.get_table(xmldoc) 479 except ValueError: 480 expr_map_table = xmldoc.childNodes[0].appendChild(lsctables.New(lsctables.ExperimentMapTable)) 481 482 # 483 # find the coinc_event table 484 # 485 486 coinc_event_table = lsctables.CoincTable.get_table(xmldoc) 487 488 # 489 # Index the coinc_inspiral table as a dictionary 490 # 491 492 coinc_index = dict((row.coinc_event_id, row) for row in lsctables.CoincInspiralTable.get_table(xmldoc)) 493 494 # 495 # Get the time_slide_table as dict 496 # 497 498 time_slide_dict = lsctables.TimeSlideTable.get_table(xmldoc).as_dict() 499 500 # 501 # find the experiment & experiment summary tables 502 # 503 504 expr_table = lsctables.ExperimentTable.get_table(xmldoc) 505 expr_summ_table = lsctables.ExperimentSummaryTable.get_table(xmldoc) 506 507 # 508 # determine what experiment datatypes are in this file 509 # 510 511 datatypes = get_experiment_type(xmldoc, time_slide_dict) 512 513 # 514 # cycle through the coincs in the coinc_inspiral table 515 # 516 for coinc in coinc_event_table: 517 518 # 519 # get the experiment and experiment_summ_id for this coinc 520 # 521 522 for expr in expr_table: 523 if expr.instruments == coinc.instruments: 524 expr_id = expr.experiment_id 525 526 in_playground = is_in_playground( coinc_index[coinc.coinc_event_id].end_time ) 527 for type in datatypes: 528 # make sure that all_data triggers also get mapped to the right 529 # all_data sub-type: exclude_play or playground 530 if in_playground & (type == "exclude_play"): 531 continue 532 elif (not in_playground) & (type == "playground"): 533 continue 534 # if doing zerolag and slides together, make sure the triggers are mapped correctly 535 elif type == "slide" and not any( time_slide_dict[coinc.time_slide_id].values() ): 536 continue 537 elif type != "slide" and any( time_slide_dict[coinc.time_slide_id].values() ): 538 continue 539 else: 540 # map the coinc to an experiment 541 expr_map = lsctables.ExperimentMap() 542 expr_map.coinc_event_id = coinc.coinc_event_id 543 544 expr_map.experiment_summ_id = expr_summ_table.get_expr_summ_id( 545 expr_id, 546 coinc.time_slide_id, 547 veto_def_name, 548 type, 549 sim_proc_id = None 550 ) 551 if not expr_map.experiment_summ_id: 552 raise ValueError, "%s experiment_summ_id could not be found with %s" \ 553 %( type, ','.join([ str(expr_id), str(coinc.time_slide_id), veto_def_name ])) 554 555 # map the experiment 556 expr_map_table.append(expr_map) 557 # Increment number of events in nevents column by 1 558 expr_summ_table.add_nevents( expr_map.experiment_summ_id, 1 )
559