Package pylal :: Module KW_veto_getTriggers
[hide private]
[frames] | no frames]

Source Code for Module pylal.KW_veto_getTriggers

  1  #!/usr/bin/env python 
  2  # 
  3  # Copyright (C) 2009  Tomoki Isogai 
  4  # 
  5  # This program is free software; you can redistribute it and/or modify it 
  6  # under the terms of the GNU General Public License as published by the 
  7  # Free Software Foundation; either version 3 of the License, or (at your 
  8  # option) any later version. 
  9  # 
 10  # This program is distributed in the hope that it will be useful, but 
 11  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 13  # Public License for more details. 
 14  # 
 15  # You should have received a copy of the GNU General Public License along 
 16  # with this program; if not, write to the Free Software Foundation, Inc., 
 17  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 18   
 19  """ 
 20  %prog --trigger_files=Files [--segment_file=File | --gps_start_time=GPSTime --gps_end_time=GPSTime] [options] 
 21   
 22  Tomoki Isogai (isogait@carleton.edu) 
 23   
 24  This program gets GPS time and SNR of GW triggers that fall into a specified segments above a given SNR threshold. 
 25  Input file must be ligolw xml or ascii format. For ligolw xml files, the code deals with sngl_inspiral, sngl_ringdown or singl_burst table. For ascii files, it is assumed that the first column represents GPS time of triggers and the second column represents SNR. The code ignores any empty lines and lines starting with "#" or "%", and gives an error if more that two columns are found. 
 26   
 27  If --out_format is given, a file will be created for each channel with the name 
 28  (start_time)_(end_time)_(SNR threshold)_GWtrigs.(specified extention) 
 29  If --name_tag is given, (name_tag)_ will be added to the name as prefix. 
 30  If --out_format is not given, the code prints out the result in stdout. 
 31  You can specify --order_by to sort the output. Supported options are 'GPSTime asc' for ascending time, 'GPSTime desc' for descending time, 'SNR asc' for ascending SNR, and 'SNR desc' for descending SNR. Default is 'GPSTime asc'. 
 32   
 33  """ 
 34   
 35  # ============================================================================= 
 36  # 
 37  #                               PREAMBLE 
 38  #  
 39  # ============================================================================= 
 40   
 41  from __future__ import division 
 42   
 43  import os 
 44  import sys 
 45  import shutil 
 46  import optparse 
 47  import cPickle 
 48   
 49  import sqlite3 
 50   
 51  from glue import segmentsUtils 
 52  from glue.segments import segment, segmentlist 
 53   
 54  from pylal import git_version 
 55  from pylal import KW_veto_utils 
 56   
 57  __author__ = "Tomoki Isogai <isogait@carleton.edu>" 
 58  __date__ = "7/10/2009" 
 59  __version__ = "2.0" 
 60   
61 -def parse_commandline():
62 """ 63 Parse the options given on the command-line. 64 """ 65 parser = optparse.OptionParser(usage=__doc__,version=git_version.verbose_msg) 66 67 parser.add_option("-t", "--trigger_files", action="append", default=[], 68 help="File containing triggers. Required.") 69 parser.add_option("-S", "--segment_file", default=None, 70 help="Segments file by which triggers are filtered. This option or --gps_start_time and --gps_end_time are required.") 71 parser.add_option("-s", "--gps_start_time", type="int", 72 help="GPS start time on which triggers are retrieved. Required unless --segment_file is specified.") 73 parser.add_option("-e", "--gps_end_time", type="int", 74 help="GPS end time on which triggers are retrieved. Required unless --segment_file is specified.") 75 parser.add_option("-m", "--min_thresh", default=8, type="float", 76 help="Code filters triggers below this minimum SNR value. (Default: 8)") 77 parser.add_option("-n", "--name_tag", default=None, 78 help="If given, this will be added as prefix in the output file name.") 79 parser.add_option("-f", "--out_format", default='.stdout', 80 help="Save in this format if specified, otherwise output in stdout.") 81 parser.add_option("-o", "--order_by", default="GPSTime asc", 82 help="Order of the output. See --help for the supported format and output file name. (Default: GPSTime asc)") 83 parser.add_option('-l', "--scratch_dir", default='.', 84 help="Scratch directory to be used for database engine. Specify local scratch directory for better performance and less fileserver load. (Default: current directory)") 85 parser.add_option("-v", "--verbose", action="store_true",default=False, 86 help="Run verbosely. (Default: False)") 87 88 opts, args = parser.parse_args() 89 90 91 ############################## Sanity Checks ############################## 92 93 ## check if necessary input exists 94 # trigger file 95 if opts.trigger_files == []: 96 parser.error("Error: --trigger_files is a required parameter") 97 for t in opts.trigger_files: 98 if not os.path.isfile(t): 99 parser.error("Error: --trigger_files %s not found"%t) 100 101 # segments 102 if opts.segment_file is not None: 103 if not os.path.isfile(opts.segment_file): 104 parser.error("Error: segment file %s not found"%opts.segment_file) 105 else: 106 for t in ('gps_start_time','gps_end_time'): 107 if getattr(opts,t) is None: 108 parser.error("Error: either --segment_file or --gps_start/end_time must be specified.") 109 110 # check if scratch directory exists 111 if not os.path.exists(opts.scratch_dir): 112 parser.error("Error: %s does not exist"%opts.scratch_dir) 113 114 # standardize the format (starting from period .) and see if it's supported 115 if opts.out_format[0] != ".": 116 opts.out_format = "." + opts.out_format 117 if opts.out_format not in (\ 118 ".stdout",".pickle",".pickle.gz",".mat",".txt",".txt.gz",".db"): 119 parser.error("Error: %s is not a supported format"%opts.out_format) 120 121 # check order_by is valid and avoid SQL injection attack 122 if opts.order_by not in ("GPSTime asc","GPSTime desc","SNR asc","SNR desc"): 123 parser.error("Error: %s is not valid. See -help for the supported order."%opes.order_by) 124 125 ## show parameters 126 if opts.verbose: 127 print >> sys.stderr, "running KW_veto_getTriggers..." 128 print >> sys.stderr, git_version.verbose_msg 129 print >> sys.stderr, "" 130 print >> sys.stderr, "******************** PARAMETERS *****************" 131 print >> sys.stderr, 'trigger file:' 132 print >> sys.stderr, opts.trigger_files; 133 if opts.segment_file is not None: 134 print >> sys.stderr, 'segment file:' 135 print >> sys.stderr, opts.segment_file; 136 else: 137 print >> sys.stderr, 'start/end GPS time:' 138 print >> sys.stderr, "%d - %d"%(opts.gps_start_time,opts.gps_end_time) 139 print >> sys.stderr, 'minimum SNR:' 140 print >> sys.stderr, opts.min_thresh; 141 print >> sys.stderr, 'name tag:' 142 print >> sys.stderr, opts.name_tag; 143 print >> sys.stderr, 'output format:' 144 print >> sys.stderr, opts.out_format[1:]; 145 print >> sys.stderr, 'order by:' 146 print >> sys.stderr, opts.order_by; 147 print >> sys.stderr, 'scratch directory:' 148 print >> sys.stderr, opts.scratch_dir; 149 print >> sys.stderr, '' 150 151 return opts
152
153 -def get_trigs_txt(GWcursor,trigger_file,segs,min_thresh,tracker,verbose):
154 """ 155 Read trigger data from text file. 156 It is assumed that the first column is GPS time and the second is SNR. 157 """ 158 # read lines avoiding white space and comment lines 159 trigs = [line.split() for line in open(trigger_file) if (line.split() != [] and not line.startswith("%") and not line.startswith("#"))] 160 161 # check if there is triggers 162 if len(trigs) == 0: 163 print >> sys.stderr, "Error: no triggers found. Please check your trigger file %s."%trigger_file 164 sys.exit(1) 165 166 # check the number of columns 167 if len(trigs[0]) != 2: 168 print >> sys.stderr, "Error: two columns are assumed (1. GPS time 2. SNR), found %d columns. Please check the trigger file."%len(trigs[0]) 169 sys.exit(1) 170 171 ## get times and snrs 172 for trig in trigs: 173 t = float(trig[0]) 174 s = float(trig[1]) 175 if t in segs: 176 if s >= min_thresh and s != float('inf'): 177 # insert into the database 178 # micro second for GPS time is accurate enough 179 GWcursor.execute("insert into GWtrigs values (?, ?)",("%.3f"%t,"%.2f"%s)) 180 tracker['counter'] += 1 181 else: 182 # keep track if some triggers are outside of segments and later 183 # warn the user (for veto code use) 184 tracker['outside'] = True 185 return GWcursor
186
187 -def get_trigs_xml(GWcursor,trigger_file,segs,min_thresh,tracker,verbose):
188 """ 189 Read trigger data from xml file that has ligolw xml format. 190 Many lines in this function are adapted from ligolw_print by Kipp Cannon 191 and modified to suit the need. 192 """ 193 from glue.ligolw import ligolw 194 from glue.ligolw import table 195 from glue.ligolw import lsctables 196 from glue.ligolw import utils 197 # speed hacks 198 # replace Glue's pure Python LIGOTimeGPS class with pyLAL's C version 199 from pylal.xlal.datatypes.ligotimegps import LIGOTimeGPS 200 lsctables.LIGOTimeGPS = LIGOTimeGPS 201 202 # Enable column interning to save memory 203 table.TableStream.RowBuilder = table.InterningRowBuilder 204 205 # for now, hardcode the table/column names 206 # FIXME: assuming there is only one of those tables in the file 207 tables = ['sngl_burst','sngl_inspiral','sngl_ringdown'] 208 columns = ['end_time','end_time_ns','snr'] 209 210 211 # don't parse other tables so as to improve parsing speed and reduce memory 212 # requirements. 213 214 def ContentHandler(xmldoc): 215 return ligolw.PartialLIGOLWContentHandler(xmldoc, lambda name, attrs:\ 216 (name == ligolw.Table.tagName) and\ 217 (table.Table.TableName(attrs["Name"]) in tables))
218 try: 219 lsctables.use_in(ligolw.PartialLIGOLWContentHandler) 220 except AttributeError: 221 # old glue did not allow .use_in(). 222 # FIXME: remove when we can require the latest version of glue 223 pass 224 225 # FIXME: find a way to load only columns necessary 226 # something like lsctables.SnglInspiral.loadcolumns = columns? 227 228 xmldoc = utils.load_url(trigger_file, verbose = verbose,\ 229 gz = trigger_file.endswith(".gz"), 230 contenthandler = ContentHandler) 231 table.InterningRowBuilder.strings.clear() 232 for table_elem in xmldoc.getElements(lambda e:\ 233 (e.tagName == ligolw.Table.tagName)): 234 # trigger specific time retrieval functions 235 if table_elem.tableName in ('sngl_inspiral'): 236 get_time = lambda row: row.get_end() 237 elif table_elem.tableName in ('sngl_burst'): 238 get_time = lambda row: row.get_peak() 239 elif table_elem.tableName in ('sngl_ringdown'): 240 get_time = lambda row: row.get_start() 241 else: 242 print >> sys.stderr, "Error: This should not be happening. Please contact to the author with the error trace." 243 sys.exit(1) 244 245 for row in table_elem: 246 t = get_time(row) 247 if t in segs: 248 if row.snr > min_thresh: 249 # insert into the database 250 # micro second for GPS time is accurate enough 251 GWcursor.execute("insert into GWtrigs values (?, ?)",("%.3f"%t, "%.2f"%row.snr)) 252 tracker['counter'] += 1 253 else: 254 # some triggers are outside of segments 255 tracker['outside'] = True 256 xmldoc.unlink() 257 return GWcursor 258
259 -def get_trigs(trigger_files,segs,min_thresh,name_tag=None,scratch_dir=".",verbose=True):
260 """ 261 prepare the SQL database and retrieve the triggers 262 """ 263 # initialize SQLite database 264 start_time = segs[0][0] 265 end_time = segs[-1][1] 266 prefix = "%d_%d_%d_GWtrigs"%(start_time,end_time,min_thresh) 267 if name_tag != None: 268 prefix = name_tag + "_" + prefix 269 dbname = prefix+".db" 270 # if the name already exists, rename the old one to avoid collisions 271 KW_veto_utils.rename(dbname) 272 273 global GW_working_filename # so that it can be erased when error occurs 274 GW_working_filename = KW_veto_utils.get_connection_filename(\ 275 dbname,tmp_path=scratch_dir,verbose=verbose) 276 277 GWconnection = sqlite3.connect(GW_working_filename) 278 GWcursor = GWconnection.cursor() 279 280 GWcursor.execute('create table GWtrigs (GPSTime double, SNR double)') 281 282 ## find out file format and read in the data 283 # tracker tracks 1) number of triggers retrieved and 2) if any triggers outside of specified segments 284 tracker = {'counter': 0,'outside': False} 285 for t in trigger_files: 286 ext=os.path.splitext(t)[-1] 287 if ext == '.txt' or ext == '.dat': 288 if verbose: print >> sys.stderr, "getting triggers from txt/dat file..." 289 GWcursor = get_trigs_txt(GWcursor,t,segs,min_thresh,tracker,verbose) 290 elif ext == '.xml': 291 if verbose: print >> sys.stderr, "getting triggers from xml file..." 292 GWcursor = get_trigs_xml(GWcursor,t,segs,min_thresh,tracker,verbose) 293 else: 294 print >> sys.stderr, """ 295 Error: unrecognized file format: please see --help and modify the 296 file to a supported format 297 """ 298 sys.exit(1) 299 300 GWconnection.commit() 301 302 if verbose: print >> sys.stderr, "times and SNR for triggers retrieved!" 303 304 # if some triggers were outside of given segments, warn the user 305 if tracker['outside']: 306 print >> sys.stderr, """ 307 Warning: Some of the triggers are outside of the segment list. 308 Unless intentional (using DQ flags etc.), make sure you 309 are using the right segment list. 310 Ignoring... 311 """ 312 313 # check how many triggers remained after cuts 314 if tracker['counter'] == 0: 315 print >> sys.stderr, """ 316 Error : No triggers remained after cut. 317 Please check trigger files and segments. 318 """ 319 sys.exit(1) 320 321 if verbose: 322 print >> sys.stderr, "%d triggers are retrieved."%tracker['counter'] 323 324 return dbname, GW_working_filename, GWconnection, GWcursor, tracker['counter']
325 326 # ============================================================================= 327 # 328 # Main 329 # 330 # ============================================================================= 331
332 -def main():
333 # parse commandline 334 opts = parse_commandline() 335 336 ## get the segments on which triggers are retrieved 337 # case 1: segment file is given 338 if opts.segment_file is not None: 339 # read segment file 340 seg_list = KW_veto_utils.read_segfile(opts.segment_file) 341 # case 2: start and end GPS time are given 342 else: 343 seg_list = segmentlist([segment(opts.gps_start_time,opts.gps_end_time)]) 344 345 try: 346 dbname, GW_working_filename, GWconnection, GWcursor, GWnum =\ 347 get_trigs(opts.trigger_files, seg_list, opts.min_thresh, name_tag = opts.name_tag, scratch_dir=opts.scratch_dir, verbose=opts.verbose) 348 349 # save/display the result 350 outname = os.path.splitext(dbname)[0]+opts.out_format 351 KW_veto_utils.save_db(GWcursor,"GWtrigs",outname,GW_working_filename, 352 order_by=opts.order_by,verbose=opts.verbose) 353 354 # close the connection to the database 355 GWconnection.close() 356 finally: 357 # erase temporal database 358 if globals().has_key('GW_working_filename'): 359 db = globals()["GW_working_filename"] 360 if opts.verbose: 361 print >> sys.stderr, "removing temporary workspace '%s'..." % db 362 os.remove(db)
363 364 if __name__=="__main__": 365 main() 366