pylal.auxmvc

1 import os 2 import sys 3 import numpy 4 import copy 5 import pickle 6 import tempfile 7

8 -def ROC(clean_ranks, glitch_ranks):

9 """ 10 Calculates ROC curves based on the ranks assigned by a classifier to clean and glitch aux triggers. 11 """ 12 clean_ranks_sorted = numpy.sort(clean_ranks) 13 glitch_ranks_sorted = numpy.sort(glitch_ranks) 14 number_of_false_alarms=[] 15 number_of_true_alarms=[] 16 FAP = [] 17 Eff = [] 18 for i,rank in enumerate(clean_ranks_sorted): 19 FAP.append(compute_FAP(clean_ranks_sorted, glitch_ranks_sorted, rank)) 20 Eff.append(compute_Eff(clean_ranks_sorted, glitch_ranks_sorted, rank)) 21 22 return numpy.asarray(FAP), numpy.asarray(Eff)

23 24

25 -def compute_FAP(clean_ranks, glitch_ranks, rank):

26 """ 27 Compute false alarm probability for a given rank using array of ranks for clean and glitch samples. 28 Input arrays clean_ranks and glitch_ranks mst be sorted in ascending order. 29 """ 30 number_of_false_alarms = len(clean_ranks) - numpy.searchsorted(clean_ranks,rank) 31 FAP = number_of_false_alarms / float(len(clean_ranks)) 32 return FAP

33

34 -def compute_Eff(clean_ranks, glitch_ranks, rank):

35 """ 36 Compute detection probability for a given rank using array of ranks for clean and glitch samples. 37 Input arrays clean_ranks and glitch_ranks mst be sorted in ascending order. 38 """ 39 number_of_true_alarms = len(glitch_ranks) - numpy.searchsorted(glitch_ranks,rank) 40 Eff = number_of_true_alarms / float(len(glitch_ranks)) 41 return Eff

42 43

44 -def split_array(array, Nparts = 2):

45 """ 46 Splits 2-d record array in N equal parts. 47 If the the number of elements in array is not divisible by Nparts, all but last sub arrays are equal. 48 It returns list of sub-arrays. 49 """ 50 51 subarrays = [] 52 n = int(len(array) / float(Nparts)) 53 54 for i in range(Nparts): 55 if i == Nparts - 1: 56 subarrays.append(array[:][i*n:]) 57 else: 58 subarrays.append(array[:][i*n:(i+1)*n]) 59 60 return subarrays

61

62 -def get_clean_samples(Triggers):

63 """ 64 Returns only clean samples from the set of random samples. By definition, a sample in unclean if there is a KW trigger with 0.1 seconds time window. 65 """ 66 67 return Triggers[numpy.nonzero(Triggers['unclean'] == 0.0)[0],:]

68

69 -def get_samples_in_segments(Triggers, segments):

70 """ 71 Returns new recarray containing only those samples (triggers) that are in segments. 72 segments is a list of tuples of the form (t1,t2). 73 This function is not as efficient as it could be. It can be speed up if Triggers and segments are sorted. 74 But sorting would mix glitches and clean samples. We sacrifice performance for clarity ( but should keep in mind optional speed up). 75 """ 76 77 out_of_segments_indices = [] 78 79 for i in range(len(Triggers)): 80 time = Triggers[i]['GPS_s'] + Triggers[i]['GPS_ms']*10**(-3) 81 insegment = False 82 for seg in segments: 83 if (time >= seg[0]) and (time <= seg[1]): 84 insegment = True 85 if not insegment: 86 out_of_segments_indices.append(i) 87 88 return numpy.delete(Triggers,out_of_segments_indices,0)

89 90

91 -def getKWAuxTriggerFromDQCAT(Triggers, DQ_category):

92 93 if DQ_category == 'DQ2': 94 return Triggers[numpy.nonzero(Triggers['DQ2'] == 1.0)[0],:] 95 elif DQ_category == 'DQ3': 96 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 1.0))[0],:] 97 elif DQ_category == 'DQ4': 98 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 0.0) * (Triggers['DQ4'] == 1.0))[0],:] 99 elif DQ_category == 'DQ23': 100 return Triggers[numpy.nonzero((Triggers['DQ2'] == 1.0) + (Triggers['DQ3'] == 1.0))[0],:] 101 elif DQ_category == 'DQ234': 102 return Triggers[numpy.nonzero((Triggers['DQ2'] == 1.0) + (Triggers['DQ3'] == 1.0) + (Triggers['DQ4'] == 1.0))[0],:] 103 elif DQ_category == 'aDQ2': 104 return Triggers[numpy.nonzero(Triggers['DQ2'] == 0.0)[0],:] 105 elif DQ_category == 'aDQ23': 106 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 0.0))[0],:] 107 elif DQ_category == 'aDQ234': 108 return Triggers[numpy.nonzero((Triggers['DQ2'] == 0.0) * (Triggers['DQ3'] == 0.0) * (Triggers['DQ4'] == 0.0))[0],:] 109 elif DQ_category == 'ALL': 110 return Triggers 111 else: 112 raise ValueError("Unknown DQ category")

113

114 -def ReadKWAuxTriggers(files):

115 116 """ 117 Reads in KW auxiliary triggers from files. Triggers are storead in the 2-D array. 118 The rows of the array are labelled by the names of the variables, which are read off of the first line of the input file. 119 The columns are populated by the values of the corresponding variables. 120 Every line (except the first) of the input file(s) corresponds to a column (or a KW trigger) in the array. 121 """ 122 for (i,f) in enumerate(files): 123 flines = open(f).readlines() 124 variables = flines[0].split() 125 formats = ['g8' for a in range(len(variables))] 126 if i > 0: 127 KWAuxTriggers = numpy.concatenate((KWAuxTriggers ,numpy.loadtxt(f,skiprows=1, dtype={'names': variables,'formats':formats})),axis=0) 128 else: 129 KWAuxTriggers = numpy.loadtxt(f,skiprows=1, dtype={'names': variables,'formats':formats}) 130 131 return KWAuxTriggers

132

133 -def ShuffleKWAuxTriggers(KWAuxTriggers, dT=60.0):

134 """ 135 Shuffle segmented trigger packets. The trigger packets are segmented with each dT seconds usig G 136 PS time. 137 """ 138 gps = KWAuxTriggers['GPS'] 139 gps_start_time = int(numpy.min(gps)) 140 gps_end_time = int(numpy.max(gps))+1 141 duration = (gps_end_time - gps_start_time) 142 n_minutes = int(duration / dT) + 1 143 start_times = gps_start_time + dT * numpy.arange(n_minutes) 144 numpy.random.shuffle(start_times) 145 end_times = start_times + dT 146 147 start_indexes = numpy.searchsorted(gps, start_times) 148 end_indexes = numpy.searchsorted(gps, end_times) 149 n_triggers = len(KWAuxTriggers) 150 ShuffledKWAuxTriggers = numpy.empty((n_triggers,), dtype=KWAuxTriggers.dtype) 151 current_start_index = 0 152 current_end_index = 0 153 for (start_index, end_index) in zip(start_indexes, end_indexes): 154 if start_index < end_index: 155 current_end_index = current_start_index + end_index - start_index 156 ShuffledKWAuxTriggers[:][current_start_index:current_end_index] = KWAuxTriggers[:][start_index:end_index] 157 current_start_index = current_end_index 158 159 return ShuffledKWAuxTriggers

160

161 -def FilterKWAuxTriggers(KWAuxTriggers,excludeparameters,excludechannels):

162 163 """ 164 Irrelevant channels and trigger parameters are excluded. 165 excludechannels is a list of irrelevant channel names to be excluded. 166 excludeparameters is a list of comma separated parameters. i.e. dur,freq,npts 167 """ 168 169 KWAuxvariables = list(KWAuxTriggers.dtype.names) 170 filteredvariables = copy.copy(KWAuxvariables) 171 172 # both lists are empty, return original triggers 173 if not excludeparameters and excludechannels: 174 return KWAuxTriggers 175 176 if excludeparameters: 177 ExParams = excludeparameters.split(",") 178 for exp in ExParams: 179 for pvar in KWAuxvariables: 180 if exp.strip() in pvar.strip(): 181 if pvar in filteredvariables: 182 filteredvariables.remove(pvar) 183 184 if excludechannels: 185 for exc in excludechannels: 186 for cvar in KWAuxvariables: 187 if cvar in filteredvariables: 188 if exc.strip() in cvar.strip(): 189 filteredvariables.remove(cvar) 190 191 #print "the list of included variable after filtering" 192 #for (i,f) in enumerate(filteredvariables): 193 #print "%i-th variable to be included : %s" % (i+1,f) 194 195 n_triggers = len(KWAuxTriggers) 196 formats = ['g8' for a in range(len(filteredvariables))] 197 FilteredKWAuxTriggers = numpy.empty((n_triggers,),dtype={'names':filteredvariables,'formats':formats}) 198 199 for fvariable in filteredvariables: 200 FilteredKWAuxTriggers[fvariable] = KWAuxTriggers[fvariable] 201 202 return FilteredKWAuxTriggers

203 204 205 206

207 -def ConvertKWAuxToMVSC(KWAuxGlitchTriggers, KWAuxCleanTriggers, ExcludeVariables = None):

208 209 """ 210 Converts KW auxiliary triggers into MVSC triggers. 211 KWAuxGlitchTriggers - KW triggers corresponding to glitches in DARM 212 KWAuxCleanTriggers - KW triggers correspondingto clean DARM data. 213 """ 214 KWvariables = list(KWAuxGlitchTriggers.dtype.names) 215 if ExcludeVariables: 216 for variable in ExcludeVariables: 217 KWvariables.remove(variable) 218 219 # remove GPS time and add GPS seconds and miliseconds 220 if 'GPS' in KWvariables: 221 KWvariables.remove('GPS') 222 223 MVSCvariables = ['index', 'i', 'w', 'GPS_s', 'GPS_ms']+ KWvariables + ['glitch-rank'] 224 formats = ['i','i','g8','i', 'i'] + ['g8' for a in range(len(MVSCvariables) - 5)] 225 n_triggers = len(KWAuxGlitchTriggers) + len(KWAuxCleanTriggers) 226 227 i_row = numpy.concatenate((numpy.ones(len(KWAuxGlitchTriggers)), numpy.zeros(len(KWAuxCleanTriggers)))) 228 index_row = numpy.arange(1, n_triggers + 1) 229 w_row = numpy.ones(n_triggers) 230 glitch_rank_row = numpy.zeros(n_triggers) 231 232 MVSCTriggers = numpy.empty((n_triggers,), dtype={'names': MVSCvariables,'formats':formats}) 233 MVSCTriggers['index'] = index_row 234 MVSCTriggers['i'] = i_row 235 MVSCTriggers['w'] = w_row 236 MVSCTriggers['glitch-rank'] = glitch_rank_row 237 #set seconds and nanoseconds columns 238 MVSCTriggers['GPS_s'] = (numpy.concatenate((KWAuxGlitchTriggers['GPS'], KWAuxCleanTriggers['GPS'])) // 1.0).astype('int') 239 MVSCTriggers['GPS_ms'] = ((numpy.around((numpy.concatenate((KWAuxGlitchTriggers['GPS'], KWAuxCleanTriggers['GPS'])) % 1.0), decimals=3) * 10**3) // 1.0).astype('int') 240 for variable in MVSCvariables: 241 if not variable in ['index', 'i', 'w', 'glitch-rank', 'GPS_s', 'GPS_ms']: 242 MVSCTriggers[variable] = numpy.concatenate((KWAuxGlitchTriggers[variable], KWAuxCleanTriggers[variable])) 243 244 245 return MVSCTriggers

246

247 -def WriteMVSCTriggers(MVSCTriggers, output_filename, Classified = False):

248 249 """ 250 Write MVSC triggers to file. 251 If Classified = False, triggers are treated as unclassfied and saved in the input file for MVSC. 252 If Classified = True, triggers as saved in the same format as output of MVSC. 253 """ 254 if not Classified: 255 Unclassified_variables = list(MVSCTriggers.dtype.names) 256 for var in ['index', 'i', 'w', 'glitch-rank']: 257 if var in Unclassified_variables: Unclassified_variables.remove(var) 258 Unclassified_variables.append('i') 259 formats = [] 260 for var in Unclassified_variables: 261 if var in ['GPS_s', 'GPS_ms', 'i']: 262 formats.append('i') 263 else: 264 formats.append('g8') 265 266 #formats = ['g8' for a in range(len(Unclassified_variables) - 1)] + ['i'] 267 Triggers = numpy.empty(MVSCTriggers.shape, dtype={'names': Unclassified_variables,'formats':formats}) 268 269 for variable in Unclassified_variables: 270 Triggers[variable] = MVSCTriggers[variable] 271 272 else: 273 Triggers = MVSCTriggers 274 275 276 file = open(output_filename, "w") 277 278 if Classified: 279 first_line = " ".join(list(Triggers.dtype.names)) 280 file.write(first_line + "\n") 281 else: 282 first_line = str(len(list(Triggers.dtype.names)[:-1])) 283 second_line = " ".join(list(Triggers.dtype.names)[:-1]) 284 file.write(first_line + "\n") 285 file.write(second_line + "\n") 286 287 # check if Triggers contain a single row. This case requires special handling. 288 if len(Triggers.shape): 289 for i in range(len(Triggers)): 290 line = " ".join(["%0.3f" % (var) for var in Triggers[i]]) 291 file.write(line + "\n") 292 file.close() 293 else: 294 line = " ".join(["%0.3f" % (Triggers[var]) for var in list(Triggers.dtype.names)]) 295 file.write(line + "\n") 296 file.close()

297 298 299

300 -def ReadMVSCTriggers(files, Classified=True):

301 302 """ 303 Reads in MVSC triggers from files. MVSC triggers are storead in the 2-D array. 304 The rows of the array are labelled by the names of the variables, which are read off of the first(or second) line of the input file, 305 depending on whether the triggers were classifed or not. 306 The columns are populated by the values of the corresponding variables. 307 Every line (except the first 1 or 2 lines) of the input file(s) corresponds to a column (or a MVSC trigger) in the array. 308 """ 309 if Classified == True: 310 varline = 0 311 nskiplines = 1 312 else: 313 varline = 1 314 nskiplines = 2 315 if len(files)==0: 316 print "Error: Empty input file list." 317 sys.exit(1) 318 flines = open(files[0]).readlines() 319 variables = flines[varline].split() 320 if Classified == False: 321 variables.append("i") 322 formats = [] 323 for var in variables: 324 if var in ['GPS_s', 'GPS_ms', 'i', 'index']: 325 formats.append('i') 326 else: 327 formats.append('g8') 328 329 for file in files[1:]: 330 flines.extend(open(file).readlines()[nskiplines:]) 331 332 333 tmpfile = tempfile.TemporaryFile() 334 tmpfile.writelines(flines) 335 tmpfile.seek(0) 336 if not (len(flines) == nskiplines): # combined file contains triggers 337 trigs = numpy.loadtxt(tmpfile,skiprows=nskiplines, dtype={'names': variables,'formats':formats}) 338 if not trigs.shape: # single row loaded from file, requires reshaping of the array 339 trigs = trigs.reshape((1,)) 340 MVSCTriggers = trigs 341 else: # no triggers in the file, create empty array 342 MVSCTriggers = numpy.empty((0,), dtype={'names': variables,'formats':formats}) 343 344 tmpfile.close() 345 return MVSCTriggers

346 347 348 349

350 -def LoadOVL(filename):

351 352 """ 353 Reads in the pickled output of CV data (eg: kwl1-35.track.9.pickle) and inverts the data storage. Returns a list of gwtrg's removed by the Cveto method. 354 returned gwtrg's are labeled by tcent (and only tcent) and are associated with a given vconfig (vchan, vthr, vwin) and vstats (dsec, c_dsec, etc) 355 input arg: 356 filename: the file that is to be loaded. This must be a string 357 output arg: 358 gwtrg_vtd_tcent: a list of gwtrgs (labeled by tcent) with associated vconfig and vstats. the storage structure is: under each column: [tcent, [vconfig], [vstats]] 359 check below for exact formats of vconfig and vstats. 360 """ 361 362 # define column names for KW trig's 363 col_kw = {'tstart':0, 'tstop':1, 'tcent':2, 'fcent':3, 'uenergy':4, 'nenergy':5, 'npix':6, 'signif':7} 364 365 # load and unpickle the .track.run.pickle file to be loaded 366 file = open(filename, 'r') 367 tfp = pickle.load(file) 368 369 # define a dictionary that labels the indexes in tfp 370 h = dict([[tfp[0][i], i] for i in range(len(tfp[0]))]) 371 372 # load global parameters used for c_stats 373 c_livetime = tfp[1][h['livetime']] 374 c_ngwtrg = tfp[1][h['#gwtrg']] 375 ngwtrg_vtd = tfp[1][h['#gwtrg']] - tfp[len(tfp) - 1][h['#gwtrg']] + tfp[len(tfp) -1][h['vact']] 376 377 # instantiate counters for c_stats 378 c_dsec = 0.0 379 c_csec = 0.0 380 c_vact = 0.0 381 382 # instantiate the storage device and the index counter 383 gwtrg_vtd_tcent = [] 384 # tcentidx = 0 385 386 # iterate over tfp and fill in gwtrg_vtd_tcent 387 for lineidx in range(len(tfp)): 388 line = tfp[lineidx] 389 if line[0] == 'livetime': 390 pass 391 else: 392 # compute cumulative quantities and lists to be stored under gwtcent 393 c_dsec += line[h['dsec']] 394 c_csec += line[h['csec']] 395 c_vact += line[h['vact']] 396 vconfig = [line[h['vchan']], line[h['vthr']], line[h['vwin']]] 397 vstats = [line[h['livetime']], line[h['#gwtrg']], line[h['dsec']], line[h['csec']], line[h['vact']], line[h['vsig']], c_livetime, c_ngwtrg, c_dsec, c_csec, c_vact, lineidx] 398 gwtrg_vtd = line[h['gwtrg_vetoed']] 399 # iterate through trg's and fill in gwtrg_vtd_tcent 400 if gwtrg_vtd[0] != 'NONE': 401 for trg in gwtrg_vtd: 402 gwtrg_vtd_tcent.append([trg[col_kw['tcent']], vconfig, vstats]) 403 # tcentidx += 1 404 405 # sort gwtrg_vtd_tcent by tcent for ease of use by the caller 406 return sorted(gwtrg_vtd_tcent, key=lambda gwtrg: gwtrg[0])

407

408 -def Convert_ovl_chan(chan, channels):

409 410 """ 411 converts a channel name into an integer (or vice versa) using channels (a file) as a dictionary. 412 channels should correspond to a file in which each line is a channel name, and the line number defines the int. 413 the idea is to convert between OVL channel integers and channel names quickly. 414 """ 415 416 channels_file=open(channels, 'r') 417 418 # if chan is a string, we return an int 419 if isinstance(chan, str): 420 chan_int=0 421 for line in channels_file: 422 if line == chan: 423 return chan_int 424 else: 425 chan_int+=1 426 return False # chan wasn't in channels_file 427 428 # if chan is an int, we return a string 429 if isinstance(chan, (int,float)): 430 chan_int=int(chan) 431 if chan_int < len(channels_file) and chan_int >= 0: 432 return channels_file[chan_int] 433 else: 434 return False # not in channels_file

435

Source Code for Module pylal.auxmvc_utils