pylal.sngl_tmplt

1 # Copyright (C) 2012 Matthew West 2 # 3 # This program is free software; you can redistribute it and/or modify it 4 # under the terms of the GNU General Public License as published by the 5 # Free Software Foundation; either version 2 of the License, or (at your 6 # option) any later version. 7 # 8 # This program is distributed in the hope that it will be useful, but 9 # WITHOUT ANY WARRANTY; without even the implied warranty of 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11 # Public License for more details. 12 # 13 # You should have received a copy of the GNU General Public License along 14 # with this program; if not, write to the Free Software Foundation, Inc., 15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 17 18 # 19 # ============================================================================= 20 # 21 # Preamble 22 # 23 # ============================================================================= 24 # 25 26 """ 27 A collection of utilities that allow one to estimate a false alarm rate from 28 triggers of a single template without the need for doing time-slides. 29 """ 30 31 import sqlite3 32 import numpy as np 33 from scipy import stats 34 import bisect 35 36 from glue import iterutils 37 from glue import segments 38 from glue.ligolw import dbtables 39 40 from pylal import ligolw_sqlutils as sqlutils 41 from pylal import ligolw_cbc_compute_durations as compute_dur 42 43 44 45 # 46 # ============================================================================= 47 # 48 # Library API 49 # 50 # ============================================================================= 51 # 52

53 -def get_newsnr(snr, chisq, chisq_dof):

54 fac = 6.0 55 rchisq = chisq/(2*chisq_dof-2) 56 newsnr = snr/(0.5*(1+rchisq**(fac/2.0)))**(1.0/fac) 57 if rchisq < 1.0: 58 newsnr = snr 59 return newsnr

60

61 -def get_effsnr(snr, chisq, chisq_dof):

62 fac = 250.0 63 rchisq = chisq/(2*chisq_dof-2) 64 effsnr = snr/((1 + snr**2/fac)*rchisq)**(1./4) 65 return effsnr

66

67 -def get_snr(snr, chisq, chisq_dof):

68 return snr

69

70 -def get_snr_over_chi(snr, chisq, chisq_dof):

71 return snr/chisq**(1./2)

72

73 -def set_getsnr_function(connection, statistic):

74 """ 75 Set the get_snr SQL function. Available options are: 76 -- "rawsnr", "newsnr", "effsnr", "snroverchi" 77 """ 78 if statistic == "rawsnr": 79 connection.create_function('get_snr', 3, get_snr) 80 elif statistic == "newsnr": 81 connection.create_function('get_snr', 3, get_newsnr) 82 elif statistic == "effsnr": 83 connection.create_function('get_snr', 3, get_effsnr) 84 elif statistic == "snroverchi": 85 connection.create_function('get_snr', 3, get_snr_over_chi) 86 else: 87 raise ValueError, "%s is not a valid snr statistic" % statistic

88

89 -def quadrature_sum(tuple):

90 snrsq_array = np.array(tuple)**2. 91 comb_snr = np.sum( snrsq_array )**(1/2.) 92 return comb_snr

93

94 -def end_time_w_ns(end_time, end_time_ns):

95 time = end_time + 1e-9*end_time_ns 96 return time

97

98 -def compute_cumrate(hist, T):

99 # create the survival function 100 cum_hist = np.cumsum( hist[::-1] )[::-1] 101 102 rate = {} 103 # moments of rate pdf assuming that the bkgd coincs are a Poisson process 104 rate["mode"] = cum_hist / T 105 rate["mean"] = (cum_hist + 1) / T 106 rate["stdev"] = (cum_hist + 1)**(1/2.) / T 107 108 # compute 1-sigma peak density confidence interval for each snr-bin in 109 # cumulative rate histogram 110 sigma = 0.674490 111 rate['lower_lim'] = np.zeros(len(rate['mode'])) 112 rate['upper_lim'] = np.zeros(len(rate['mode'])) 113 for i, R in enumerate(rate['mode']): 114 # define limits to likely rate given peak and stdev 115 max_range = rate['mode'][i]+5*rate['stdev'][i] 116 if 5*rate['stdev'][i] < rate['mode'][i]: 117 min_range = rate['mode'][i]-5*rate['stdev'][i] 118 else: 119 min_range = 0.0 120 r = np.linspace(min_range, max_range, 1e4+1) 121 122 #FIXME: using self-made gamma distribution because one in scipy 123 # version 0.7.2 is broken 124 cdf = stats.gamma.cdf(r, cum_hist[i]+1, scale=1./T) 125 pdf = (cdf[1:]-cdf[:-1])/(r[1]-r[0]) 126 127 sort_idx = np.argsort(pdf)[::-1] 128 norm_cumsum = np.cumsum(np.sort(pdf)[::-1])/np.sum(pdf) 129 idx = np.argmin(np.abs(norm_cumsum - sigma)) 130 rate['lower_lim'][i] += R - r[np.min(sort_idx[:idx+1])] 131 rate['upper_lim'][i] += r[np.max(sort_idx[:idx+1])] - R 132 133 return rate

134

135 -def calc_far( snrs, cn_snr, snr_bins, far_column, t_fgd, t_bkgd ):

136 137 significance = [] 138 for snr in snrs: 139 # which bin the coinc event falls into 140 idx = bisect.bisect_right(snr_bins, snr) - 1 141 if idx == (len(snr_bins) - 1): 142 N = 0.0 143 else: 144 N = cn_snr[idx] 145 146 # compute desired statistic 147 if far_column == 'peak_far': 148 significance.append( N/t_bkgd ) 149 elif far_column == 'mean_far': 150 significance.append( (N+1)/t_bkgd ) 151 elif far_column == 'marginalized_fap': 152 significance.append( 1-(1+t_fgd/t_bkgd)**(-N-1) ) 153 154 return significance

155 156 # 157 # ============================================================================= 158 # 159 # SNR Histograms 160 # 161 # ============================================================================= 162 # 163

164 -def sngl_snr_hist( 165 connection, 166 ifo, 167 mchirp, 168 eta, 169 min_snr, 170 snr_stat = None, 171 sngls_width = None, 172 usertag = "FULL_DATA", 173 datatype = None, 174 sngls_bins = None):

175 """ 176 Creates a histogram of sngl_inspiral triggers and returns a list of counts 177 and the associated snr bins. 178 179 @connection: connection to a SQLite database with lsctables 180 @ifo: the instrument one desires triggers from 181 @mchirp: the chirp mass from the desired template 182 @eta: the symmetric mass ratio from the desired template 183 @min_snr: a lower threshold on the value of the snr_stat 184 @sngls_width: the bin width for the histogram 185 @usertag: the usertag for the triggers. The default is "FULL_DATA". 186 @datatype: the datatype (all_data, slide, ...) if single-ifo triggers from 187 coincident events is desired. The default is to collect all triggers. 188 @sngls_bins: a list of bin edges for the snr-histogram 189 """ 190 191 # create function for the desired snr statistic 192 set_getsnr_function(connection, snr_stat) 193 194 connection.create_function('end_time_w_ns', 2, end_time_w_ns) 195 196 # set SQL statement parameters 197 sql_params_dict = { 198 "mchirp": mchirp, "eta": eta, 199 "min_snr": min_snr, "ifo": ifo, 200 "usertag": usertag} 201 202 # SQLite query to get a list of (snr, gps-time) tuples for inspiral triggers 203 sqlquery = """ 204 SELECT DISTINCT 205 get_snr(snr, chisq, chisq_dof) as snr_stat, 206 end_time_w_ns(end_time, end_time_ns) 207 FROM sngl_inspiral 208 JOIN process_params ON ( 209 process_params.process_id == sngl_inspiral.process_id) """ 210 # if a datatype is given, get only the inspiral triggers from coincs of that type 211 if datatype: 212 sqlquery += """ 213 JOIN coinc_event_map, experiment_map, experiment_summary ON ( 214 coinc_event_map.event_id == sngl_inspiral.event_id 215 AND experiment_map.coinc_event_id == coinc_event_map.coinc_event_id 216 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id) """ 217 sqlquery += """ 218 WHERE 219 sngl_inspiral.ifo == :ifo 220 AND snr_stat >= :min_snr 221 AND sngl_inspiral.mchirp == :mchirp 222 AND sngl_inspiral.eta == :eta 223 AND process_params.value == :usertag """ 224 if datatype: 225 sqlquery += """ 226 AND experiment_summary.datatype == :type 227 """ 228 sql_params_dict["type"] = datatype 229 230 # get dq-veto segments 231 xmldoc = dbtables.get_xml(connection) 232 veto_segments = compute_dur.get_veto_segments(xmldoc, False) 233 veto_segments = veto_segments[ veto_segments.keys()[0] ] 234 235 snr_array = np.array([]) 236 # apply vetoes to the list of trigger times 237 for snr, trig_time in connection.execute( sqlquery, sql_params_dict ): 238 trig_segment = segments.segment(trig_time, trig_time) 239 if not veto_segments[ifo].intersects_segment( trig_segment ): 240 snr_array = np.append( snr_array, snr ) 241 242 if sngls_bins is None: 243 sngls_bins = np.arange(min_snr, np.max(snr_array) + sngls_width, sngls_width) 244 245 # make the binned snr histogram 246 sngls_hist, _ = np.histogram(snr_array, bins=sngls_bins) 247 248 return sngls_hist, sngls_bins

249 250

251 -def all_sngl_snr_hist( 252 connection, 253 mchirp, 254 eta, 255 all_ifos, 256 min_snr = 5.5, 257 sngls_width = 0.01, 258 snr_stat = None):

259 """ 260 Creates a pair of dictionaries containing single-ifo snr histograms and 261 their associated snr bins. The keys are the instruments filtered in the 262 analysis. 263 264 @connection: connection to a SQLite database with lsctables 265 @min_snr: a lower threshold on the value of the snr_stat 266 @sngls_width: the bin width for the histogram 267 @mchirp: the chirp mass from the desired template 268 @eta: the symmetric mass ratio from the desired template 269 @all_ifos: a list containing the instruments used in the analysis 270 """ 271 272 sngl_ifo_hist = {} 273 sngl_ifo_midbins = {} 274 # loop over the analyzed ifos 275 for ifo in all_ifos: 276 sngl_ifo_hist[ifo], bins = sngl_snr_hist( 277 connection, 278 ifo, 279 mchirp, eta, 280 min_snr, 281 sngls_width = sngls_width, 282 snr_stat = snr_stat) 283 # define the midpoint of each snr bin 284 sngl_ifo_midbins[ifo] = 0.5*( bins[1:] + bins[:-1] ) 285 286 return sngl_ifo_hist, sngl_ifo_midbins

287 288

289 -def coinc_snr_hist( 290 connection, 291 ifos, 292 mchirp, 293 eta, 294 min_snr = 5.5, 295 datatype = None, 296 slide_id = None, 297 combined_bins = None, 298 snr_stat = None):

299 """ 300 Creates a histogram of coinc_inspiral triggers and returns a list of counts 301 in each of the snr bins. 302 303 @connection: connection to a SQLite database with lsctables 304 @sngls_threshold: a lower threshold on the value of the snr_stat 305 @ifos: a tuple containing the ifos a coinc must come from 306 @mchirp: the chirp mass from the desired template 307 @eta: the symmetric mass ratio from the desired template 308 @datatype: the datatype (all_data, slide, ...) if single-ifo triggers from 309 coincident events is desired. The default is to collect all triggers. 310 @combined_bins: a list of bin edges for the snr-histogram 311 """ 312 313 # create function for the desired snr statistic 314 set_getsnr_function(connection, snr_stat) 315 316 # set SQL statement parameters 317 query_params_dict = { 318 "ifo1": ifos[0], "ifo2": ifos[1], 319 "type": datatype, 320 "min_snr": min_snr, 321 "mchirp": mchirp, "eta": eta 322 } 323 324 # get a list of the combined snrs from the coinc_inspiral table 325 sqlquery = """ 326 SELECT 327 coinc_inspiral.coinc_event_id, 328 get_snr(si_ifo1.snr, si_ifo1.chisq, si_ifo1.chisq_dof), 329 get_snr(si_ifo2.snr, si_ifo2.chisq, si_ifo2.chisq_dof)""" 330 if len(ifos) > 2: 331 sqlquery += """, 332 get_snr(si_ifo3.snr, si_ifo3.chisq, si_ifo3.chisq_dof)""" 333 query_params_dict["ifo3"] = ifos[2] 334 335 sqlquery += """ 336 FROM coinc_inspiral 337 JOIN sngl_inspiral AS si_ifo1, coinc_event_map AS cem_ifo1 ON ( 338 coinc_inspiral.coinc_event_id == cem_ifo1.coinc_event_id 339 AND cem_ifo1.event_id == si_ifo1.event_id 340 AND si_ifo1.ifo == :ifo1) 341 JOIN sngl_inspiral AS si_ifo2, coinc_event_map AS cem_ifo2 ON ( 342 coinc_inspiral.coinc_event_id == cem_ifo2.coinc_event_id 343 AND cem_ifo2.event_id == si_ifo2.event_id 344 AND si_ifo2.ifo == :ifo2)""" 345 if len(ifos) > 2: 346 sqlquery += """ 347 JOIN sngl_inspiral AS si_ifo3, coinc_event_map AS cem_ifo3 ON ( 348 coinc_inspiral.coinc_event_id == cem_ifo3.coinc_event_id 349 AND cem_ifo3.event_id == si_ifo3.event_id 350 AND si_ifo3.ifo == :ifo3)""" 351 352 sqlquery += """ 353 JOIN experiment_map AS expr_map, experiment_summary AS expr_summ ON ( 354 expr_map.coinc_event_id == coinc_inspiral.coinc_event_id 355 AND expr_summ.experiment_summ_id == expr_map.experiment_summ_id) 356 WHERE 357 expr_summ.datatype == :type 358 AND si_ifo1.mchirp == :mchirp 359 AND si_ifo1.eta == :eta 360 AND get_snr(si_ifo1.snr, si_ifo1.chisq, si_ifo1.chisq_dof) >= :min_snr 361 AND get_snr(si_ifo2.snr, si_ifo2.chisq, si_ifo2.chisq_dof) >= :min_snr""" 362 if len(ifos) > 2: 363 sqlquery += """ 364 AND get_snr(si_ifo3.snr, si_ifo3.chisq, si_ifo3.chisq_dof) >= :min_snr""" 365 if slide_id: 366 query_params_dict['tsid'] = slide_id 367 sqlquery += """ 368 AND expr_summ.time_slide_id == :tsid """ 369 370 # execute query 371 events = connection.execute(sqlquery, query_params_dict).fetchall() 372 373 coincs = {} 374 coincs['snrs'] = np.array([ quadrature_sum(event[1:]) for event in events ]) 375 coincs['ids'] = [event[0] for event in events] 376 377 # make histogram of coinc snr 378 binned_snr, _ = np.histogram(coincs['snrs'], bins=combined_bins) 379 coincs['snr_hist'] = map(float, binned_snr) 380 381 return coincs

382 383

384 -def all_possible_coincs( 385 sngl_ifo_hist, 386 sngl_ifo_midbins, 387 combined_bins, 388 ifos 389 ):

390 """ 391 Creates a histogram of all possible coincident events and returns a list of counts 392 in each of the snr bins. This is made using the single-ifo snr histograms. 393 394 @sngl_ifo_hist: a dictionary containing an snr histogram for each IFO 395 @sngl_ifo_midbins: a dictionary the snr mid-bin values for sngl_ifo_hist 396 @combined_bins: a list of bin edges for the combined-snr histogram 397 @zerolag_coinc_hist: the snr histogram for zerolag coincident events 398 @ifos: a list of analyzed instruments to generate a coinc 399 """ 400 401 if len(ifos) > 3: 402 raise ValueError, "Can only estimate FARs for doubles & triples" 403 404 binWidth = combined_bins[1] - combined_bins[0] 405 combined_counts = np.zeros(len(combined_bins)-1) 406 407 # for computing doubles rate: N01_ij = n0_i*n1_j 408 N01 = np.outer(sngl_ifo_hist[ifos[0]], sngl_ifo_hist[ifos[1]]) 409 len0, len1 = N01.shape 410 for idx0, snr0 in enumerate(sngl_ifo_midbins[ifos[0]]): 411 for idx1, snr1 in enumerate(sngl_ifo_midbins[ifos[1]]): 412 if len(ifos) == 2: 413 combined_snr = quadrature_sum( (snr0, snr1) ) 414 index = int( np.floor((combined_snr - min(combined_bins))/binWidth) ) 415 combined_counts[index] += N01[idx0,idx1] 416 else: 417 # for computing triples rate: N012_ijk = n0_i*n1_j*n2_k 418 N012 = np.outer(N01, sngl_ifo_hist[ifos[2]]) 419 N012 = N_012.reshape(len0, len1, N012.size/(len0*len1)) 420 for idx2, snr2 in enumerate(sngl_ifo_midbins[ifos[2]]): 421 combined_snr = quadrature_sum( (snr0, snr1, snr2) ) 422 index = int( np.floor((combined_snr - min(combined_bins))/binWidth) ) 423 combined_counts[index] += N012[idx0,idx1,idx2] 424 425 return combined_counts

426 427 # 428 # ============================================================================= 429 # 430 # Time Functions 431 # 432 # ============================================================================= 433 # 434

435 -def coinc_time(connection, datatype, inc_ifo_list, unit, tsid=None):

436 params_dict = {'type': datatype} 437 cursor = connection.cursor() 438 439 sqlquery = """ 440 SELECT duration, instruments 441 FROM experiment_summary 442 JOIN experiment ON ( 443 experiment.experiment_id == experiment_summary.experiment_id) 444 WHERE datatype = :type """ 445 if tsid: 446 params_dict['tsid'] = tsid 447 sqlquery += " AND time_slide_id = :tsid" 448 cursor.execute( sqlquery, params_dict ) 449 450 # all elements of inclusive ifo_list must be found in ifos for T to be included 451 livetime = np.sum([ T for T, ifos in cursor if set(ifos.split(',')).issuperset(set(inc_ifo_list)) ]) 452 453 return sqlutils.convert_duration(livetime, unit)

454 455

456 -def get_singles_times( connection, verbose = False ):

457 # get single-ifo filtered segments 458 ifo_segments = compute_dur.get_single_ifo_segments( 459 connection, program_name = "inspiral", usertag = "FULL_DATA") 460 461 # get all veto segments 462 xmldoc = dbtables.get_xml(connection) 463 veto_segments = compute_dur.get_veto_segments(xmldoc, verbose) 464 465 sngls_durations = {} 466 467 for veto_def_name, veto_seg_dict in veto_segments.items(): 468 post_vetoes_ifosegs = ifo_segments - veto_seg_dict 469 for ifo, segs_list in post_vetoes_ifosegs.items(): 470 sngls_durations[ifo] = float( abs(segs_list) ) 471 472 return sngls_durations

473 474

475 -def get_coinc_window(connection, ifos):

476 # determine the minimum time shift 477 sqlquery = """ 478 SELECT MIN(ABS(offset)) 479 FROM time_slide 480 WHERE offset != 0.0 481 """ 482 shift = connection.execute( sqlquery ).fetchone()[0] 483 484 # SQL query to get gps end-times for a given type of double 485 sqlquery = """ 486 SELECT DISTINCT 487 si_ifo1.end_time, si_ifo2.end_time, 488 si_ifo1.end_time_ns, si_ifo2.end_time_ns 489 FROM coinc_inspiral 490 JOIN sngl_inspiral AS si_ifo1, coinc_event_map AS cem_ifo1 ON ( 491 coinc_inspiral.coinc_event_id == cem_ifo1.coinc_event_id 492 AND cem_ifo1.event_id == si_ifo1.event_id 493 AND si_ifo1.ifo == ?) 494 JOIN sngl_inspiral AS si_ifo2, coinc_event_map AS cem_ifo2 ON ( 495 coinc_inspiral.coinc_event_id == cem_ifo2.coinc_event_id 496 AND cem_ifo2.event_id == si_ifo2.event_id 497 AND si_ifo2.ifo == ?) 498 """ 499 # loop over pairs of instruments from the ifos list 500 tau = {} 501 for ifo_pair in iterutils.choices(ifos, 2): 502 503 toa_diff = np.array([]) 504 # determine the difference in trigger end-times after sliding the times 505 for coinc in connection.execute( sqlquery, tuple(ifos) ): 506 dT_sec = (coinc[0]-coinc[1]) + (coinc[2]-coinc[3])*1e-9 507 num_shifts = np.round(dT_sec/shift) 508 toa_diff = np.append( toa_diff, dT_sec - num_shifts*shift ) 509 510 # the max-min of time differences defines the size of the coinc window 511 tau[','.join(ifos)] = np.max(toa_diff) - np.min(toa_diff) 512 513 return tau

514 515

516 -def eff_bkgd_time(T_i, tau_ij, ifos, unit):

517 # numerator is the product of the relevant single-ifo analyzed times 518 numerator = np.prod([T for (ifo, T) in T_i.items() if ifo in ifos]) 519 520 # sorted list of the coincidence windows from smallest to largest 521 taus = sorted(tau_ij.values()) 522 # denominator is a non-trivial combination of the coincident windows 523 if len(ifos) == 2: 524 denominator = taus[0] 525 elif len(ifos) == 3: 526 denominator = 0.5*tau[0]*(tau[2] + tau[1]) - \ 527 0.25*( (tau[2]-tau[1])**2. + tau[0]**2. ) 528 else: 529 raise ValueError, "Can only estimate background times for double & triples" 530 531 return sqlutils.convert_duration(numerator/denominator, unit)

532

Source Code for Module pylal.sngl_tmplt_far