Package pylal :: Module ligolw_cbc_plotvt_utils
[hide private]
[frames] | no frames]

Source Code for Module pylal.ligolw_cbc_plotvt_utils

  1  # Copyright (C) 2012  Matthew West 
  2  # 
  3  # This program is free software; you can redistribute it and/or modify it 
  4  # under the terms of the GNU General Public License as published by the 
  5  # Free Software Foundation; either version 2 of the License, or (at your 
  6  # option) any later version. 
  7  # 
  8  # This program is distributed in the hope that it will be useful, but 
  9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 11  # Public License for more details. 
 12  # 
 13  # You should have received a copy of the GNU General Public License along 
 14  # with this program; if not, write to the Free Software Foundation, Inc., 
 15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 16   
 17   
 18  # 
 19  # ============================================================================= 
 20  # 
 21  #                                 Preamble 
 22  # 
 23  # ============================================================================= 
 24  # 
 25   
 26  """ 
 27  Collection of functions to compute the efficiency and effective 4-volume 
 28  """ 
 29   
 30  import sqlite3 
 31  import math 
 32  import pdb 
 33  from operator import itemgetter 
 34  import numpy 
 35  from scipy import special 
 36   
 37  from glue import segments 
 38  from glue.ligolw import table 
 39  from glue.ligolw import lsctables 
 40  from glue.ligolw import dbtables 
 41   
 42  from pylal import antenna 
 43  from pylal import ligolw_sqlutils as sqlutils 
 44  from pylal import ligolw_cbc_compute_durations as compute_dur 
 45   
 46   
 47  # 
 48  # ============================================================================= 
 49  # 
 50  #                                  
 51  # 
 52  # ============================================================================= 
 53  # 
 54   
 55   
56 -def chirp_dist(distance, mchirp):
57 mchirp_DNS = (1.4+1.4) * (1./4)**(3.0/5.0) 58 59 return distance * (mchirp_DNS/mchirp)**(5.0/6.0)
60
61 -def decisive_dist( 62 h_dist, l_dist, v_dist, 63 mchirp, weight_dist, ifos):
64 65 dist_list = [] 66 if 'H1' in ifos or 'H2' in ifos: 67 dist_list.append(h_dist) 68 if 'L1' in ifos: 69 dist_list.append(l_dist) 70 if 'V1' in ifos: 71 dist_list.append(v_dist) 72 73 if weight_dist: 74 return chirp_dist(sorted(dist_list)[1], mchirp) 75 else: 76 return sorted(dist_list)[1]
77
78 -def end_time_with_ns(end_time, end_time_ns):
79 time = end_time + 1e-9*end_time_ns 80 return time
81
82 -def get_livetime(connection, veto_cat, on_ifos, datatype):
83 sqlquery = """ 84 SELECT duration 85 FROM experiment_summary 86 JOIN experiment ON ( 87 experiment_summary.experiment_id == experiment.experiment_id) 88 WHERE 89 datatype = :0 90 AND veto_def_name = :1 91 AND instruments = :2 """ 92 93 # total livetime in seconds 94 total_dur = numpy.sum(connection.execute(sqlquery, (datatype, veto_cat, on_ifos)).fetchall() ) 95 96 return total_dur
97 98 # 99 # ============================================================================= 100 # 101 # Injections Functions 102 # 103 # ============================================================================= 104 # 105
106 -def inj_dist_range(dist_bounds, dist_scale = "linear", step = 4.0):
107 108 if dist_scale == "linear": 109 dist_bin_edges = numpy.arange(dist_bounds[0]-step, dist_bounds[1]+step, step) 110 elif dist_scale == "log": 111 log_limits = numpy.log10([dist_bounds[0], dist_bounds[1]])/numpy.log10(step) 112 dist_bin_edges = numpy.power( 113 step, 114 numpy.arange(log_limits[0]-1, log_limits[1]+1) 115 ) 116 117 return dist_bin_edges
118 119
120 -def successful_injections( 121 connection, 122 tag, 123 on_ifos, 124 veto_cat, 125 dist_type = "distance", 126 weight_dist = False, 127 verbose = False):
128 129 """ 130 My attempt to get a list of the simulations that actually made 131 it into some level of coincident time 132 """ 133 134 xmldoc = dbtables.get_xml(connection) 135 connection.create_function('end_time_with_ns', 2, end_time_with_ns) 136 137 # Get the veto segments as dictionaries, keyed by veto category 138 veto_segments = compute_dur.get_veto_segments(xmldoc, verbose) 139 140 # ------------------------ Get List of Injections ------------------------ # 141 sql_params_dict = {} 142 sqlquery = """ 143 SELECT DISTINCT 144 simulation_id, 145 end_time_with_ns(geocent_end_time, geocent_end_time_ns),""" 146 # add the desired distance measure to the SQL query 147 if dist_type == "distance": 148 connection.create_function('distance_func', 2, chirp_dist) 149 sqlquery += """ 150 distance_func(distance, sim_inspiral.mchirp) 151 FROM sim_inspiral """ 152 elif dist_type == "decisive_distance": 153 connection.create_function('decisive_dist_func', 6, decisive_dist) 154 sql_params_dict['ifos'] = on_ifos 155 sql_params_dict['weight_dist'] = weight_dist 156 sqlquery += """ 157 decisive_dist_func( 158 eff_dist_h, eff_dist_l, eff_dist_v, 159 sim_inspiral.mchirp, :weight_dist, :ifos) 160 FROM sim_inspiral """ 161 162 if tag != 'ALL_INJ': 163 # if a specific injection set is wanted 164 sqlquery += """ 165 JOIN process_params ON ( 166 process_params.process_id == sim_inspiral.process_id) 167 WHERE process_params.value = :usertag) """ 168 sql_params_dict["usertag"] = tag 169 else: 170 # for all injections 171 tag = 'FULL_DATA' 172 173 # Get segments that define which time was filtered 174 ifo_segments = compute_dur.get_single_ifo_segments( 175 connection, 176 program_name = "inspiral", 177 usertag = tag) 178 179 zero_lag_dict = dict([(ifo, 0.0) for ifo in ifo_segments]) 180 181 successful_inj = [] 182 # determine coincident segments for that veto category 183 coinc_segs = compute_dur.get_coinc_segments( 184 ifo_segments - veto_segments[veto_cat], 185 zero_lag_dict) 186 187 # Apply vetoes to single-ifo filter segments 188 for injection in connection.execute(sqlquery, sql_params_dict): 189 inj_segment = segments.segment(injection[1], injection[1]) 190 if coinc_segs[on_ifos].intersects_segment( inj_segment ): 191 successful_inj.append( injection ) 192 193 return successful_inj
194 195
196 -def found_injections( 197 connection, 198 tag, 199 on_ifos, 200 dist_type = "distance", 201 weight_dist = False, 202 verbose = False):
203 204 connection.create_function('end_time_with_ns', 2, end_time_with_ns) 205 206 sql_params_dict = {'ifos': on_ifos} 207 sqlquery = """ 208 SELECT DISTINCT 209 sim_inspiral.simulation_id, 210 end_time_with_ns(geocent_end_time, geocent_end_time_ns), """ 211 # add the desired distance measure to the SQL query 212 if dist_type == "distance": 213 connection.create_function('distance_func', 2, chirp_dist) 214 sqlquery += """ 215 distance_func(distance, sim_inspiral.mchirp), """ 216 elif dist_type == "decisive_distance": 217 connection.create_function('decisive_dist_func', 6, decisive_dist) 218 sql_params_dict['weight_dist'] = weight_dist 219 sqlquery += """ 220 decisive_dist_func( 221 eff_dist_h, eff_dist_l, eff_dist_v, 222 sim_inspiral.mchirp, :weight_dist, :ifos), """ 223 224 sqlquery += """ 225 false_alarm_rate, 226 coinc_inspiral.snr 227 FROM 228 coinc_event_map AS coincs 229 JOIN coinc_event_map AS sims, coinc_inspiral, coinc_event, sim_inspiral ON ( 230 coincs.coinc_event_id == sims.coinc_event_id 231 AND coinc_event.coinc_event_id == coincs.event_id 232 AND coinc_inspiral.coinc_event_id == coincs.event_id 233 AND sim_inspiral.simulation_id == sims.event_id) 234 JOIN process_params ON ( 235 process_params.process_id == sim_inspiral.process_id) 236 WHERE 237 coincs.table_name = "coinc_event" 238 AND sims.table_name = "sim_inspiral" 239 AND coinc_event.instruments = :ifos """ 240 241 if tag != 'ALL_INJ': 242 sqlquery += """ 243 AND process_params.value = :usertag 244 """ 245 sql_params_dict["tag"] = tag 246 247 injections = set(connection.execute(sqlquery, sql_params_dict).fetchall()) 248 249 # Get foreground coinc events 250 sqlquery = """ 251 SELECT 252 end_time_with_ns(end_time, end_time_ns) AS trig_time, 253 snr AS trig_snr 254 FROM coinc_inspiral AS ci 255 JOIN experiment_map AS em, experiment_summary AS es ON ( 256 ci.coinc_event_id == em.coinc_event_id 257 AND em.experiment_summ_id == es.experiment_summ_id ) 258 WHERE es.datatype == 'all_data'; 259 """ 260 foreground = connection.executescript(sqlquery).fetchall() 261 262 # Remove injection coincs that correspond closely in time and snr to foreground coincs 263 inj_snr = numpy.array([inj[3] for inj in injections]) 264 inj_time = numpy.array([inj[1] for inj in injections]) 265 idx2remove = [] 266 for time, snr in foreground: 267 indices = numpy.where(numpy.abs(inj_time - time) < 1.0) 268 if len(indices[0]): 269 idx = numpy.where(inj_snr[indices]/snr < 1.25) 270 if len(idx[0]): 271 idx2remove += list(indices[idx[0]]) 272 for i in sorted(idx2remove, reverse=True): 273 del injections[i] 274 275 # Sort found injections by FAR (largest to smallest) 276 injections = sorted(injections, key=itemgetter(3), reverse=True) 277 found_inj = [inj[0:3] for inj in injections] 278 inj_fars = [inj[3] for inj in injections] 279 inj_snrs = [inj[4] for inj in injections] 280 281 return found_inj, inj_fars, inj_snrs
282 283
284 -def binomial_confidence(K, N, eff_bin_edges, confidence):
285 """ 286 Calculate the optimal Bayesian credible interval for p(eff|k,n) 287 Posterior generated with binomial p(k|eff,n) and a uniform p(eff) 288 is the beta function: Beta(eff|k+1,n-k+1) where n is the number 289 of injected signals and k is the number of found signals. 290 """ 291 eff_low = numpy.zeros(len(N)) 292 eff_high = numpy.zeros(len(N)) 293 for i, n in enumerate(N): 294 if n!= 0: 295 # construct the point-mass-function 296 eff_cdf = special.betainc(K[i]+1, n-K[i]+1, eff_bin_edges) 297 pmf = ( eff_cdf[1:] - eff_cdf[:-1] ) 298 299 # determine the indices for the highest density interval 300 a = numpy.argsort(pmf)[::-1] 301 sorted_cdf = numpy.cumsum(numpy.sort(pmf)[::-1]) 302 j = numpy.argmin( numpy.abs(sorted_cdf - confidence) ) 303 304 eff_low[i] = eff_bin_edges[:-1][ numpy.min(a[:(j+1)]) ] 305 eff_high[i] = eff_bin_edges[:-1][ numpy.max(a[:(j+1)]) ] 306 307 return eff_low, eff_high
308
309 -def detection_efficiency( 310 successful_inj, 311 found_inj, 312 found_fars, 313 far_list, 314 r, 315 confidence):
316 """ 317 This function determines the peak efficiency for a given bin and associated 318 'highest density' confidence interval. The calculation is done for results 319 from each false-alarm-rate threshold 320 """ 321 # catching any edge cases were the injection end_time is nearly on a second boundary 322 successful_inj = set(successful_inj) | set(found_inj) 323 # histogram of successful injections into coincidence time post vetoes 324 successful_dist = [inj[2] for inj in successful_inj] 325 N, _ = numpy.histogram(successful_dist, bins = r) 326 327 significant_dist = [inj[2] for inj in found_inj] 328 329 eff_bin_edges = numpy.linspace(0, 1, 1e3+1) 330 eff = { 331 'mode': {}, 332 'low': {}, 333 'high': {} 334 } 335 for far in far_list: 336 for idx, coinc_far in enumerate(found_fars): 337 if coinc_far <= far: 338 new_start = idx 339 break 340 # Histogram found injections with FAR < threshold 341 K, _ = numpy.histogram(significant_dist[new_start:], bins = r) 342 eff['mode'][far] = numpy.nan_to_num(numpy.float_(K)/N) 343 344 # computes the confidence interval for each distance bin 345 eff['low'][far], eff['high'][far] = binomial_confidence(K, N, eff_bin_edges, confidence) 346 347 return eff
348 349
350 -def rescale_dist( 351 on_ifos, dist_type, weight_dist, 352 phys_dist=None, param_dist=None 353 ):
354 355 N_signals = int(1e6) 356 trigTime = 0.0 357 358 # if decisive distance is desired, get the antenna responses for each signal 359 if dist_type == 'decisive_distance': 360 # sky position (right ascension & declination) 361 ra = 360 * numpy.random.rand(N_signals) 362 dec = 180 * numpy.random.rand(N_signals) - 90 363 # additional angles 364 inclination = 180 * numpy.random.rand(N_signals) 365 polarization = 360 * numpy.random.rand(N_signals) 366 367 f_q = {} 368 for ifo in on_ifos: 369 f_q[ifo] = numpy.zeros(N_signals) 370 for index in range(N_signals): 371 _, _, _, f_q[ifo][index] = antenna.response( 372 trigTime, 373 ra[index], dec[index], 374 inclination[index], polarization[index], 375 'degree', ifo ) 376 377 prob_d_d = {} 378 for j in range(len(phys_dist)-1): 379 # for this physical distance range, create signals that are uniform in volume 380 volume = 4*numpy.pi/3 * numpy.random.uniform( 381 low = phys_dist[j]**3.0, 382 high = phys_dist[j+1]**3.0, 383 size = N_signals) 384 dist = numpy.power(volume*(3/(4*numpy.pi)), 1./3) 385 386 # create decisive distance (if desired) 387 if dist_type == 'decisive_distance': 388 dist_eff = {} 389 for ifo in on_ifos: 390 dist_eff[ifo] = dist / f_q[ifo] 391 dist_dec = numpy.sort(dist_eff.values(), 0)[1] 392 393 # weight distance measure by chirp mass (if desired) 394 if weight_dist: 395 # Component masses are Gaussian distributed around the Chandrasekar mass 396 mass1, mass2 = 0.13 * numpy.random.randn(2, N_signals) + 1.40 397 mchirp = numpy.power(mass1+mass2, -1./5) * numpy.power(mass1*mass2, 3./5) 398 if dist_type == 'decisive_distance': 399 dist_chirp = chirp_dist(dist_dec, mchirp) 400 if dist_type == 'distance': 401 dist_chirp = chirp_dist(dist, mchirp) 402 N_d, _ = numpy.histogram(dist_chirp, bins=param_dist) 403 else: 404 N_d, _ = numpy.histogram(dist_dec, bins=param_dist) 405 406 prob_d_d[phys_dist[j+1]] = numpy.float_(N_d)/numpy.sum(N_d) 407 408 return prob_d_d
409
410 -def eff_vs_dist(measured_eff, prob_dc_d):
411 """ 412 This function creates a weighted average efficiency as a function of distance 413 by computing eff_wavg(D) = \sum_dc eff_mode(dc)p(dc|d). p(dc|d) is the probability 414 a signal has a parameterized distance dc if its physical distance is d. 415 416 The confidence interval for eff_wavg(d) is constructed from the quadrature sum 417 of the difference between the modes and the bounds, with each term again 418 weighted by p(dc|d). 419 420 This operation is done for each false-alarm-rate threshold. 421 """ 422 eff_dist = { 423 'wavg': {}, 424 'low': {}, 425 'high': {} 426 } 427 for far, modes in measured_eff['mode'].items(): 428 eff_dist['wavg'][far] = numpy.sum(modes * prob_dc_d.values(), 1) 429 eff_dist['low'][far] = numpy.sqrt(numpy.sum( 430 (measured_eff['low'][far] - modes)**2 * prob_dc_d.values(), 1) 431 ) 432 eff_dist['high'][far] = numpy.sqrt(numpy.sum( 433 (measured_eff['high'][far] - modes)**2 * prob_dc_d.values(), 1) 434 ) 435 return eff_dist
436 437
438 -def volume_efficiency(measured_eff, V_shell, prob_dc_d):
439 """ 440 This function creates a weighted average efficiency within a given volume 441 by computing eff_wavg(D) = \sum_dc eff_mode(dc)p(dc|D). p(dc|D) is the 442 probability a signal has a parameterized distance dc if it falls within 443 physical distance D. 444 445 The confidence interval for eff_wavg(D) is constructed from the quadrature sum 446 of the difference between the modes and the bounds, with each term again 447 weighted by p(dc|D). 448 449 This operation is done for each false-alarm-rate threshold. 450 """ 451 452 cumVol = numpy.cumsum(V_shell) 453 p_dc_d = numpy.array(prob_dc_d.values()) 454 V_dc_D = numpy.cumsum((V_shell * p_dc_d.transpose()).transpose(), 0) 455 p_dc_D = (V_dc_D.transpose()/cumVol).transpose() 456 457 vol_eff = { 458 'wavg': {}, 459 'low': {}, 460 'high': {} 461 } 462 for far, modes in measured_eff['mode'].items(): 463 vol_eff['wavg'][far] = numpy.sum(modes * p_dc_D, 1) 464 vol_eff['low'][far] = numpy.sqrt(numpy.sum( 465 (measured_eff['low'][far] - modes)**2 * p_dc_D, 1) 466 ) 467 vol_eff['high'][far] = numpy.sqrt(numpy.sum( 468 (measured_eff['high'][far] - modes)**2 * p_dc_D, 1) 469 ) 470 471 return vol_eff
472