Package pylal :: Module upper_limit_utils
[hide private]
[frames] | no frames]

Source Code for Module pylal.upper_limit_utils

  1  from __future__ import division 
  2  import numpy 
  3  import bisect 
  4  import sys 
  5   
  6  from glue.ligolw import lsctables 
  7  from glue.ligolw import dbtables 
  8  from lal import rate 
  9   
 10   
11 -def margLikelihoodMonteCarlo(VTs, lambs, mu, mcerrs=None):
12 ''' 13 This function marginalizes the loudest event likelihood over unknown 14 Monte Carlo errors, assumed to be independent between each experiment. 15 ''' 16 if mcerrs is None: 17 mcerrs = [0]*len(VTs) 18 19 # combine experiments, propagating statistical uncertainties 20 # in the measured efficiency 21 likely = 1 22 for vA,dvA,mc in zip(VTs,lambs,mcerrs): 23 if mc == 0: 24 # we have perfectly measured our efficiency in this mass bin 25 # so the posterior is given by eqn (11) in Biswas et al. [arXiv:0710.0465] 26 likely *= (1+mu*vA*dvA)*numpy.exp(-mu*vA) 27 else: 28 # we have uncertainty in our efficiency in this mass bin and 29 # want to marginalize it out using eqn (24) of Biswas et al. 30 k = (vA/mc)**2 # k is 1./fractional_error**2 31 likely *= (1+mu*vA*(1/k+dvA))*(1+mu*vA/k)**(-(k+1)) 32 33 return likely
34 35
36 -def margLikelihood(VTs, lambs, mu, calerr=0, mcerrs=None):
37 ''' 38 This function marginalizes the loudest event likelihood over unknown 39 Monte Carlo and calibration errors. The vector VTs is the sensitive 40 volumes for independent searches and lambs is the vector of loudest 41 event likelihood. The statistical errors are assumed to be independent 42 between each experiment while the calibration errors are applied 43 the same in each experiment. 44 ''' 45 if calerr == 0: 46 return margLikelihoodMonteCarlo(VTs,lambs,mu,mcerrs) 47 48 std = calerr 49 mean = 0 # median volume = 1 50 51 fracerrs = numpy.linspace(0.33,3,1e2) # assume we got the volume to a factor of three or better 52 errdist = numpy.exp(-(numpy.log(fracerrs)-mean)**2/(2*std**2))/(fracerrs*std) # log-normal pdf 53 errdist /= errdist.sum() #normalize 54 55 likely = sum([ pd*margLikelihoodMonteCarlo(delta*VTs,lambs,mu,mcerrs) for delta, pd in zip(fracerrs,errdist)]) #marginalize over errors 56 57 return likely
58 59
60 -def integral_element(mu, pdf):
61 ''' 62 Returns an array of elements of the integrand dP = p(mu) dmu 63 for a density p(mu) defined at sample values mu ; samples need 64 not be equally spaced. Uses a simple trapezium rule. 65 Number of dP elements is 1 - (number of mu samples). 66 ''' 67 dmu = mu[1:] - mu[:-1] 68 bin_mean = (pdf[1:] + pdf[:-1]) /2 69 return dmu * bin_mean
70 71
72 -def normalize_pdf(mu, pofmu):
73 """ 74 Takes a function pofmu defined at rate sample values mu and 75 normalizes it to be a suitable pdf. Both mu and pofmu must be 76 arrays or lists of the same length. 77 """ 78 if min(pofmu) < 0: 79 raise ValueError, "Probabilities cannot be negative, don't \ 80 ask me to normalize a function with negative values!" 81 if min(mu) < 0: # maybe not necessary ? 82 raise ValueError, "Rates cannot be negative, don't \ 83 ask me to normalize a function over a negative domain!" 84 85 dp = integral_element(mu, pofmu) 86 return mu, pofmu/sum(dp)
87 88
89 -def compute_upper_limit(mu_in, post, alpha = 0.9):
90 """ 91 Returns the upper limit mu_high of confidence level alpha for a 92 posterior distribution post on the given parameter mu. 93 The posterior need not be normalized. 94 """ 95 if 0 < alpha < 1: 96 dp = integral_element(mu_in, post) 97 high_idx = bisect.bisect_left( dp.cumsum()/dp.sum(), alpha ) 98 # if alpha is in (0,1] and post is non-negative, bisect_left 99 # will always return an index in the range of mu since 100 # post.cumsum()/post.sum() will always begin at 0 and end at 1 101 mu_high = mu_in[high_idx] 102 elif alpha == 1: 103 mu_high = numpy.max(mu_in[post > 0]) 104 else: 105 raise ValueError, "Confidence level must be in (0,1]." 106 107 return mu_high
108 109
110 -def compute_lower_limit(mu_in, post, alpha = 0.9):
111 """ 112 Returns the lower limit mu_low of confidence level alpha for a 113 posterior distribution post on the given parameter mu. 114 The posterior need not be normalized. 115 """ 116 if 0 < alpha < 1: 117 dp = integral_element(mu_in, post) 118 low_idx = bisect.bisect_right( dp.cumsum()/dp.sum(), 1-alpha ) 119 # if alpha is in [0,1) and post is non-negative, bisect_right 120 # will always return an index in the range of mu since 121 # post.cumsum()/post.sum() will always begin at 0 and end at 1 122 mu_low = mu_in[low_idx] 123 elif alpha == 1: 124 mu_low = numpy.min(mu_in[post > 0]) 125 else: 126 raise ValueError, "Confidence level must be in (0,1]." 127 128 return mu_low
129 130
131 -def confidence_interval_min_width( mu, post, alpha = 0.9 ):
132 ''' 133 Returns the minimal-width confidence interval [mu_low, mu_high] of 134 confidence level alpha for a posterior distribution post on the parameter mu. 135 ''' 136 if not 0 < alpha < 1: 137 raise ValueError, "Confidence level must be in (0,1)." 138 139 # choose a step size for the sliding confidence window 140 alpha_step = 0.01 141 142 # initialize the lower and upper limits 143 mu_low = numpy.min(mu) 144 mu_high = numpy.max(mu) 145 146 # find the smallest window (by delta-mu) stepping by dalpha 147 for ai in numpy.arange( 0, 1-alpha, alpha_step ): 148 ml = compute_lower_limit( mu, post, 1 - ai ) 149 mh = compute_upper_limit( mu, post, alpha + ai) 150 if mh - ml < mu_high - mu_low: 151 mu_low = ml 152 mu_high = mh 153 154 return mu_low, mu_high
155 156
157 -def hpd_coverage(mu, pdf, thresh):
158 ''' 159 Integrates a pdf over mu taking only bins where 160 the mean over the bin is above a given threshold 161 This gives the coverage of the HPD interval for 162 the given threshold. 163 ''' 164 dp = integral_element(mu, pdf) 165 bin_mean = (pdf[1:] + pdf[:-1]) /2 166 167 return dp[bin_mean > thresh].sum()
168 169
170 -def hpd_threshold(mu_in, post, alpha, tol):
171 ''' 172 For a PDF post over samples mu_in, find a density 173 threshold such that the region having higher density 174 has coverage of at least alpha, and less than alpha 175 plus a given tolerance. 176 ''' 177 norm_post = normalize_pdf(mu_in, post) 178 # initialize bisection search 179 p_minus = 0.0 180 p_plus = max(post) 181 while abs(hpd_coverage(mu_in, post, p_minus)-hpd_coverage(mu_in, post, p_plus)) >= tol: 182 test = (p_minus + p_plus) /2 183 if hpd_coverage(mu_in, post, test) >= alpha: # test value was too low or just right 184 p_minus = p_test 185 else: # test value was too high 186 p_plus = p_test 187 # p_minus never goes above the required threshold and p_plus never goes below 188 # thus on exiting p_minus is at or below the required threshold and the 189 # difference in coverage is within tolerance 190 191 return p_minus
192 193
194 -def hpd_credible_interval(mu_in, post, alpha = 0.9, tolerance = 1e-3):
195 ''' 196 Returns the minimum and maximum rate values of the HPD 197 (Highest Posterior Density) credible interval for a posterior 198 post defined at the sample values mu_in. Samples need not be 199 uniformly spaced and posterior need not be normalized. 200 201 Will not return a correct credible interval if the posterior 202 is multimodal and the correct interval is not contiguous; 203 in this case will over-cover by including the whole range from 204 minimum to maximum mu. 205 ''' 206 if alpha == 1: 207 nonzero_samples = mu_in[post > 0] 208 mu_low = numpy.min(nonzero_samples) 209 mu_high = numpy.max(nonzero_samples) 210 elif 0 < alpha < 1: 211 # determine the highest PDF for which the region with 212 # higher density has sufficient coverage 213 pthresh = hpd_threshold(mu_in, post, alpha, tol = tolerance) 214 samples_over_threshold = mu_in[post > pthresh] 215 mu_low = numpy.min(samples_over_threshold) 216 mu_high = numpy.max(samples_over_threshold) 217 218 return mu_low, mu_high
219 220
221 -def integrate_efficiency(dbins, eff, err=0, logbins=False):
222 # NB logbins is only called in the unit tests 223 if logbins: 224 logd = numpy.log(dbins) 225 dlogd = logd[1:] - logd[:-1] 226 dreps = numpy.exp( (numpy.log(dbins[1:]) + numpy.log(dbins[:-1])) / 2. ) # log midpoint 227 vol = numpy.sum(4. * numpy.pi * (dreps ** 3.) * eff * dlogd) 228 verr = numpy.sqrt( numpy.sum((4. * numpy.pi * (dreps ** 3.) * err * dlogd) ** 2.) ) #propagate errors in eff to errors in v 229 else: 230 dd = dbins[1:] - dbins[:-1] 231 dreps = (dbins[1:] + dbins[:-1]) / 2. # midpoint 232 vol = numpy.sum(4. * numpy.pi * (dreps ** 2.) * eff * dd ) 233 verr = numpy.sqrt( numpy.sum((4. * numpy.pi * (dreps ** 2.) * err * dd) ** 2.) ) #propagate errors in eff to errors in v 234 235 return vol, verr
236 237
238 -def compute_efficiency(f_dist, m_dist, dbins):
239 ''' 240 Compute the efficiency as a function of distance for the given sets of found 241 and missed injection distances. 242 Note that injections that do not fit into any dbin get lost :(. 243 ''' 244 efficiency = numpy.zeros(len(dbins) - 1) 245 error = numpy.zeros(len(dbins) - 1) 246 for j, dlow in enumerate(dbins[:-1]): 247 dhigh = dbins[j + 1] 248 found = numpy.sum( (dlow <= f_dist) * (f_dist < dhigh) ) 249 missed = numpy.sum( (dlow <= m_dist) * (m_dist < dhigh) ) 250 if found + missed == 0: missed = 1.0 # avoid divide by 0 in empty bins 251 efficiency[j] = found / (found + missed) # NB division is imported from __future__ ! 252 error[j] = numpy.sqrt( efficiency[j] * (1 - efficiency[j]) / (found + missed) ) 253 254 return efficiency, error
255 256
257 -def mean_efficiency_volume(found, missed, dbins):
258 259 if len(found) == 0: # no efficiency here 260 return numpy.zeros(len(dbins)-1), numpy.zeros(len(dbins)-1), 0, 0 261 262 # only need distances 263 f_dist = numpy.array([l.distance for l in found]) 264 m_dist = numpy.array([l.distance for l in missed]) 265 266 # compute the efficiency and its variance 267 eff, err = compute_efficiency(f_dist, m_dist, dbins) 268 vol, verr = integrate_efficiency(dbins, eff, err) 269 270 return eff, err, vol, verr
271 272
273 -def volume_montecarlo(found, missed, distribution_param, distribution, limits_param, max_param=None, min_param=None):
274 ''' 275 Compute the sensitive volume and standard error using a direct Monte Carlo integral 276 277 * distribution_param, D: parameter of the injections used to generate a distribution over distance 278 - may be 'distance', 'chirp_distance" 279 * distribution: form of the distribution over the parameter 280 - 'log' (uniform in log D), 'uniform' (uniform in D), 'distancesquared' (uniform in D**2), 281 'volume' (uniform in D***3) 282 - It is assumed that injections were carried out over a range of D such that sensitive 283 volume due to signals at distances < D_min is negligible and efficiency at distances 284 > D_max is negligibly small 285 * limits_param, Dlim: parameter specifying limits in which injections were made 286 - may be 'distance', 'chirp_distance' 287 * max_param: maximum value of Dlim out to which injections were made, if None the maximum 288 value among all found and missed injections will be used 289 * min_param: minimum value of Dlim at which injections were made - needed to normalize 290 the log distance integral correctly. If None, for the log distribution the minimum 291 value among all found and missed injections will be used 292 ''' 293 d_power = { 294 'log' : 3., 295 'uniform' : 2., 296 'distancesquared' : 1., 297 'volume' : 0. 298 }[distribution] 299 mchirp_power = { 300 'log' : 0., 301 'uniform' : 5. / 6., 302 'distancesquared' : 5. / 3., 303 'volume' : 5. / 2. 304 }[distribution] 305 306 found_d = numpy.array([inj.distance for inj in found]) 307 missed_d = numpy.array([inj.distance for inj in missed]) 308 309 # establish maximum physical distance: first in case of chirp distance distribution 310 if limits_param == 'chirp_distance': 311 mchirp_standard_bns = 1.4 * (2. ** (-1. / 5.)) 312 found_mchirp = numpy.array([inj.mchirp for inj in found]) 313 missed_mchirp = numpy.array([inj.mchirp for inj in missed]) 314 all_mchirp = numpy.concatenate((found_mchirp, missed_mchirp)) 315 max_mchirp = numpy.max(all_mchirp) 316 if max_param is not None: 317 # use largest actually injected mchirp for conversion 318 max_distance = max_param * (max_mchirp / mchirp_standard_bns) ** (5. / 6.) 319 elif limits_param == 'distance': 320 max_distance = max_param 321 else: raise NotImplementedError("%s is not a recognized parameter" % limits_param) 322 323 # if no max distance given, use maximum distance actually injected 324 if max_param == None: 325 max_distance = max(numpy.max(found_d), numpy.max(missed_d)) 326 327 # volume of sphere 328 montecarlo_vtot = (4. / 3.) * numpy.pi * max_distance ** 3. 329 330 # arrays of weights for the MC integral 331 if distribution_param == 'distance': 332 found_weights = found_d ** d_power 333 missed_weights = missed_d ** d_power 334 elif distribution_param == 'chirp_distance': 335 # weight by a power of mchirp to rescale injection density to the 336 # target mass distribution 337 found_weights = found_d ** d_power * \ 338 found_mchirp ** mchirp_power 339 missed_weights = missed_d ** d_power * \ 340 missed_mchirp ** mchirp_power 341 else: raise NotImplementedError("%s is not a recognized distance parameter" % distance_param) 342 343 all_weights = numpy.concatenate((found_weights, missed_weights)) 344 345 # MC integral is volume of sphere * (sum of found weights)/(sum of all weights) 346 # over injections covering the sphere 347 mc_weight_samples = numpy.concatenate((found_weights, 0 * missed_weights)) 348 mc_sum = sum(mc_weight_samples) 349 350 if limits_param == 'distance': 351 mc_norm = sum(all_weights) 352 elif limits_param == 'chirp_distance': 353 # if injections are made up to a maximum chirp distance, account for 354 # extra missed injections that would occur when injecting up to 355 # maximum physical distance : this works out to a 'chirp volume' factor 356 mc_norm = sum(all_weights * (max_mchirp / all_mchirp) ** (5. / 2.)) 357 358 # take out a constant factor 359 mc_prefactor = montecarlo_vtot / mc_norm 360 361 # count the samples 362 if limits_param == 'distance': 363 Ninj = len(mc_weight_samples) 364 elif limits_param == 'chirp_distance': 365 # find the total expected number after extending from maximum chirp 366 # dist up to maximum physical distance 367 if distribution == 'log': 368 # need minimum distance only in this case 369 if min_param is not None: 370 min_distance = min_param * (numpy.min(all_mchirp) / mchirp_standard_bns) ** (5. / 6.) 371 else: 372 min_distance = min(numpy.min(found_d), numpy.min(missed_d)) 373 logrange = numpy.log(max_distance / min_distance) 374 Ninj = len(mc_weight_samples) + (5. / 6.) * sum(numpy.log(max_mchirp / all_mchirp) / logrange) 375 else: 376 Ninj = sum((max_mchirp / all_mchirp) ** mchirp_power) 377 378 # sample variance of injection efficiency: mean of the square - square of the mean 379 mc_sample_variance = sum(mc_weight_samples ** 2.) / Ninj - (mc_sum / Ninj) ** 2. 380 381 # return MC integral and its standard deviation; variance of mc_sum scales 382 # relative to sample variance by Ninj (Bienayme' rule) 383 return mc_prefactor * mc_sum, mc_prefactor * (Ninj * mc_sample_variance) ** 0.5
384 385
386 -def filter_injections_by_mass(injs, mbins, bin_num , bin_type, bin_num2=None):
387 ''' 388 For a given set of injections (sim_inspiral rows), return the subset 389 of injections that fall within the given mass range. 390 ''' 391 if bin_type == "Mass1_Mass2": 392 m1bins = numpy.concatenate((mbins.lower()[0],numpy.array([mbins.upper()[0][-1]]))) 393 m1lo = m1bins[bin_num] 394 m1hi = m1bins[bin_num+1] 395 m2bins = numpy.concatenate((mbins.lower()[1],numpy.array([mbins.upper()[1][-1]]))) 396 m2lo = m2bins[bin_num2] 397 m2hi = m2bins[bin_num2+1] 398 newinjs = [l for l in injs if ( (m1lo<= l.mass1 <m1hi and m2lo<= l.mass2 <m2hi) or (m1lo<= l.mass2 <m1hi and m2lo<= l.mass1 <m2hi))] 399 return newinjs 400 401 mbins = numpy.concatenate((mbins.lower()[0],numpy.array([mbins.upper()[0][-1]]))) 402 mlow = mbins[bin_num] 403 mhigh = mbins[bin_num+1] 404 if bin_type == "Chirp_Mass": 405 newinjs = [l for l in injs if (mlow <= l.mchirp < mhigh)] 406 elif bin_type == "Total_Mass": 407 newinjs = [l for l in injs if (mlow <= l.mass1+l.mass2 < mhigh)] 408 elif bin_type == "Component_Mass": #it is assumed that m2 is fixed 409 newinjs = [l for l in injs if (mlow <= l.mass1 < mhigh)] 410 elif bin_type == "BNS_BBH": 411 if bin_num == 0 or bin_num == 2: #BNS/BBH case 412 newinjs = [l for l in injs if (mlow <= l.mass1 < mhigh and mlow <= l.mass2 < mhigh)] 413 else: 414 newinjs = [l for l in injs if (mbins[0] <= l.mass1 < mbins[1] and mbins[2] <= l.mass2 < mbins[3])] #NSBH 415 newinjs += [l for l in injs if (mbins[0] <= l.mass2 < mbins[1] and mbins[2] <= l.mass1 < mbins[3])] #BHNS 416 417 return newinjs
418 419
420 -def compute_volume_vs_mass(found, missed, mass_bins, bin_type, dbins=None, 421 distribution_param=None, distribution=None, limits_param=None, 422 max_param=None, min_param=None):
423 """ 424 Compute the average luminosity an experiment was sensitive to given the sets 425 of found and missed injections and assuming luminosity is uniformly distributed 426 in space. 427 428 If distance_param and distance_distribution are not None, use an unbinned 429 Monte Carlo integral (which optionally takes max_distance and min_distance 430 parameters) for the volume and error 431 Otherwise use a simple efficiency * distance**2 binned integral 432 In either case use distance bins to return the efficiency in each bin 433 """ 434 # initialize MC volume integral method: binned or unbinned 435 if distribution_param is not None and distribution is not None and limits_param is not None: 436 mc_unbinned = True 437 else: 438 mc_unbinned = False 439 440 # mean and std estimate for sensitive volume averaged over search time 441 volArray = rate.BinnedArray(mass_bins) 442 vol2Array = rate.BinnedArray(mass_bins) 443 444 # found/missed stats 445 foundArray = rate.BinnedArray(mass_bins) 446 missedArray = rate.BinnedArray(mass_bins) 447 448 # efficiency over distance and its standard (binomial) error 449 effvmass = [] 450 errvmass = [] 451 452 if bin_type == "Mass1_Mass2": # two-d case first 453 for j,mc1 in enumerate(mass_bins.centres()[0]): 454 for k,mc2 in enumerate(mass_bins.centres()[1]): 455 456 # filter out injections not in this mass bin 457 newfound = filter_injections_by_mass(found, mass_bins, j, bin_type, k) 458 newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type, k) 459 460 foundArray[(mc1,mc2)] = len(newfound) 461 missedArray[(mc1,mc2)] = len(newmissed) 462 463 # compute the volume using this injection set 464 meaneff, efferr, meanvol, volerr = mean_efficiency_volume(newfound, newmissed, dbins) 465 effvmass.append(meaneff) 466 errvmass.append(efferr) 467 if mc_unbinned: 468 meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param, 469 distribution, limits_param, max_param, min_param) 470 volArray[(mc1,mc2)] = meanvol 471 vol2Array[(mc1,mc2)] = volerr 472 473 return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass 474 475 476 for j,mc in enumerate(mass_bins.centres()[0]): 477 478 # filter out injections not in this mass bin 479 newfound = filter_injections_by_mass(found, mass_bins, j, bin_type) 480 newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type) 481 482 foundArray[(mc,)] = len(newfound) 483 missedArray[(mc,)] = len(newmissed) 484 485 # compute the volume using this injection set 486 meaneff, efferr, meanvol, volerr = mean_efficiency_volume(newfound, newmissed, dbins) 487 effvmass.append(meaneff) 488 errvmass.append(efferr) 489 # if the unbinned MC calculation is available, do it 490 if mc_unbinned: 491 meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param, 492 distribution, limits_param, max_param, min_param) 493 volArray[(mc,)] = meanvol 494 vol2Array[(mc,)] = volerr 495 496 return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass
497 498
499 -def log_volume_derivative_fit(x, vols):
500 ''' 501 Performs a linear least squares to log(vols) as a function of x. 502 ''' 503 if numpy.min(vols) == 0: 504 print >> sys.stderr, "Warning: cannot fit log volume derivative, one or more volumes are zero!" 505 print >> sys.stderr, vols 506 return (0,0) 507 508 coeffs, resids, rank, svs, rcond = numpy.polyfit(x, numpy.log(vols), 1, full=True) 509 if coeffs[0] < 0: #negative derivatives may arise from rounding error 510 print >> sys.stderr, "Warning: Derivative fit resulted in Lambda =", coeffs[0] 511 coeffs[0] = 0 512 print >> sys.stderr, "The value Lambda = 0 was substituted" 513 514 return coeffs
515 516
517 -def get_loudest_event(connection, coinc_table="coinc_inspiral", datatype="exclude_play"):
518 519 far_threshold_query = """ 520 SELECT coinc_event.instruments, MIN(combined_far) FROM %s JOIN coinc_event ON (%s.coinc_event_id == coinc_event.coinc_event_id) JOIN experiment_map ON (coinc_event.coinc_event_id == experiment_map.coinc_event_id) JOIN experiment_summary ON ( experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id) WHERE experiment_summary.datatype == "%s" GROUP BY coinc_event.instruments; 521 """ % (coinc_table, coinc_table, datatype) 522 523 for inst, far in connection.cursor().execute(far_threshold_query): 524 inst = frozenset(lsctables.instrument_set_from_ifos(inst)) 525 yield inst, far 526 527 return
528