1 from __future__ import division
2 import numpy
3 import bisect
4 import sys
5
6 from glue.ligolw import lsctables
7 from glue.ligolw import dbtables
8 from lal import rate
9
10
12 '''
13 This function marginalizes the loudest event likelihood over unknown
14 Monte Carlo errors, assumed to be independent between each experiment.
15 '''
16 if mcerrs is None:
17 mcerrs = [0]*len(VTs)
18
19
20
21 likely = 1
22 for vA,dvA,mc in zip(VTs,lambs,mcerrs):
23 if mc == 0:
24
25
26 likely *= (1+mu*vA*dvA)*numpy.exp(-mu*vA)
27 else:
28
29
30 k = (vA/mc)**2
31 likely *= (1+mu*vA*(1/k+dvA))*(1+mu*vA/k)**(-(k+1))
32
33 return likely
34
35
37 '''
38 This function marginalizes the loudest event likelihood over unknown
39 Monte Carlo and calibration errors. The vector VTs is the sensitive
40 volumes for independent searches and lambs is the vector of loudest
41 event likelihood. The statistical errors are assumed to be independent
42 between each experiment while the calibration errors are applied
43 the same in each experiment.
44 '''
45 if calerr == 0:
46 return margLikelihoodMonteCarlo(VTs,lambs,mu,mcerrs)
47
48 std = calerr
49 mean = 0
50
51 fracerrs = numpy.linspace(0.33,3,1e2)
52 errdist = numpy.exp(-(numpy.log(fracerrs)-mean)**2/(2*std**2))/(fracerrs*std)
53 errdist /= errdist.sum()
54
55 likely = sum([ pd*margLikelihoodMonteCarlo(delta*VTs,lambs,mu,mcerrs) for delta, pd in zip(fracerrs,errdist)])
56
57 return likely
58
59
61 '''
62 Returns an array of elements of the integrand dP = p(mu) dmu
63 for a density p(mu) defined at sample values mu ; samples need
64 not be equally spaced. Uses a simple trapezium rule.
65 Number of dP elements is 1 - (number of mu samples).
66 '''
67 dmu = mu[1:] - mu[:-1]
68 bin_mean = (pdf[1:] + pdf[:-1]) /2
69 return dmu * bin_mean
70
71
73 """
74 Takes a function pofmu defined at rate sample values mu and
75 normalizes it to be a suitable pdf. Both mu and pofmu must be
76 arrays or lists of the same length.
77 """
78 if min(pofmu) < 0:
79 raise ValueError, "Probabilities cannot be negative, don't \
80 ask me to normalize a function with negative values!"
81 if min(mu) < 0:
82 raise ValueError, "Rates cannot be negative, don't \
83 ask me to normalize a function over a negative domain!"
84
85 dp = integral_element(mu, pofmu)
86 return mu, pofmu/sum(dp)
87
88
90 """
91 Returns the upper limit mu_high of confidence level alpha for a
92 posterior distribution post on the given parameter mu.
93 The posterior need not be normalized.
94 """
95 if 0 < alpha < 1:
96 dp = integral_element(mu_in, post)
97 high_idx = bisect.bisect_left( dp.cumsum()/dp.sum(), alpha )
98
99
100
101 mu_high = mu_in[high_idx]
102 elif alpha == 1:
103 mu_high = numpy.max(mu_in[post > 0])
104 else:
105 raise ValueError, "Confidence level must be in (0,1]."
106
107 return mu_high
108
109
111 """
112 Returns the lower limit mu_low of confidence level alpha for a
113 posterior distribution post on the given parameter mu.
114 The posterior need not be normalized.
115 """
116 if 0 < alpha < 1:
117 dp = integral_element(mu_in, post)
118 low_idx = bisect.bisect_right( dp.cumsum()/dp.sum(), 1-alpha )
119
120
121
122 mu_low = mu_in[low_idx]
123 elif alpha == 1:
124 mu_low = numpy.min(mu_in[post > 0])
125 else:
126 raise ValueError, "Confidence level must be in (0,1]."
127
128 return mu_low
129
130
132 '''
133 Returns the minimal-width confidence interval [mu_low, mu_high] of
134 confidence level alpha for a posterior distribution post on the parameter mu.
135 '''
136 if not 0 < alpha < 1:
137 raise ValueError, "Confidence level must be in (0,1)."
138
139
140 alpha_step = 0.01
141
142
143 mu_low = numpy.min(mu)
144 mu_high = numpy.max(mu)
145
146
147 for ai in numpy.arange( 0, 1-alpha, alpha_step ):
148 ml = compute_lower_limit( mu, post, 1 - ai )
149 mh = compute_upper_limit( mu, post, alpha + ai)
150 if mh - ml < mu_high - mu_low:
151 mu_low = ml
152 mu_high = mh
153
154 return mu_low, mu_high
155
156
158 '''
159 Integrates a pdf over mu taking only bins where
160 the mean over the bin is above a given threshold
161 This gives the coverage of the HPD interval for
162 the given threshold.
163 '''
164 dp = integral_element(mu, pdf)
165 bin_mean = (pdf[1:] + pdf[:-1]) /2
166
167 return dp[bin_mean > thresh].sum()
168
169
171 '''
172 For a PDF post over samples mu_in, find a density
173 threshold such that the region having higher density
174 has coverage of at least alpha, and less than alpha
175 plus a given tolerance.
176 '''
177 norm_post = normalize_pdf(mu_in, post)
178
179 p_minus = 0.0
180 p_plus = max(post)
181 while abs(hpd_coverage(mu_in, post, p_minus)-hpd_coverage(mu_in, post, p_plus)) >= tol:
182 test = (p_minus + p_plus) /2
183 if hpd_coverage(mu_in, post, test) >= alpha:
184 p_minus = p_test
185 else:
186 p_plus = p_test
187
188
189
190
191 return p_minus
192
193
195 '''
196 Returns the minimum and maximum rate values of the HPD
197 (Highest Posterior Density) credible interval for a posterior
198 post defined at the sample values mu_in. Samples need not be
199 uniformly spaced and posterior need not be normalized.
200
201 Will not return a correct credible interval if the posterior
202 is multimodal and the correct interval is not contiguous;
203 in this case will over-cover by including the whole range from
204 minimum to maximum mu.
205 '''
206 if alpha == 1:
207 nonzero_samples = mu_in[post > 0]
208 mu_low = numpy.min(nonzero_samples)
209 mu_high = numpy.max(nonzero_samples)
210 elif 0 < alpha < 1:
211
212
213 pthresh = hpd_threshold(mu_in, post, alpha, tol = tolerance)
214 samples_over_threshold = mu_in[post > pthresh]
215 mu_low = numpy.min(samples_over_threshold)
216 mu_high = numpy.max(samples_over_threshold)
217
218 return mu_low, mu_high
219
220
222
223 if logbins:
224 logd = numpy.log(dbins)
225 dlogd = logd[1:] - logd[:-1]
226 dreps = numpy.exp( (numpy.log(dbins[1:]) + numpy.log(dbins[:-1])) / 2. )
227 vol = numpy.sum(4. * numpy.pi * (dreps ** 3.) * eff * dlogd)
228 verr = numpy.sqrt( numpy.sum((4. * numpy.pi * (dreps ** 3.) * err * dlogd) ** 2.) )
229 else:
230 dd = dbins[1:] - dbins[:-1]
231 dreps = (dbins[1:] + dbins[:-1]) / 2.
232 vol = numpy.sum(4. * numpy.pi * (dreps ** 2.) * eff * dd )
233 verr = numpy.sqrt( numpy.sum((4. * numpy.pi * (dreps ** 2.) * err * dd) ** 2.) )
234
235 return vol, verr
236
237
239 '''
240 Compute the efficiency as a function of distance for the given sets of found
241 and missed injection distances.
242 Note that injections that do not fit into any dbin get lost :(.
243 '''
244 efficiency = numpy.zeros(len(dbins) - 1)
245 error = numpy.zeros(len(dbins) - 1)
246 for j, dlow in enumerate(dbins[:-1]):
247 dhigh = dbins[j + 1]
248 found = numpy.sum( (dlow <= f_dist) * (f_dist < dhigh) )
249 missed = numpy.sum( (dlow <= m_dist) * (m_dist < dhigh) )
250 if found + missed == 0: missed = 1.0
251 efficiency[j] = found / (found + missed)
252 error[j] = numpy.sqrt( efficiency[j] * (1 - efficiency[j]) / (found + missed) )
253
254 return efficiency, error
255
256
258
259 if len(found) == 0:
260 return numpy.zeros(len(dbins)-1), numpy.zeros(len(dbins)-1), 0, 0
261
262
263 f_dist = numpy.array([l.distance for l in found])
264 m_dist = numpy.array([l.distance for l in missed])
265
266
267 eff, err = compute_efficiency(f_dist, m_dist, dbins)
268 vol, verr = integrate_efficiency(dbins, eff, err)
269
270 return eff, err, vol, verr
271
272
273 -def volume_montecarlo(found, missed, distribution_param, distribution, limits_param, max_param=None, min_param=None):
274 '''
275 Compute the sensitive volume and standard error using a direct Monte Carlo integral
276
277 * distribution_param, D: parameter of the injections used to generate a distribution over distance
278 - may be 'distance', 'chirp_distance"
279 * distribution: form of the distribution over the parameter
280 - 'log' (uniform in log D), 'uniform' (uniform in D), 'distancesquared' (uniform in D**2),
281 'volume' (uniform in D***3)
282 - It is assumed that injections were carried out over a range of D such that sensitive
283 volume due to signals at distances < D_min is negligible and efficiency at distances
284 > D_max is negligibly small
285 * limits_param, Dlim: parameter specifying limits in which injections were made
286 - may be 'distance', 'chirp_distance'
287 * max_param: maximum value of Dlim out to which injections were made, if None the maximum
288 value among all found and missed injections will be used
289 * min_param: minimum value of Dlim at which injections were made - needed to normalize
290 the log distance integral correctly. If None, for the log distribution the minimum
291 value among all found and missed injections will be used
292 '''
293 d_power = {
294 'log' : 3.,
295 'uniform' : 2.,
296 'distancesquared' : 1.,
297 'volume' : 0.
298 }[distribution]
299 mchirp_power = {
300 'log' : 0.,
301 'uniform' : 5. / 6.,
302 'distancesquared' : 5. / 3.,
303 'volume' : 5. / 2.
304 }[distribution]
305
306 found_d = numpy.array([inj.distance for inj in found])
307 missed_d = numpy.array([inj.distance for inj in missed])
308
309
310 if limits_param == 'chirp_distance':
311 mchirp_standard_bns = 1.4 * (2. ** (-1. / 5.))
312 found_mchirp = numpy.array([inj.mchirp for inj in found])
313 missed_mchirp = numpy.array([inj.mchirp for inj in missed])
314 all_mchirp = numpy.concatenate((found_mchirp, missed_mchirp))
315 max_mchirp = numpy.max(all_mchirp)
316 if max_param is not None:
317
318 max_distance = max_param * (max_mchirp / mchirp_standard_bns) ** (5. / 6.)
319 elif limits_param == 'distance':
320 max_distance = max_param
321 else: raise NotImplementedError("%s is not a recognized parameter" % limits_param)
322
323
324 if max_param == None:
325 max_distance = max(numpy.max(found_d), numpy.max(missed_d))
326
327
328 montecarlo_vtot = (4. / 3.) * numpy.pi * max_distance ** 3.
329
330
331 if distribution_param == 'distance':
332 found_weights = found_d ** d_power
333 missed_weights = missed_d ** d_power
334 elif distribution_param == 'chirp_distance':
335
336
337 found_weights = found_d ** d_power * \
338 found_mchirp ** mchirp_power
339 missed_weights = missed_d ** d_power * \
340 missed_mchirp ** mchirp_power
341 else: raise NotImplementedError("%s is not a recognized distance parameter" % distance_param)
342
343 all_weights = numpy.concatenate((found_weights, missed_weights))
344
345
346
347 mc_weight_samples = numpy.concatenate((found_weights, 0 * missed_weights))
348 mc_sum = sum(mc_weight_samples)
349
350 if limits_param == 'distance':
351 mc_norm = sum(all_weights)
352 elif limits_param == 'chirp_distance':
353
354
355
356 mc_norm = sum(all_weights * (max_mchirp / all_mchirp) ** (5. / 2.))
357
358
359 mc_prefactor = montecarlo_vtot / mc_norm
360
361
362 if limits_param == 'distance':
363 Ninj = len(mc_weight_samples)
364 elif limits_param == 'chirp_distance':
365
366
367 if distribution == 'log':
368
369 if min_param is not None:
370 min_distance = min_param * (numpy.min(all_mchirp) / mchirp_standard_bns) ** (5. / 6.)
371 else:
372 min_distance = min(numpy.min(found_d), numpy.min(missed_d))
373 logrange = numpy.log(max_distance / min_distance)
374 Ninj = len(mc_weight_samples) + (5. / 6.) * sum(numpy.log(max_mchirp / all_mchirp) / logrange)
375 else:
376 Ninj = sum((max_mchirp / all_mchirp) ** mchirp_power)
377
378
379 mc_sample_variance = sum(mc_weight_samples ** 2.) / Ninj - (mc_sum / Ninj) ** 2.
380
381
382
383 return mc_prefactor * mc_sum, mc_prefactor * (Ninj * mc_sample_variance) ** 0.5
384
385
387 '''
388 For a given set of injections (sim_inspiral rows), return the subset
389 of injections that fall within the given mass range.
390 '''
391 if bin_type == "Mass1_Mass2":
392 m1bins = numpy.concatenate((mbins.lower()[0],numpy.array([mbins.upper()[0][-1]])))
393 m1lo = m1bins[bin_num]
394 m1hi = m1bins[bin_num+1]
395 m2bins = numpy.concatenate((mbins.lower()[1],numpy.array([mbins.upper()[1][-1]])))
396 m2lo = m2bins[bin_num2]
397 m2hi = m2bins[bin_num2+1]
398 newinjs = [l for l in injs if ( (m1lo<= l.mass1 <m1hi and m2lo<= l.mass2 <m2hi) or (m1lo<= l.mass2 <m1hi and m2lo<= l.mass1 <m2hi))]
399 return newinjs
400
401 mbins = numpy.concatenate((mbins.lower()[0],numpy.array([mbins.upper()[0][-1]])))
402 mlow = mbins[bin_num]
403 mhigh = mbins[bin_num+1]
404 if bin_type == "Chirp_Mass":
405 newinjs = [l for l in injs if (mlow <= l.mchirp < mhigh)]
406 elif bin_type == "Total_Mass":
407 newinjs = [l for l in injs if (mlow <= l.mass1+l.mass2 < mhigh)]
408 elif bin_type == "Component_Mass":
409 newinjs = [l for l in injs if (mlow <= l.mass1 < mhigh)]
410 elif bin_type == "BNS_BBH":
411 if bin_num == 0 or bin_num == 2:
412 newinjs = [l for l in injs if (mlow <= l.mass1 < mhigh and mlow <= l.mass2 < mhigh)]
413 else:
414 newinjs = [l for l in injs if (mbins[0] <= l.mass1 < mbins[1] and mbins[2] <= l.mass2 < mbins[3])]
415 newinjs += [l for l in injs if (mbins[0] <= l.mass2 < mbins[1] and mbins[2] <= l.mass1 < mbins[3])]
416
417 return newinjs
418
419
420 -def compute_volume_vs_mass(found, missed, mass_bins, bin_type, dbins=None,
421 distribution_param=None, distribution=None, limits_param=None,
422 max_param=None, min_param=None):
423 """
424 Compute the average luminosity an experiment was sensitive to given the sets
425 of found and missed injections and assuming luminosity is uniformly distributed
426 in space.
427
428 If distance_param and distance_distribution are not None, use an unbinned
429 Monte Carlo integral (which optionally takes max_distance and min_distance
430 parameters) for the volume and error
431 Otherwise use a simple efficiency * distance**2 binned integral
432 In either case use distance bins to return the efficiency in each bin
433 """
434
435 if distribution_param is not None and distribution is not None and limits_param is not None:
436 mc_unbinned = True
437 else:
438 mc_unbinned = False
439
440
441 volArray = rate.BinnedArray(mass_bins)
442 vol2Array = rate.BinnedArray(mass_bins)
443
444
445 foundArray = rate.BinnedArray(mass_bins)
446 missedArray = rate.BinnedArray(mass_bins)
447
448
449 effvmass = []
450 errvmass = []
451
452 if bin_type == "Mass1_Mass2":
453 for j,mc1 in enumerate(mass_bins.centres()[0]):
454 for k,mc2 in enumerate(mass_bins.centres()[1]):
455
456
457 newfound = filter_injections_by_mass(found, mass_bins, j, bin_type, k)
458 newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type, k)
459
460 foundArray[(mc1,mc2)] = len(newfound)
461 missedArray[(mc1,mc2)] = len(newmissed)
462
463
464 meaneff, efferr, meanvol, volerr = mean_efficiency_volume(newfound, newmissed, dbins)
465 effvmass.append(meaneff)
466 errvmass.append(efferr)
467 if mc_unbinned:
468 meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param,
469 distribution, limits_param, max_param, min_param)
470 volArray[(mc1,mc2)] = meanvol
471 vol2Array[(mc1,mc2)] = volerr
472
473 return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass
474
475
476 for j,mc in enumerate(mass_bins.centres()[0]):
477
478
479 newfound = filter_injections_by_mass(found, mass_bins, j, bin_type)
480 newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type)
481
482 foundArray[(mc,)] = len(newfound)
483 missedArray[(mc,)] = len(newmissed)
484
485
486 meaneff, efferr, meanvol, volerr = mean_efficiency_volume(newfound, newmissed, dbins)
487 effvmass.append(meaneff)
488 errvmass.append(efferr)
489
490 if mc_unbinned:
491 meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param,
492 distribution, limits_param, max_param, min_param)
493 volArray[(mc,)] = meanvol
494 vol2Array[(mc,)] = volerr
495
496 return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass
497
498
500 '''
501 Performs a linear least squares to log(vols) as a function of x.
502 '''
503 if numpy.min(vols) == 0:
504 print >> sys.stderr, "Warning: cannot fit log volume derivative, one or more volumes are zero!"
505 print >> sys.stderr, vols
506 return (0,0)
507
508 coeffs, resids, rank, svs, rcond = numpy.polyfit(x, numpy.log(vols), 1, full=True)
509 if coeffs[0] < 0:
510 print >> sys.stderr, "Warning: Derivative fit resulted in Lambda =", coeffs[0]
511 coeffs[0] = 0
512 print >> sys.stderr, "The value Lambda = 0 was substituted"
513
514 return coeffs
515
516
517 -def get_loudest_event(connection, coinc_table="coinc_inspiral", datatype="exclude_play"):
518
519 far_threshold_query = """
520 SELECT coinc_event.instruments, MIN(combined_far) FROM %s JOIN coinc_event ON (%s.coinc_event_id == coinc_event.coinc_event_id) JOIN experiment_map ON (coinc_event.coinc_event_id == experiment_map.coinc_event_id) JOIN experiment_summary ON ( experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id) WHERE experiment_summary.datatype == "%s" GROUP BY coinc_event.instruments;
521 """ % (coinc_table, coinc_table, datatype)
522
523 for inst, far in connection.cursor().execute(far_threshold_query):
524 inst = frozenset(lsctables.instrument_set_from_ifos(inst))
525 yield inst, far
526
527 return
528