43 from fpconst
import NaN, NegInf, PosInf
48 NegInf = float(
"-inf")
49 PosInf = float(
"+inf")
52 from scipy
import optimize
56 from glue.text_progress_bar
import ProgressBar
57 from pylal
import rate
60 from gstlal
import emcee
73 def maximum_likelihood_rates(ln_f_over_b):
75 return optimize.fmin((
lambda x: -RatesLnPDF(x, ln_f_over_b)), (1.0, len(ln_f_over_b) + len(ln_f_over_b)**.5), disp =
True)
78 def run_mcmc(n_walkers, n_dim, n_samples_per_walker, lnprobfunc, pos0 = None, args = (), n_burn = 100, progressbar =
None):
80 A generator function that yields samples distributed according to a
81 user-supplied probability density function that need not be
82 normalized. lnprobfunc computes and returns the natural logarithm
83 of the probability density, up to a constant offset. n_dim sets
84 the number of dimensions of the parameter space over which the PDF
85 is defined and args gives any additional arguments to be passed to
86 lnprobfunc, whose signature must be
88 ln(P) = lnprobfunc(X, *args)
90 where X is a numpy array of length n_dim.
92 The generator yields a total of n_samples_per_walker arrays each of
93 which is n_walkers by n_dim in size. Each row is a sample drawn
94 from the n_dim-dimensional parameter space.
96 pos0 is an n_walkers by n_dim array giving the initial positions of
97 the walkers (this parameter is currently not optional). n_burn
98 iterations of the MCMC sampler will be executed and discarded to
99 allow the system to stabilize before samples are yielded to the
100 calling code. A chain can be continued by passing the last return
101 value as pos0 and setting n_burn = 0.
103 NOTE: the samples yielded by this generator are not drawn from the
104 PDF independently of one another. The correlation length is not
111 sampler = emcee.EnsembleSampler(n_walkers, n_dim, lnprobfunc, args = args)
118 assert pos0
is not None,
"auto-selection of initial positions not implemented"
127 pos0, ignored, ignored = sampler.run_mcmc(pos0, n_burn, storechain =
False)
128 if progressbar
is not None:
129 progressbar.increment(delta = n_burn)
130 if n_burn
and sampler.acceptance_fraction.min() < 0.4:
131 print >>sys.stderr,
"\nwarning: low burn-in acceptance fraction (min = %g)" % sampler.acceptance_fraction.min()
139 for coordslist, ignored, ignored
in sampler.sample(pos0, iterations = n_samples_per_walker, storechain =
False):
141 if progressbar
is not None:
142 progressbar.increment()
143 if n_samples_per_walker
and sampler.acceptance_fraction.min() < 0.5:
144 print >>sys.stderr,
"\nwarning: low sampler acceptance fraction (min %g)" % sampler.acceptance_fraction.min()
147 def binned_rates_from_samples(samples):
149 Construct and return a BinnedArray containing a histogram of a
150 sequence of samples. If limits is None (default) then the limits
151 of the binning will be determined automatically from the sequence,
152 otherwise limits is expected to be a tuple or other two-element
153 sequence providing the (low, high) limits, and in that case the
154 sequence can be a generator.
156 lo, hi = math.floor(samples.min()), math.ceil(samples.max())
157 nbins = len(samples) // 729
158 binnedarray = rate.BinnedArray(rate.NDBins((rate.LinearBins(lo, hi, nbins),)))
159 for sample
in samples:
160 binnedarray[sample,] += 1.
161 rate.filter_array(binnedarray.array, rate.gaussian_window(5))
162 numpy.clip(binnedarray.array, 0.0, PosInf, binnedarray.array)
166 def calculate_rate_posteriors(ranking_data, ln_likelihood_ratios, progressbar = None, chain_file = None, nsample = 400000):
174 if any(math.isnan(ln_lr)
for ln_lr
in ln_likelihood_ratios):
175 raise ValueError(
"NaN log likelihood ratio encountered")
177 raise ValueError(
"nsample < 0: %d" % nsample)
185 if ranking_data
is not None:
186 f = ranking_data.signal_likelihood_pdfs[
None]
187 b = ranking_data.background_likelihood_pdfs[
None]
188 ln_f_over_b = numpy.log(numpy.array([f[ln_lr,] / b[ln_lr,]
for ln_lr
in ln_likelihood_ratios]))
198 ln_f_over_b = ln_f_over_b[~numpy.isnan(ln_f_over_b)]
200 if numpy.isinf(ln_f_over_b).any():
201 raise ValueError(
"infinity encountered in ranking statistic PDF ratios")
203 raise ValueError(
"must supply ranking data to run MCMC sampler")
206 ln_f_over_b = numpy.array([])
214 nwalkers = 10 * 2 * ndim
217 pos0 = numpy.zeros((nwalkers, ndim), dtype =
"double")
219 if progressbar
is not None:
220 progressbar.max = nsample + nburn
224 if chain_file
is not None and "chain" in chain_file:
225 chain = chain_file[
"chain"].values()
226 length = sum(sample.shape[0]
for sample
in chain)
227 samples = numpy.empty((max(nsample, length), nwalkers, ndim), dtype =
"double")
228 if progressbar
is not None:
229 progressbar.max = samples.shape[0]
232 samples[i:i+sample.shape[0],:,:] = sample
234 if progressbar
is not None:
235 progressbar.update(i)
239 pos0 = samples[i - 1,:,:]
241 raise ValueError(
"no chain file to load or invalid chain file, and no new samples requested")
247 samples = numpy.empty((nsample, nwalkers, ndim), dtype =
"double")
248 pos0[:,0] = numpy.random.exponential(scale = 1., size = (nwalkers,))
249 pos0[:,1] = numpy.random.poisson(lam = len(ln_likelihood_ratios), size = (nwalkers,))
259 log_posterior = posterior(ln_f_over_b)
261 for j, coordslist
in enumerate(run_mcmc(nwalkers, ndim, max(0, nsample - i), (
lambda x: log_posterior(x) / 2.), n_burn = nburn, pos0 = pos0, progressbar = progressbar), i):
263 samples[j,:,:] = coordslist
265 if j + 1 >= i + 2048
and chain_file
is not None:
266 chain_file[
"chain/%08d" % i] = samples[i:j+1,:,:]
270 if chain_file
is not None and i < samples.shape[0]:
271 chain_file[
"chain/%08d" % i] = samples[i:,:,:]
278 if samples.min() < 0:
279 raise ValueError(
"MCMC sampler yielded negative rate(s)")
297 Rf_pdf = binned_rates_from_samples(samples[:,:,0].flatten())
298 Rf_pdf.array = Rf_pdf.array**2. / Rf_pdf.bins.volumes()
301 Rb_pdf = binned_rates_from_samples(samples[:,:,1].flatten())
302 Rb_pdf.array = Rb_pdf.array**2. / Rb_pdf.bins.volumes()
309 return Rf_pdf, Rb_pdf
312 def moment_from_pdf(binnedarray, moment, c = 0.):
313 if len(binnedarray.bins) != 1:
314 raise ValueError(
"BinnedArray PDF must have 1 dimension")
315 x = binnedarray.bins.centres()[0]
316 dx = binnedarray.bins.upper()[0] - binnedarray.bins.lower()[0]
317 return ((x - c)**moment * binnedarray.array * dx).sum()
320 def mean_from_pdf(binnedarray):
321 return moment_from_pdf(binnedarray, 1.)
324 def variance_from_pdf(binnedarray):
325 return moment_from_pdf(binnedarray, 2., mean_from_pdf(binnedarray))
328 def confidence_interval_from_binnedarray(binned_array, confidence = 0.95):
330 Constructs a confidence interval based on a BinnedArray object
331 containing a normalized 1-D PDF. Returns the tuple (mode, lower bound,
335 if numpy.isnan(binned_array.array).any():
336 raise ValueError(
"NaNs encountered in rate PDF")
337 if numpy.isinf(binned_array.array).any():
338 raise ValueError(
"infinities encountered in rate PDF")
339 if (binned_array.array < 0.).any():
340 raise ValueError(
"negative values encountered in rate PDF")
341 if not 0.0 <= confidence <= 1.0:
342 raise ValueError(
"confidence must be in [0, 1]")
344 mode_index = numpy.argmax(binned_array.array)
346 centres, = binned_array.centres()
347 upper = binned_array.bins[0].upper()
348 lower = binned_array.bins[0].lower()
349 bin_widths = upper - lower
350 if (bin_widths <= 0.).any():
351 raise ValueError(
"rate PDF bin sizes must be positive")
352 assert not numpy.isinf(bin_widths).any(),
"infinite bin sizes not supported"
353 P = binned_array.array * bin_widths
354 if abs(P.sum() - 1.0) > 1e-13:
355 raise ValueError(
"rate PDF is not normalized")
358 P_sum = P[mode_index]
359 while P_sum < confidence:
360 if li <= 0
and ri >= len(P) - 1:
361 raise ValueError(
"failed to achieve requested confidence")
362 P_li = P[li - 1]
if li > 0
else 0.
363 P_ri = P[ri + 1]
if ri < len(P) - 1
else 0.
364 assert P_li >= 0.
and P_ri >= 0.
374 ri = min(ri + 1, len(P) - 1)
375 return centres[mode_index], lower[li], upper[ri]