3 # Copyright (C) 2013,2014 Kipp Cannon, Chad Hanna, Jacob Peoples
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # A program to comput the signal and noise rate posteriors of a gstlal inspiral analysis
24 # =============================================================================
28 # =============================================================================
34 from fpconst
import NegInf
36 NegInf = float(
"-inf")
40 matplotlib.rcParams.update({
42 "axes.titlesize": 10.0,
43 "axes.labelsize": 10.0,
44 "xtick.labelsize": 8.0,
45 "ytick.labelsize": 8.0,
46 "legend.fontsize": 8.0,
51 from matplotlib
import figure
52 from matplotlib.backends.backend_agg
import FigureCanvasAgg as FigureCanvas
54 from optparse
import OptionParser
59 from glue
import segments
60 from glue.lal
import CacheEntry
61 from glue.ligolw
import ligolw
62 from glue.ligolw
import dbtables
63 from glue.ligolw
import lsctables
64 from glue.ligolw
import utils as ligolw_utils
65 from glue.ligolw.utils
import process as ligolw_process
66 from glue.text_progress_bar
import ProgressBar
67 from pylal
import ligolw_thinca
70 from gstlal
import far
71 from gstlal
import rate_estimation
74 golden_ratio = (1. + math.sqrt(5.)) / 2.
77 process_name = u
"gstlal_inspiral_rate_posterior"
80 __author__ =
"Kipp Cannon <kipp.cannon@ligo.org>"
81 __version__ =
"git id %s" %
"" # FIXME
86 # =============================================================================
90 # =============================================================================
94 def parse_command_line():
95 parser = OptionParser(
96 version =
"Name: %%prog\n%s" %
"" # FIXME
98 parser.add_option("-c", "--confidence-intervals", metavar = "confidence[,...]", default = "0.68,0.95,0.999999", help = "Compute and report confidence intervals in the signal count for these confidences (default = \".68,.95,.999999\", clear to disable).")
99 parser.add_option("-i", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache. See lalapps_path2cache for information on how to produce a LAL cache file.")
100 parser.add_option("--chain-file", metavar = "filename", help = "Read chain from this file, save chain to this file.")
101 parser.add_option("--likelihood-file", metavar = "filename", action = "append", help = "Load ranking statistic PDFs from this file. Can be given multiple times.")
102 parser.add_option("--likelihood-cache", metavar = "filename", help = "Load ranking statistic PDFs from the files in this LAL cache.")
103 parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on
which the final output will reside.")
104 parser.add_option("--with-background", action = "store_true", help = "Show background posterior on plot.")
105 parser.add_option("--samples", metavar = "count", type = "
int", help = "Run this many samples. Set to 0 to load and plot the contents of a previously-recorded chain file without doing any additional samples.")
106 parser.add_option("--threshold", metavar = "log likelihood ratio", type = "
float", help = "Derive the rate posterior by considering only events ranked at or above this value of the log likelihood ratio ranking statistic (default = use all events).")
107 parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
108 options, filenames = parser.parse_args()
110 paramdict = options.__dict__
112 options.confidence_intervals = map(
float, options.confidence_intervals.split(","))
114 options.likelihood_filenames = []
115 if options.likelihood_cache is not None:
116 options.likelihood_filenames += [CacheEntry(line).path for line in open(options.likelihood_cache)]
117 if options.likelihood_file is not None:
118 options.likelihood_filenames += options.likelihood_file
119 if not options.likelihood_filenames and options.samples > 0:
120 raise ValueError("must provide --likelihood-cache and/or one or more --likelihood-file options")
122 if options.input_cache:
123 filenames += [CacheEntry(line).path for line in open(options.input_cache)]
125 return options, paramdict, filenames
129 # =============================================================================
133 # =============================================================================
137 def load_ranking_data(filenames, ln_likelihood_ratio_threshold = None, verbose = False):
139 raise ValueError(
"no likelihood files!")
141 for n, filename in enumerate(filenames, start = 1):
143 print >>sys.stderr,
"%d/%d:" % (n, len(filenames)),
144 xmldoc = ligolw_utils.load_filename(filename, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = verbose)
145 ignored, this_ranking_data, ignored = far.parse_likelihood_control_doc(xmldoc)
147 if this_ranking_data is None:
148 raise ValueError(
"'%s' does not contain ranking statistic PDFs" % filename)
149 if ranking_data is None:
150 ranking_data = this_ranking_data
152 ranking_data += this_ranking_data
153 # affect the zeroing of the PDFs below threshold by hacking the
154 # histograms before running .finish(). do the indexing ourselves
155 # to not 0 the bin @ threshold
156 if ln_likelihood_ratio_threshold is not None:
157 for binnedarray in ranking_data.background_likelihood_rates.values():
158 binnedarray.array[:binnedarray.bins[0][ln_likelihood_ratio_threshold]] = 0.
159 for binnedarray in ranking_data.signal_likelihood_rates.values():
160 binnedarray.array[:binnedarray.bins[0][ln_likelihood_ratio_threshold]] = 0.
161 for binnedarray in ranking_data.zero_lag_likelihood_rates.values():
162 binnedarray.array[:binnedarray.bins[0][ln_likelihood_ratio_threshold]] = 0.
164 ranking_data.finish(verbose = verbose)
169 def load_search_results(filenames, ln_likelihood_ratio_threshold = None, tmp_path = None, verbose = False):
170 background_ln_likelihood_ratios = []
171 zerolag_ln_likelihood_ratios = []
173 for n, filename in enumerate(filenames, 1):
175 print >>sys.stderr,
"%d/%d: %s" % (n, len(filenames), filename)
176 working_filename = dbtables.get_connection_filename(filename, tmp_path = tmp_path, verbose = verbose)
177 connection = sqlite3.connect(working_filename)
179 xmldoc = dbtables.get_xml(connection)
180 definer_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
182 for ln_likelihood_ratio, is_background in connection.cursor().execute(
"""
184 coinc_event.likelihood,
191 time_slide.time_slide_id == coinc_event.time_slide_id
192 AND time_slide.offset != 0
196 JOIN coinc_inspiral ON (
197 coinc_inspiral.coinc_event_id == coinc_event.coinc_event_id
200 coinc_event.coinc_def_id == ?
201 AND coinc_event.likelihood >= ?"""
202 , (definer_id, (ln_likelihood_ratio_threshold if ln_likelihood_ratio_threshold is not None else NegInf))):
204 background_ln_likelihood_ratios.append(ln_likelihood_ratio)
206 zerolag_ln_likelihood_ratios.append(ln_likelihood_ratio)
209 dbtables.discard_connection_filename(filename, working_filename, verbose = verbose)
211 return background_ln_likelihood_ratios, zerolag_ln_likelihood_ratios
214 def plot_rates(signal_rate, confidence_intervals = None):
215 fig = figure.Figure()
218 signal_axes = fig.gca()
220 x = signal_rate.bins[0].centres()
221 y = signal_rate.array
222 line1, = signal_axes.plot(x, y, color =
"k", linestyle =
"-", label =
"Signal")
223 signal_axes.set_title(
"Event Rate Posterior")
224 signal_axes.set_xlabel(
"Event Rate ($\mathrm{signals} / \mathrm{experiment}$)")
225 signal_axes.set_ylabel(r
"$P(\mathrm{signals} / \mathrm{experiment})$")
228 signal_axes.set_ylim((1e-8, 1.))
230 if confidence_intervals is not None:
231 alphas = dict(zip(sorted(confidence_intervals), [.6, .4, .2]))
233 # convert lo and hi bounds to co-ordinate index segments
234 confidence_intervals = sorted((conf, segments.segmentlist([segments.segment(bisect.bisect_left(x, lo), bisect.bisect_right(x, hi))])) for conf, (mode, lo, hi) in confidence_intervals.items())
236 # remove from each the indexes spanned by lower confidence regions
237 confidence_intervals = [(conf, indexes - sum((segs for conf, segs in confidence_intervals[:i]), segments.segmentlist())) for i, (conf, indexes) in enumerate(confidence_intervals)]
239 for conf, segs in confidence_intervals:
241 signal_axes.fill_between(x[lo:hi], y[lo:hi], 1e-8, color =
"k", alpha = alphas[conf])
245 except AttributeError:
246 # old matplotlib. auto-layout not available
252 # =============================================================================
256 # =============================================================================
265 options, paramdict, filenames = parse_command_line()
269 # load ranking statistic PDFs
273 if options.likelihood_filenames:
274 ranking_data = load_ranking_data(options.likelihood_filenames, ln_likelihood_ratio_threshold = options.threshold, verbose = options.verbose)
280 # load search results
284 background_ln_likelihood_ratios, zerolag_ln_likelihood_ratios = load_search_results(filenames, ln_likelihood_ratio_threshold = options.threshold, tmp_path = options.tmp_space, verbose = options.verbose)
288 # calculate rate posteriors
293 print >>sys.stderr,
"calculating rate posteriors using %d likelihood ratios ..." % len(zerolag_ln_likelihood_ratios)
294 progressbar = ProgressBar()
298 if options.chain_file is not None:
299 kwargs[
"chain_file"] = h5py.File(options.chain_file)
300 if options.samples is not None:
301 kwargs[
"nsamples"] = options.samples
302 signal_rate_pdf, noise_rate_pdf = rate_estimation.calculate_rate_posteriors(ranking_data, zerolag_ln_likelihood_ratios, progressbar = progressbar, **kwargs)
307 # find confidence intervals
311 if options.confidence_intervals:
313 print >>sys.stderr,
"determining confidence intervals ..."
314 confidence_intervals = dict((conf, rate_estimation.confidence_interval_from_binnedarray(signal_rate_pdf, conf))
for conf in options.confidence_intervals)
316 confidence_intervals = None
317 if options.verbose and confidence_intervals is not None:
318 print >>sys.stderr,
"rate posterior mean = %g signals/experiment" % rate_estimation.mean_from_pdf(signal_rate_pdf)
319 # all modes are the same, pick one and report it
320 print >>sys.stderr,
"maximum-likelihood rate = %g signals/experiment" % confidence_intervals.values()[0][0]
321 for conf, (mode, lo, hi) in sorted(confidence_intervals.items()):
322 print >>sys.stderr,
"%g%% confidence interval = [%g, %g] signals/experiment" % (conf * 100., lo, hi)
330 filename =
"rate_posteriors.xml.gz"
331 xmldoc = ligolw.Document()
332 xmldoc.appendChild(ligolw.LIGO_LW())
333 process = ligolw_process.register_to_xmldoc(xmldoc, process_name, paramdict)
334 xmldoc.childNodes[-1].appendChild(signal_rate_pdf.to_xml(u
"%s:signal_pdf" % process_name))
335 xmldoc.childNodes[-1].appendChild(noise_rate_pdf.to_xml(u
"%s:noise_pdf" % process_name))
336 ligolw_utils.write_filename(xmldoc, filename, gz = (filename or stdout).endswith(
".gz"), verbose = options.verbose)
339 fig = plot_rates(signal_rate_pdf, confidence_intervals = confidence_intervals)
340 for filename in (
"rate_posteriors.png",
"rate_posteriors.pdf"):
342 print >>sys.stderr,
"writing %s ..." % filename
343 fig.savefig(filename)
346 print >>sys.stderr,
"done"