3 # Copyright (C) 2013 Chad Hanna, Kipp Cannon
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 ## @file gstlal_inspiral_plot_background
20 # A program to plot the likelihood distributions in noise of a gstlal inspiral analysis
22 # ### Command line interface
24 # + `--database` [filename]: Retrieve search results from this database (optional). Can be given multiple times.
25 # + `--database-cache` [filename]: Retrieve search results from all databases in this LAL cache (optional). See lalapps_path2cache.
26 # + `--max-snr` [value] (float): Plot SNR PDFs up to this value of SNR (default = 200).
27 # + `--max-log-lambda` [value] (float): Plot ranking statistic CDFs, etc., up to this value of the natural logarithm of the likelihood ratio (default = 100).
28 # + `--min-log-lambda` [value] (float): Plot ranking statistic CDFs, etc., down to this value of the natural logarithm of the likelihood ratio (default = -5).
29 # + `--output-dir` [path]: Write output to this directory (default = ".").
30 # + `--tmp-space` [path]: Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.
31 # + `--user-tag` [tag]: Set the adjustable component of the description fields in the output filenames (default = "ALL").
32 # + `--verbose`: Be verbose.
36 matplotlib.rcParams.update({
38 "axes.titlesize": 10.0,
39 "axes.labelsize": 10.0,
40 "xtick.labelsize": 8.0,
41 "ytick.labelsize": 8.0,
42 "legend.fontsize": 8.0,
47 from matplotlib
import figure
48 from matplotlib.backends.backend_agg
import FigureCanvasAgg as FigureCanvas
50 from optparse
import OptionParser
55 from glue.lal
import CacheEntry
56 from glue.ligolw
import dbtables
57 from glue.ligolw
import lsctables
58 from glue.ligolw
import utils as ligolw_utils
59 from pylal
import ligolw_thinca
62 from gstlal
import far
63 from gstlal
import plotfar
64 from gstlal
import inspiral_pipe
67 def parse_command_line():
68 parser = OptionParser()
69 parser.add_option("-d", "--database", metavar = "filename", action = "append", help = "Retrieve search results from this database (optional). Can be given multiple times.")
70 parser.add_option("-c", "--database-cache", metavar = "filename", help = "Retrieve search results from all databases in this LAL cache (optional). See lalapps_path2cache.")
71 parser.add_option("--max-snr", metavar = "SNR", default = 200., type = "
float", help = "Plot SNR PDFs up to this value of SNR (default = 200).")
72 parser.add_option("--max-log-lambda", metavar = "value", default = 100., type = "
float", help = "Plot ranking statistic CCDFs, etc., up to this value of the natural logarithm of the likelihood ratio (default = 100).")
73 parser.add_option("--min-log-lambda", metavar = "value", default = -5., type = "
float", help = "Plot ranking statistic CCDFs, etc., down to this value of the natural logarithm of the likelihood ratio (default = -10).")
74 parser.add_option("--output-dir", metavar = "output-dir", default = ".", help = "Write output to this directory (default = \".\").")
75 parser.add_option("--output-format", metavar = "extension", default = ".png", help = "Select output format by choosen the filename extension (default = \".png\").")
76 parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on
which the final output will reside.")
77 parser.add_option("--user-tag", metavar = "user-tag", default = "ALL", help = "Set the adjustable component of the description fields in the output filenames (default = \"ALL\").")
78 parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
79 options, filenames = parser.parse_args()
81 if options.database_cache is not None:
82 if options.database is None:
84 options.database += [CacheEntry(line).path for line in open(options.database_cache)]
86 if options.output_format not in (".png", ".pdf", ".svg"):
87 raise ValueError("invalid --output-format \"%s\"" % options.output_format)
89 options.user_tag = options.user_tag.upper()
91 return options, filenames
94 def load_distributions(filenames, verbose = False):
95 coinc_params_distributions, ranking_data, seglists = None, None, None
96 for n, filename in enumerate(filenames, 1):
98 print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
99 this_coinc_params_distributions, this_ranking_data, this_seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(filenames[0], contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = verbose))
100 if this_coinc_params_distributions is None and this_ranking_data is None:
101 raise ValueError("%s contains no parameter distribution data" % filename)
102 if coinc_params_distributions is None:
103 coinc_params_distributions = this_coinc_params_distributions
105 coinc_params_distributions += this_coinc_params_distributions
106 if ranking_data is None:
107 ranking_data = this_ranking_data
109 ranking_data += this_ranking_data
111 seglists = this_seglists
113 seglists |= this_seglists
114 if coinc_params_distributions is None and ranking_data is None:
115 raise ValueError("no parameter distribution data loaded")
116 # FIXME: hack to fake some segments so we can generate file names
117 # if the input files didn't provide segment information
119 seglists.extent_all()
121 seglists[None] = lsctables.segments.segmentlist([lsctables.segments.segment(0, 0)])
122 return coinc_params_distributions, ranking_data, seglists
125 def load_search_results(filenames, tmp_path = None, verbose = False):
129 background_ln_likelihood_ratios = {}
130 zerolag_ln_likelihood_ratios = {}
132 for n, filename in enumerate(filenames, 1):
134 print >>sys.stderr,
"%d/%d: %s" % (n, len(filenames), filename)
135 working_filename = dbtables.get_connection_filename(filename, tmp_path = tmp_path, verbose = verbose)
136 connection = sqlite3.connect(working_filename)
138 xmldoc = dbtables.get_xml(connection)
139 definer_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
141 for participating_instruments, ln_likelihood_ratio, is_background in connection.cursor().execute(
"""
144 coinc_event.likelihood,
151 time_slide.time_slide_id == coinc_event.time_slide_id
152 AND time_slide.offset != 0
156 JOIN coinc_inspiral ON (
157 coinc_inspiral.coinc_event_id == coinc_event.coinc_event_id
160 coinc_event.coinc_def_id == ?
162 participating_instruments = frozenset(lsctables.instrument_set_from_ifos(participating_instruments))
164 background_ln_likelihood_ratios.setdefault(participating_instruments, []).append(ln_likelihood_ratio)
166 zerolag_ln_likelihood_ratios.setdefault(participating_instruments, []).append(ln_likelihood_ratio)
169 dbtables.discard_connection_filename(filename, working_filename, verbose = verbose)
171 return background_ln_likelihood_ratios, zerolag_ln_likelihood_ratios
179 options, filenames = parse_command_line()
187 coinc_params_distributions, ranking_data, seglists = load_distributions(filenames, verbose = options.verbose)
188 if coinc_params_distributions is not None:
189 coinc_params_distributions.finish(verbose = options.verbose)
190 if ranking_data is not None:
191 ranking_data.finish(verbose = options.verbose)
192 fapfar = far.FAPFAR(ranking_data, livetime = far.get_live_time(seglists))
197 background_ln_likelihood_ratios, zerolag_ln_likelihood_ratios = load_search_results(options.database, tmp_path = options.tmp_space, verbose = options.verbose)
205 for instrument in (
"H1",
"L1",
"V1"):
206 for snr_chi_type in (
"background_pdf",
"injection_pdf",
"zero_lag_pdf",
"LR"):
207 fig = plotfar.plot_snr_chi_pdf(coinc_params_distributions, instrument, snr_chi_type, options.max_snr)
210 plotname = inspiral_pipe.
T050017_filename(instrument,
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_%s_SNRCHI2" % (options.user_tag, snr_chi_type.upper()), int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
212 print >>sys.stderr,
"writing %s" % plotname
213 fig.savefig(plotname)
216 # Trigger and event rates
217 fig = plotfar.plot_rates(coinc_params_distributions, ranking_data)
218 plotname = inspiral_pipe.
T050017_filename(
"H1L1V1",
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_RATES" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
220 print >>sys.stderr,
"writing %s" % plotname
221 fig.savefig(plotname)
225 for (instruments, horizon_distances) in sorted(coinc_params_distributions.snr_joint_pdf_cache.keys(), key = lambda (a, horizon_distances): sorted(horizon_distances)):
226 fig = plotfar.plot_snr_joint_pdf(coinc_params_distributions, instruments, horizon_distances, options.max_snr)
228 hd_dict = dict(horizon_distances)
229 plotname = inspiral_pipe.
T050017_filename(instruments,
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_SNR_PDF_%s" % (options.user_tag,
"_".join([
"%s_%s" % (k, hd_dict[k]) for k in sorted(hd_dict)]) ), int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
231 print >>sys.stderr,
"writing %s" % plotname
232 fig.savefig(plotname)
235 if ranking_data is not None:
236 for instruments, binnedarray in ranking_data.background_likelihood_pdfs.items():
237 fig = plotfar.plot_likelihood_ratio_pdf(ranking_data, instruments, (options.min_log_lambda, options.max_log_lambda),
"Noise", binnedarray_string =
"background_likelihood_pdfs")
238 plotname = inspiral_pipe.
T050017_filename(instruments or
"COMBINED",
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_NOISE_LIKELIHOOD_RATIO_PDF" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
240 print >>sys.stderr,
"writing %s" % plotname
241 fig.savefig(plotname)
243 ranking_stats = sum(zerolag_ln_likelihood_ratios.values(), []) if zerolag_ln_likelihood_ratios is not None else None
244 fig = plotfar.plot_likelihood_ratio_ccdf(fapfar, (options.min_log_lambda, options.max_log_lambda),
"Noise")
245 plotname = inspiral_pipe.
T050017_filename(
"NONE",
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_NOISE_LIKELIHOOD_RATIO_CCDF" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
247 print >>sys.stderr,
"writing %s" % plotname
248 fig.savefig(plotname)
252 # I am not sure what to do here.
255 for instruments, binnedarray in ranking_data.signal_likelihood_pdfs.items() if ranking_data is not None else ():
256 fig = plot_likelihood_ratio_pdf(instruments, binnedarray, (options.min_log_lambda, options.max_log_lambda),
"Signal")
257 plotname = inspiral_pipe.
T050017_filename(instruments or
"NONE",
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_signal_likelihood_ratio_pdf" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
259 print >>sys.stderr,
"writing %s" % plotname
260 fig.savefig(plotname)
262 # the interpolators won
't accept infinite co-ordinates, so
263 # have to discard the last bin
264 ranks = ranking_data.background_likelihood_rates[instruments].bins[0].upper()[:-1]
265 counts = ranking_data.background_likelihood_rates[instruments].array[:-1]
267 # cumulative distribution function and its complement.
268 # it's numerically better to recompute the ccdf by
269 # reversing the array of counts than trying to subtract the
271 ccdf = counts[::-1].cumsum()[::-1]
274 # cdf boundary condition: cdf = 1/e at the ranking
275 # statistic threshold so that
276 # self.far_from_rank(threshold) * livetime =
277 # observed count of events above threshold.
278 ccdf *= 1. - 1. / math.e
280 from scipy import interpolate
281 fig = plot_likelihood_ratio_ccdf(instruments, interpolate.interp1d(ranks, ccdf), None, (options.min_log_lambda, options.max_log_lambda),
"Signal")
282 plotname = inspiral_pipe.
T050017_filename(instruments or
"NONE",
"GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_signal_likelihood_ratio_ccdf" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
284 print >>sys.stderr,
"writing %s" % plotname
285 fig.savefig(plotname)