gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_inspiral_plot_background
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2013 Chad Hanna, Kipp Cannon
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 ## @file gstlal_inspiral_plot_background
20 # A program to plot the likelihood distributions in noise of a gstlal inspiral analysis
21 #
22 # ### Command line interface
23 #
24 # + `--database` [filename]: Retrieve search results from this database (optional). Can be given multiple times.
25 # + `--database-cache` [filename]: Retrieve search results from all databases in this LAL cache (optional). See lalapps_path2cache.
26 # + `--max-snr` [value] (float): Plot SNR PDFs up to this value of SNR (default = 200).
27 # + `--max-log-lambda` [value] (float): Plot ranking statistic CDFs, etc., up to this value of the natural logarithm of the likelihood ratio (default = 100).
28 # + `--min-log-lambda` [value] (float): Plot ranking statistic CDFs, etc., down to this value of the natural logarithm of the likelihood ratio (default = -5).
29 # + `--output-dir` [path]: Write output to this directory (default = ".").
30 # + `--tmp-space` [path]: Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.
31 # + `--user-tag` [tag]: Set the adjustable component of the description fields in the output filenames (default = "ALL").
32 # + `--verbose`: Be verbose.
33 
34 import math
35 import matplotlib
36 matplotlib.rcParams.update({
37  "font.size": 10.0,
38  "axes.titlesize": 10.0,
39  "axes.labelsize": 10.0,
40  "xtick.labelsize": 8.0,
41  "ytick.labelsize": 8.0,
42  "legend.fontsize": 8.0,
43  "figure.dpi": 600,
44  "savefig.dpi": 600,
45  "text.usetex": True
46 })
47 from matplotlib import figure
48 from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
49 import numpy
50 from optparse import OptionParser
51 import sqlite3
52 import sys
53 
54 
55 from glue.lal import CacheEntry
56 from glue.ligolw import dbtables
57 from glue.ligolw import lsctables
58 from glue.ligolw import utils as ligolw_utils
59 from pylal import ligolw_thinca
60 
61 
62 from gstlal import far
63 from gstlal import plotfar
64 from gstlal import inspiral_pipe
65 
66 
67 def parse_command_line():
68  parser = OptionParser()
69  parser.add_option("-d", "--database", metavar = "filename", action = "append", help = "Retrieve search results from this database (optional). Can be given multiple times.")
70  parser.add_option("-c", "--database-cache", metavar = "filename", help = "Retrieve search results from all databases in this LAL cache (optional). See lalapps_path2cache.")
71  parser.add_option("--max-snr", metavar = "SNR", default = 200., type = "float", help = "Plot SNR PDFs up to this value of SNR (default = 200).")
72  parser.add_option("--max-log-lambda", metavar = "value", default = 100., type = "float", help = "Plot ranking statistic CCDFs, etc., up to this value of the natural logarithm of the likelihood ratio (default = 100).")
73  parser.add_option("--min-log-lambda", metavar = "value", default = -5., type = "float", help = "Plot ranking statistic CCDFs, etc., down to this value of the natural logarithm of the likelihood ratio (default = -10).")
74  parser.add_option("--output-dir", metavar = "output-dir", default = ".", help = "Write output to this directory (default = \".\").")
75  parser.add_option("--output-format", metavar = "extension", default = ".png", help = "Select output format by choosen the filename extension (default = \".png\").")
76  parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
77  parser.add_option("--user-tag", metavar = "user-tag", default = "ALL", help = "Set the adjustable component of the description fields in the output filenames (default = \"ALL\").")
78  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
79  options, filenames = parser.parse_args()
80 
81  if options.database_cache is not None:
82  if options.database is None:
83  options.database = []
84  options.database += [CacheEntry(line).path for line in open(options.database_cache)]
85 
86  if options.output_format not in (".png", ".pdf", ".svg"):
87  raise ValueError("invalid --output-format \"%s\"" % options.output_format)
88 
89  options.user_tag = options.user_tag.upper()
90 
91  return options, filenames
92 
93 
94 def load_distributions(filenames, verbose = False):
95  coinc_params_distributions, ranking_data, seglists = None, None, None
96  for n, filename in enumerate(filenames, 1):
97  if verbose:
98  print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
99  this_coinc_params_distributions, this_ranking_data, this_seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(filenames[0], contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = verbose))
100  if this_coinc_params_distributions is None and this_ranking_data is None:
101  raise ValueError("%s contains no parameter distribution data" % filename)
102  if coinc_params_distributions is None:
103  coinc_params_distributions = this_coinc_params_distributions
104  else:
105  coinc_params_distributions += this_coinc_params_distributions
106  if ranking_data is None:
107  ranking_data = this_ranking_data
108  else:
109  ranking_data += this_ranking_data
110  if seglists is None:
111  seglists = this_seglists
112  else:
113  seglists |= this_seglists
114  if coinc_params_distributions is None and ranking_data is None:
115  raise ValueError("no parameter distribution data loaded")
116  # FIXME: hack to fake some segments so we can generate file names
117  # if the input files didn't provide segment information
118  try:
119  seglists.extent_all()
120  except ValueError:
121  seglists[None] = lsctables.segments.segmentlist([lsctables.segments.segment(0, 0)])
122  return coinc_params_distributions, ranking_data, seglists
123 
124 
125 def load_search_results(filenames, tmp_path = None, verbose = False):
126  if not filenames:
127  return None, None
128 
129  background_ln_likelihood_ratios = {}
130  zerolag_ln_likelihood_ratios = {}
131 
132  for n, filename in enumerate(filenames, 1):
133  if verbose:
134  print >>sys.stderr, "%d/%d: %s" % (n, len(filenames), filename)
135  working_filename = dbtables.get_connection_filename(filename, tmp_path = tmp_path, verbose = verbose)
136  connection = sqlite3.connect(working_filename)
137 
138  xmldoc = dbtables.get_xml(connection)
139  definer_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
140 
141  for participating_instruments, ln_likelihood_ratio, is_background in connection.cursor().execute("""
142 SELECT
143  coinc_inspiral.ifos,
144  coinc_event.likelihood,
145  EXISTS (
146  SELECT
147  *
148  FROM
149  time_slide
150  WHERE
151  time_slide.time_slide_id == coinc_event.time_slide_id
152  AND time_slide.offset != 0
153  )
154 FROM
155  coinc_event
156  JOIN coinc_inspiral ON (
157  coinc_inspiral.coinc_event_id == coinc_event.coinc_event_id
158  )
159 WHERE
160  coinc_event.coinc_def_id == ?
161  """, (definer_id,)):
162  participating_instruments = frozenset(lsctables.instrument_set_from_ifos(participating_instruments))
163  if is_background:
164  background_ln_likelihood_ratios.setdefault(participating_instruments, []).append(ln_likelihood_ratio)
165  else:
166  zerolag_ln_likelihood_ratios.setdefault(participating_instruments, []).append(ln_likelihood_ratio)
167 
168  connection.close()
169  dbtables.discard_connection_filename(filename, working_filename, verbose = verbose)
170 
171  return background_ln_likelihood_ratios, zerolag_ln_likelihood_ratios
172 
173 
174 #
175 # command line
176 #
177 
178 
179 options, filenames = parse_command_line()
180 
181 
182 #
183 # load input
184 #
185 
186 
187 coinc_params_distributions, ranking_data, seglists = load_distributions(filenames, verbose = options.verbose)
188 if coinc_params_distributions is not None:
189  coinc_params_distributions.finish(verbose = options.verbose)
190 if ranking_data is not None:
191  ranking_data.finish(verbose = options.verbose)
192  fapfar = far.FAPFAR(ranking_data, livetime = far.get_live_time(seglists))
193 else:
194  fapfar = None
195 
196 
197 background_ln_likelihood_ratios, zerolag_ln_likelihood_ratios = load_search_results(options.database, tmp_path = options.tmp_space, verbose = options.verbose)
198 
199 
200 #
201 # plots
202 #
203 
204 # SNR and Chisquared
205 for instrument in ("H1","L1","V1"):
206  for snr_chi_type in ("background_pdf", "injection_pdf", "zero_lag_pdf", "LR"):
207  fig = plotfar.plot_snr_chi_pdf(coinc_params_distributions, instrument, snr_chi_type, options.max_snr)
208  if fig is None:
209  continue
210  plotname = inspiral_pipe.T050017_filename(instrument, "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_%s_SNRCHI2" % (options.user_tag, snr_chi_type.upper()), int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
211  if options.verbose:
212  print >>sys.stderr, "writing %s" % plotname
213  fig.savefig(plotname)
214 
215 
216 # Trigger and event rates
217 fig = plotfar.plot_rates(coinc_params_distributions, ranking_data)
218 plotname = inspiral_pipe.T050017_filename("H1L1V1", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_RATES" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
219 if options.verbose:
220  print >>sys.stderr, "writing %s" % plotname
221 fig.savefig(plotname)
222 
223 
224 # SNR PDFs
225 for (instruments, horizon_distances) in sorted(coinc_params_distributions.snr_joint_pdf_cache.keys(), key = lambda (a, horizon_distances): sorted(horizon_distances)):
226  fig = plotfar.plot_snr_joint_pdf(coinc_params_distributions, instruments, horizon_distances, options.max_snr)
227  if fig is not None:
228  hd_dict = dict(horizon_distances)
229  plotname = inspiral_pipe.T050017_filename(instruments, "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_SNR_PDF_%s" % (options.user_tag, "_".join(["%s_%s" % (k, hd_dict[k]) for k in sorted(hd_dict)]) ), int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
230  if options.verbose:
231  print >>sys.stderr, "writing %s" % plotname
232  fig.savefig(plotname)
233 
234 
235 if ranking_data is not None:
236  for instruments, binnedarray in ranking_data.background_likelihood_pdfs.items():
237  fig = plotfar.plot_likelihood_ratio_pdf(ranking_data, instruments, (options.min_log_lambda, options.max_log_lambda), "Noise", binnedarray_string = "background_likelihood_pdfs")
238  plotname = inspiral_pipe.T050017_filename(instruments or "COMBINED", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_NOISE_LIKELIHOOD_RATIO_PDF" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
239  if options.verbose:
240  print >>sys.stderr, "writing %s" % plotname
241  fig.savefig(plotname)
242 
243  ranking_stats = sum(zerolag_ln_likelihood_ratios.values(), []) if zerolag_ln_likelihood_ratios is not None else None
244  fig = plotfar.plot_likelihood_ratio_ccdf(fapfar, (options.min_log_lambda, options.max_log_lambda), "Noise")
245  plotname = inspiral_pipe.T050017_filename("NONE", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_NOISE_LIKELIHOOD_RATIO_CCDF" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
246  if options.verbose:
247  print >>sys.stderr, "writing %s" % plotname
248  fig.savefig(plotname)
249 
250 
251 # FIXME
252 # I am not sure what to do here.
253 sys.exit(0)
254 
255 for instruments, binnedarray in ranking_data.signal_likelihood_pdfs.items() if ranking_data is not None else ():
256  fig = plot_likelihood_ratio_pdf(instruments, binnedarray, (options.min_log_lambda, options.max_log_lambda), "Signal")
257  plotname = inspiral_pipe.T050017_filename(instruments or "NONE", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_signal_likelihood_ratio_pdf" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
258  if options.verbose:
259  print >>sys.stderr, "writing %s" % plotname
260  fig.savefig(plotname)
261 
262  # the interpolators won't accept infinite co-ordinates, so
263  # have to discard the last bin
264  ranks = ranking_data.background_likelihood_rates[instruments].bins[0].upper()[:-1]
265  counts = ranking_data.background_likelihood_rates[instruments].array[:-1]
266 
267  # cumulative distribution function and its complement.
268  # it's numerically better to recompute the ccdf by
269  # reversing the array of counts than trying to subtract the
270  # cdf from 1.
271  ccdf = counts[::-1].cumsum()[::-1]
272  ccdf /= ccdf[0]
273 
274  # cdf boundary condition: cdf = 1/e at the ranking
275  # statistic threshold so that
276  # self.far_from_rank(threshold) * livetime =
277  # observed count of events above threshold.
278  ccdf *= 1. - 1. / math.e
279 
280  from scipy import interpolate
281  fig = plot_likelihood_ratio_ccdf(instruments, interpolate.interp1d(ranks, ccdf), None, (options.min_log_lambda, options.max_log_lambda), "Signal")
282  plotname = inspiral_pipe.T050017_filename(instruments or "NONE", "GSTLAL_INSPIRAL_PLOT_BACKGROUND_%s_signal_likelihood_ratio_ccdf" % options.user_tag, int(seglists.extent_all()[0]), int(seglists.extent_all()[1]), options.output_format, path = options.output_dir)
283  if options.verbose:
284  print >>sys.stderr, "writing %s" % plotname
285  fig.savefig(plotname)