gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_compute_far_from_snr_chisq_histograms
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2011--2013 Kipp Cannon, Chad Hanna, Drew Keppel
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 ## @file
20 # Compute FAR and FAP distributions from the likelihood CCDFs.
21 #
22 # ### Command line interface
23 #
24 # + `--background-bins-file` [filename]: Set the name of the xml file containing the marginalized likelihood (required).
25 # + `--tmp-space` [dir]: Set the name of the tmp space if working with sqlite.
26 # + `--non-injection-db` [filename]: Provide the name of a database from a non-injection run. Can be given multiple times (default = []).
27 # + `--injection-db` [filename]: Provide the name of a database from an injection run. Can be given multiple times. Databases are assumed to be over the same time period as the non injection databases using the same templates. If not the results will be nonsense (default = []).
28 # + `--verbose`: Be verbose.
29 
30 #
31 # =============================================================================
32 #
33 # Preamble
34 #
35 # =============================================================================
36 #
37 
38 
39 from optparse import OptionParser
40 try:
41  import sqlite3
42 except ImportError:
43  # pre 2.5.x
44  from pysqlite2 import dbapi2 as sqlite3
45 sqlite3.enable_callback_tracebacks(True)
46 import sys
47 
48 
49 from glue.ligolw import ligolw
50 from glue.ligolw import dbtables
51 from glue.ligolw import lsctables
52 from glue.ligolw import utils as ligolw_utils
53 from glue.ligolw.utils import process as ligolw_process
54 from glue.ligolw.utils import search_summary as ligolw_search_summary
55 from pylal import ligolw_thinca
56 from gstlal import far
57 
58 
59 #
60 # =============================================================================
61 #
62 # Command Line
63 #
64 # =============================================================================
65 #
66 
67 
68 def parse_command_line():
69  parser = OptionParser()
70  parser.add_option("--background-bins-file", metavar = "filename", help = "Set the name of the xml file containing the marginalized likelihood (required).")
71  parser.add_option("--tmp-space", metavar = "dir", help = "Set the name of the tmp space if working with sqlite.")
72  parser.add_option("--non-injection-db", metavar = "filename", default = [], action = "append", help = "Provide the name of a database from a non-injection run. Can be given multiple times.")
73  parser.add_option("--injection-db", metavar = "filename", default = [], action = "append", help = "Provide the name of a database from an injection run. Can be given multiple times. Databases are assumed to be over the same time period as the non injection databases using the same templates. If not the results will be nonsense.")
74  parser.add_option("--force", "-f", action = "store_true", help = "Force script to reevaluate FARs and FAPs.")
75  parser.add_option("--verbose", "-v", action = "store_true", help = "Be verbose.")
76  options, filenames = parser.parse_args()
77 
78  if options.background_bins_file is None:
79  raise ValueError("must set --background-bins-file")
80 
81  if not options.non_injection_db + options.injection_db:
82  raise ValueError("must provide at least one database file to process")
83 
84  if filenames:
85  raise ValueError("unrecognized trailing arguments")
86 
87  return options, filenames
88 
89 
90 #
91 # =============================================================================
92 #
93 # Main
94 #
95 # =============================================================================
96 #
97 
98 
99 #
100 # Parse command line
101 #
102 
103 
104 options, filenames = parse_command_line()
105 
106 
107 #
108 # Retrieve distribution data
109 #
110 
111 
112 coinc_params_distributions, ranking_data, seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(options.background_bins_file, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose))
113 if coinc_params_distributions is None:
114  raise ValueError("\"%s\" does not contain event parameter PDFs" % options.background_bins_file)
115 if ranking_data is None:
116  raise ValueError("\"%s\" does not contain likelihood ratio PDFs" % options.background_bins_file)
117 
118 
119 #
120 # Count the number of above-threshold events
121 #
122 
123 
124 if options.verbose:
125  print >>sys.stderr, "beginning count of above-threshold events"
126 
127 for binnedarray in ranking_data.zero_lag_likelihood_rates.values():
128  binnedarray.array[:] = 0.
129 
130 for n, filename in enumerate(options.non_injection_db, start = 1):
131  #
132  # get working copy of database. do not use scratch space for this,
133  # query is very fast
134  #
135 
136  if options.verbose:
137  print >>sys.stderr, "%d/%d: %s" % (n, len(options.non_injection_db), filename)
138  working_filename = dbtables.get_connection_filename(filename, tmp_path = None, verbose = options.verbose)
139  connection = sqlite3.connect(working_filename)
140 
141  #
142  # update counts
143  #
144 
145  xmldoc = dbtables.get_xml(connection)
146  coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
147  xmldoc.unlink()
148  ranking_data.collect_zero_lag_rates(connection, coinc_def_id)
149 
150  #
151  # done
152  #
153 
154  connection.close()
155  dbtables.discard_connection_filename(filename, working_filename, verbose = options.verbose)
156 
157 
158 # FIXME: need to do this to get the combined PDFs populated. the XML file
159 # contains both bin counts and PDFs for all instrument sets but none for
160 # the "combined" set. the bin counts for the combined set are populated by
161 # the .from_xml() method but the PDFs are built in the .finish() method.
162 # because all the other PDFs come out of the file we normally would not
163 # need to invoke the .finish() method here at all. look into getting the
164 # combined PDFs built by the .from_xml() method as well. NOTE: that method
165 # can't just call .finish() itself because that's a huge waste of time when
166 # many files need to be read and summed.
167 ranking_data.finish(verbose = options.verbose)
168 
169 
170 #
171 # Initialize the FAP & FAR assignment machine
172 #
173 
174 
175 fapfar = far.FAPFAR(ranking_data, livetime = far.get_live_time(seglists))
176 
177 
178 #
179 # Iterate over databases
180 #
181 
182 
183 if options.verbose:
184  print >>sys.stderr, "assigning FAPs and FARs"
185 
186 for n, filename in enumerate(options.non_injection_db + options.injection_db, start = 1):
187  #
188  # get working copy of database
189  #
190 
191  if options.verbose:
192  print >>sys.stderr, "%d/%d: %s" % (n, len(options.non_injection_db + options.injection_db), filename)
193  if not options.force and sqlite3.connect(filename).cursor().execute("""SELECT EXISTS(SELECT * FROM process WHERE program == ?);""", (u"gstlal_compute_far_from_snr_chisq_histograms",)).fetchone()[0]:
194  if options.verbose:
195  print >>sys.stderr, "already processed, skipping"
196  continue
197  working_filename = dbtables.get_connection_filename(filename, tmp_path = options.tmp_space, verbose = options.verbose)
198  connection = sqlite3.connect(working_filename)
199 
200  #
201  # record our passage
202  #
203 
204  xmldoc = dbtables.get_xml(connection)
205  process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_compute_far_from_snr_chisq_histograms", {})
206 
207  #
208  # assign FAPs and FARs
209  #
210 
211  fapfar.assign_faps(connection)
212  fapfar.assign_fars(connection)
213 
214  #
215  # done, restore file to original location
216  #
217 
218  ligolw_process.set_process_end_time(process)
219  connection.cursor().execute("UPDATE process SET end_time = ? WHERE process_id == ?", (process.end_time, process.process_id))
220 
221  connection.commit()
222  connection.close()
223  dbtables.put_connection_filename(filename, working_filename, verbose = options.verbose)
224 
225 if options.verbose:
226  print >>sys.stderr, "FAP and FAR assignment complete"
227 
228 
229 #
230 # Rewrite parameter and ranking statistic distribution file but with
231 # zero-lag counts replaced with count-above-threshold.
232 #
233 
234 
235 xmldoc = ligolw.Document()
236 xmldoc.appendChild(ligolw.LIGO_LW())
237 process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_compute_far_from_snr_chisq_histograms", {})
238 search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
239 far.gen_likelihood_control_doc(xmldoc, process, coinc_params_distributions, ranking_data, seglists)
240 ligolw_process.set_process_end_time(process)
241 
242 outname = "post_%s" % options.background_bins_file
243 ligolw_utils.write_filename(xmldoc, outname, gz = outname.endswith(".gz"), verbose = options.verbose)
244 
245 if options.verbose:
246  print >>sys.stderr, "done"