3 # Copyright (C) 2011--2013 Kipp Cannon, Chad Hanna, Drew Keppel
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # Compute FAR and FAP distributions from the likelihood CCDFs.
22 # ### Command line interface
24 # + `--background-bins-file` [filename]: Set the name of the xml file containing the marginalized likelihood (required).
25 # + `--tmp-space` [dir]: Set the name of the tmp space if working with sqlite.
26 # + `--non-injection-db` [filename]: Provide the name of a database from a non-injection run. Can be given multiple times (default = []).
27 # + `--injection-db` [filename]: Provide the name of a database from an injection run. Can be given multiple times. Databases are assumed to be over the same time period as the non injection databases using the same templates. If not the results will be nonsense (default = []).
28 # + `--verbose`: Be verbose.
31 # =============================================================================
35 # =============================================================================
39 from optparse
import OptionParser
44 from pysqlite2
import dbapi2 as sqlite3
45 sqlite3.enable_callback_tracebacks(True)
49 from glue.ligolw
import ligolw
50 from glue.ligolw
import dbtables
51 from glue.ligolw
import lsctables
52 from glue.ligolw
import utils as ligolw_utils
53 from glue.ligolw.utils
import process as ligolw_process
54 from glue.ligolw.utils
import search_summary as ligolw_search_summary
55 from pylal
import ligolw_thinca
56 from gstlal
import far
60 # =============================================================================
64 # =============================================================================
68 def parse_command_line():
69 parser = OptionParser()
70 parser.add_option(
"--background-bins-file", metavar =
"filename", help =
"Set the name of the xml file containing the marginalized likelihood (required).")
71 parser.add_option(
"--tmp-space", metavar =
"dir", help =
"Set the name of the tmp space if working with sqlite.")
72 parser.add_option(
"--non-injection-db", metavar =
"filename", default = [], action =
"append", help =
"Provide the name of a database from a non-injection run. Can be given multiple times.")
73 parser.add_option(
"--injection-db", metavar =
"filename", default = [], action =
"append", help =
"Provide the name of a database from an injection run. Can be given multiple times. Databases are assumed to be over the same time period as the non injection databases using the same templates. If not the results will be nonsense.")
74 parser.add_option(
"--force",
"-f", action =
"store_true", help =
"Force script to reevaluate FARs and FAPs.")
75 parser.add_option(
"--verbose",
"-v", action =
"store_true", help =
"Be verbose.")
76 options, filenames = parser.parse_args()
78 if options.background_bins_file is None:
79 raise ValueError(
"must set --background-bins-file")
81 if not options.non_injection_db + options.injection_db:
82 raise ValueError(
"must provide at least one database file to process")
85 raise ValueError(
"unrecognized trailing arguments")
87 return options, filenames
91 # =============================================================================
95 # =============================================================================
104 options, filenames = parse_command_line()
108 # Retrieve distribution data
112 coinc_params_distributions, ranking_data, seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(options.background_bins_file, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler,
verbose = options.
verbose))
113 if coinc_params_distributions is None:
114 raise ValueError(
"\"%s\" does not contain event parameter PDFs" % options.background_bins_file)
115 if ranking_data is None:
116 raise ValueError(
"\"%s\" does not contain likelihood ratio PDFs" % options.background_bins_file)
120 # Count the number of above-threshold events
125 print >>sys.stderr,
"beginning count of above-threshold events"
127 for binnedarray in ranking_data.zero_lag_likelihood_rates.values():
128 binnedarray.array[:] = 0.
130 for n, filename in enumerate(options.non_injection_db, start = 1):
132 # get working copy of database. do not use scratch space for this,
137 print >>sys.stderr,
"%d/%d: %s" % (n, len(options.non_injection_db), filename)
138 working_filename = dbtables.get_connection_filename(filename, tmp_path = None,
verbose = options.
verbose)
139 connection = sqlite3.connect(working_filename)
145 xmldoc = dbtables.get_xml(connection)
146 coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
148 ranking_data.collect_zero_lag_rates(connection, coinc_def_id)
155 dbtables.discard_connection_filename(filename, working_filename,
verbose = options.
verbose)
158 # FIXME: need to do this to get the combined PDFs populated. the XML file
159 # contains both bin counts and PDFs for all instrument sets but none for
160 # the
"combined" set. the bin counts for the combined set are populated by
161 # the .from_xml() method but the PDFs are built in the .finish() method.
162 # because all the other PDFs come out of the file we normally would not
163 # need to invoke the .finish() method here at all. look into getting the
164 # combined PDFs built by the .from_xml() method as well. NOTE: that method
165 # can
't just call .finish() itself because that's a huge waste of time when
166 # many files need to be read and summed.
171 # Initialize the FAP & FAR assignment machine
175 fapfar = far.FAPFAR(ranking_data, livetime = far.get_live_time(seglists))
179 # Iterate over databases
184 print >>sys.stderr,
"assigning FAPs and FARs"
186 for n, filename in enumerate(options.non_injection_db + options.injection_db, start = 1):
188 # get working copy of database
192 print >>sys.stderr,
"%d/%d: %s" % (n, len(options.non_injection_db + options.injection_db), filename)
193 if not options.force and sqlite3.connect(filename).cursor().execute(
"""SELECT EXISTS(SELECT * FROM process WHERE program == ?);""", (u
"gstlal_compute_far_from_snr_chisq_histograms",)).fetchone()[0]:
195 print >>sys.stderr,
"already processed, skipping"
197 working_filename = dbtables.get_connection_filename(filename, tmp_path = options.tmp_space,
verbose = options.
verbose)
198 connection = sqlite3.connect(working_filename)
204 xmldoc = dbtables.get_xml(connection)
205 process = ligolw_process.register_to_xmldoc(xmldoc, u
"gstlal_compute_far_from_snr_chisq_histograms", {})
208 # assign FAPs and FARs
211 fapfar.assign_faps(connection)
212 fapfar.assign_fars(connection)
215 # done, restore file to original location
218 ligolw_process.set_process_end_time(process)
219 connection.cursor().execute(
"UPDATE process SET end_time = ? WHERE process_id == ?", (process.end_time, process.process_id))
223 dbtables.put_connection_filename(filename, working_filename, verbose = options.verbose)
226 print >>sys.stderr,
"FAP and FAR assignment complete"
230 # Rewrite parameter and ranking statistic distribution file but with
231 # zero-lag counts replaced with count-above-threshold.
235 xmldoc = ligolw.Document()
236 xmldoc.appendChild(ligolw.LIGO_LW())
237 process = ligolw_process.register_to_xmldoc(xmldoc, u
"gstlal_compute_far_from_snr_chisq_histograms", {})
238 search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
239 far.gen_likelihood_control_doc(xmldoc, process, coinc_params_distributions, ranking_data, seglists)
240 ligolw_process.set_process_end_time(process)
242 outname =
"post_%s" % options.background_bins_file
243 ligolw_utils.write_filename(xmldoc, outname, gz = outname.endswith(
".gz"),
verbose = options.verbose)
246 print >>sys.stderr,
"done"