gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_inspiral_calc_likelihood
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2010--2014 Kipp Cannon, Chad Hanna
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 ## @file
20 # A program to compute the likelhood ratios of inspiral triggers
21 #
22 # ### Command line interface
23 #
24 # + `--input-cache` [filename]: Also process the files named in this LAL cache. See lalapps_path2cache for information on how to produce a LAL cache file.
25 # + `--likelihood-url` [URL]: Set the name of the likelihood ratio data file to use. Can be given more than once. Filenames and URLs are accepted.
26 # + `--likelihood-cache` [filename]: Also load the likelihood ratio data files listsed in this LAL cache. See lalapps_path2cache for information on how to produce a LAL cache file.
27 # + `--tmp-space` [path]: Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.
28 # + `--vetoes-name` [name]: Set the name of the segment lists to use as vetoes (default = do not apply vetoes).
29 # + `--verbose`: Be verbose.
30 # + `--write-rates` [filename]: Write combined event parameter histograms together with histograms of asssigned zero-lag ranking statistic values to this file.
31 
32 
33 #
34 # =============================================================================
35 #
36 # Preamble
37 #
38 # =============================================================================
39 #
40 
41 
42 from optparse import OptionParser
43 import sys
44 
45 
46 from glue import iterutils
47 from glue.lal import CacheEntry
48 from glue.text_progress_bar import ProgressBar
49 from glue.ligolw import ligolw
50 from glue.ligolw import lsctables
51 from glue.ligolw import utils as ligolw_utils
52 from glue.ligolw.utils import process as ligolw_process
53 from glue.ligolw.utils import search_summary as ligolw_search_summary
54 from glue.ligolw.utils import segments as ligolw_segments
55 from glue import segments
56 from pylal import ligolw_burca2
57 from pylal import ligolw_thinca
58 from pylal import snglcoinc
59 from gstlal import far
60 
61 
62 process_name = u"gstlal_inspiral_calc_likelihood"
63 
64 
65 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
66 __version__ = "git id %s" % "" # FIXME
67 __date__ = "" # FIXME
68 
69 
70 #
71 # =============================================================================
72 #
73 # Command Line
74 #
75 # =============================================================================
76 #
77 
78 
79 def parse_command_line():
80  parser = OptionParser(
81  version = "Name: %%prog\n%s" % "" # FIXME
82  )
83  parser.add_option("-c", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache. See lalapps_path2cache for information on how to produce a LAL cache file.")
84  parser.add_option("-l", "--likelihood-url", metavar = "URL", action = "append", help = "Set the name of the likelihood ratio data file to use. Can be given more than once. Filenames and URLs are accepted.")
85  parser.add_option("--likelihood-cache", metavar = "filename", help = "Also load the likelihood ratio data files listsed in this LAL cache. See lalapps_path2cache for information on how to produce a LAL cache file.")
86  parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file. The database file will be worked on in this directory, and then moved to the final location when complete. This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
87  parser.add_option("--vetoes-name", metavar = "name", help = "Set the name of the segment lists to use as vetoes (default = do not apply vetoes).")
88  parser.add_option("-f", "--force", action = "store_true", help = "Force recomputation of likelihood values.")
89  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
90  parser.add_option("-w", "--write-rates", metavar = "filename", help = "Write combined event parameter histograms together with histograms of asssigned zero-lag ranking statistic values to this file.")
91  options, filenames = parser.parse_args()
92 
93  paramdict = options.__dict__
94 
95  options.likelihood_urls = []
96  if options.likelihood_urls is not None:
97  options.likelihood_urls += options.likelihood_url
98  if options.likelihood_cache is not None:
99  options.likelihood_urls += [CacheEntry(line).url for line in open(options.likelihood_cache)]
100  if not options.likelihood_urls:
101  raise ValueError("no likelihood URLs specified")
102 
103  if options.input_cache:
104  filenames += [CacheEntry(line).path for line in open(options.input_cache)]
105 
106  return options, paramdict, filenames
107 
108 
109 #
110 # =============================================================================
111 #
112 # Support Funcs for Likelihood Ratio Code
113 #
114 # =============================================================================
115 #
116 
117 
118 def sngl_inspiral_veto_func(event, vetoseglists):
119  # return True if event should be *retained*
120  return event.ifo not in vetoseglists or event.get_end() not in vetoseglists[event.ifo]
121 
122 
123 #
124 # =============================================================================
125 #
126 # Main
127 #
128 # =============================================================================
129 #
130 
131 
132 #
133 # command line
134 #
135 
136 
137 options, paramdict, filenames = parse_command_line()
138 
139 
140 #
141 # load parameter distribution data
142 #
143 
144 
145 coincparamsdistributions = None
146 seglists = segments.segmentlistdict()
147 for n, likelihood_url in enumerate(options.likelihood_urls, start = 1):
148  if options.verbose:
149  print >>sys.stderr, "%d/%d:" % (n, len(options.likelihood_urls)),
150  xmldoc = ligolw_utils.load_url(likelihood_url, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose)
151  this_coincparamsdistributions, ignored, this_seglists = far.parse_likelihood_control_doc(xmldoc)
152  xmldoc.unlink()
153  if this_coincparamsdistributions is None:
154  raise ValueError("%s does not contain parameter distribution data" % likelihood_url)
155  if coincparamsdistributions is None:
156  coincparamsdistributions = this_coincparamsdistributions
157  else:
158  coincparamsdistributions += this_coincparamsdistributions
159  seglists |= this_seglists
160 if options.verbose:
161  print >>sys.stderr, "total livetime:\n\t%s" % ",\n\t".join("%s = %s s" % (instrument, str(abs(segs))) for instrument, segs in seglists.items())
162 
163 # compute the probability of instruments given signal
164 coincparamsdistributions.populate_prob_of_instruments_given_signal(segs = seglists, n = 1.0, verbose = options.verbose)
165 
166 # compute the instrument combination counts
167 coincparamsdistributions.add_instrument_combination_counts(segs = seglists, verbose = options.verbose)
168 
169 #
170 # rebuild event parameter PDFs (+= method has not constructed these
171 # correctly, and we might have added additional priors to the histograms),
172 # then initialize likeihood ratio evaluator
173 #
174 
175 
176 coincparamsdistributions.finish(verbose = options.verbose)
177 ln_likelihood_ratio_func = snglcoinc.LnLikelihoodRatio(coincparamsdistributions)
178 
179 
180 #
181 # initialize a RankingData object in which to collect ranking statistic
182 # histograms if requested
183 #
184 
185 
186 if options.write_rates is not None:
187  ranking_data = far.RankingData(None, seglists.keys())
188 else:
189  ranking_data = None
190 
191 
192 #
193 # iterate over files
194 #
195 
196 
197 failed = []
198 for n, filename in enumerate(filenames, 1):
199  #
200  # open the file. be lazy and use the content handler for the
201  # distribution data files because it's fine for this, too. if a
202  # file can't be loaded because of a filesystem failure or CRC
203  # failure, or whatever, try to do the rest of the files before
204  # exiting instead of crashing right away to reduce the time spent
205  # in rescue dags.
206  #
207 
208  if options.verbose:
209  print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
210  try:
211  xmldoc = ligolw_utils.load_filename(filename, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose)
212  except Exception as e:
213  if options.verbose:
214  print >>sys.stderr, "failed to load '%s': %s. trying to continue with remaining files" % (filename, str(e))
215  failed.append(filename)
216  continue
217 
218  if not options.force and ligolw_process.doc_includes_process(xmldoc, process_name):
219  if options.verbose:
220  print >>sys.stderr, "already processed, skipping"
221  xmldoc.unlink()
222  continue
223 
224  #
225  # summarize the database, and record our passage.
226  #
227 
228  try:
229  coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
230  except KeyError:
231  if options.verbose:
232  print >>sys.stderr, "document does not contain inspiral coincidences. skipping."
233  xmldoc.unlink()
234  continue
235 
236  process = ligolw_process.register_to_xmldoc(xmldoc, process_name, paramdict)
237  search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
238 
239  if options.verbose:
240  print >>sys.stderr, "indexing document ...",
241  sngl_inspiral_table_index = dict((row.event_id, row) for row in lsctables.SnglInspiralTable.get_table(xmldoc))
242  coinc_event_map_index = dict((row.coinc_event_id, []) for row in lsctables.CoincTable.get_table(xmldoc) if row.coinc_def_id == coinc_def_id)
243  for row in lsctables.CoincMapTable.get_table(xmldoc):
244  if row.coinc_event_id not in coinc_event_map_index:
245  continue
246  coinc_event_map_index[row.coinc_event_id].append(sngl_inspiral_table_index[row.event_id])
247  del sngl_inspiral_table_index
248  coinc_inspiral_index = dict((row.coinc_event_id, row) for row in lsctables.CoincInspiralTable.get_table(xmldoc))
249 
250  offset_vectors = lsctables.TimeSlideTable.get_table(xmldoc).as_dict()
251 
252  if options.vetoes_name is not None:
253  vetoseglists = ligolw_segments.segmenttable_get_by_name(xmldoc, options.vetoes_name).coalesce()
254  else:
255  vetoseglists = segments.segmentlistdict()
256  if options.verbose:
257  print >>sys.stderr, "done"
258 
259  #
260  # run likelihood ratio calculation.
261  #
262 
263  ligolw_burca2.assign_likelihood_ratios_xml(
264  xmldoc = xmldoc,
265  coinc_def_id = coinc_def_id,
266  offset_vectors = offset_vectors,
267  vetoseglists = vetoseglists,
268  events_func = lambda _, coinc_event_id: coinc_event_map_index[coinc_event_id],
269  veto_func = sngl_inspiral_veto_func,
270  ln_likelihood_ratio_func = ln_likelihood_ratio_func,
271  likelihood_params_func = coincparamsdistributions.coinc_params,
272  verbose = options.verbose
273  )
274 
275  #
276  # collect ranking statistic values if that's what we're doing.
277  #
278 
279  if ranking_data is not None:
280  for row in lsctables.CoincTable.get_table(xmldoc):
281  if row.coinc_def_id != coinc_def_id:
282  continue
283  instruments = lsctables.instrument_set_from_ifos(coinc_inspiral_index[row.coinc_event_id].ifos)
284  ln_likelihood_ratio = row.likelihood
285  if any(offset_vectors[row.time_slide_id].values()):
286  ranking_data.background_likelihood_rates[frozenset(instruments)][ln_likelihood_ratio,] += 1.
287  else:
288  ranking_data.zero_lag_likelihood_rates[frozenset(instruments)][ln_likelihood_ratio,] += 1.
289 
290  #
291  # close out process metadata.
292  #
293 
294  ligolw_process.set_process_end_time(process)
295 
296  #
297  # clean up.
298  #
299 
300  ligolw_utils.write_filename(xmldoc, filename, gz = (filename or "stdout").endswith(".gz"), verbose = options.verbose)
301  xmldoc.unlink()
302 
303 
304 #
305 # crash if any input files were broken
306 #
307 
308 
309 if failed:
310  raise ValueError("%s could not be processed" % ", ".join("'%s'" % filename for filename in failed))
311 
312 
313 #
314 # finally write combined ranking data file if requested
315 #
316 
317 
318 if options.write_rates is not None:
319  xmldoc = ligolw.Document()
320  xmldoc.appendChild(ligolw.LIGO_LW())
321  process = ligolw_process.register_to_xmldoc(xmldoc, process_name, paramdict)
322  search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())
323  far.gen_likelihood_control_doc(xmldoc, process, coincparamsdistributions, ranking_data, seglists)
324  ligolw_utils.write_filename(xmldoc, options.write_rates, gz = (options.write_rates or "stdout").endswith(".gz"), verbose = options.verbose)