gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_inspiral_marginalize_likelihood
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2009-2013 Kipp Cannon, Chad Hanna, Drew Keppel
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 ##@file gstlal_inspiral_marginalize_likelihood
20 # A program to marginalize the likelihood pdfs in noise across mass bins for a gstlal inspiral analysis.
21 #
22 # ### Command line interface
23 #
24 # + `--ignore-missing`: Ignore and skip missing input documents.
25 # + `--output` [filename]: Set the output file name (default = write to stdout).
26 # + `--likelihood-cache` [filename]: Set the cache file name from which to read likelihood files
27 # + `--verbose`: Be verbose.
28 #
29 # ### Review status
30 #
31 # | Names | Hash | Date |
32 # | ------------------------------------------- | ------------------------------------------- | ---------- |
33 # | Florent, Jolien, Kipp, Chad | ca54624dac65388bc8dfd0d8f5ed5d187fbcb99a | 2015-05-14 |
34 #
35 # #### Action
36 # - none
37 #
38 # =============================================================================
39 #
40 # Preamble
41 #
42 # =============================================================================
43 #
44 
45 
46 from optparse import OptionParser
47 import sys
48 
49 
50 from glue.ligolw import ligolw
51 from glue.ligolw import utils as ligolw_utils
52 from glue.ligolw.utils import process as ligolw_process
53 from glue.ligolw.utils import search_summary as ligolw_search_summary
54 from glue import segments
55 from glue import lal
56 
57 
58 from gstlal import far
59 
60 
61 #
62 # =============================================================================
63 #
64 # Command Line
65 #
66 # =============================================================================
67 #
68 
69 
70 def parse_command_line():
71  parser = OptionParser(
72  )
73  parser.add_option("--ignore-missing", action = "store_true", help = "Ignore and skip missing input documents.")
74  parser.add_option("-o", "--output", metavar = "filename", help = "Set the output file name (default = write to stdout).")
75  parser.add_option("--likelihood-cache", metavar = "filename", help = "Set the cache file name from which to read likelihood files.")
76  parser.add_option("--verbose", action = "store_true", help = "Be verbose.")
77 
78  options, urls = parser.parse_args()
79 
80  if options.likelihood_cache:
81  urls += [lal.CacheEntry(line).url for line in open(options.likelihood_cache)]
82  if not urls and not options.ignore_missing:
83  raise ValueError("no input documents")
84 
85  return options, urls
86 
87 
88 #
89 # =============================================================================
90 #
91 # Main
92 #
93 # =============================================================================
94 #
95 
96 
97 #
98 # parse command line
99 #
100 
101 
102 options, urls = parse_command_line()
103 
104 
105 #
106 # initialize output document
107 #
108 
109 
110 xmldoc = ligolw.Document()
111 xmldoc.appendChild(ligolw.LIGO_LW())
112 process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_inspiral_marginalize_likelihood", options.__dict__)
113 search_summary = ligolw_search_summary.append_search_summary(xmldoc, process)
114 
115 
116 #
117 # loop over input documents
118 #
119 
120 
121 distributions = None
122 ranking_data = None
123 seglists = segments.segmentlistdict()
124 for n, url in enumerate(urls, start = 1):
125  #
126  # load input document
127  #
128 
129  if options.verbose:
130  print >>sys.stderr, "%d/%d:" % (n, len(urls)),
131  try:
132  in_xmldoc = ligolw_utils.load_url(url, verbose = options.verbose, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler)
133  except IOError:
134  # IOError is raised when an on-disk file is missing.
135  # urllib2.URLError is raised when a URL cannot be loaded,
136  # but this is subclassed from IOError so IOError will catch
137  # those, too.
138  if not options.ignore_missing:
139  raise
140  if options.verbose:
141  print >>sys.stderr, "Could not load \"%s\" ... skipping as requested" % url
142  continue
143 
144  #
145  # compute weighted sum of ranking data PDFs
146  #
147 
148  this_distributions, this_ranking_data, this_seglists = far.parse_likelihood_control_doc(in_xmldoc)
149  in_xmldoc.unlink()
150 
151  if this_distributions is None:
152  raise ValueError("\"%s\" contains no parameter PDF data" % url)
153  if this_ranking_data is None:
154  raise ValueError("\"%s\" contains no ranking statistic PDF data" % url)
155 
156  if distributions is None:
157  distributions = this_distributions
158  else:
159  distributions += this_distributions
160  if ranking_data is None:
161  ranking_data = this_ranking_data
162  else:
163  ranking_data += this_ranking_data
164  seglists |= this_seglists
165 
166 
167 #
168 # regenerate event parameter PDFs. += method doesn't compute these
169 # correctly.
170 #
171 # NOTE: the FAP/FAR code doesn't actually use any of the information in
172 # the parameter PDF file. that code does need to know the "count above
173 # threshold" but in the offline case that count isn't known until after the
174 # coincs are ranked (the files loaded here have the count of all coincs,
175 # not just coincs above the ranking stat FAP/FAR normalization threshold).
176 # the compute_far_from_snr_chisq_histograms program will count the
177 # above-threshold events itself and replace the coinc counts in the
178 # parameter PDF file with its counts and write a new file. we wouldn't
179 # need to load the parameter PDF files here at all except that the Farr et
180 # al. rate estimation code needs the marginalized numerator and denominator
181 # parameter PDFs.
182 #
183 # also re-generate likelihood ratio (ranking data) PDFs from the combined
184 # bin counts. this shouldn't be needed but it's fast and maybe more
185 # accurate than relying on the weighted sum of the PDFs to have been
186 # numerically stable
187 #
188 
189 
190 distributions.finish(verbose = options.verbose)
191 ranking_data.finish(verbose = options.verbose)
192 
193 
194 #
195 # write output document
196 #
197 
198 
199 distributions.process_id = ranking_data.process_id = process.process_id
200 far.gen_likelihood_control_doc(xmldoc, process, distributions, ranking_data, seglists)
201 search_summary.instruments = seglists.keys()
202 search_summary.in_segment = search_summary.out_segment = seglists.extent_all()
203 ligolw_process.set_process_end_time(process)
204 ligolw_utils.write_filename(xmldoc, options.output, gz = (options.output or "stdout").endswith(".gz"), verbose = options.verbose)