gstlal-inspiral: /build/workspace/gstlal/label/wheezy/gstlal-inspiral/bin/gstlal_inspiral_marginalize

 #!/usr/bin/env python
 #
 # Copyright (C) 2009-2013  Kipp Cannon, Chad Hanna, Drew Keppel
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 2 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
 ##@file gstlal_inspiral_marginalize_likelihood
 # A program to marginalize the likelihood pdfs in noise across mass bins for a gstlal inspiral analysis.
 #
 # ### Command line interface
 #
 #   + `--ignore-missing`: Ignore and skip missing input documents.
 #   + `--output` [filename]: Set the output file name (default = write to stdout).
 #   + `--likelihood-cache` [filename]: Set the cache file name from which to read likelihood files
 #   + `--verbose`: Be verbose.
 #
 # ### Review status
 #
 # | Names                                   | Hash                        | Date       |
 # | ------------------------------------------- | ------------------------------------------- | ---------- |
 # | Florent, Jolien, Kipp, Chad | ca54624dac65388bc8dfd0d8f5ed5d187fbcb99a  | 2015-05-14 |
 #
 # #### Action
 # - none
 #
 # =============================================================================
 #
 #                                   Preamble
 #
 # =============================================================================
 #
 
 
 from optparse import OptionParser
 import sys
 
 
 from glue.ligolw import ligolw
 from glue.ligolw import utils as ligolw_utils
 from glue.ligolw.utils import process as ligolw_process
 from glue.ligolw.utils import search_summary as ligolw_search_summary
 from glue import segments
 from glue import lal
 
 
 from gstlal import far
 
 
 #
 # =============================================================================
 #
 #                                 Command Line
 #
 # =============================================================================
 #
 
 
 def parse_command_line():
     parser = OptionParser(
     )
     parser.add_option("--ignore-missing", action = "store_true", help = "Ignore and skip missing input documents.")
     parser.add_option("-o", "--output", metavar = "filename", help = "Set the output file name (default = write to stdout).")
     parser.add_option("--likelihood-cache", metavar = "filename", help = "Set the cache file name from which to read likelihood files.")
     parser.add_option("--verbose", action = "store_true", help = "Be verbose.")
 
     options, urls = parser.parse_args()
 
     if options.likelihood_cache:
         urls += [lal.CacheEntry(line).url for line in open(options.likelihood_cache)]
     if not urls and not options.ignore_missing:
         raise ValueError("no input documents")
 
     return options, urls
 
 
 #
 # =============================================================================
 #
 #                                     Main
 #
 # =============================================================================
 #
 
 
 #
 # parse command line
 #
 
 
 options, urls = parse_command_line()
 
 
 #
 # initialize output document
 #
 
 
 xmldoc = ligolw.Document()
 xmldoc.appendChild(ligolw.LIGO_LW())
 process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_inspiral_marginalize_likelihood", options.__dict__)
 search_summary = ligolw_search_summary.append_search_summary(xmldoc, process)
 
 
 #
 # loop over input documents
 #
 
 
 distributions = None
 ranking_data = None
 seglists = segments.segmentlistdict()
 for n, url in enumerate(urls, start = 1):
     #
     # load input document
     #
 
     if options.verbose:
         print >>sys.stderr, "%d/%d:" % (n, len(urls)),
     try:
         in_xmldoc = ligolw_utils.load_url(url, verbose = options.verbose, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler)
     except IOError:
         # IOError is raised when an on-disk file is missing.
         # urllib2.URLError is raised when a URL cannot be loaded,
         # but this is subclassed from IOError so IOError will catch
         # those, too.
         if not options.ignore_missing:
             raise
         if options.verbose:
             print >>sys.stderr, "Could not load \"%s\" ... skipping as requested" % url
         continue
 
     #
     # compute weighted sum of ranking data PDFs
     #
 
     this_distributions, this_ranking_data, this_seglists = far.parse_likelihood_control_doc(in_xmldoc)
     in_xmldoc.unlink()
 
     if this_distributions is None:
         raise ValueError("\"%s\" contains no parameter PDF data" % url)
     if this_ranking_data is None:
         raise ValueError("\"%s\" contains no ranking statistic PDF data" % url)
 
     if distributions is None:
         distributions = this_distributions
     else:
         distributions += this_distributions
     if ranking_data is None:
         ranking_data = this_ranking_data
     else:
         ranking_data += this_ranking_data
     seglists |= this_seglists
 
 
 #
 # regenerate event parameter PDFs.  += method doesn't compute these
 # correctly.
 #
 # NOTE:  the FAP/FAR code doesn't actually use any of the information in
 # the parameter PDF file.  that code does need to know the "count above
 # threshold" but in the offline case that count isn't known until after the
 # coincs are ranked (the files loaded here have the count of all coincs,
 # not just coincs above the ranking stat FAP/FAR normalization threshold).
 # the compute_far_from_snr_chisq_histograms program will count the
 # above-threshold events itself and replace the coinc counts in the
 # parameter PDF file with its counts and write a new file.  we wouldn't
 # need to load the parameter PDF files here at all except that the Farr et
 # al. rate estimation code needs the marginalized numerator and denominator
 # parameter PDFs.
 #
 # also re-generate likelihood ratio (ranking data) PDFs from the combined
 # bin counts.  this shouldn't be needed but it's fast and maybe more
 # accurate than relying on the weighted sum of the PDFs to have been
 # numerically stable
 #
 
 
 distributions.finish(verbose = options.verbose)
 ranking_data.finish(verbose = options.verbose)
 
 
 #
 # write output document
 #
 
 
 distributions.process_id = ranking_data.process_id = process.process_id
 far.gen_likelihood_control_doc(xmldoc, process, distributions, ranking_data, seglists)
 search_summary.instruments = seglists.keys()
 search_summary.in_segment = search_summary.out_segment = seglists.extent_all()
 ligolw_process.set_process_end_time(process)
 ligolw_utils.write_filename(xmldoc, options.output, gz = (options.output or "stdout").endswith(".gz"), verbose = options.verbose)