gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
far.py
Go to the documentation of this file.
1 # Copyright (C) 2011--2014 Kipp Cannon, Chad Hanna, Drew Keppel
2 # Copyright (C) 2013 Jacob Peoples
3 #
4 # This program is free software; you can redistribute it and/or modify it
5 # under the terms of the GNU General Public License as published by the
6 # Free Software Foundation; either version 2 of the License, or (at your
7 # option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 # Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 
18 ## @file
19 # The python module to implement false alarm probability and false alarm rate
20 #
21 # ### Review Status
22 #
23 # STATUS: reviewed with actions
24 #
25 # | Names | Hash | Date |
26 # | ------------------------------------------- | ------------------------------------------- | ---------- |
27 # | Hanna, Cannon, Meacher, Creighton J, Robinet, Sathyaprakash, Messick, Dent, Blackburn | 7fb5f008afa337a33a72e182d455fdd74aa7aa7a | 2014-11-05 |
28 #
29 # #### Action items
30 #
31 
32 # - Address the fixed SNR PDF using median PSD which could be pre-computed and stored on disk. (Store database of SNR pdfs for a variety of horizon)
33 # - The binning parameters are hard-coded too; Could it be a problem?
34 # - Chisquare binning hasn't been tuned to be a good representation of the PDFs; could be improved in future
35 
36 ## @package far
37 
38 
39 #
40 # =============================================================================
41 #
42 # Preamble
43 #
44 # =============================================================================
45 #
46 
47 
48 import bisect
49 import copy
50 try:
51  from fpconst import NaN, NegInf, PosInf
52 except ImportError:
53  # fpconst is not part of the standard library and might not be
54  # available
55  NaN = float("nan")
56  NegInf = float("-inf")
57  PosInf = float("+inf")
58 import itertools
59 import math
60 import multiprocessing
61 import multiprocessing.queues
62 import numpy
63 import random
64 import warnings
65 from scipy import interpolate
66 from scipy import optimize
67 # FIXME remove this when the LDG upgrades scipy on the SL6 systems, Debian
68 # systems are already fine
69 try:
70  from scipy.optimize import curve_fit
71 except ImportError:
72  from gstlal.curve_fit import curve_fit
73 from scipy import stats
74 from scipy.special import ive
75 import sqlite3
76 sqlite3.enable_callback_tracebacks(True)
77 import sys
78 
79 
80 from glue import iterutils
81 from glue.ligolw import ligolw
82 from glue.ligolw import array as ligolw_array
83 from glue.ligolw import param as ligolw_param
84 from glue.ligolw import lsctables
85 from glue.ligolw import dbtables
86 from glue.ligolw import utils as ligolw_utils
87 from glue.ligolw.utils import search_summary as ligolw_search_summary
88 from glue.ligolw.utils import segments as ligolw_segments
89 from glue.segmentsUtils import vote
90 from glue.text_progress_bar import ProgressBar
91 import lal
92 import lalsimulation
93 from pylal import rate
94 from pylal import snglcoinc
95 
96 
97 #
98 # ============================================================================
99 #
100 # Non-central chisquared pdf
101 #
102 # ============================================================================
103 #
104 
105 #
106 # FIXME this is to work around a precision issue in scipy
107 # See: https://github.com/scipy/scipy/issues/1608
108 #
109 
110 def logiv(v, z):
111  # from Abramowitz and Stegun (9.7.1), if mu = 4 v^2, then for large
112  # z:
113  #
114  # Iv(z) ~= exp(z) / (\sqrt(2 pi z)) { 1 - (mu - 1)/(8 z) + (mu - 1)(mu - 9) / (2! (8 z)^2) - (mu - 1)(mu - 9)(mu - 25) / (3! (8 z)^3) ... }
115  # Iv(z) ~= exp(z) / (\sqrt(2 pi z)) { 1 + (mu - 1)/(8 z) [-1 + (mu - 9) / (2 (8 z)) [1 - (mu - 25) / (3 (8 z)) ... ]]}
116  # log Iv(z) ~= z - .5 log(2 pi) log z + log1p(1 + (mu - 1)/(8 z) (-1 + (mu - 9)/(16 z) (1 - (mu - 25)/(24 z) ... )))
117 
118  with numpy.errstate(divide = "ignore"):
119  a = numpy.log(ive(v,z))
120 
121  # because this result will only be used for large z, to silence
122  # divide-by-0 complaints from inside log1p() when z is small we
123  # clip z to 1.
124  mu = 4. * v**2.
125  with numpy.errstate(divide = "ignore", invalid = "ignore"):
126  b = -math.log(2. * math.pi) / 2. * numpy.log(z)
127  z = numpy.clip(z, 1., PosInf)
128  b += numpy.log1p((mu - 1.) / (8. * z) * (-1. + (mu - 9.) / (16. * z) * (1. - (mu - 25.) / (24. * z))))
129 
130  return z + numpy.where(z < 1e8, a, b)
131 
132 # See: http://en.wikipedia.org/wiki/Noncentral_chi-squared_distribution
133 def ncx2logpdf(x, k, l):
134  return -math.log(2.) - (x+l)/2. + (k/4.-.5) * (numpy.log(x) - numpy.log(l)) + logiv(k/2.-1., numpy.sqrt(l * x))
135 
136 def ncx2pdf(x, k, l):
137  return numpy.exp(ncx2logpdf(x, k, l))
138 
139 
140 #
141 # =============================================================================
142 #
143 # Binomial Stuff
144 #
145 # =============================================================================
146 #
147 
148 
149 def fap_after_trials(p, m):
150  """
151  Given the probability, p, that an event occurs, compute the
152  probability of at least one such event occuring after m independent
153  trials.
154 
155  The return value is 1 - (1 - p)^m computed avoiding round-off
156  errors and underflows. m cannot be negative but need not be an
157  integer.
158 
159  Example:
160 
161  >>> fap_after_trials(0.5, 1)
162  0.5
163  >>> fap_after_trials(0.066967008463192584, 10)
164  0.5
165  >>> fap_after_trials(0.0069075045629640984, 100)
166  0.5
167  >>> fap_after_trials(0.00069290700954747807, 1000)
168  0.5
169  >>> fap_after_trials(0.000069312315846428086, 10000)
170  0.5
171  >>> fap_after_trials(.000000006931471781576803, 100000000)
172  0.5
173  >>> fap_after_trials(0.000000000000000069314718055994534, 10000000000000000)
174  0.5
175  >>> "%.15g" % fap_after_trials(0.1, 21.854345326782834)
176  '0.9'
177  >>> "%.15g" % fap_after_trials(1e-17, 2.3025850929940458e+17)
178  '0.9'
179  >>> fap_after_trials(0.1, .2)
180  0.020851637639023216
181  """
182  # when m*p is not >> 1, we can use an expansion in powers of m*p
183  # that allows us to avoid having to compute differences of
184  # probabilities (which would lead to loss of precision when working
185  # with probabilities close to 0 or 1)
186  #
187  # 1 - (1 - p)^m = m p - (m^2 - m) p^2 / 2 +
188  # (m^3 - 3 m^2 + 2 m) p^3 / 6 -
189  # (m^4 - 6 m^3 + 11 m^2 - 6 m) p^4 / 24 + ...
190  #
191  # the coefficient of each power of p is a polynomial in m. if m <<
192  # 1, these polynomials can be approximated by their leading term in
193  # m
194  #
195  # 1 - (1 - p)^m ~= m p + m p^2 / 2 + m p^3 / 3 + m p^4 / 4 + ...
196  # = -m * log(1 - p)
197  #
198  # NOTE: the ratio of the coefficients of higher-order terms in the
199  # polynomials in m to that of the leading-order term grows with
200  # successive powers of p and eventually the higher-order terms will
201  # dominate. we assume that this does not happen until the power of
202  # p is sufficiently high that the entire term (polynomial in m and
203  # all) is negligable and the series has been terminated.
204  #
205  # if m is not << 1, then returning to the full expansion, starting
206  # at 0, the nth term in the series is
207  #
208  # -1^n * (m - 0) * (m - 1) * ... * (m - n) * p^(n + 1) / (n + 1)!
209  #
210  # if the (n-1)th term is X, the nth term in the series is
211  #
212  # X * (n - m) * p / (n + 1)
213  #
214  # this recursion relation allows us to compute successive terms
215  # without explicit evaluation of the full numerator and denominator
216  # expressions (each of which quickly overflow).
217  #
218  # for sufficiently large n the denominator dominates and the terms
219  # in the series become small and the sum eventually converges to a
220  # stable value (and if m is an integer the sum is exact in a finite
221  # number of terms). however, if m*p >> 1 it can take many terms
222  # before the series sum stabilizes, terms in the series initially
223  # grow large and alternate in sign and an accurate result can only
224  # be obtained through careful cancellation of the large values.
225  #
226  # for large m*p we write the expression as
227  #
228  # 1 - (1 - p)^m = 1 - exp(m log(1 - p))
229  #
230  # if p is small, log(1 - p) suffers from loss of precision but the
231  # Taylor expansion of log(1 - p) converges quickly
232  #
233  # m ln(1 - p) = -m p - m p^2 / 2 - m p^3 / 3 - ...
234  # = -m p * (1 + p / 2 + p^2 / 3 + ...)
235  #
236  # math libraries (including Python's) generally provide log1p(),
237  # which evalutes log(1 + p) accurately for small p. we rely on
238  # this function to provide results valid both in the small-p and
239  # not small-p regimes.
240  #
241  # if p = 1, log1p() complains about an overflow error. we trap
242  # these and return the hard-coded answer
243  #
244 
245  if m <= 0.:
246  raise ValueError("m = %g must be positive" % m)
247  if not (0. <= p <= 1.):
248  raise ValueError("p = %g must be between 0 and 1 inclusively" % p)
249 
250  if m * p < 4.:
251  #
252  # expansion of 1 - (1 - p)^m in powers of p
253  #
254 
255  if m < 1e-8:
256  #
257  # small m approximation
258  #
259 
260  try:
261  return -m * math.log1p(-p)
262  except OverflowError:
263  #
264  # p is too close to 1. result is 1
265  #
266 
267  return 1.
268 
269  #
270  # general m
271  #
272 
273  s = []
274  term = -1.
275  for n in itertools.count():
276  term *= (n - m) * p / (n + 1.)
277  s.append(term)
278  # 0th term is always positive
279  if abs(term) <= 1e-18 * s[0]:
280  s.reverse()
281  return sum(s)
282 
283  #
284  # compute result as 1 - exp(m * log(1 - p))
285  #
286 
287  try:
288  x = m * math.log1p(-p)
289  except OverflowError:
290  #
291  # p is very close to 1. we know p <= 1 because it's a
292  # probability, and we know that m*p >= 4 otherwise we
293  # wouldn't have followed this code path, and so because p
294  # is close to 1 and m is not small we can safely assume the
295  # anwer is 1.
296  #
297 
298  return 1.
299 
300  if x > -0.69314718055994529:
301  #
302  # result is closer to 0 than to 1. use Taylor expansion
303  # for exp() with leading term removed to avoid having to
304  # subtract from 1 to get answer.
305  #
306  # 1 - exp x = -(x + x^2/2! + x^3/3! + ...)
307  #
308 
309  s = [x]
310  term = x
311  for n in itertools.count(2):
312  term *= x / n
313  s.append(term)
314  # 0th term is always negative
315  if abs(term) <= -1e-18 * s[0]:
316  s.reverse()
317  return -sum(s)
318 
319  return 1. - math.exp(x)
320 
321 
322 fap_after_trials_arr = numpy.frompyfunc(fap_after_trials, 2, 1)
323 
324 
325 def trials_from_faps(p0, p1):
326  """
327  Given the probabiity, p0, of an event occuring, and the
328  probability, p1, of at least one such event being observed after
329  some number of independent trials, solve for and return the number
330  of trials, m, that relates the two probabilities. The three
331  quantities are related by p1 = 1 - (1 - p0)^m. Generally the
332  return value is not an integer.
333 
334  See also fap_after_trials(). Note that if p0 is 0 or 1 then p1
335  must be 0 or 1 respectively, and in both cases m is undefined.
336  Otherwise if p1 is 1 then inf is returned.
337  """
338  assert 0 <= p0 <= 1 # p0 must be a valid probability
339  assert 0 <= p1 <= 1 # p1 must be a valid probability
340 
341  if p0 == 0 or p0 == 1:
342  assert p0 == p1 # require valid relationship
343  # but we still can't solve for m
344  raise ValueError("m undefined")
345  if p1 == 1:
346  return PosInf
347 
348  #
349  # m log(1 - p0) = log(1 - p1)
350  #
351 
352  return math.log1p(-p1) / math.log1p(-p0)
353 
354 
355 #
356 # =============================================================================
357 #
358 # Parameter Distributions Book-Keeping Object
359 #
360 # =============================================================================
361 #
362 
363 
364 #
365 # Horizon distance record keeping
366 #
367 
368 
369 class NearestLeafTree(object):
370  """
371  A simple binary tree in which look-ups return the value of the
372  closest leaf. Only float objects are supported for the keys and
373  values. Look-ups raise KeyError if the tree is empty.
374 
375  Example:
376 
377  >>> x = NearestLeafTree()
378  >>> x[100.0] = 120.
379  >>> x[104.0] = 100.
380  >>> x[102.0] = 110.
381  >>> x[90.]
382  120.0
383  >>> x[100.999]
384  120.0
385  >>> x[101.001]
386  110.0
387  >>> x[200.]
388  100.0
389  >>> del x[104]
390  >>> x[200.]
391  110.0
392  >>> x.keys()
393  [100.0, 102.0]
394  >>> 102 in x
395  True
396  >>> 103 in x
397  False
398  >>> x.to_xml(u"H1").write()
399  <Array Type="real_8" Name="H1:nearestleaftree:array">
400  <Dim>2</Dim>
401  <Dim>2</Dim>
402  <Stream Delimiter=" " Type="Local">
403  100 102
404  120 110
405  </Stream>
406  </Array>
407  """
408  def __init__(self, items = ()):
409  """
410  Initialize a NearestLeafTree.
411 
412  Example:
413 
414  >>> x = NearestLeafTree()
415  >>> x = NearestLeafTree([(100., 120.), (104., 100.), (102., 110.)])
416  >>> y = {100.: 120., 104.: 100., 102.: 100.}
417  >>> x = NearestLeafTree(y.items())
418  """
419  self.tree = list(items)
420  self.tree.sort()
421 
422  def __setitem__(self, x, val):
423  """
424  Example:
425 
426  >>> x = NearestLeafTree()
427  >>> x[100.:200.] = 0.
428  >>> x[150.] = 1.
429  >>> x
430  NearestLeaftree([(100, 0), (150, 1), (200, 0)])
431  """
432  if type(x) is slice:
433  # replace all entries in the requested range of
434  # co-ordiantes with two entries, each with the
435  # given value, one at the start of the range and
436  # one at the end of the range. thus, after this
437  # all queries within that range will return this
438  # value.
439  if x.step is not None:
440  raise ValueError("%s: step not supported" % repr(x))
441  if x.start is None:
442  if not self.tree:
443  raise IndexError("open-ended slice not supported with empty tree")
444  x = slice(self.minkey(), x.stop)
445  if x.stop is None:
446  if not self.tree:
447  raise IndexError("open-ended slice not supported with empty tree")
448  x = slice(x.start, self.maxkey())
449  if x.stop < x.start:
450  raise ValueError("%s: bounds out of order" % repr(x))
451  lo = bisect.bisect_left(self.tree, (x.start, NegInf))
452  hi = bisect.bisect_right(self.tree, (x.stop, PosInf))
453  self.tree[lo:hi] = ((x.start, val), (x.stop, val))
454  else:
455  # replace all entries having the same co-ordinate
456  # with this one
457  lo = bisect.bisect_left(self.tree, (x, NegInf))
458  hi = bisect.bisect_right(self.tree, (x, PosInf))
459  self.tree[lo:hi] = ((x, val),)
460 
461  def __getitem__(self, x):
462  if not self.tree:
463  raise KeyError(x)
464  if type(x) is slice:
465  raise ValueError("slices not supported")
466  hi = bisect.bisect_right(self.tree, (x, PosInf))
467  try:
468  x_hi, val_hi = self.tree[hi]
469  except IndexError:
470  x_hi, val_hi = self.tree[-1]
471  if hi == 0:
472  x_lo, val_lo = x_hi, val_hi
473  else:
474  x_lo, val_lo = self.tree[hi - 1]
475  # compute average in way that will be safe if x values are
476  # range-limited objects
477  return val_lo if x < x_lo + (x_hi - x_lo) / 2. else val_hi
478 
479  def __delitem__(self, x):
480  """
481  Example:
482 
483  >>> x = NearestLeafTree([(100., 0.), (150., 1.), (200., 0.)])
484  >>> del x[150.]
485  >>> x
486  NearestLeafTree([(100., 0.), (200., 0.)])
487  >>> del x[:]
488  NearestLeafTree([])
489  """
490  if type(x) is slice:
491  if x.step is not None:
492  raise ValueError("%s: step not supported" % repr(x))
493  if x.start is None:
494  if not self.tree:
495  # no-op
496  return
497  x = slice(self.minkey(), x.stop)
498  if x.stop is None:
499  if not self.tree:
500  # no-op
501  return
502  x = slice(x.start, self.maxkey())
503  if x.stop < x.start:
504  # no-op
505  return
506  lo = bisect.bisect_left(self.tree, (x.start, NegInf))
507  hi = bisect.bisect_right(self.tree, (x.stop, PosInf))
508  del self.tree[lo:hi]
509  elif not self.tree:
510  raise IndexError(x)
511  else:
512  lo = bisect.bisect_left(self.tree, (x, NegInf))
513  if self.tree[lo][0] != x:
514  raise IndexError(x)
515  del self.tree[lo]
516 
517  def __nonzero__(self):
518  return bool(self.tree)
519 
520  def __iadd__(self, other):
521  for x, val in other.tree:
522  self[x] = val
523  return self
524 
525  def keys(self):
526  return [x for x, val in self.tree]
527 
528  def values(self):
529  return [val for x, val in self.tree]
530 
531  def items(self):
532  return list(self.tree)
533 
534  def min(self):
535  """
536  Return the minimum value stored in the tree. This is O(n).
537  """
538  if not self.tree:
539  raise ValueError("empty tree")
540  return min(val for x, val in self.tree)
541 
542  def minkey(self):
543  """
544  Return the minimum key stored in the tree. This is O(1).
545  """
546  if not self.tree:
547  raise ValueError("empty tree")
548  return self.tree[0][0]
549 
550  def max(self):
551  """
552  Return the maximum value stored in the tree. This is O(n).
553  """
554  if not self.tree:
555  raise ValueError("empty tree")
556  return max(val for x, val in self.tree)
557 
558  def maxkey(self):
559  """
560  Return the maximum key stored in the tree. This is O(1).
561  """
562  if not self.tree:
563  raise ValueError("empty tree")
564  return self.tree[-1][0]
565 
566  def __contains__(self, x):
567  try:
568  return bool(self.tree) and self.tree[bisect.bisect_left(self.tree, (x, NegInf))][0] == x
569  except IndexError:
570  return False
571 
572  def __len__(self):
573  return len(self.tree)
574 
575  def __repr__(self):
576  return "NearestLeaftree([%s])" % ", ".join("(%g, %g)" % item for item in self.tree)
577 
578  @classmethod
579  def from_xml(cls, xml, name):
580  return cls(map(tuple, ligolw_array.get_array(xml, u"%s:nearestleaftree" % name).array[:]))
581 
582  def to_xml(self, name):
583  return ligolw_array.from_array(u"%s:nearestleaftree" % name, numpy.array(self.tree, dtype = "double"))
584 
585 
586 class HorizonHistories(dict):
587  def __iadd__(self, other):
588  for key, history in other.iteritems():
589  try:
590  self[key] += history
591  except KeyError:
592  self[key] = copy.deepcopy(history)
593  return self
594 
595  def minkey(self):
596  """
597  Return the minimum key stored in the trees.
598  """
599  minkeys = tuple(history.minkey() for history in self.values() if history)
600  if not minkeys:
601  raise ValueError("empty trees")
602  return min(minkeys)
603 
604  def maxkey(self):
605  """
606  Return the maximum key stored in the trees.
607  """
608  maxkeys = tuple(history.maxkey() for history in self.values() if history)
609  if not maxkeys:
610  raise ValueError("empty trees")
611  return max(maxkeys)
612 
613  def getdict(self, x):
614  return dict((key, value[x]) for key, value in self.iteritems())
615 
616  def randhorizons(self):
617  """
618  Generator yielding a sequence of random horizon distance
619  dictionaries chosen by drawing random times uniformly
620  distributed between the lowest and highest times recorded
621  in the history and returning the dictionary of horizon
622  distances for each of those times.
623  """
624  x_min = self.minkey()
625  x_max = self.maxkey()
626  getdict = self.getdict
627  rnd = random.uniform
628  while 1:
629  yield getdict(rnd(x_min, x_max))
630 
631  def all(self):
632  """
633  Returns a list of the unique sets of horizon distances
634  recorded in the histories.
635  """
636  # unique times for which a horizon distance measurement is
637  # available
638  all_x = set(x for value in self.values() for x in value.keys())
639 
640  # the unique horizon distances from those times, expressed
641  # as frozensets of instrument/distance pairs
642  result = set(frozenset(self.getdict(x).items()) for x in all_x)
643 
644  # return a list of the results converted back to
645  # dictionaries
646  return map(dict, result)
647 
648  @classmethod
649  def from_xml(cls, xml, name):
650  xml = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == u"%s:horizonhistories" % name]
651  try:
652  xml, = xml
653  except ValueError:
654  raise ValueError("document must contain exactly 1 HorizonHistories named '%s'" % name)
655  keys = [elem.Name.replace(u":nearestleaftree", u"") for elem in xml.getElementsByTagName(ligolw.Array.tagName) if elem.hasAttribute(u"Name") and elem.Name.endswith(u":nearestleaftree")]
656  self = cls()
657  for key in keys:
658  self[key] = NearestLeafTree.from_xml(xml, key)
659  return self
660 
661  def to_xml(self, name):
662  xml = ligolw.LIGO_LW({u"Name": u"%s:horizonhistories" % name})
663  for key, value in self.items():
664  xml.appendChild(value.to_xml(key))
665  return xml
666 
667 
668 #
669 # Inspiral-specific CoincParamsDistributions sub-class
670 #
671 
672 
673 class CoincParams(dict):
674  # place-holder class to allow params dictionaries to carry
675  # attributes as well
676  __slots__ = ("horizons",)
677 
678 
679 class ThincaCoincParamsDistributions(snglcoinc.CoincParamsDistributions):
680  ligo_lw_name_suffix = u"gstlal_inspiral_coincparamsdistributions"
681 
682  instrument_categories = snglcoinc.InstrumentCategories()
683 
684  # range of SNRs covered by this object
685  # FIXME: must ensure lower boundary matches search threshold
686  snr_min = 4.
687 
688  # if two horizon distances, D1 and D2, differ by less than
689  #
690  # | ln(D1 / D2) | <= log_distance_tolerance
691  #
692  # then they are considered to be equal for the purpose of recording
693  # horizon distance history, generating joint SNR PDFs, and so on.
694  # if the smaller of the two is < min_ratio * the larger then the
695  # smaller is treated as though it were 0.
696  # FIXME: is this choice of distance quantization appropriate?
697  @staticmethod
698  def quantize_horizon_distances(horizon_distances, log_distance_tolerance = PosInf, min_ratio = 0.1):
699  horizon_distance_norm = max(horizon_distances.values())
700  assert horizon_distance_norm != 0.
701  if math.isinf(log_distance_tolerance):
702  return dict((instrument, 1.) for instrument in horizon_distances)
703  min_distance = min_ratio * horizon_distance_norm
704  return dict((instrument, (0. if horizon_distance < min_distance else math.exp(round(math.log(horizon_distance / horizon_distance_norm) / log_distance_tolerance) * log_distance_tolerance))) for instrument, horizon_distance in horizon_distances.items())
705 
706  # binnings (filter funcs look-up initialized in .__init__()
707  binnings = {
708  "instruments": rate.NDBins((rate.LinearBins(0.5, instrument_categories.max() + 0.5, instrument_categories.max()),)),
709  "H1_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
710  "H2_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
711  "H1H2_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
712  "L1_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
713  "V1_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
714  "E1_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
715  "E2_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
716  "E3_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200))),
717  "E0_snr_chi": rate.NDBins((rate.ATanLogarithmicBins(3.6, 70., 260), rate.ATanLogarithmicBins(.001, 0.5, 200)))
718  }
719 
720  def __init__(self, *args, **kwargs):
721  super(ThincaCoincParamsDistributions, self).__init__(*args, **kwargs)
723  self.pdf_from_rates_func = {
724  "instruments": self.pdf_from_rates_instruments,
725  "H1_snr_chi": self.pdf_from_rates_snrchi2,
726  "H2_snr_chi": self.pdf_from_rates_snrchi2,
727  "H1H2_snr_chi": self.pdf_from_rates_snrchi2,
728  "L1_snr_chi": self.pdf_from_rates_snrchi2,
729  "V1_snr_chi": self.pdf_from_rates_snrchi2,
730  "E1_snr_chi": self.pdf_from_rates_snrchi2,
731  "E2_snr_chi": self.pdf_from_rates_snrchi2,
732  "E3_snr_chi": self.pdf_from_rates_snrchi2,
733  "E0_snr_chi": self.pdf_from_rates_snrchi2,
734  }
735 
736  def __iadd__(self, other):
737  # NOTE: because we use custom PDF constructions, the stock
738  # .__iadd__() method for this class will not result in
739  # valid PDFs. the rates arrays *are* handled correctly by
740  # the .__iadd__() method, by fiat, so just remember to
741  # invoke .finish() to get the PDFs in shape afterwards
742  super(ThincaCoincParamsDistributions, self).__iadd__(other)
743  self.horizon_history += other.horizon_history
744  return self
745 
746  #
747  # class-level cache of pre-computed SNR joint PDFs. structure is like
748  #
749  # = {
750  # frozenset(("H1", ...)), frozenset([("H1", horiz_dist), ("L1", horiz_dist), ...]): (InterpBinnedArray, BinnedArray, age),
751  # ...
752  # }
753  #
754  # at most max_cached_snr_joint_pdfs will be stored. if a PDF is
755  # required that cannot be found in the cache, and there is no more
756  # room, the oldest PDF (the one with the *smallest* age) is pop()ed
757  # to make room, and the new one added with its age set to the
758  # highest age in the cache + 1.
759  #
760  # 5**3 * 4 = 5 quantizations in 3 instruments, 4 combos per
761  # quantized choice of horizon distances
762  #
763 
764  max_cached_snr_joint_pdfs = int(5**3 * 4)
765  snr_joint_pdf_cache = {}
766 
767  def get_snr_joint_pdf(self, instruments, horizon_distances, verbose = False):
768  #
769  # key for cache: two element tuple, first element is
770  # frozen set of instruments for which this is the PDF,
771  # second element is frozen set of (instrument, horizon
772  # distance) pairs for all instruments in the network.
773  # horizon distances are normalized to fractions of the
774  # largest among them and then the fractions aquantized to
775  # integer powers of a common factor
776  #
777 
778  key = frozenset(instruments), frozenset(self.quantize_horizon_distances(horizon_distances).items())
779 
780  #
781  # retrieve cached PDF, or build new one
782  #
783 
784  try:
785  pdf = self.snr_joint_pdf_cache[key][0]
786  except KeyError:
787  # no entries in cache for this instrument combo and
788  # set of horizon distances
789  if self.snr_joint_pdf_cache:
790  age = max(age for ignored, ignored, age in self.snr_joint_pdf_cache.values()) + 1
791  else:
792  age = 0
793  if verbose:
794  print >>sys.stderr, "For horizon distances %s" % ", ".join("%s = %.4g Mpc" % item for item in sorted(horizon_distances.items()))
795  progressbar = ProgressBar(text = "%s SNR PDF" % ", ".join(sorted(key[0])))
796  else:
797  progressbar = None
798  binnedarray = self.joint_pdf_of_snrs(key[0], dict(key[1]), progressbar = progressbar)
799  del progressbar
800  lnbinnedarray = binnedarray.copy()
801  with numpy.errstate(divide = "ignore"):
802  lnbinnedarray.array = numpy.log(lnbinnedarray.array)
803  pdf = rate.InterpBinnedArray(lnbinnedarray, fill_value = NegInf)
804  self.snr_joint_pdf_cache[key] = pdf, binnedarray, age
805  # if the cache is full, delete the entry with the
806  # smallest age
807  while len(self.snr_joint_pdf_cache) > self.max_cached_snr_joint_pdfs:
808  del self.snr_joint_pdf_cache[min((age, key) for key, (ignored, ignored, age) in self.snr_joint_pdf_cache.items())[1]]
809  if verbose:
810  print >>sys.stderr, "%d/%d slots in SNR PDF cache now in use" % (len(self.snr_joint_pdf_cache), self.max_cached_snr_joint_pdfs)
811  return pdf
812 
813  def coinc_params(self, events, offsetvector):
814  #
815  # NOTE: unlike the burst codes, this function is expected
816  # to work with single-instrument event lists as well, as
817  # it's output is used to populate the single-instrument
818  # background bin counts.
819  #
820 
821  #
822  # 2D (snr, \chi^2) values. don't allow both H1 and H2 to
823  # both contribute parameters to the same coinc. if both
824  # have participated favour H1
825  #
826 
827  params = CoincParams(("%s_snr_chi" % event.ifo, (event.snr, event.chisq / event.snr**2)) for event in events)
828  if "H2_snr_chi" in params and "H1_snr_chi" in params:
829  del params["H2_snr_chi"]
830 
831  #
832  # instrument combination
833  #
834 
835  params["instruments"] = (ThincaCoincParamsDistributions.instrument_categories.category(event.ifo for event in events),)
836 
837  #
838  # record the horizon distances. pick one trigger at random
839  # to provide a timestamp and pull the horizon distances
840  # from our horizon distance history at that time. the
841  # horizon history is keyed by floating-point values (don't
842  # need nanosecond precision for this). NOTE: this is
843  # attached as a property instead of going into the
844  # dictionary to not confuse the stock lnP_noise(),
845  # lnP_signal(), and friends methods.
846  #
847 
848  params.horizons = self.horizon_history.getdict(float(events[0].get_end()))
849  # for instruments that provided triggers,
850  # use the trigger effective distance and
851  # SNR to provide the horizon distance.
852  # should be the same, but do this just in
853  # case the history isn't as complete as
854  # we'd like it to be
855  #
856  # FIXME: for now this is disabled until
857  # we figure out how to get itac's sigmasq
858  # property updated from the whitener
859  #params.horizons.update(dict((event.ifo, event.eff_distance * event.snr / 8.) for event in events))
860 
861  #
862  # done
863  #
864 
865  return params
866 
867  def lnP_signal(self, params):
868  # (instrument, snr) pairs sorted alphabetically by
869  # instrument name
870  snrs = sorted((name.split("_")[0], value[0]) for name, value in params.items() if name.endswith("_snr_chi"))
871  # retrieve the SNR PDF
872  snr_pdf = self.get_snr_joint_pdf((instrument for instrument, rho in snrs), params.horizons)
873  # evaluate it (snrs are alphabetical by instrument)
874  lnP_signal = snr_pdf(*tuple(rho for instrument, rho in snrs))
875 
876  # FIXME: P(instruments | signal) needs to depend on
877  # horizon distances. here we're assuming whatever
878  # populate_prob_of_instruments_given_signal() has set the
879  # probabilities to is OK. we probably need to cache these
880  # and save them in the XML file, too, like P(snrs | signal,
881  # instruments)
882  lnP_signal += super(ThincaCoincParamsDistributions, self).lnP_signal(params)
883 
884  # return logarithm of (.99 P(..|signal) + 0.01 P(..|noise))
885  # FIXME: investigate how to determine correct mixing ratio
886  lnP_noise = self.lnP_noise(params)
887  if math.isinf(lnP_noise) and math.isinf(lnP_signal):
888  if lnP_noise < 0. and lnP_signal < 0.:
889  return NegInf
890  if lnP_noise > 0. and lnP_signal > 0.:
891  return PosInf
892  lnP_signal += math.log(.99)
893  lnP_noise += math.log(0.01)
894  return max(lnP_signal, lnP_noise) + math.log1p(math.exp(-abs(lnP_signal - lnP_noise)))
895 
896  def add_background_prior(self, instruments, n = 1., transition = 23., verbose = False):
897  #
898  # populate snr,chi2 binnings with a slope to force
899  # higher-SNR events to be assesed to be more significant
900  # when in the regime beyond the edge of measured or even
901  # extrapolated background.
902  #
903 
904  if verbose:
905  print >>sys.stderr, "adding tilt to (SNR, \\chi^2) background PDFs ..."
906  for instrument in instruments:
907  binarr = self.background_rates["%s_snr_chi" % instrument]
908  if verbose:
909  progressbar = ProgressBar(instrument, max = len(binarr.bins[0]))
910  else:
911  progressbar = None
912 
913  # will need to normalize results so need new
914  # storage
915  new_binarr = rate.BinnedArray(binarr.bins)
916  # don't compute this in the loop
917  transition_factor = transition**5. * math.exp(-transition**2. / 2.)
918  # iterate over all bins
919  dchi2_over_snr2s = new_binarr.bins[1].upper() - new_binarr.bins[1].lower()
920  dchi2_over_snr2s[numpy.isinf(dchi2_over_snr2s)] = 0.
921  for snr, dsnr in zip(new_binarr.bins[0].centres(), new_binarr.bins[0].upper() - new_binarr.bins[0].lower()):
922  # normalization is irrelevant. overall
923  # normalization will be imposed afterwards.
924  # tilt looks like expected behaviour for
925  # Gaussian noise + softer fall-off above
926  # some transition SNR to avoid zero-valued
927  # bins
928  #
929  # NOTE: expression split up so that if a
930  # numerical overflow or underflow occurs we
931  # can see which part of the expression was
932  # the problem from the traceback
933  if math.isinf(dsnr):
934  continue
935  p = math.exp(-snr**2. / 2.)
936  p += transition_factor / snr**5.
937  new_binarr[snr,:] += p * dsnr * dchi2_over_snr2s
938  if progressbar is not None:
939  progressbar.increment()
940  # zero everything below the search threshold and
941  # normalize what's left to the requested count.
942  # need to do the slicing ourselves to avoid zeroing
943  # the at-threshold bin
944  new_binarr.array[:new_binarr.bins[0][self.snr_min],:] = 0.
945  new_binarr.array *= n / new_binarr.array.sum()
946  # add to raw counts
947  self.background_rates["instruments"][self.instrument_categories.category([instrument]),] += n
948  binarr += new_binarr
949 
950  def add_instrument_combination_counts(self, segs, verbose = False):
951  #
952  # populate instrument combination binning. assume the
953  # single-instrument categories in the background rates
954  # instruments binning provide the background event counts
955  # for the segment lists provided. NOTE: we're using the
956  # counts in the single-instrument categories for input, and
957  # the output only goes into the 2-instrument and higher
958  # categories, so this procedure does not result in a loss
959  # of information and can be performed multiple times
960  # without altering the statistics of the input data.
961  #
962  # FIXME: we need to know the coincidence window to do this
963  # correctly. we assume 5ms. get the correct number.
964  #
965  # FIXME: should this be done in .finish()? but we'd need
966  # the segment lists
967 
968  if verbose:
969  print >>sys.stderr, "synthesizing background-like instrument combination probabilities ..."
970  coincsynth = snglcoinc.CoincSynthesizer(
971  eventlists = dict((instrument, self.background_rates["instruments"][self.instrument_categories.category([instrument]),]) for instrument in segs),
972  segmentlists = segs,
973  delta_t = 0.005
974  )
975  # assume the single-instrument events are being collected
976  # in several disjoint bins so that events from different
977  # instruments that occur at the same time but in different
978  # bins are not coincident. if there are M bins for each
979  # instrument, the probability that N events all occur in
980  # the same bin is (1/M)^(N-1). the number of bins, M, is
981  # therefore given by the (N-1)th root of the ratio of the
982  # predicted number of N-instrument coincs to the observed
983  # number of N-instrument coincs. use the average of M
984  # measured from all instrument combinations.
985  #
986  # finding M by comparing predicted to observed zero-lag
987  # counts assumes we are in a noise-dominated regime, i.e.,
988  # that the observed relative abundances of coincs are not
989  # significantly contaminated by signals. if signals are
990  # present in large numbers, and occur in different
991  # abundances than the noise events, averaging the apparent
992  # M over different instrument combinations helps to
993  # suppress the contamination. NOTE: the number of
994  # coincidence bins, M, should be very close to the number
995  # of templates (experience shows that it is not equal to
996  # the number of templates, though I don't know why).
997  livetime = get_live_time(segs)
998  N = 0
999  coincidence_bins = 0.
1000  for instruments in coincsynth.all_instrument_combos:
1001  predicted_count = coincsynth.rates[frozenset(instruments)] * livetime
1002  observed_count = self.zero_lag_rates["instruments"][self.instrument_categories.category(instruments),]
1003  if predicted_count > 0 and observed_count > 0:
1004  coincidence_bins += (predicted_count / observed_count)**(1. / (len(instruments) - 1))
1005  N += 1
1006  assert N > 0
1007  assert coincidence_bins > 0.
1008  coincidence_bins /= N
1009  if verbose:
1010  print >>sys.stderr, "\tthere seems to be %g effective disjoint coincidence bin(s)" % coincidence_bins
1011  assert coincidence_bins >= 1.
1012  # convert single-instrument event rates to rates/bin
1013  coincsynth.mu = dict((instrument, rate / coincidence_bins) for instrument, rate in coincsynth.mu.items())
1014  # now compute the expected coincidence rates/bin, then
1015  # multiply by the number of bins to get the expected
1016  # coincidence rates
1017  for instruments, count in coincsynth.mean_coinc_count.items():
1018  self.background_rates["instruments"][self.instrument_categories.category(instruments),] = count * coincidence_bins
1019 
1020 
1021  def add_foreground_snrchi_prior(self, instruments, n, prefactors_range, df, verbose = False):
1022  if verbose:
1023  print >>sys.stderr, "synthesizing signal-like (SNR, \\chi^2) distributions ..."
1024  pfs = numpy.linspace(prefactors_range[0], prefactors_range[1], 10)
1025  for instrument in instruments:
1026  binarr = self.injection_rates["%s_snr_chi" % instrument]
1027  if verbose:
1028  progressbar = ProgressBar(instrument, max = len(binarr.bins[0]))
1029  else:
1030  progressbar = None
1031 
1032  # will need to normalize results so need new
1033  # storage
1034  new_binarr = rate.BinnedArray(binarr.bins)
1035 
1036  # iterate over all bins
1037  chi2_over_snr2s = new_binarr.bins[1].centres()
1038  dchi2_over_snr2s = new_binarr.bins[1].upper() - new_binarr.bins[1].lower()
1039  dchi2_over_snr2s[numpy.isinf(dchi2_over_snr2s)] = 0.
1040  for snr, dsnr in zip(new_binarr.bins[0].centres(), new_binarr.bins[0].upper() - new_binarr.bins[0].lower()):
1041  if math.isinf(dsnr):
1042  continue
1043  # avoid computing things in loops
1044  snr2 = snr**2.
1045  dsnr_over_snr4 = dsnr / snr**4.
1046  # iterate over chi2 bins
1047  for chi2_over_snr2, dchi2_over_snr2 in zip(chi2_over_snr2s, dchi2_over_snr2s):
1048  chi2 = chi2_over_snr2 * snr2 * df # We record the reduced chi2
1049  with numpy.errstate(over = "ignore", divide = "ignore", invalid = "ignore"):
1050  v = ncx2pdf(chi2, df, pfs * snr2)
1051  # remove nans and infs, and barf if
1052  # we got a negative number
1053  v = v[numpy.isfinite(v)]
1054  assert (v >= 0.).all(), v
1055  # normalization is irrelevant,
1056  # final result will have over-all
1057  # normalization imposed
1058  p = v.sum() * dsnr_over_snr4 * dchi2_over_snr2
1059  assert p >= 0. and not (math.isinf(p) or math.isnan(p)), p
1060  new_binarr[snr, chi2_over_snr2] = p
1061  if progressbar is not None:
1062  progressbar.increment()
1063  # zero everything below the search threshold and
1064  # normalize what's left to the requested count.
1065  # need to do the slicing ourselves to avoid zeroing
1066  # the at-threshold bin
1067  new_binarr.array[:new_binarr.bins[0][self.snr_min],:] = 0.
1068  new_binarr.array *= n / new_binarr.array.sum()
1069  # add to raw counts
1070  binarr += new_binarr
1071 
1072  def populate_prob_of_instruments_given_signal(self, segs, n = 1., verbose = False):
1073  #
1074  # populate instrument combination binning
1075  #
1076 
1077  assert len(segs) > 1
1078  assert set(self.horizon_history) <= set(segs)
1079 
1080  # probability that a signal is detectable by each of the
1081  # instrument combinations
1082  P = P_instruments_given_signal(self.horizon_history)
1083 
1084  # multiply by probability that enough instruments are on to
1085  # form each of those combinations
1086  #
1087  # FIXME: if when an instrument is off it has its horizon
1088  # distance set to 0 in the horizon history, then this step
1089  # will not be needed because the marginalization over
1090  # horizon histories will already reflect the duty cycles.
1091  P_live = snglcoinc.CoincSynthesizer(segmentlists = segs).P_live
1092  for instruments in P:
1093  P[instruments] *= sum(sorted(p for on_instruments, p in P_live.items() if on_instruments >= instruments))
1094  # renormalize
1095  total = sum(sorted(P.values()))
1096  for instruments in P:
1097  P[instruments] /= total
1098  # populate binning from probabilities
1099  for instruments, p in P.items():
1100  self.injection_rates["instruments"][self.instrument_categories.category(instruments),] += n * p
1101 
1102  def _rebuild_interpolators(self):
1103  super(ThincaCoincParamsDistributions, self)._rebuild_interpolators()
1104 
1105  #
1106  # the instrument combination "interpolators" are
1107  # pass-throughs. we pre-evaluate a bunch of attribute,
1108  # dictionary, and method look-ups for speed
1109  #
1110 
1111  def mkinterp(binnedarray):
1112  with numpy.errstate(divide = "ignore"):
1113  # need to insert an element at the start to
1114  # get the binning indexes to map the
1115  # correct locations in the array
1116  return numpy.hstack(([NaN], numpy.log(binnedarray.array))).__getitem__
1117  if "instruments" in self.background_pdf:
1118  self.background_lnpdf_interp["instruments"] = mkinterp(self.background_pdf["instruments"])
1119  if "instruments" in self.injection_pdf:
1120  self.injection_lnpdf_interp["instruments"] = mkinterp(self.injection_pdf["instruments"])
1121  if "instruments" in self.zero_lag_pdf:
1122  self.zero_lag_lnpdf_interp["instruments"] = mkinterp(self.zero_lag_pdf["instruments"])
1123 
1124  def pdf_from_rates_instruments(self, key, pdf_dict):
1125  # instrument combos are probabilities, not densities. be
1126  # sure the single-instrument categories are zeroed.
1127  binnedarray = pdf_dict[key]
1128  for category in self.instrument_categories.values():
1129  binnedarray[category,] = 0
1130  with numpy.errstate(invalid = "ignore"):
1131  binnedarray.array /= binnedarray.array.sum()
1132 
1133  def pdf_from_rates_snrchi2(self, key, pdf_dict, snr_kernel_width_at_8 = math.sqrt(2) / 4.0, sigma = 10.):
1134  # get the binned array we're going to process
1135  binnedarray = pdf_dict[key]
1136 
1137  # construct the density estimation kernel
1138  snr_bins = binnedarray.bins[0]
1139  snr_per_bin_at_8 = (snr_bins.upper() - snr_bins.lower())[snr_bins[8.]]
1140  snr_kernel_bins = snr_kernel_width_at_8 / snr_per_bin_at_8
1141  assert snr_kernel_bins >= 2.5, snr_kernel_bins # don't let the window get too small
1142  kernel = rate.gaussian_window(snr_kernel_bins, 5, sigma = sigma)
1143 
1144  # convolve with the bin count data
1145  rate.filter_array(binnedarray.array, kernel)
1146 
1147  # zero everything below the SNR cut-off. need to do the
1148  # slicing ourselves to avoid zeroing the at-threshold bin
1149  binnedarray.array[:binnedarray.bins[0][self.snr_min],:] = 0.
1150 
1151  # normalize what remains to be a valid PDF
1152  with numpy.errstate(invalid = "ignore"):
1153  binnedarray.to_pdf()
1154 
1155  # if this is the numerator, convert (rho, chi^2/rho^2) PDFs
1156  # into P(chi^2/rho^2 | rho). don't bother unless some
1157  # events of this type were recorded
1158  if pdf_dict is self.injection_pdf and not numpy.isnan(binnedarray.array).all():
1159  bin_sizes = binnedarray.bins[1].upper() - binnedarray.bins[1].lower()
1160  for i in xrange(binnedarray.array.shape[0]):
1161  nonzero = binnedarray.array[i] != 0
1162  if not nonzero.any():
1163  # PDF is 0 in this column. leave
1164  # that way. result is not a valid
1165  # PDF, but we'll have to live with
1166  # it.
1167  continue
1168  norm = numpy.dot(numpy.compress(nonzero, binnedarray.array[i]), numpy.compress(nonzero, bin_sizes))
1169  assert not math.isnan(norm), "encountered impossible PDF: %s is non-zero in a bin with infinite volume" % name
1170  binnedarray.array[i] /= norm
1171 
1172  @classmethod
1173  def from_xml(cls, xml, name):
1174  self = super(ThincaCoincParamsDistributions, cls).from_xml(xml, name)
1175  xml = self.get_xml_root(xml, name)
1176  self.horizon_history = HorizonHistories.from_xml(xml, name)
1177  prefix = u"cached_snr_joint_pdf"
1178  for elem in [elem for elem in xml.childNodes if elem.Name.startswith(u"%s:" % prefix)]:
1179  key = ligolw_param.get_pyvalue(elem, u"key").strip().split(u";")
1180  key = frozenset(lsctables.instrument_set_from_ifos(key[0].strip())), frozenset((inst.strip(), float(dist.strip())) for inst, dist in (inst_dist.strip().split(u"=") for inst_dist in key[1].strip().split(u",")))
1181  binnedarray = rate.BinnedArray.from_xml(elem, prefix)
1182  if self.snr_joint_pdf_cache:
1183  age = max(age for ignored, ignored, age in self.snr_joint_pdf_cache.values()) + 1
1184  else:
1185  age = 0
1186  lnbinnedarray = binnedarray.copy()
1187  with numpy.errstate(divide = "ignore"):
1188  lnbinnedarray.array = numpy.log(lnbinnedarray.array)
1189  self.snr_joint_pdf_cache[key] = rate.InterpBinnedArray(lnbinnedarray, fill_value = NegInf), binnedarray, age
1190  while len(self.snr_joint_pdf_cache) > self.max_cached_snr_joint_pdfs:
1191  del self.snr_joint_pdf_cache[min((age, key) for key, (ignored, ignored, age) in self.snr_joint_pdf_cache.items())[1]]
1192  return self
1193 
1194  def to_xml(self, name):
1195  xml = super(ThincaCoincParamsDistributions, self).to_xml(name)
1196  xml.appendChild(self.horizon_history.to_xml(name))
1197  prefix = u"cached_snr_joint_pdf"
1198  for key, (ignored, binnedarray, ignored) in self.snr_joint_pdf_cache.items():
1199  elem = xml.appendChild(binnedarray.to_xml(prefix))
1200  elem.appendChild(ligolw_param.new_param(u"key", u"lstring", "%s;%s" % (lsctables.ifos_from_instrument_set(key[0]), u",".join(u"%s=%.17g" % inst_dist for inst_dist in sorted(key[1])))))
1201  return xml
1202 
1203  @property
1205  """
1206  Dictionary mapping instrument combination (as a frozenset)
1207  to number of zero-lag coincs observed. An additional entry
1208  with key None stores the total.
1209  """
1210  count_above_threshold = dict((frozenset(self.instrument_categories.instruments(int(round(category)))), count) for category, count in zip(self.zero_lag_rates["instruments"].bins.centres()[0], self.zero_lag_rates["instruments"].array))
1211  count_above_threshold[None] = sum(sorted(count_above_threshold.values()))
1212  return count_above_threshold
1213 
1214  @count_above_threshold.setter
1215  def count_above_threshold(self, count_above_threshold):
1216  self.zero_lag_rates["instruments"].array[:] = 0.
1217  for instruments, count in count_above_threshold.items():
1218  if instruments is not None:
1219  self.zero_lag_rates["instruments"][self.instrument_categories.category(instruments),] = count
1220 
1221  @property
1222  def Pinstrument_noise(self):
1223  P = {}
1224  for category, p in zip(self.background_pdf["instruments"].bins.centres()[0], self.background_pdf["instruments"].array):
1225  instruments = frozenset(self.instrument_categories.instruments(int(round(category))))
1226  if len(instruments) < 2 or not p:
1227  continue
1228  P[instruments] = p
1229  return P
1230 
1231  @property
1232  def Pinstrument_signal(self):
1233  P = {}
1234  for category, p in zip(self.injection_pdf["instruments"].bins.centres()[0], self.injection_pdf["instruments"].array):
1235  instruments = frozenset(self.instrument_categories.instruments(int(round(category))))
1236  if len(instruments) < 2 or not p:
1237  continue
1238  P[instruments] = p
1239  return P
1240 
1241  def random_params(self, instruments):
1242  """
1243  Generator that yields an endless sequence of randomly
1244  generated parameter dictionaries for the given instruments.
1245  NOTE: the parameters will be within the domain of the
1246  repsective binnings but are not drawn from the PDF stored
1247  in those binnings --- this is not an MCMC style sampler.
1248  The return value is a tuple, the first element of which is
1249  the random parameter dictionary and the second is the
1250  natural logarithm (up to an arbitrary constant) of the PDF
1251  from which the parameters have been drawn evaluated at the
1252  co-ordinates in the parameter dictionary.
1253 
1254  Example:
1255 
1256  >>> x = iter(ThincaCoincParamsDistributions().random_params(("H1", "L1", "V1")))
1257  >>> x.next()
1258 
1259  See also:
1260 
1261  random_sim_params()
1262 
1263  The sequence is suitable for input to the
1264  pylal.snglcoinc.LnLikelihoodRatio.samples() log likelihood
1265  ratio generator.
1266  """
1267  snr_slope = 0.8 / len(instruments)**3
1268 
1269  keys = tuple("%s_snr_chi" % instrument for instrument in instruments)
1270  base_params = {"instruments": (self.instrument_categories.category(instruments),)}
1271  horizongen = iter(self.horizon_history.randhorizons()).next
1272  # P(horizons) = 1/livetime
1273  log_P_horizons = -math.log(self.horizon_history.maxkey() - self.horizon_history.minkey())
1274  coordgens = tuple(iter(self.binnings[key].randcoord(ns = (snr_slope, 1.), domain = (slice(self.snr_min, None), slice(None, None)))).next for key in keys)
1275  while 1:
1276  seq = sum((coordgen() for coordgen in coordgens), ())
1277  params = CoincParams(zip(keys, seq[0::2]))
1278  params.update(base_params)
1279  params.horizons = horizongen()
1280  # NOTE: I think the result of this sum is, in
1281  # fact, correctly normalized, but nothing requires
1282  # it to be (only that it be correct up to an
1283  # unknown constant) and I've not checked that it is
1284  # so the documentation doesn't promise that it is.
1285  yield params, sum(seq[1::2], log_P_horizons)
1286 
1287  def random_sim_params(self, sim, horizon_distance = None, snr_min = None, snr_efficiency = 1.0):
1288  """
1289  Generator that yields an endless sequence of randomly
1290  generated parameter dictionaries drawn from the
1291  distribution of parameters expected for the given
1292  injection, which is an instance of a SimInspiral table row
1293  object (see glue.ligolw.lsctables.SimInspiral for more
1294  information). The return value is a tuple, the first
1295  element of which is the random parameter dictionary and the
1296  second is 0.
1297 
1298  See also:
1299 
1300  random_params()
1301 
1302  The sequence is suitable for input to the
1303  pylal.snglcoinc.LnLikelihoodRatio.samples() log likelihood
1304  ratio generator.
1305 
1306  Bugs:
1307 
1308  The second element in each tuple in the sequence is merely
1309  a placeholder, not the natural logarithm of the PDF from
1310  which the sample has been drawn, as in the case of
1311  random_params(). Therefore, when used in combination with
1312  pylal.snglcoinc.LnLikelihoodRatio.samples(), the two
1313  probability densities computed and returned by that
1314  generator along with each log likelihood ratio value will
1315  simply be the probability densities of the signal and noise
1316  populations at that point in parameter space. They cannot
1317  be used to form an importance weighted sampler of the log
1318  likeklihood ratios.
1319  """
1320  #
1321  # retrieve horizon distance from history if not given
1322  # explicitly. retrieve SNR threshold from class attribute
1323  # if not given explicitly
1324  #
1325 
1326  if horizon_distance is None:
1327  horizon_distance = self.horizon_history[float(sim.get_time_geocent())]
1328  if snr_min is None:
1329  snr_min = self.snr_min
1330 
1331  #
1332  # compute nominal SNRs
1333  #
1334  # FIXME: remove LIGOTimeGPS type cast when sim is ported
1335  # to swig bindings
1336  #
1337 
1338  cosi2 = math.cos(sim.inclination)**2.
1339  gmst = lal.GreenwichMeanSiderealTime(lal.LIGOTimeGPS(0, sim.get_time_geocent().ns()))
1340  snr_0 = {}
1341  for instrument, DH in horizon_distance.items():
1342  fp, fc = lal.ComputeDetAMResponse(lalsimulation.DetectorPrefixToLALDetector(str(instrument)).response, sim.longitude, sim.latitude, sim.polarization, gmst)
1343  snr_0[instrument] = snr_efficiency * 8. * DH * math.sqrt(fp**2. * (1. + cosi2)**2. / 4. + fc**2. * cosi2) / sim.distance
1344 
1345  #
1346  # construct SNR generators, and approximating the SNRs to
1347  # be fixed at the nominal SNRs construct \chi^2 generators
1348  #
1349 
1350  def snr_gen(snr):
1351  rvs = stats.ncx2(2., snr**2.).rvs
1352  math_sqrt = math.sqrt
1353  while 1:
1354  yield math_sqrt(rvs())
1355 
1356  def chi2_over_snr2_gen(instrument, snr):
1357  rates_lnx = numpy.log(self.injection_rates["%s_snr_chi" % instrument].bins[1].centres())
1358  # FIXME: kinda broken for SNRs below self.snr_min
1359  rates_cdf = self.injection_rates["%s_snr_chi" % instrument][max(snr, self.snr_min),:].cumsum()
1360  # add a small tilt to break degeneracies then
1361  # normalize
1362  rates_cdf += numpy.linspace(0., 0.001 * rates_cdf[-1], len(rates_cdf))
1363  rates_cdf /= rates_cdf[-1]
1364  assert not numpy.isnan(rates_cdf).any()
1365 
1366  interp = interpolate.interp1d(rates_cdf, rates_lnx)
1367  math_exp = math.exp
1368  random_uniform = random.uniform
1369  while 1:
1370  yield math_exp(float(interp(random_uniform(0., 1.))))
1371 
1372  gens = dict(((instrument, "%s_snr_chi" % instrument), (iter(snr_gen(snr)).next, iter(chi2_over_snr2_gen(instrument, snr)).next)) for instrument, snr in snr_0.items())
1373 
1374  #
1375  # yield a sequence of randomly generated parameters for
1376  # this sim.
1377  #
1378 
1379  while 1:
1380  params = CoincParams()
1381  instruments = []
1382  for (instrument, key), (snr, chi2_over_snr2) in gens.items():
1383  snr = snr()
1384  if snr < snr_min:
1385  continue
1386  params[key] = snr, chi2_over_snr2()
1387  instruments.append(instrument)
1388  if len(instruments) < 2:
1389  continue
1390  params["instruments"] = (ThincaCoincParamsDistributions.instrument_categories.category(instruments),)
1391  params.horizons = horizon_distance
1392  yield params, 0.
1393 
1394 
1395  @classmethod
1396  def joint_pdf_of_snrs(cls, instruments, inst_horiz_mapping, n_samples = 80000, bins = rate.ATanLogarithmicBins(3.6, 120., 100), progressbar = None):
1397  """
1398  Return a BinnedArray representing the joint probability
1399  density of measuring a set of SNRs from a network of
1400  instruments. The inst_horiz_mapping is a dictionary
1401  mapping instrument name (e.g., "H1") to horizon distance
1402  (arbitrary units). n_samples is the number of lines over
1403  which to calculate the density in the SNR space. The axes
1404  of the PDF correspond to the instruments in alphabetical
1405  order.
1406  """
1407  # get instrument names in alphabetical order
1408  instruments = sorted(instruments)
1409  # get horizon distances and responses in that same order
1410  DH_times_8 = 8. * numpy.array([inst_horiz_mapping[inst] for inst in instruments])
1411  resps = tuple(lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in instruments)
1412 
1413  # get horizon distances and responses of remaining
1414  # instruments (order doesn't matter as long as they're in
1415  # the same order)
1416  DH_times_8_other = 8. * numpy.array([dist for inst, dist in inst_horiz_mapping.items() if inst not in instruments])
1417  resps_other = tuple(lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in inst_horiz_mapping if inst not in instruments)
1418 
1419  # initialize the PDF array, and pre-construct the sequence
1420  # of snr,d(snr) tuples. since the last SNR bin probably
1421  # has infinite size, we remove it from the sequence
1422  # (meaning the PDF will be left 0 in that bin).
1423  pdf = rate.BinnedArray(rate.NDBins([bins] * len(instruments)))
1424  snr_sequence = rate.ATanLogarithmicBins(3.6, 120., 300)
1425  snr_snrlo_snrhi_sequence = numpy.array(zip(snr_sequence.centres(), snr_sequence.lower(), snr_sequence.upper())[:-1])
1426 
1427  # compute the SNR at which to begin iterations over bins
1428  assert type(cls.snr_min) is float
1429  snr_min = cls.snr_min - 3.0
1430  assert snr_min > 0.0
1431 
1432  # no-op if one of the instruments that must be able to
1433  # participate in a coinc is not on. the PDF that gets
1434  # returned is not properly normalized (it's all 0) but
1435  # that's the correct value everywhere except at SNR-->+inf
1436  # where the product of SNR * no sensitivity might be said
1437  # to give a non-zero contribution, who knows. anyway, for
1438  # finite SNR, 0 is the correct value.
1439  if DH_times_8.min() == 0.:
1440  return pdf
1441 
1442  # we select random uniformly-distributed right ascensions
1443  # so there's no point in also choosing random GMSTs and any
1444  # value is as good as any other
1445  gmst = 0.0
1446 
1447  # run the sampler the requested # of iterations. save some
1448  # symbols to avoid doing module attribute look-ups in the
1449  # loop
1450  if progressbar is not None:
1451  progressbar.max = n_samples
1452  acos = math.acos
1453  random_uniform = random.uniform
1454  twopi = 2. * math.pi
1455  pi_2 = math.pi / 2.
1456  xlal_am_resp = lal.ComputeDetAMResponse
1457  # FIXME: scipy.stats.rice.rvs broken on reference OS.
1458  # switch to it when we can rely on a new-enough scipy
1459  #rice_rvs = stats.rice.rvs # broken on reference OS
1460  rice_rvs = lambda x: numpy.sqrt(stats.ncx2.rvs(2., x**2.))
1461  for i in xrange(n_samples):
1462  # select random sky location and source orbital
1463  # plane inclination and choice of polarization
1464  theta = acos(random_uniform(-1., 1.))
1465  phi = random_uniform(0., twopi)
1466  psi = random_uniform(0., twopi)
1467  cosi2 = random_uniform(-1., 1.)**2.
1468 
1469  # F+^2 and Fx^2 for each instrument
1470  fpfc2 = numpy.array(tuple(xlal_am_resp(resp, phi, pi_2 - theta, psi, gmst) for resp in resps))**2.
1471  fpfc2_other = numpy.array(tuple(xlal_am_resp(resp, phi, pi_2 - theta, psi, gmst) for resp in resps_other))**2.
1472 
1473  # ratio of distance to inverse SNR for each instrument
1474  fpfc_factors = ((1. + cosi2)**2. / 4., cosi2)
1475  snr_times_D = DH_times_8 * numpy.dot(fpfc2, fpfc_factors)**0.5
1476 
1477  # snr * D in instrument whose SNR grows fastest
1478  # with decreasing D
1479  max_snr_times_D = snr_times_D.max()
1480 
1481  # snr_times_D.min() / snr_min = the furthest a
1482  # source can be and still be above snr_min in all
1483  # instruments involved. max_snr_times_D / that
1484  # distance = the SNR that distance corresponds to
1485  # in the instrument whose SNR grows fastest with
1486  # decreasing distance --- the SNR the source has in
1487  # the most sensitive instrument when visible to all
1488  # instruments in the combo
1489  try:
1490  start_index = snr_sequence[max_snr_times_D / (snr_times_D.min() / snr_min)]
1491  except ZeroDivisionError:
1492  # one of the instruments that must be able
1493  # to see the event is blind to it
1494  continue
1495 
1496  # min_D_other is minimum distance at which source
1497  # becomes visible in an instrument that isn't
1498  # involved. max_snr_times_D / min_D_other gives
1499  # the SNR in the most sensitive instrument at which
1500  # the source becomes visible to one of the
1501  # instruments not allowed to participate
1502  if len(DH_times_8_other):
1503  min_D_other = (DH_times_8_other * numpy.dot(fpfc2_other, fpfc_factors)**0.5).min() / cls.snr_min
1504  try:
1505  end_index = snr_sequence[max_snr_times_D / min_D_other] + 1
1506  except ZeroDivisionError:
1507  # all instruments that must not see
1508  # it are blind to it
1509  end_index = None
1510  else:
1511  # there are no other instruments
1512  end_index = None
1513 
1514  # if start_index >= end_index then in order for the
1515  # source to be close enough to be visible in all
1516  # the instruments that must see it it is already
1517  # visible to one or more instruments that must not.
1518  # don't need to check for this, the for loop that
1519  # comes next will simply not have any iterations.
1520 
1521  # iterate over the nominal SNRs (= noise-free SNR
1522  # in the most sensitive instrument) at which we
1523  # will add weight to the PDF. from the SNR in
1524  # most sensitive instrument, the distance to the
1525  # source is:
1526  #
1527  # D = max_snr_times_D / snr
1528  #
1529  # and the (noise-free) SNRs in all instruments are:
1530  #
1531  # snr_times_D / D
1532  #
1533  # scipy's Rice-distributed RV code is used to
1534  # add the effect of background noise, converting
1535  # the noise-free SNRs into simulated observed SNRs
1536  #
1537  # number of sources b/w Dlo and Dhi:
1538  #
1539  # d count \propto D^2 |dD|
1540  # count \propto Dhi^3 - Dlo**3
1541  for D, Dhi, Dlo in max_snr_times_D / snr_snrlo_snrhi_sequence[start_index:end_index]:
1542  pdf[tuple(rice_rvs(snr_times_D / D))] += Dhi**3. - Dlo**3.
1543 
1544  if progressbar is not None:
1545  progressbar.increment()
1546  # check for divide-by-zeros that weren't caught
1547  assert numpy.isfinite(pdf.array).all()
1548 
1549  # convolve the samples with a Gaussian density estimation
1550  # kernel
1551  rate.filter_array(pdf.array, rate.gaussian_window(*(1.875,) * len(pdf.array.shape)))
1552  # protect against round-off in FFT convolution leading to
1553  # negative values in the PDF
1554  numpy.clip(pdf.array, 0., PosInf, pdf.array)
1555  # zero counts in bins that are below the trigger threshold.
1556  # have to convert SNRs to indexes ourselves and adjust so
1557  # that we don't zero the bin in which the SNR threshold
1558  # falls
1559  range_all = slice(None, None)
1560  range_low = slice(None, pdf.bins[0][cls.snr_min])
1561  for i in xrange(len(instruments)):
1562  slices = [range_all] * len(instruments)
1563  slices[i] = range_low
1564  pdf.array[slices] = 0.
1565  # convert bin counts to normalized PDF
1566  pdf.to_pdf()
1567  # done
1568  return pdf
1569 
1570 
1571 def P_instruments_given_signal(horizon_history, n_samples = 500000, min_distance = 0.):
1572  # FIXME: this function computes P(instruments | signal)
1573  # marginalized over time (i.e., marginalized over the history of
1574  # horizon distances). what we really want is to know P(instruments
1575  # | signal, horizon distances), that is we want to leave it
1576  # depending parametrically on the instantaneous horizon distances.
1577  # this function takes about 30 s to evaluate, so computing it on
1578  # the fly isn't practical and we'll require some sort of caching
1579  # scheme. unless somebody can figure out how to compute this
1580  # explicitly without resorting to Monte Carlo integration.
1581 
1582  if n_samples < 1:
1583  raise ValueError("n_samples=%d must be >= 1" % n_samples)
1584  if min_distance < 0.:
1585  raise ValueError("min_distance=%g must be >= 0" % min_distance)
1586 
1587  # get instrument names
1588  names = tuple(horizon_history)
1589  if not names:
1590  raise ValueError("horizon_history is empty")
1591  # get responses in that same order
1592  resps = [lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in names]
1593 
1594  # initialize output. dictionary mapping instrument combination to
1595  # probability (initially all 0).
1596  result = dict.fromkeys((frozenset(instruments) for n in xrange(2, len(names) + 1) for instruments in iterutils.choices(names, n)), 0.0)
1597 
1598  # we select random uniformly-distributed right ascensions so
1599  # there's no point in also choosing random GMSTs and any value is
1600  # as good as any other
1601  gmst = 0.0
1602 
1603  # function to spit out a choice of horizon distances drawn
1604  # uniformly in time
1605  rand_horizon_distances = iter(horizon_history.randhorizons()).next
1606 
1607  # in the loop, we'll need a sequence of integers to enumerate
1608  # instruments. construct it here to avoid doing it repeatedly in
1609  # the loop
1610  indexes = tuple(range(len(names)))
1611 
1612  # avoid attribute look-ups and arithmetic in the loop
1613  acos = math.acos
1614  numpy_array = numpy.array
1615  numpy_dot = numpy.dot
1616  numpy_sqrt = numpy.sqrt
1617  random_uniform = random.uniform
1618  twopi = 2. * math.pi
1619  pi_2 = math.pi / 2.
1620  xlal_am_resp = lal.ComputeDetAMResponse
1621 
1622  # loop as many times as requested
1623  for i in xrange(n_samples):
1624  # retrieve random horizon distances in the same order as
1625  # the instruments. note: rand_horizon_distances() is only
1626  # evaluated once in this expression. that's important
1627  DH = numpy_array(map(rand_horizon_distances().__getitem__, names))
1628 
1629  # select random sky location and source orbital plane
1630  # inclination and choice of polarization
1631  theta = acos(random_uniform(-1., 1.))
1632  phi = random_uniform(0., twopi)
1633  psi = random_uniform(0., twopi)
1634  cosi2 = random_uniform(-1., 1.)**2.
1635 
1636  # compute F+^2 and Fx^2 for each antenna from the sky
1637  # location and antenna responses
1638  fpfc2 = numpy_array(tuple(xlal_am_resp(resp, phi, pi_2 - theta, psi, gmst) for resp in resps))**2.
1639 
1640  # 1/8 ratio of inverse SNR to distance for each instrument
1641  # (1/8 because horizon distance is defined for an SNR of 8,
1642  # and we've omitted that factor for performance)
1643  snr_times_D_over_8 = DH * numpy_sqrt(numpy_dot(fpfc2, ((1. + cosi2)**2. / 4., cosi2)))
1644 
1645  # the volume visible to each instrument given the
1646  # requirement that a source be above the SNR threshold is
1647  #
1648  # V = [constant] * (8 * snr_times_D_over_8 / snr_threshold)**3.
1649  #
1650  # but in the end we'll only need ratios of these volumes so
1651  # we can omit the proportionality constant and we can also
1652  # omit the factor of (8 / snr_threshold)**3
1653  #
1654  # NOTE: noise-induced SNR fluctuations have the effect of
1655  # allowing sources slightly farther away than would
1656  # nominally allow them to be detectable to be seen above
1657  # the detection threshold with some non-zero probability,
1658  # and sources close enough to be detectable to be masked by
1659  # noise and missed with some non-zero probability.
1660  # accounting for this effect correctly shows it to provide
1661  # an additional multiplicative factor to the volume that
1662  # depends only on the SNR threshold. therefore, like all
1663  # the other factors common to all instruments, it too can
1664  # be ignored.
1665  V_at_snr_threshold = snr_times_D_over_8**3.
1666 
1667  # order[0] is index of instrument that can see sources the
1668  # farthest, order[1] is index of instrument that can see
1669  # sources the next farthest, etc.
1670  order = sorted(indexes, key = V_at_snr_threshold.__getitem__, reverse = True)
1671  ordered_names = tuple(names[i] for i in order)
1672 
1673  # instrument combination and volume of space (up to
1674  # irrelevant proportionality constant) visible to that
1675  # combination given the requirement that a source be above
1676  # the SNR threshold in that combination. sequence of
1677  # instrument combinations is left as a generator expression
1678  # for lazy evaluation
1679  instruments = (frozenset(ordered_names[:n]) for n in xrange(2, len(order) + 1))
1680  V = tuple(V_at_snr_threshold[i] for i in order[1:])
1681  if V[0] <= min_distance:
1682  # fewer than two instruments are on, so no
1683  # combination can see anything
1684  continue
1685 
1686  # for each instrument combination, probability that a
1687  # source visible to at least two instruments is visible to
1688  # that combination (here is where the proportionality
1689  # constant and factor of (8/snr_threshold)**3 drop out of
1690  # the calculation)
1691  P = tuple(x / V[0] for x in V)
1692 
1693  # accumulate result. p - pnext is the probability that a
1694  # source (that is visible to at least two instruments) is
1695  # visible to that combination of instruments and not any
1696  # other combination of instruments.
1697  for key, p, pnext in zip(instruments, P, P[1:] + (0.,)):
1698  result[key] += p - pnext
1699  for key in result:
1700  result[key] /= n_samples
1701 
1702  #
1703  # make sure it's normalized
1704  #
1705 
1706  total = sum(sorted(result.values()))
1707  assert abs(total - 1.) < 1e-13
1708  for key in result:
1709  result[key] /= total
1710 
1711  #
1712  # done
1713  #
1714 
1715  return result
1716 
1717 
1718 #
1719 # =============================================================================
1720 #
1721 # False Alarm Book-Keeping Object
1722 #
1723 # =============================================================================
1724 #
1725 
1726 
1727 def binned_log_likelihood_ratio_rates_from_samples(signal_rates, noise_rates, samples, nsamples):
1728  """
1729  Populate signal and noise BinnedArray histograms from a sequence of
1730  samples (which can be a generator). The first nsamples elements
1731  from the sequence are used. The samples must be a sequence of
1732  three-element tuples (or sequences) in which the first element is a
1733  value of the ranking statistic (likelihood ratio) and the second
1734  and third elements the logs of the probabilities of obtaining that
1735  value of the ranking statistic in the signal and noise populations
1736  respectively.
1737  """
1738  exp = math.exp
1739  isnan = math.isnan
1740  for ln_lamb, lnP_signal, lnP_noise in itertools.islice(samples, nsamples):
1741  if isnan(ln_lamb):
1742  raise ValueError("encountered NaN likelihood ratio")
1743  if isnan(lnP_signal) or isnan(lnP_noise):
1744  raise ValueError("encountered NaN signal or noise model probability densities")
1745  signal_rates[ln_lamb,] += exp(lnP_signal)
1746  noise_rates[ln_lamb,] += exp(lnP_noise)
1747  return signal_rates, noise_rates
1748 
1749 
1750 def binned_log_likelihood_ratio_rates_from_samples_wrapper(queue, *args, **kwargs):
1751  try:
1752  queue.put(binned_log_likelihood_ratio_rates_from_samples(*args, **kwargs))
1753  except:
1754  queue.put(None)
1755  raise
1756 
1757 
1758 #
1759 # Class to compute ranking statistic PDFs for background-like and
1760 # signal-like populations
1761 #
1762 # FIXME: this is really close to just being another subclass of
1763 # CoincParamsDistributions. consider the wisdom of rewriting it to be such
1764 #
1765 
1766 
1767 class RankingData(object):
1768  ligo_lw_name_suffix = u"gstlal_inspiral_rankingdata"
1769 
1770  #
1771  # likelihood ratio binning
1772  #
1773 
1774  binnings = {
1775  "ln_likelihood_ratio": rate.NDBins((rate.ATanBins(0., 110., 3000),))
1776  }
1777 
1778  filters = {
1779  "ln_likelihood_ratio": rate.gaussian_window(8.)
1780  }
1781 
1782  #
1783  # Threshold at which FAP & FAR normalization will occur
1784  #
1785 
1786  ln_likelihood_ratio_threshold = NegInf
1787 
1788 
1789  def __init__(self, coinc_params_distributions, instruments, process_id = None, nsamples = 1000000, verbose = False):
1790  self.background_likelihood_rates = {}
1791  self.background_likelihood_pdfs = {}
1792  self.signal_likelihood_rates = {}
1793  self.signal_likelihood_pdfs = {}
1794  self.zero_lag_likelihood_rates = {}
1795  self.zero_lag_likelihood_pdfs = {}
1796  self.process_id = process_id
1797 
1798  #
1799  # initialize binnings
1800  #
1801 
1802  instruments = tuple(instruments)
1803  for key in [frozenset(ifos) for n in range(2, len(instruments) + 1) for ifos in iterutils.choices(instruments, n)]:
1804  self.background_likelihood_rates[key] = rate.BinnedArray(self.binnings["ln_likelihood_ratio"])
1805  self.signal_likelihood_rates[key] = rate.BinnedArray(self.binnings["ln_likelihood_ratio"])
1806  self.zero_lag_likelihood_rates[key] = rate.BinnedArray(self.binnings["ln_likelihood_ratio"])
1807 
1808  #
1809  # bailout out used by .from_xml() class method to get an
1810  # uninitialized instance
1811  #
1812 
1813  if coinc_params_distributions is None:
1814  return
1815 
1816  #
1817  # run importance-weighted random sampling to populate
1818  # binnings. one thread per instrument combination
1819  #
1820 
1821  threads = []
1822  for key in self.background_likelihood_rates:
1823  if verbose:
1824  print >>sys.stderr, "computing ranking statistic PDFs for %s" % ", ".join(sorted(key))
1825  q = multiprocessing.queues.SimpleQueue()
1826  p = multiprocessing.Process(target = lambda: binned_log_likelihood_ratio_rates_from_samples_wrapper(
1827  q,
1828  self.signal_likelihood_rates[key],
1829  self.background_likelihood_rates[key],
1830  snglcoinc.LnLikelihoodRatio(coinc_params_distributions).samples(coinc_params_distributions.random_params(key)),
1831  nsamples = nsamples
1832  ))
1833  p.start()
1834  threads.append((p, q, key))
1835  while threads:
1836  p, q, key = threads.pop(0)
1837  self.signal_likelihood_rates[key], self.background_likelihood_rates[key] = q.get()
1838  p.join()
1839  if p.exitcode:
1840  raise Exception("sampling thread failed")
1841  if verbose:
1842  print >>sys.stderr, "done computing ranking statistic PDFs"
1843 
1844  #
1845  # propogate knowledge of the background event rates through
1846  # to the ranking statistic distributions. this information
1847  # is required so that when adding ranking statistic PDFs in
1848  # ._compute_combined_rates() or our .__iadd__() method
1849  # they are combined with the correct relative weights.
1850  # what we're doing here is making the total event count in
1851  # each background ranking statistic array equal to the
1852  # expected background coincidence event count for the
1853  # corresponding instrument combination.
1854  #
1855 
1856  for instruments, binnedarray in self.background_likelihood_rates.items():
1857  if binnedarray.array.any():
1858  binnedarray.array *= coinc_params_distributions.background_rates["instruments"][coinc_params_distributions.instrument_categories.category(instruments),] / binnedarray.array.sum()
1859 
1860  #
1861  # propogate instrument combination priors through to
1862  # ranking statistic histograms so that
1863  # ._compute_combined_rates() and .__iadd__() combine the
1864  # histograms with the correct weights.
1865  #
1866  # FIXME: need to also apply a weight that reflects the
1867  # probability of recovering a signal in the interval
1868  # spanned by the data these histograms reflect so that when
1869  # combining statistics from different intervals they are
1870  # summed with the correct weights.
1871  #
1872 
1873  for instruments, binnedarray in self.signal_likelihood_rates.items():
1874  if binnedarray.array.any():
1875  binnedarray.array *= coinc_params_distributions.injection_rates["instruments"][coinc_params_distributions.instrument_categories.category(instruments),] / binnedarray.array.sum()
1876 
1877  #
1878  # compute combined rates
1879  #
1880 
1882 
1883  #
1884  # populate the ranking statistic PDF arrays from the counts
1885  #
1886 
1887  self.finish()
1888 
1889 
1890  def collect_zero_lag_rates(self, connection, coinc_def_id):
1891  for instruments, ln_likelihood_ratio in connection.cursor().execute("""
1892 SELECT
1893  coinc_inspiral.ifos,
1894  coinc_event.likelihood
1895 FROM
1896  coinc_inspiral
1897  JOIN coinc_event ON (
1898  coinc_event.coinc_event_id == coinc_inspiral.coinc_event_id
1899  )
1900 WHERE
1901  coinc_event.coinc_def_id == ?
1902  AND NOT EXISTS (
1903  SELECT
1904  *
1905  FROM
1906  time_slide
1907  WHERE
1908  time_slide.time_slide_id == coinc_event.time_slide_id
1909  AND time_slide.offset != 0
1910  )
1911 """, (coinc_def_id,)):
1912  assert ln_likelihood_ratio is not None, "null likelihood ratio encountered. probably coincs have not been ranked"
1913  self.zero_lag_likelihood_rates[frozenset(lsctables.instrument_set_from_ifos(instruments))][ln_likelihood_ratio,] += 1.
1914 
1915  #
1916  # update combined rates. NOTE: this recomputes all the
1917  # combined rates, not just the zero-lag combined rates.
1918  # it's safe to do this, but it might be found to be too
1919  # much of a performance hit every time one wants to update
1920  # the zero-lag rates. if it becomes a problem, this call
1921  # might need to be removed from this method so that it is
1922  # invoked explicitly on an as-needed basis
1923  #
1924 
1926 
1927  def _compute_combined_rates(self):
1928  #
1929  # (re-)compute combined noise and signal rates
1930  #
1931 
1932  def compute_combined_rates(rates_dict):
1933  try:
1934  del rates_dict[None]
1935  except KeyError:
1936  pass
1937  total_rate = rates_dict.itervalues().next().copy()
1938  # FIXME: we don't bother checking that the
1939  # binnings are all compatible, we assume they were
1940  # all generated in our __init__() method and *are*
1941  # the same
1942  total_rate.array[:] = sum(binnedarray.array for binnedarray in rates_dict.values())
1943  rates_dict[None] = total_rate
1944 
1945  compute_combined_rates(self.background_likelihood_rates)
1946  compute_combined_rates(self.signal_likelihood_rates)
1947  compute_combined_rates(self.zero_lag_likelihood_rates)
1948 
1949  def finish(self, verbose = False):
1950  self.background_likelihood_pdfs.clear()
1951  self.signal_likelihood_pdfs.clear()
1952  self.zero_lag_likelihood_pdfs.clear()
1953  def build_pdf(binnedarray, filt):
1954  # copy counts into pdf array and smooth
1955  pdf = binnedarray.copy()
1956  rate.filter_array(pdf.array, filt)
1957  # zero the counts in the infinite-sized high bin so
1958  # the final PDF normalization ends up OK
1959  pdf.array[-1] = 0.
1960  # convert to normalized PDF
1961  pdf.to_pdf()
1962  return pdf
1963  if verbose:
1964  progressbar = ProgressBar(text = "Computing Log Lambda PDFs", max = len(self.background_likelihood_rates) + len(self.signal_likelihood_rates) + len(self.zero_lag_likelihood_rates))
1965  progressbar.show()
1966  else:
1967  progressbar = None
1968  for key, binnedarray in self.background_likelihood_rates.items():
1969  assert not numpy.isnan(binnedarray.array).any(), "%s noise model log likelihood ratio counts contain NaNs" % (key if key is not None else "combined")
1970  self.background_likelihood_pdfs[key] = build_pdf(binnedarray, self.filters["ln_likelihood_ratio"])
1971  if progressbar is not None:
1972  progressbar.increment()
1973  for key, binnedarray in self.signal_likelihood_rates.items():
1974  assert not numpy.isnan(binnedarray.array).any(), "%s signal model log likelihood ratio counts contain NaNs" % (key if key is not None else "combined")
1975  self.signal_likelihood_pdfs[key] = build_pdf(binnedarray, self.filters["ln_likelihood_ratio"])
1976  if progressbar is not None:
1977  progressbar.increment()
1978  for key, binnedarray in self.zero_lag_likelihood_rates.items():
1979  assert not numpy.isnan(binnedarray.array).any(), "%s zero lag log likelihood ratio counts contain NaNs" % (key if key is not None else "combined")
1980  self.zero_lag_likelihood_pdfs[key] = build_pdf(binnedarray, self.filters["ln_likelihood_ratio"])
1981  if progressbar is not None:
1982  progressbar.increment()
1983 
1984  def __iadd__(self, other):
1985  snglcoinc.CoincParamsDistributions.addbinnedarrays(self.background_likelihood_rates, other.background_likelihood_rates, self.background_likelihood_pdfs, other.background_likelihood_pdfs)
1986  snglcoinc.CoincParamsDistributions.addbinnedarrays(self.signal_likelihood_rates, other.signal_likelihood_rates, self.signal_likelihood_pdfs, other.signal_likelihood_pdfs)
1987  snglcoinc.CoincParamsDistributions.addbinnedarrays(self.zero_lag_likelihood_rates, other.zero_lag_likelihood_rates, self.zero_lag_likelihood_pdfs, other.zero_lag_likelihood_pdfs)
1988  return self
1989 
1990  @classmethod
1991  def from_xml(cls, xml, name):
1992  # find the root of the XML tree containing the
1993  # serialization of this object
1994  xml, = [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and elem.Name == u"%s:%s" % (name, cls.ligo_lw_name_suffix)]
1995 
1996  # create a mostly uninitialized instance
1997  self = cls(None, (), process_id = ligolw_param.get_pyvalue(xml, u"process_id"))
1998 
1999  # pull out the likelihood count and PDF arrays
2000  def reconstruct(xml, prefix, target_dict):
2001  for ba_elem in [elem for elem in xml.getElementsByTagName(ligolw.LIGO_LW.tagName) if elem.hasAttribute(u"Name") and ("_%s" % prefix) in elem.Name]:
2002  ifo_set = frozenset(lsctables.instrument_set_from_ifos(ba_elem.Name.split("_")[0]))
2003  target_dict[ifo_set] = rate.BinnedArray.from_xml(ba_elem, ba_elem.Name.split(":")[0])
2004  reconstruct(xml, u"background_likelihood_rate", self.background_likelihood_rates)
2005  reconstruct(xml, u"background_likelihood_pdf", self.background_likelihood_pdfs)
2006  reconstruct(xml, u"signal_likelihood_rate", self.signal_likelihood_rates)
2007  reconstruct(xml, u"signal_likelihood_pdf", self.signal_likelihood_pdfs)
2008  reconstruct(xml, u"zero_lag_likelihood_rate", self.zero_lag_likelihood_rates)
2009  reconstruct(xml, u"zero_lag_likelihood_pdf", self.zero_lag_likelihood_pdfs)
2010 
2011  assert set(self.background_likelihood_rates) == set(self.background_likelihood_pdfs)
2012  assert set(self.signal_likelihood_rates) == set(self.signal_likelihood_pdfs)
2013  assert set(self.zero_lag_likelihood_rates) == set(self.zero_lag_likelihood_pdfs)
2014  assert set(self.background_likelihood_rates) == set(self.signal_likelihood_rates)
2015  assert set(self.background_likelihood_rates) == set(self.zero_lag_likelihood_rates)
2016 
2018 
2019  return self
2020 
2021  def to_xml(self, name):
2022  xml = ligolw.LIGO_LW({u"Name": u"%s:%s" % (name, self.ligo_lw_name_suffix)})
2023  xml.appendChild(ligolw_param.new_param(u"process_id", u"ilwd:char", self.process_id))
2024  def store(xml, prefix, source_dict):
2025  for key, binnedarray in source_dict.items():
2026  if key is not None:
2027  ifostr = lsctables.ifos_from_instrument_set(key).replace(",","")
2028  xml.appendChild(binnedarray.to_xml(u"%s_%s" % (ifostr, prefix)))
2029  store(xml, u"background_likelihood_rate", self.background_likelihood_rates)
2030  store(xml, u"background_likelihood_pdf", self.background_likelihood_pdfs)
2031  store(xml, u"signal_likelihood_rate", self.signal_likelihood_rates)
2032  store(xml, u"signal_likelihood_pdf", self.signal_likelihood_pdfs)
2033  store(xml, u"zero_lag_likelihood_rate", self.zero_lag_likelihood_rates)
2034  store(xml, u"zero_lag_likelihood_pdf", self.zero_lag_likelihood_pdfs)
2035 
2036  return xml
2037 
2038 
2039 #
2040 # Class to compute false-alarm probabilities and false-alarm rates from
2041 # ranking statistic PDFs
2042 #
2043 
2044 
2045 class FAPFAR(object):
2046  def __init__(self, ranking_stats, livetime = None):
2047  # none is OK, but then can only compute FAPs, not FARs
2048  self.livetime = livetime
2049 
2050  # pull out both the bg counts and the pdfs, we need 'em both
2051  bgcounts_ba = ranking_stats.background_likelihood_rates[None]
2052  bgpdf_ba = ranking_stats.background_likelihood_pdfs[None]
2053  # we also need the zero lag counts to build the extinction model
2054  zlagcounts_ba = ranking_stats.zero_lag_likelihood_rates[None]
2055 
2056  # safety checks
2057  assert not numpy.isnan(bgcounts_ba.array).any(), "log likelihood ratio rates contains NaNs"
2058  assert not (bgcounts_ba.array < 0.0).any(), "log likelihood ratio rate contains negative values"
2059  assert not numpy.isnan(bgpdf_ba.array).any(), "log likelihood ratio pdf contains NaNs"
2060  assert not (bgpdf_ba.array < 0.0).any(), "log likelihood ratio pdf contains negative values"
2061 
2062  # grab bins that are not infinite in size
2063  finite_bins = numpy.isfinite(bgcounts_ba.bins.volumes())
2064  ranks = bgcounts_ba.bins.upper()[0].compress(finite_bins)
2065  drank = bgcounts_ba.bins.volumes().compress(finite_bins)
2066 
2067  # whittle down the arrays of counts and pdfs
2068  bgcounts_ba_array = bgcounts_ba.array.compress(finite_bins)
2069  bgpdf_ba_array = bgpdf_ba.array.compress(finite_bins)
2070  zlagcounts_ba_array = zlagcounts_ba.array.compress(finite_bins)
2071 
2072  # get the extincted background PDF
2073  self.zero_lag_total_count = zlagcounts_ba_array.sum()
2074  extinct_bf_pdf = self.extinct(bgcounts_ba_array, bgpdf_ba_array, zlagcounts_ba_array, ranks)
2075 
2076  # cumulative distribution function and its complement.
2077  # it's numerically better to recompute the ccdf by
2078  # reversing the array of weights than trying to subtract
2079  # the cdf from 1.
2080  weights = extinct_bf_pdf * drank
2081  cdf = weights.cumsum()
2082  cdf /= cdf[-1]
2083  ccdf = weights[::-1].cumsum()[::-1]
2084  ccdf /= ccdf[0]
2085 
2086  # cdf boundary condition: cdf = 1/e at the ranking
2087  # statistic threshold so that self.far_from_rank(threshold)
2088  # * livetime = observed count of events above threshold.
2089  # FIXME this doesn't actually work.
2090  # FIXME not doing it doesn't actually work.
2091  # ccdf *= 1. - 1. / math.e
2092  # cdf *= 1. - 1. / math.e
2093  # cdf += 1. / math.e
2094 
2095  # last checks that the CDF and CCDF are OK
2096  assert not numpy.isnan(cdf).any(), "log likelihood ratio CDF contains NaNs"
2097  assert not numpy.isnan(ccdf).any(), "log likelihood ratio CCDF contains NaNs"
2098  assert ((0. <= cdf) & (cdf <= 1.)).all(), "log likelihood ratio CDF failed to be normalized"
2099  assert ((0. <= ccdf) & (ccdf <= 1.)).all(), "log likelihood ratio CCDF failed to be normalized"
2100  assert (abs(1. - (cdf[:-1] + ccdf[1:])) < 1e-12).all(), "log likelihood ratio CDF + CCDF != 1 (max error = %g)" % abs(1. - (cdf[:-1] + ccdf[1:])).max()
2101 
2102  # build interpolators
2103  self.cdf_interpolator = interpolate.interp1d(ranks, cdf)
2104  self.ccdf_interpolator = interpolate.interp1d(ranks, ccdf)
2105 
2106  # record min and max ranks so we know which end of the ccdf
2107  # to use when we're out of bounds
2108  self.minrank = min(ranks)
2109  self.maxrank = max(ranks)
2110 
2111  def extinct(self, bgcounts_ba_array, bgpdf_ba_array, zlagcounts_ba_array, ranks):
2112  # Generate arrays of complementary cumulative counts
2113  # for background events (monte carlo, pre clustering)
2114  # and zero lag events (observed, post clustering)
2115  zero_lag_compcumcount = zlagcounts_ba_array[::-1].cumsum()[::-1]
2116  bg_compcumcount = bgcounts_ba_array[::-1].cumsum()[::-1]
2117 
2118  # Fit for the number of preclustered, independent coincs by
2119  # only considering the observed counts safely in the bulk of
2120  # the distribution. Only do the fit above 10 counts and below
2121  # 10000, unless that can't be met and trigger a warning
2122  fit_min_rank = 1.
2123  fit_min_counts = min(10., self.zero_lag_total_count / 10. + 1)
2124  fit_max_counts = min(10000., self.zero_lag_total_count / 10. + 2) # the +2 gaurantees that fit_max_counts > fit_min_counts
2125  rank_range = numpy.logical_and(ranks > fit_min_rank, numpy.logical_and(zero_lag_compcumcount < fit_max_counts, zero_lag_compcumcount > fit_min_counts))
2126  if fit_min_counts < 100.:
2127  warnings.warn("There are less than 100 coincidences, extinction effects on background may not be accurately calculated, which will decrease the accuracy of the combined instruments background estimation.")
2128  if zero_lag_compcumcount.compress(rank_range).size < 1:
2129  raise ValueError("not enough zero lag data to fit background")
2130 
2131  # Use curve fit to find the predicted total preclustering
2132  # count. First we need an interpolator of the counts
2133  obs_counts = interpolate.interp1d(ranks, bg_compcumcount)
2134  bg_pdf_interp = interpolate.interp1d(ranks, bgpdf_ba_array)
2135 
2136  def extincted_counts(x, N_ratio):
2137  out = max(zero_lag_compcumcount) * (1. - numpy.exp(-obs_counts(x) * N_ratio))
2138  out[~numpy.isfinite(out)] = 0.
2139  return out
2140 
2141  def extincted_pdf(x, N_ratio):
2142  out = N_ratio * numpy.exp(-obs_counts(x) * N_ratio) * bg_pdf_interp(x)
2143  out[~numpy.isfinite(out)] = 0.
2144  return out
2145 
2146  # Fit for the ratio of unclustered to clustered triggers.
2147  # Only fit N_ratio over the range of ranks decided above
2148  precluster_normalization, precluster_covariance_matrix = curve_fit(
2149  extincted_counts,
2150  ranks[rank_range],
2151  zero_lag_compcumcount.compress(rank_range),
2152  sigma = zero_lag_compcumcount.compress(rank_range)**.5,
2153  p0 = 1e-4
2154  )
2155 
2156  N_ratio = precluster_normalization[0]
2157 
2158  return extincted_pdf(ranks, N_ratio)
2159 
2160  def fap_from_rank(self, rank):
2161  # implements equation (8) from Phys. Rev. D 88, 024025.
2162  # arXiv:1209.0718
2163  rank = max(self.minrank, min(self.maxrank, rank))
2164  fap = float(self.ccdf_interpolator(rank))
2165  return fap_after_trials(fap, self.zero_lag_total_count)
2166 
2167  def far_from_rank(self, rank):
2168  # implements equation (B4) of Phys. Rev. D 88, 024025.
2169  # arXiv:1209.0718. the return value is divided by T to
2170  # convert events/experiment to events/second.
2171  assert self.livetime is not None, "cannot compute FAR without livetime"
2172  rank = max(self.minrank, min(self.maxrank, rank))
2173  # true-dismissal probability = 1 - single-event false-alarm
2174  # probability, the integral in equation (B4)
2175  tdp = float(self.cdf_interpolator(rank))
2176  try:
2177  log_tdp = math.log(tdp)
2178  except ValueError:
2179  # TDP = 0 --> FAR = +inf
2180  return PosInf
2181  if log_tdp >= -1e-9:
2182  # rare event: avoid underflow by using log1p(-FAP)
2183  log_tdp = math.log1p(-float(self.ccdf_interpolator(rank)))
2184  return self.zero_lag_total_count * -log_tdp / self.livetime
2185 
2186  def assign_faps(self, connection):
2187  # assign false-alarm probabilities
2188  # FIXME: choose a function name more likely to be unique?
2189  # FIXME: abusing false_alarm_rate column, move for a
2190  # false_alarm_probability column??
2191  connection.create_function("fap", 1, self.fap_from_rank)
2192  connection.cursor().execute("""
2193 UPDATE
2194  coinc_inspiral
2195 SET
2196  false_alarm_rate = (
2197  SELECT
2198  fap(coinc_event.likelihood)
2199  FROM
2200  coinc_event
2201  WHERE
2202  coinc_event.coinc_event_id == coinc_inspiral.coinc_event_id
2203  )
2204 """)
2205 
2206  def assign_fars(self, connection):
2207  # assign false-alarm rates
2208  # FIXME: choose a function name more likely to be unique?
2209  connection.create_function("far", 1, self.far_from_rank)
2210  connection.cursor().execute("""
2211 UPDATE
2212  coinc_inspiral
2213 SET
2214  combined_far = (
2215  SELECT
2216  far(coinc_event.likelihood)
2217  FROM
2218  coinc_event
2219  WHERE
2220  coinc_event.coinc_event_id == coinc_inspiral.coinc_event_id
2221  )
2222 """)
2223 
2224 
2225 #
2226 # =============================================================================
2227 #
2228 # I/O
2229 #
2230 # =============================================================================
2231 #
2232 
2233 
2234 def gen_likelihood_control_doc(xmldoc, process, coinc_params_distributions, ranking_data, seglists, name = u"gstlal_inspiral_likelihood", comment = None):
2235  node = xmldoc.childNodes[-1]
2236  assert node.tagName == ligolw.LIGO_LW.tagName
2237 
2238  if coinc_params_distributions is not None:
2239  coinc_params_distributions.process_id = process.process_id
2240  node.appendChild(coinc_params_distributions.to_xml(name))
2241 
2242  if ranking_data is not None:
2243  ranking_data.process_id = process.process_id
2244  node.appendChild(ranking_data.to_xml(name))
2245 
2246  llwsegments = ligolw_segments.LigolwSegments(xmldoc)
2247  llwsegments.insert_from_segmentlistdict(seglists, u"%s:segments" % name, comment = comment)
2248  llwsegments.finalize(process)
2249 
2250  return xmldoc
2251 
2252 
2253 def parse_likelihood_control_doc(xmldoc, name = u"gstlal_inspiral_likelihood"):
2254  coinc_params_distributions = ranking_data = process_id = None
2255  try:
2256  coinc_params_distributions = ThincaCoincParamsDistributions.from_xml(xmldoc, name)
2257  except ValueError:
2258  pass
2259  else:
2260  process_id = coinc_params_distributions.process_id
2261  try:
2262  ranking_data = RankingData.from_xml(xmldoc, name)
2263  except ValueError:
2264  pass
2265  else:
2266  if process_id is None:
2267  process_id = ranking_data.process_id
2268  if coinc_params_distributions is None and ranking_data is None:
2269  raise ValueError("document does not contain likelihood ratio data")
2270  seglists = ligolw_segments.segmenttable_get_by_name(xmldoc, u"%s:segments" % name).coalesce()
2271  return coinc_params_distributions, ranking_data, seglists
2272 
2273 
2274 #
2275 # =============================================================================
2276 #
2277 # Other
2278 #
2279 # =============================================================================
2280 #
2281 
2282 
2283 def get_live_time(seglistdict, verbose = False):
2284  livetime = float(abs(vote((segs for instrument, segs in seglistdict.items() if instrument != "H2"), 2)))
2285  if verbose:
2286  print >> sys.stderr, "Livetime: %.3g s" % livetime
2287  return livetime
2288 
2289 
2290 def get_live_time_segs_from_search_summary_table(connection, program_name = "gstlal_inspiral"):
2291  xmldoc = dbtables.get_xml(connection)
2292  farsegs = ligolw_search_summary.segmentlistdict_fromsearchsummary(xmldoc, program_name).coalesce()
2293  xmldoc.unlink()
2294  return farsegs