gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
streamthinca.py
Go to the documentation of this file.
1 # Copyright (C) 2011--2013 Kipp Cannon, Chad Hanna, Drew Keppel
2 #
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation; either version 2 of the License, or (at your
6 # option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11 # Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License along
14 # with this program; if not, write to the Free Software Foundation, Inc.,
15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 
17 ## @file
18 # The python module to implement streaming coincidence
19 #
20 # ### Review Status
21 #
22 # STATUS: reviewed with actions
23 #
24 # | Names | Hash | Date |
25 # | ------------------------------------------- | ------------------------------------------- | ---------- |
26 # | Kipp Cannon, Chad Hanna, Jolien Creighton, Florent Robinet, B. Sathyaprakash, Duncan Meacher, T.G.G. Li | b8fef70a6bafa52e3e120a495ad0db22007caa20 | 2014-12-03 |
27 #
28 # #### Action items
29 #
30 # - L300+: Please document within the code that the FAR column is used to store FAP so that future developers don't get confused what that column represents
31 
32 #
33 # =============================================================================
34 #
35 # Preamble
36 #
37 # =============================================================================
38 #
39 
40 
41 import bisect
42 
43 
44 from glue import iterutils
45 from glue import segments
46 from glue.ligolw import lsctables
47 from pylal import ligolw_thinca
48 from pylal import snglcoinc
49 import lal
50 import time
51 
52 
53 #
54 # =============================================================================
55 #
56 # Configuration
57 #
58 # =============================================================================
59 #
60 
61 
62 #
63 # allowed instrument combinations (yes, hard-coded, just take off, eh)
64 #
65 
66 
67 allowed_instrument_combos = frozenset([frozenset(("H1", "H2", "L1")), frozenset(("H1", "L1", "V1")), frozenset(("H1", "L1")), frozenset(("H1", "V1")), frozenset(("L1", "V1")), frozenset(("H1H2", "L1")), frozenset(("H1H2", "L1", "V1")), frozenset(("E1", "E2")), frozenset(("E1", "E3")), frozenset(("E2", "E3")), frozenset(("E1", "E2", "E3"))])
68 
69 
70 #
71 # =============================================================================
72 #
73 # pylal.ligolw_thinca Customizations
74 #
75 # =============================================================================
76 #
77 
78 
79 #
80 # sngl_inspiral<-->sngl_inspiral comparison function
81 #
82 
83 
84 def event_comparefunc(event_a, offset_a, event_b, offset_b, light_travel_time, delta_t):
85  # NOTE: we also require the masses and spin of the two events to
86  # match, but the InspiralEventList class ensures that all event
87  # pairs that make it this far are from the same template so we
88  # don't need to explicitly test for that here.
89  return float(abs(event_a.get_end() + offset_a - event_b.get_end() - offset_b)) > light_travel_time + delta_t
90 
91 
92 #
93 # gstlal_inspiral's triggers cause a divide-by-zero error in the effective
94 # SNR method attached to the triggers, so we replace it with one that works
95 # for the duration of the ligolw_thinca() call. this is the function with
96 # which we replace it
97 #
98 
99 
100 def get_effective_snr(self, fac):
101  return self.snr
102 
103 
104 #
105 # InspiralEventList customization making use of the fact that we demand
106 # exact template co-incidence to increase performance. NOTE: the use of
107 # this class defeats ligolw_thinca()'s ability to apply veto segments. We
108 # don't use that feature in StreamThinca so this isn't a problem for us,
109 # but it's something to be aware of if this gets used somewhere else.
110 #
111 
112 
113 class InspiralEventList(ligolw_thinca.InspiralEventList):
114  @staticmethod
115  def template(event):
116  """
117  Returns an immutable hashable object (it can be used as a
118  dictionary key) uniquely identifying the template that
119  produced the given event.
120  """
121  return event.mass1, event.mass2, event.spin1x, event.spin1y, event.spin1z, event.spin2x, event.spin2y, event.spin2z
122 
123  def make_index(self):
124  self.index = {}
125  for event in self:
126  self.index.setdefault(self.template(event), []).append(event)
127  for events in self.index.values():
128  events.sort(lambda a, b: cmp(a.end_time, b.end_time) or cmp(a.end_time_ns, b.end_time_ns))
129 
130  def get_coincs(self, event_a, offset_a, light_travel_time, e_thinca_parameter, comparefunc):
131  #
132  # event_a's end time, with the time shift applied
133  #
134 
135  end = event_a.get_end() + offset_a - self.offset
136 
137  #
138  # all events sharing event_a's template
139  #
140 
141  try:
142  events = self.index[self.template(event_a)]
143  except KeyError:
144  # that template didn't produce any events in this
145  # instrument
146  return []
147 
148  #
149  # extract the subset of events from this list that pass
150  # coincidence with event_a (use bisection searches for the
151  # minimum and maximum allowed end times to quickly identify
152  # a subset of the full list)
153  #
154 
155  return [event_b for event_b in events[bisect.bisect_left(events, end - self.dt) : bisect.bisect_right(events, end + self.dt)] if not comparefunc(event_a, offset_a, event_b, self.offset, light_travel_time, e_thinca_parameter)]
156 
157 
158 #
159 # Replace the InspiralEventList class in ligolw_thinca with ours
160 #
161 
162 
163 ligolw_thinca.InspiralEventList = InspiralEventList
164 
165 
166 #
167 # =============================================================================
168 #
169 # StreamThinca
170 #
171 # =============================================================================
172 #
173 
174 
175 #
176 # on-the-fly thinca implementation
177 #
178 
179 
180 class StreamThinca(object):
181  def __init__(self, coincidence_threshold, thinca_interval = 50.0, sngls_snr_threshold = None):
182  self._xmldoc = None
183  self.thinca_interval = thinca_interval # seconds
184  self.last_coincs = {}
185  self.sngls_snr_threshold = sngls_snr_threshold
186  self.sngl_inspiral_table = None
187  self.ln_likelihood_func = None
188  self.ln_likelihood_params_func = None
189 
190  # the \Delta t window not including the light travel time
191  self.coincidence_threshold = coincidence_threshold
192 
193  # upper boundary of interval spanned by last invocation
194  self.last_boundary = -segments.infinity()
195 
196  # set of the event ids of triggers currently in ram that
197  # have already been used in coincidences
198  self.ids = set()
199 
200 
201  def set_coinc_params_distributions(self, coinc_params_distributions):
202  self.ln_likelihood_func = snglcoinc.LnLikelihoodRatio(coinc_params_distributions)
203  self.ln_likelihood_params_func = coinc_params_distributions.coinc_params
204  def del_coinc_params_distributions(self):
205  self.ln_likelihood_func = None
206  self.ln_likelihood_params_func = None
207  coinc_params_distributions = property(None, set_coinc_params_distributions, del_coinc_params_distributions, "ThincaCoincParamsDistributions instance with which to compute likelihood ratio values.")
208 
209 
210  def add_events(self, xmldoc, process_id, events, boundary, fapfar = None):
211  # invalidate the coinc extractor in case all that follows
212  # is a no-op
213  self.last_coincs = {}
214 
215  # we need our own copy of the sngl_inspiral table because
216  # we need a place to store a history of all the triggers,
217  # and a place we can run coincidence on them. when making
218  # our copy, we can't use table.new_from_template() because
219  # we need to ensure we have a Table subclass, not a DBTable
220  # subclass
221  if self.sngl_inspiral_table is None:
222  self.sngl_inspiral_table = lsctables.New(lsctables.SnglInspiralTable, lsctables.SnglInspiralTable.get_table(xmldoc).columnnames)
223  # so we can watch for it changing
224  assert self._xmldoc is None
225  self._xmldoc = xmldoc
226 
227  # convert the new row objects to the type required by
228  # ligolw_thinca(), and append to our sngl_inspiral table
229  for old_event in events:
230  new_event = ligolw_thinca.SnglInspiral()
231  for col in self.sngl_inspiral_table.columnnames:
232  setattr(new_event, col, getattr(old_event, col))
233  self.sngl_inspiral_table.append(new_event)
234 
235  # run coincidence, return non-coincident sngls. no-op if
236  # no new events
237  if not events:
238  return []
239  return self.run_coincidence(xmldoc, process_id, boundary, fapfar = fapfar)
240 
241 
242  def run_coincidence(self, xmldoc, process_id, boundary, fapfar = None):
243  # safety check
244  assert xmldoc is self._xmldoc
245 
246  # stay this far away from the boundaries of the available
247  # triggers
248  coincidence_back_off = max(abs(offset) for offset in lsctables.TimeSlideTable.get_table(xmldoc).getColumnByName("offset"))
249 
250  # check that we've accumulated thinca_interval seconds, and
251  # that .add_events() has been called with some events since
252  # the last flush
253  if self.last_boundary + self.thinca_interval > boundary - coincidence_back_off or self.sngl_inspiral_table is None:
254  return []
255 
256  # remove triggers that are too old to be useful from our
257  # internal sngl_inspiral table. save any that were never
258  # used in coincidences
259  discard_boundary = self.last_boundary - coincidence_back_off
260  noncoinc_sngls = [row for row in self.sngl_inspiral_table if row.get_end() < discard_boundary and row.event_id not in self.ids]
261  iterutils.inplace_filter(lambda row: row.get_end() >= discard_boundary, self.sngl_inspiral_table)
262 
263  # we need our own copies of these other tables because
264  # sometimes ligolw_thinca wants to modify the attributes of
265  # a row object after appending it to a table, which isn't
266  # possible if the tables are SQL-based. these do not store
267  # any state so we create them on the fly when needed
268  coinc_event_map_table = lsctables.New(lsctables.CoincMapTable)
269  coinc_event_table = lsctables.New(lsctables.CoincTable)
270  coinc_inspiral_table = lsctables.New(lsctables.CoincInspiralTable)
271 
272  # replace tables with our versions
273  real_sngl_inspiral_table = lsctables.SnglInspiralTable.get_table(xmldoc)
274  real_coinc_event_map_table = lsctables.CoincMapTable.get_table(xmldoc)
275  real_coinc_event_table = lsctables.CoincTable.get_table(xmldoc)
276  real_coinc_inspiral_table = lsctables.CoincInspiralTable.get_table(xmldoc)
277  xmldoc.childNodes[-1].replaceChild(self.sngl_inspiral_table, real_sngl_inspiral_table)
278  xmldoc.childNodes[-1].replaceChild(coinc_event_map_table, real_coinc_event_map_table)
279  xmldoc.childNodes[-1].replaceChild(coinc_event_table, real_coinc_event_table)
280  xmldoc.childNodes[-1].replaceChild(coinc_inspiral_table, real_coinc_inspiral_table)
281 
282  # synchronize our coinc_event table's ID generator with the
283  # ID generator attached to the database' table object
284  coinc_event_table.set_next_id(real_coinc_event_table.next_id)
285 
286  # define once-off ntuple_comparefunc() so we can pass the
287  # coincidence segment in as a default value for the seg
288  # keyword argument
289  def ntuple_comparefunc(events, offset_vector, seg = segments.segment(self.last_boundary, boundary)):
290  return frozenset(event.ifo for event in events) not in allowed_instrument_combos or ligolw_thinca.coinc_inspiral_end_time(events, offset_vector) not in seg
291 
292  # swap .get_effective_snr() method on trigger class
293  orig_get_effective_snr, ligolw_thinca.SnglInspiral.get_effective_snr = ligolw_thinca.SnglInspiral.get_effective_snr, get_effective_snr
294 
295  # find coincs
296  ligolw_thinca.ligolw_thinca(
297  xmldoc,
298  process_id = process_id,
299  coinc_definer_row = ligolw_thinca.InspiralCoincDef,
300  event_comparefunc = event_comparefunc,
301  thresholds = self.coincidence_threshold,
302  ntuple_comparefunc = ntuple_comparefunc,
303  likelihood_func = self.ln_likelihood_func,
304  likelihood_params_func = self.ln_likelihood_params_func,
305  # add 10% to coincidence window for safety + the
306  # light-crossing time for the Earth
307  max_dt = 1.1 * self.coincidence_threshold + 2. * lal.REARTH_SI / lal.C_SI
308  )
309 
310  # restore .get_effective_snr() method on trigger class
311  ligolw_thinca.SnglInspiral.get_effective_snr = orig_get_effective_snr
312 
313  # assign the FAP and FAR if provided with the data to do so
314  if fapfar is not None:
315  coinc_event_index = dict((row.coinc_event_id, row) for row in coinc_event_table)
316  gps_time_now = float(lal.UTCToGPS(time.gmtime()))
317  for coinc_inspiral_row in coinc_inspiral_table:
318  likelihood_ratio = coinc_event_index[coinc_inspiral_row.coinc_event_id].likelihood
319  coinc_inspiral_row.combined_far = fapfar.far_from_rank(likelihood_ratio)
320  # FIXME: add a proper column to store this in
321  coinc_inspiral_row.false_alarm_rate = fapfar.fap_from_rank(likelihood_ratio)
322 
323  # abuse minimum_duration column to store
324  # the latency. NOTE: this is nonsensical
325  # unless running live.
326  coinc_inspiral_row.minimum_duration = gps_time_now - float(coinc_inspiral_row.get_end())
327 
328  # construct a coinc extractor from the XML document while
329  # the tree still contains our internal table objects
330  self.last_coincs = ligolw_thinca.sngl_inspiral_coincs(xmldoc)
331 
332  # synchronize the database' coinc_event table's ID
333  # generator with ours
334  real_coinc_event_table.set_next_id(coinc_event_table.next_id)
335 
336  # put the original table objects back
337  xmldoc.childNodes[-1].replaceChild(real_sngl_inspiral_table, self.sngl_inspiral_table)
338  xmldoc.childNodes[-1].replaceChild(real_coinc_event_map_table, coinc_event_map_table)
339  xmldoc.childNodes[-1].replaceChild(real_coinc_event_table, coinc_event_table)
340  xmldoc.childNodes[-1].replaceChild(real_coinc_inspiral_table, coinc_inspiral_table)
341 
342  # copy triggers into real output document
343  if coinc_event_map_table:
344  # figure out the IDs of triggers that have been
345  # used in coincs for the first time, and update the
346  # set of IDs of all triggers that have been used in
347  # coincs
348  index = dict((row.event_id, row) for row in self.sngl_inspiral_table)
349  self.ids &= set(index)
350  newids = set(coinc_event_map_table.getColumnByName("event_id")) - self.ids
351  self.ids |= newids
352 
353  # copy rows into target tables.
354  for id in newids:
355  real_sngl_inspiral_table.append(index[id])
356  map(real_coinc_event_map_table.append, coinc_event_map_table)
357  map(real_coinc_event_table.append, coinc_event_table)
358  map(real_coinc_inspiral_table.append, coinc_inspiral_table)
359 
360  # save all sngls above the requested sngls SNR threshold
361  # (all snlgs that participated in coincs are already in the
362  # document, so only need to check for ones in the
363  # non-coincident sngls list for this iteration)
364  if self.sngls_snr_threshold is not None:
365  for event in noncoinc_sngls:
366  if event.snr >= self.sngls_snr_threshold:
367  real_sngl_inspiral_table.append(event)
368 
369  # record boundary
370  self.last_boundary = boundary
371 
372  # return non-coincident sngls
373  return noncoinc_sngls
374 
375 
376  def flush(self, xmldoc, process_id, fapfar = None):
377  # invalidate the coinc extractor in case run_coincidence()
378  # is a no-op.
379  self.last_coincs = {}
380 
381  # coincidence. don't bother unless .add_events() has been
382  # called since the last flush()
383  if self._xmldoc is not None:
384  noncoinc_sngls = self.run_coincidence(xmldoc, process_id, segments.infinity(), fapfar = fapfar)
385  else:
386  noncoinc_sngls = []
387 
388  # any event that hasn't been used in a coinc by now will
389  # never be
390  if self.sngl_inspiral_table is not None:
391  noncoinc_sngls.extend(row for row in self.sngl_inspiral_table if row.event_id not in self.ids)
392  self.sngl_inspiral_table.unlink()
393  self.sngl_inspiral_table = None
394  self.ids.clear()
395 
396  # last_boundary must be reset to -infinity so that it looks
397  # like a fresh copy of the stream thinca instance
398  self.last_boundary = -segments.infinity()
399 
400  # it's now safe to work with a different document
401  self._xmldoc = None
402 
403  # return non-coincident sngls
404  return noncoinc_sngls