gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
cbc_template_fir.py
Go to the documentation of this file.
1 # Copyright (C) 2009 LIGO Scientific Collaboration
2 #
3 # This program is free software; you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation; either version 2 of the License, or (at your
6 # option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful, but
9 # WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
11 # Public License for more details.
12 #
13 # You should have received a copy of the GNU General Public License along
14 # with this program; if not, write to the Free Software Foundation, Inc.,
15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 
17 ## @file
18 # The python module to implement SVD decomposed FIR filtering
19 #
20 # ### Review Status
21 #
22 # STATUS: reviewed with actions
23 #
24 # | Names | Hash | Date |
25 # | ------------------------------------------- | ------------------------------------------- | ---------- |
26 # | Florent, Sathya, Duncan Me, Jolien, Kipp, Chad | 7536db9d496be9a014559f4e273e1e856047bf71 | 2014-04-30 |
27 # | Florent, Surabhi, Tjonnie, Kent, Jolien, Kipp, Chad | 0d7301a22ad3346f1283d3a1917b67aa7ec1c1ab | 2015-05-12 |
28 #
29 # #### Action items
30 #
31 # - Consider changing the order of interpolation and smoothing the PSD
32 # - Remove Jolien's function and get the new flow from lalsimulation to use XLALSimInspiralChirpStartFrequencyBound() and friends
33 # - move sigma squared calculation somewhere and get them updated dynamically
34 # - possibly use ROM stuff, possibly use low-order polynomial approx computed on the fly from the template as it's generated
35 # - remove lefttukeywindow()
36 # - use template_bank_row.coa_phase == 0. in SimInspiralFD() call, make sure itac adjusts the phase it assigns to triggers from the template coa_phase
37 # - change "assumes fhigh" to "asserts fhigh"
38 # - move assert epoch_time into condition_imr_waveform(), should be assert -len(data) <= epoch_time * sample_rate < 0
39 #
40 ## @package cbc_template_fir
41 
42 #
43 # =============================================================================
44 #
45 # Preamble
46 #
47 # =============================================================================
48 #
49 
50 
51 import math
52 import cmath
53 import numpy
54 import scipy
55 import sys
56 
57 
58 from pylal import datatypes as laltypes
59 from pylal import lalfft
60 from pylal import spawaveform
61 import lal
62 import lalsimulation as lalsim
63 
64 
65 from gstlal.reference_psd import interpolate_psd, horizon_distance
66 
67 
68 from gstlal import templates
69 from gstlal import chirptime
70 
71 
72 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>, Chad Hanna <chad.hanna@ligo.org>, Drew Keppel <drew.keppel@ligo.org>"
73 __version__ = "FIXME"
74 __date__ = "FIXME"
75 
76 
77 #
78 # =============================================================================
79 #
80 # Inspiral Template Stuff
81 #
82 # =============================================================================
83 #
84 
85 
86 def tukeywindow(data, samps = 200.):
87  assert (len(data) >= 2 * samps) # make sure that the user is requesting something sane
88  tp = float(samps) / len(data)
89  return lal.CreateTukeyREAL8Window(len(data), tp).data.data
90 
91 
92 def generate_template(template_bank_row, approximant, sample_rate, duration, f_low, f_high, amporder = 0, order = 7, fwdplan = None, fworkspace = None):
93  """
94  Generate a single frequency-domain template, which
95  (1) is band-limited between f_low and f_high,
96  (2) has an IFFT which is duration seconds long and
97  (3) has an IFFT which is sampled at sample_rate Hz
98  """
99  if approximant in templates.gstlal_approximants:
100 
101  # FIXME use hcross somday?
102  # We don't here because it is not guaranteed to be orthogonal
103  # and we add orthogonal phase later
104 
105  hplus,hcross = lalsim.SimInspiralFD(
106  0., # phase
107  1.0 / duration, # sampling interval
108  lal.MSUN_SI * template_bank_row.mass1,
109  lal.MSUN_SI * template_bank_row.mass2,
110  template_bank_row.spin1x,
111  template_bank_row.spin1y,
112  template_bank_row.spin1z,
113  template_bank_row.spin2x,
114  template_bank_row.spin2y,
115  template_bank_row.spin2z,
116  f_low,
117  f_high,
118  0., #FIXME chosen until suitable default value for f_ref is defined
119  1.e6 * lal.PC_SI, # distance
120  0., # redshift
121  0., # inclination
122  0., # tidal deformability lambda 1
123  0., # tidal deformability lambda 2
124  None, # waveform flags
125  None, # Non GR params
126  amporder,
127  order,
128  lalsim.GetApproximantFromString(str(approximant))
129  )
130 
131  # NOTE assumes fhigh is the Nyquist frequency!!!
132  z = hplus.data.data
133  assert len(z) == int(round(sample_rate * duration))//2 +1
134 
135  else:
136  raise ValueError("Unsupported approximant given %s" % approximant)
137 
138  return laltypes.COMPLEX16FrequencySeries(
139  name = "template",
140  epoch = laltypes.LIGOTimeGPS(hplus.epoch.gpsSeconds, hplus.epoch.gpsNanoSeconds),
141  f0 = 0.0,
142  deltaF = 1.0 / duration,
143  sampleUnits = laltypes.LALUnit("strain"),
144  data = z
145  )
146 
147 def condition_imr_template(approximant, data, epoch_time, sample_rate_max, max_ringtime):
148  assert -len(data) / sample_rate_max <= epoch_time < 0.0, "Epoch returned follows a different convention"
149  # find the index for the peak sample using the epoch returned by
150  # the waveform generator
151  epoch_index = -int(epoch_time*sample_rate_max) - 1
152  # align the peaks according to an overestimate of max rinddown
153  # time for a given split bank
154  target_index = len(data)-1 - int(sample_rate_max * max_ringtime)
155  # rotate phase so that sample with peak amplitude is real
156  phase = numpy.arctan2(data[epoch_index].imag, data[epoch_index].real)
157  data *= numpy.exp(-1.j * phase)
158  data = numpy.roll(data, target_index-epoch_index)
159  # re-taper the ends of the waveform that got cyclically permuted
160  # around the ring
161  tukey_beta = 2. * abs(target_index - epoch_index) / float(len(data))
162  assert 0. <= tukey_beta <= 1., "waveform got rolled WAY too much"
163  data *= lal.CreateTukeyREAL8Window(len(data), tukey_beta).data.data
164  # done
165  return data, target_index
166 
167 
168 def compute_autocorrelation_mask( autocorrelation ):
169  '''
170  Given an autocorrelation time series, estimate the optimal
171  autocorrelation length to use and return a matrix which masks
172  out the unwanted elements. FIXME TODO for now just returns
173  ones
174  '''
175  return numpy.ones( autocorrelation.shape, dtype="int" )
176 
177 
178 def movingmedian(interval, window_size):
179  tmp = numpy.copy(interval)
180  for i in range(window_size, len(interval)-window_size):
181  tmp[i] = numpy.median(interval[i-window_size:i+window_size])
182  return tmp
183 
184 
185 def movingaverage(interval, window_size):
186  window = lal.CreateTukeyREAL8Window(window_size, 0.5).data.data
187  return numpy.convolve(interval, window, 'same')
188 
189 
190 def condition_psd(psd, newdeltaF, minfs = (35.0, 40.0), maxfs = (1800., 2048.), smoothing_frequency = 4.):
191  """
192  A function to condition the psd that will be used to whiten waveforms
193 
194  @param psd A real8 frequency series containing the psd
195  @param newdeltaF (Hz) The target delta F to interpolate to
196  @param minfs (Hz) The frequency boundaries over which to taper the spectrum to infinity. i.e., frequencies below the first item in the tuple will have an infinite spectrum, the second item in the tuple will not be changed. A taper from 0 to infinity is applied in between. The PSD is also tapered from 0.85 * Nyquist to Nyquist.
197  @param smoothing_frequency (Hz) The target frequency resolution after smoothing. Lines with bandwidths << smoothing_frequency are removed via a median calculation. Remaining features will be blurred out to this resolution.
198 
199  returns a conditioned psd
200  """
201 
202  #
203  # store the psd horizon before conditioning
204  #
205 
206  horizon_before = horizon_distance(psd, 1.4, 1.4, 8.0, minfs[1], maxfs[0])
207 
208  #
209  # interpolate to new \Delta f
210  #
211 
212  psd = interpolate_psd(psd, newdeltaF)
213 
214  #
215  # Smooth the psd
216  #
217 
218  psddata = psd.data
219  avgwindow = int(smoothing_frequency / newdeltaF)
220  psddata = movingmedian(psddata, avgwindow)
221  psddata = movingaverage(psddata, avgwindow)
222 
223  #
224  # Taper to infinity to turn this psd into an effective band pass filter
225  #
226 
227  kmin = int(minfs[0] / newdeltaF)
228  kmax = int(minfs[1] / newdeltaF)
229  psddata[:kmin] = float('Inf')
230  psddata[kmin:kmax] /= numpy.sin(numpy.arange(kmax-kmin) / (kmax-kmin-1.) * numpy.pi / 2.0)**4
231 
232  kmin = int(maxfs[0] / newdeltaF)
233  kmax = int(maxfs[1] / newdeltaF)
234  psddata[kmax:] = float('Inf')
235  psddata[kmin:kmax] /= numpy.cos(numpy.arange(kmax-kmin) / (kmax-kmin-1.) * numpy.pi / 2.0)**4
236 
237  psd.data = psddata
238 
239  #
240  # compute the psd horizon after conditioning and renormalize
241  #
242 
243  horizon_after = horizon_distance(psd, 1.4, 1.4, 8.0, minfs[1], maxfs[0])
244 
245  psddata = psd.data
246  psd.data = psddata * (horizon_after / horizon_before)**2
247 
248  #
249  # done
250  #
251 
252  return psd
253 
254 
255 def joliens_function(f, template_table):
256  """
257  A function to compute the padding needed to gaurantee well behaved waveforms
258 
259  @param f The target low frequency starting point
260  @param template_table The sngl_inspiral table containing all the template parameters for this bank
261 
262  Returns the extra padding in time (tx) and the new low frequency cut
263  off that should be used (f_new) as a tuple: (tx, f_new)
264  """
265  def _chirp_duration(f, m1, m2):
266  """
267  @returns the Newtonian chirp duration in seconds
268  @param f the starting frequency in Hertz
269  @param m1 mass of one component in solar masses
270  @param m2 mass of the other component in solar masses
271  """
272  G = 6.67e-11
273  c = 3e8
274  m1 *= 2e30 # convert to kg
275  m2 *= 2e30 # convert to kg
276  m1 *= G / c**3 # convert to s
277  m2 *= G / c**3 # convert to s
278  M = m1 + m2
279  nu = m1 * m2 / M**2
280  v = (numpy.pi * M * f)**(1.0/3.0)
281  return 5.0 * M / (256.0 * nu * v**8)
282 
283  def _chirp_start_frequency(t, m1, m2):
284  """
285  @returns the Newtonian chirp start frequency in Hertz
286  @param t the time before coalescence in seconds
287  @param m1 mass of one component in solar masses
288  @param m2 mass of the other component in solar masses
289  """
290  G = 6.67e-11
291  c = 3e8
292  m1 *= 2e30 # convert to kg
293  m2 *= 2e30 # convert to kg
294  m1 *= G / c**3 # convert to s
295  m2 *= G / c**3 # convert to s
296  M = m1 + m2
297  nu = m1 * m2 / M**2
298  theta = (nu * t / (5.0 * M))**(-1.0/8.0)
299  return theta**3 / (8.0 * numpy.pi * M)
300 
301  minmc = min(template_table.getColumnByName('mchirp'))
302  row = [t for t in template_table if t.mchirp == minmc][0]
303  m1, m2 = row.mass1, row.mass2
304  tc = _chirp_duration(f, m1, m2)
305  tx = .1 * tc + 1.0
306  f_new = _chirp_start_frequency(tx + tc, m1, m2)
307  return tx, f_new
308 
309 
310 def generate_templates(template_table, approximant, psd, f_low, time_slices, autocorrelation_length = None, verbose = False):
311  """!
312  Generate a bank of templates, which are
313  (1) broken up into time slice,
314  (2) down-sampled in each time slice and
315  (3) whitened with the given psd.
316  """
317  sample_rate_max = max(time_slices['rate'])
318  duration = max(time_slices['end'])
319  length_max = int(round(duration * sample_rate_max))
320 
321  # working f_low to actually use for generating the waveform
322  working_f_low_extra_time, working_f_low = joliens_function(f_low, template_table)
323 
324  # Add duration of PSD to template length for PSD ringing, round up to power of 2 count of samples
325  working_length = templates.ceil_pow_2(length_max + round(1./psd.deltaF * sample_rate_max))
326  working_duration = float(working_length) / sample_rate_max
327 
328  # Smooth the PSD and interpolate to required resolution
329  if psd is not None:
330  psd = condition_psd(psd, 1.0 / working_duration, minfs = (working_f_low, f_low), maxfs = (sample_rate_max / 2.0 * 0.90, sample_rate_max / 2.0))
331 
332  revplan = lalfft.XLALCreateReverseCOMPLEX16FFTPlan(working_length, 1)
333  fwdplan = lalfft.XLALCreateForwardREAL8FFTPlan(working_length, 1)
334  tseries = laltypes.COMPLEX16TimeSeries(
335  data = numpy.zeros((working_length,), dtype = "cdouble")
336  )
337  fworkspace = laltypes.COMPLEX16FrequencySeries(
338  name = "template",
339  epoch = laltypes.LIGOTimeGPS(0),
340  f0 = 0.0,
341  deltaF = 1.0 / working_duration,
342  data = numpy.zeros((working_length//2 + 1,), dtype = "cdouble")
343  )
344 
345  # Check parity of autocorrelation length
346  if autocorrelation_length is not None:
347  if not (autocorrelation_length % 2):
348  raise ValueError, "autocorrelation_length must be odd (got %d)" % autocorrelation_length
349  autocorrelation_bank = numpy.zeros((len(template_table), autocorrelation_length), dtype = "cdouble")
350  autocorrelation_mask = compute_autocorrelation_mask( autocorrelation_bank )
351  else:
352  autocorrelation_bank = None
353  autocorrelation_mask = None
354 
355  # Multiply by 2 * length of the number of sngl_inspiral rows to get the sine/cosine phases.
356  template_bank = [numpy.zeros((2 * len(template_table), int(round(rate*(end-begin)))), dtype = "double") for rate,begin,end in time_slices]
357 
358  # Store the original normalization of the waveform. After
359  # whitening, the waveforms are normalized. Use the sigmasq factors
360  # to get back the original waveform.
361  sigmasq = []
362 
363  # Generate each template, downsampling as we go to save memory
364  max_ringtime = max([chirptime.ringtime(row.mass1*lal.MSUN_SI + row.mass2*lal.MSUN_SI, chirptime.overestimate_j_from_chi(max(row.spin1z, row.spin2z))) for row in template_table])
365  for i, row in enumerate(template_table):
366  if verbose:
367  print >>sys.stderr, "generating template %d/%d: m1 = %g, m2 = %g, s1x = %g, s1y = %g, s1z = %g, s2x = %g, s2y = %g, s2z = %g" % (i + 1, len(template_table), row.mass1, row.mass2, row.spin1x, row.spin1y, row.spin1z, row.spin2x, row.spin2y, row.spin2z)
368 
369  #
370  # generate "cosine" component of frequency-domain template.
371  # waveform is generated for a canonical distance of 1 Mpc.
372  #
373 
374  fseries = generate_template(row, approximant, sample_rate_max, working_duration, f_low, sample_rate_max / 2., fwdplan = fwdplan, fworkspace = fworkspace)
375 
376  #
377  # whiten and add quadrature phase ("sine" component)
378  #
379 
380  if psd is not None:
381  lalfft.XLALWhitenCOMPLEX16FrequencySeries(fseries, psd)
382  fseries = templates.add_quadrature_phase(fseries, working_length)
383 
384  #
385  # compute time-domain autocorrelation function
386  #
387 
388  if autocorrelation_bank is not None:
389  autocorrelation = templates.normalized_autocorrelation(fseries, revplan).data
390  autocorrelation_bank[i, ::-1] = numpy.concatenate((autocorrelation[-(autocorrelation_length // 2):], autocorrelation[:(autocorrelation_length // 2 + 1)]))
391 
392  #
393  # transform template to time domain
394  #
395 
396  lalfft.XLALCOMPLEX16FreqTimeFFT(tseries, fseries, revplan)
397 
398  data = tseries.data
399  epoch_time = fseries.epoch.seconds + fseries.epoch.nanoseconds*1.e-9
400  #
401  # extract the portion to be used for filtering
402  #
403 
404 
405  #
406  # condition the template if necessary (e.g. line up IMR
407  # waveforms by peak amplitude)
408  #
409 
410  if approximant in templates.gstlal_IMR_approximants:
411  data, target_index = condition_imr_template(approximant, data, epoch_time, sample_rate_max, max_ringtime)
412  # record the new end times for the waveforms (since we performed the shifts)
413  row.set_end(laltypes.LIGOTimeGPS(float(target_index-(len(data) - 1.))/sample_rate_max))
414  else:
415  data *= tukeywindow(data, samps = 32)
416 
417  data = data[-length_max:]
418  #
419  # normalize so that inner product of template with itself
420  # is 2
421  #
422 
423  norm = abs(numpy.dot(data, numpy.conj(data)))
424  data *= cmath.sqrt(2 / norm)
425 
426  #
427  # sigmasq = 2 \sum_{k=0}^{N-1} |\tilde{h}_{k}|^2 / S_{k} \Delta f
428  #
429  # XLALWhitenCOMPLEX16FrequencySeries() computed
430  #
431  # \tilde{h}'_{k} = \sqrt{2 \Delta f} \tilde{h}_{k} / \sqrt{S_{k}}
432  #
433  # and XLALCOMPLEX16FreqTimeFFT() computed
434  #
435  # h'_{j} = \Delta f \sum_{k=0}^{N-1} exp(2\pi i j k / N) \tilde{h}'_{k}
436  #
437  # therefore, "norm" is
438  #
439  # \sum_{j} |h'_{j}|^{2} = (\Delta f)^{2} \sum_{j} \sum_{k=0}^{N-1} \sum_{k'=0}^{N-1} exp(2\pi i j (k-k') / N) \tilde{h}'_{k} \tilde{h}'^{*}_{k'}
440  # = (\Delta f)^{2} \sum_{k=0}^{N-1} \sum_{k'=0}^{N-1} \tilde{h}'_{k} \tilde{h}'^{*}_{k'} \sum_{j} exp(2\pi i j (k-k') / N)
441  # = (\Delta f)^{2} N \sum_{k=0}^{N-1} |\tilde{h}'_{k}|^{2}
442  # = (\Delta f)^{2} N 2 \Delta f \sum_{k=0}^{N-1} |\tilde{h}_{k}|^{2} / S_{k}
443  # = (\Delta f)^{2} N sigmasq
444  #
445  # and \Delta f = 1 / (N \Delta t), so "norm" is
446  #
447  # \sum_{j} |h'_{j}|^{2} = 1 / (N \Delta t^2) sigmasq
448  #
449  # therefore
450  #
451  # sigmasq = norm * N * (\Delta t)^2
452  #
453 
454  sigmasq.append(norm * len(data) / sample_rate_max**2.)
455 
456  #
457  # copy real and imaginary parts into adjacent (real-valued)
458  # rows of template bank
459  #
460 
461  for j, time_slice in enumerate(time_slices):
462  # start and end times are measured *backwards* from
463  # template end; subtract from n to convert to
464  # start and end index; end:start is the slice to
465  # extract, but there's also an amount added equal
466  # to 1 less than the stride that I can't explain
467  # but probaby has something to do with the reversal
468  # of the open/closed boundary conditions through
469  # all of this (argh! Chad!)
470  stride = int(round(sample_rate_max / time_slice['rate']))
471  begin_index = length_max - int(round(time_slice['begin'] * sample_rate_max)) + stride - 1
472  end_index = length_max - int(round(time_slice['end'] * sample_rate_max)) + stride - 1
473  # make sure the rates are commensurate
474  assert stride * time_slice['rate'] == sample_rate_max
475 
476  # extract every stride-th sample. we multiply by
477  # \sqrt{stride} to maintain inner product
478  # normalization so that the templates still appear
479  # to be unit vectors at the reduced sample rate.
480  # note that the svd returns unit basis vectors
481  # regardless so this factor has no effect on the
482  # normalization of the basis vectors used for
483  # filtering but it ensures that the chifacs values
484  # have the correct relative normalization.
485  template_bank[j][(2*i+0),:] = data.real[end_index:begin_index:stride] * math.sqrt(stride)
486  template_bank[j][(2*i+1),:] = data.imag[end_index:begin_index:stride] * math.sqrt(stride)
487 
488  return template_bank, autocorrelation_bank, autocorrelation_mask, sigmasq, psd
489 
490 
491 def decompose_templates(template_bank, tolerance, identity = False):
492  #
493  # sum-of-squares for each template (row).
494  #
495 
496  chifacs = (template_bank * template_bank).sum(1)
497 
498  #
499  # this turns this function into a no-op: the output "basis
500  # vectors" are exactly the input templates and the reconstruction
501  # matrix is absent (triggers pipeline construction code to omit
502  # matrixmixer element)
503  #
504 
505  if identity:
506  return template_bank, None, None, chifacs
507 
508  #
509  # adjust tolerance according to local norm
510  #
511 
512  tolerance = 1 - (1 - tolerance) / chifacs.max()
513 
514  #
515  # S.V.D.
516  #
517 
518  U, s, Vh = spawaveform.svd(template_bank.T,mod=True,inplace=True)
519 
520  #
521  # determine component count
522  #
523 
524  residual = numpy.sqrt((s * s).cumsum() / numpy.dot(s, s))
525  # FIXME in an ad hoc way force at least 6 principle components
526  n = max(min(residual.searchsorted(tolerance) + 1, len(s)), 6)
527 
528  #
529  # clip decomposition, pre-multiply Vh by s
530  #
531 
532  U = U[:,:n]
533  Vh = numpy.dot(numpy.diag(s), Vh)[:n,:]
534  s = s[:n]
535 
536  #
537  # renormalize the truncated SVD approximation of these template
538  # waveform slices making sure their squares still add up to chifacs.
539  # This is done by renormalizing the sum of the square of the
540  # singular value weighted reconstruction coefficients associated with
541  # each template.
542  #
543 
544  V2 = (Vh * Vh).sum(0)
545  for idx,v2 in enumerate(V2):
546  Vh[:, idx] *= numpy.sqrt(chifacs[idx] / v2)
547 
548  #
549  # done.
550  #
551 
552  return U.T, s, Vh, chifacs