gstlal  0.8.1
 All Classes Namespaces Files Functions Variables Pages
reference_psd.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2010--2013 Kipp Cannon, Chad Hanna, Leo Singer
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 
20 #
21 # =============================================================================
22 #
23 # Preamble
24 #
25 # =============================================================================
26 #
27 
28 
29 import math
30 import numpy
31 import os
32 import scipy
33 import scipy.fftpack
34 from scipy import interpolate
35 import sys
36 import signal
37 import warnings
38 
39 
40 import pygtk
41 pygtk.require("2.0")
42 import gobject
43 gobject.threads_init()
44 import pygst
45 pygst.require("0.10")
46 import gst
47 
48 
49 from glue.ligolw import utils
50 import lal
51 from pylal import datatypes as laltypes
52 from pylal import series as lalseries
53 
54 
55 from gstlal import datasource
56 from gstlal import pipeparts
57 from gstlal import pipeio
58 from gstlal import simplehandler
59 
60 
61 ## @file
62 # module for reference psds
63 #
64 # ### Review Status
65 #
66 # | Names | Hash | Date |
67 # | ------------------------------------------- | ------------------------------------------- | ---------- |
68 # | Florent, Sathya, Duncan Me., Jolien, Kipp, Chad | b3ef077fe87b597578000f140e4aa780f3a227aa | 2014-05-01 |
69 #
70 #
71 # #### Action items
72 #
73 # - Make graphs of code and compare with gstreamer graphs
74 # - Link spectrum movie from DCC
75 # - Consider exposing the average samples property
76 # - Check FIR kernel normalization norm
77 
78 
79 ## @package python.reference_psd
80 # the reference_psd module
81 
82 
83 #
84 # =============================================================================
85 #
86 # PSD Measurement
87 #
88 # =============================================================================
89 #
90 
91 
92 #
93 # pipeline handler for PSD measurement
94 #
95 
96 
98  def __init__(self, *args, **kwargs):
99  self.psd = None
100  simplehandler.Handler.__init__(self, *args, **kwargs)
101 
102  def do_on_message(self, bus, message):
103  if message.type == gst.MESSAGE_ELEMENT and message.structure.get_name() == "spectrum":
104  self.psd = pipeio.parse_spectrum_message(message)
105  return True
106  return False
107 
108 
109 #
110 # measure_psd()
111 #
112 
113 ## A pipeline to measure a PSD
114 #
115 # @dot
116 # digraph G {
117 # // graph properties
118 #
119 # rankdir=LR;
120 # compound=true;
121 # node [shape=record fontsize=10 fontname="Verdana"];
122 # edge [fontsize=8 fontname="Verdana"];
123 #
124 # // nodes
125 #
126 # "mkbasicsrc()" [URL="\ref datasource.mkbasicsrc()"];
127 # capsfilter1 [URL="\ref pipeparts.mkcapsfilter()"];
128 # resample [URL="\ref pipeparts.mkresample()"];
129 # capsfilter2 [URL="\ref pipeparts.mkcapsfilter()"];
130 # queue [URL="\ref pipeparts.mkqueue()"];
131 # whiten [URL="\ref pipeparts.mkwhiten()"];
132 # fakesink [URL="\ref pipeparts.mkfakesink()"];
133 #
134 # "mkbasicsrc()" -> capsfilter1 -> resample -> capsfilter2 -> queue -> whiten -> fakesink;
135 # }
136 # @enddot
137 def measure_psd(gw_data_source_info, instrument, rate, psd_fft_length = 8, verbose = False):
138  #
139  # 8 FFT-lengths is just a ball-parky estimate of how much data is
140  # needed for a good PSD, this isn't a requirement of the code (the
141  # code requires a minimum of 1)
142  #
143 
144  if gw_data_source_info.seg is not None and float(abs(gw_data_source_info.seg)) < 8 * psd_fft_length:
145  raise ValueError("segment %s too short" % str(gw_data_source_info.seg))
146 
147  #
148  # build pipeline
149  #
150 
151  if verbose:
152  print >>sys.stderr, "measuring PSD in segment %s" % str(gw_data_source_info.seg)
153  print >>sys.stderr, "building pipeline ..."
154  mainloop = gobject.MainLoop()
155  pipeline = gst.Pipeline("psd")
156  handler = PSDHandler(mainloop, pipeline)
157 
158  head = datasource.mkbasicsrc(pipeline, gw_data_source_info, instrument, verbose = verbose)
159  head = pipeparts.mkcapsfilter(pipeline, head, "audio/x-raw-float, rate=[%d,MAX]" % rate) # disallow upsampling
160  head = pipeparts.mkresample(pipeline, head, quality = 9)
161  head = pipeparts.mkcapsfilter(pipeline, head, "audio/x-raw-float, rate=%d" % rate)
162  head = pipeparts.mkqueue(pipeline, head, max_size_buffers = 8)
163  if gw_data_source_info.seg is not None:
164  average_samples = int(round(float(abs(gw_data_source_info.seg)) / (psd_fft_length / 2.) - 1.))
165  else:
166  #FIXME maybe let the user specify this
167  average_samples = 64
168  head = pipeparts.mkwhiten(pipeline, head, psd_mode = 0, zero_pad = 0, fft_length = psd_fft_length, average_samples = average_samples, median_samples = 7)
169  pipeparts.mkfakesink(pipeline, head)
170 
171  #
172  # setup signal handler to shutdown pipeline for live data
173  #
174 
175  if gw_data_source_info.data_source in ("lvshm", "framexmit"):# FIXME what about nds online?
177 
178  #
179  # process segment
180  #
181 
182  if verbose:
183  print >>sys.stderr, "putting pipeline into playing state ..."
184  if pipeline.set_state(gst.STATE_PLAYING) == gst.STATE_CHANGE_FAILURE:
185  raise RuntimeError("pipeline failed to enter PLAYING state")
186  if verbose:
187  print >>sys.stderr, "running pipeline ..."
188  mainloop.run()
189 
190  #
191  # done
192  #
193 
194  if verbose:
195  print >>sys.stderr, "PSD measurement complete"
196  return handler.psd
197 
198 
199 def read_psd_xmldoc(xmldoc):
200  import warnings
201  warnings.warn("gstlal.reference_psd.read_psd_xmldoc() is deprecated, use pylal.series.read_psd_xmldoc() instead.", DeprecationWarning)
202  return lalseries.read_psd_xmldoc(xmldoc)
203 
204 
205 def make_psd_xmldoc(psddict, xmldoc = None):
206  import warnings
207  warnings.warn("gstlal.reference_psd.make_psd_xmldoc() is deprecated, use pylal.series.make_psd_xmldoc() instead.", DeprecationWarning)
208  return lalseries.make_psd_xmldoc(psddict, xmldoc = xmldoc)
209 
210 
211 def write_psd_fileobj(fileobj, psddict, gz = False, trap_signals = None):
212  """
213  Wrapper around make_psd_xmldoc() to write the XML document directly
214  to a Python file object.
215  """
216  utils.write_fileobj(lalseries.make_psd_xmldoc(psddict), fileobj, gz = gz, trap_signals = trap_signals)
217 
218 
219 def write_psd(filename, psddict, verbose = False, trap_signals = None):
220  """
221  Wrapper around make_psd_xmldoc() to write the XML document directly
222  to a named file.
223  """
224  utils.write_filename(lalseries.make_psd_xmldoc(psddict), filename, gz = (filename or "stdout").endswith(".gz"), verbose = verbose, trap_signals = trap_signals)
225 
226 
227 #
228 # =============================================================================
229 #
230 # PSD Utilities
231 #
232 # =============================================================================
233 #
234 
235 
236 def horizon_distance(psd, m1, m2, snr, f_min, f_max = None, inspiral_spectrum = None):
237  """
238  Compute horizon distance, the distance at which an optimally
239  oriented inspiral would be seen to have the given SNR. m1 and m2
240  are in solar mass units. f_min and f_max are in Hz. psd is a
241  REAL8FrequencySeries object containing the strain spectral density
242  function in the LAL normalization convention. The return value is
243  in Mpc.
244 
245  The horizon distance is determined using an integral whose upper
246  bound is the smaller of f_max (if supplied), the highest frequency
247  in the PSD, or the ISCO frequency.
248 
249  If inspiral_spectrum is not None, it should be a two-element list.
250  The first element will be replaced with an array of frequency
251  values, and the second element will be replaced with an array of
252  spectrum values giving the amplitude of an inspiral spectrum with
253  the given SNR. The spectrum is normalized so that the SNR is
254 
255  SNR^2 = \int (inspiral_spectrum / psd) df
256 
257  That is, the ratio of the inspiral spectrum to the PSD gives the
258  density of SNR^2.
259  """
260  #
261  # obtain PSD data, set default f_max if not supplied
262  #
263 
264  Sn = psd.data
265  assert len(Sn) > 0
266 
267  if f_max is None:
268  f_max = psd.f0 + (len(Sn) - 1) * psd.deltaF
269  elif f_max > psd.f0 + (len(Sn) - 1) * psd.deltaF:
270  warnings.warn("f_max clipped to Nyquist frequency", UserWarning)
271  f_max = psd.f0 + (len(Sn) - 1) * psd.deltaF
272 
273  #
274  # clip to ISCO. see (4) in arXiv:1003.2481
275  #
276 
277  f_isco = lal.C_SI**3 / (6**(3. / 2.) * math.pi * lal.G_SI * (m1 + m2) * lal.MSUN_SI)
278  f_max = min(f_max, f_isco)
279  assert psd.f0 <= f_isco
280  assert psd.f0 <= f_min <= f_isco
281  assert f_min <= f_max
282 
283  #
284  # convert f_min and f_max to indexes and extract data vectors for
285  # SNR integral
286  #
287 
288  k_min = int(round((f_min - psd.f0) / psd.deltaF))
289  k_max = int(round((f_max - psd.f0) / psd.deltaF))
290 
291  f = psd.f0 + numpy.arange(k_min, k_max + 1) * psd.deltaF
292  Sn = Sn[k_min : k_max + 1]
293 
294  #
295  # |h(f)|^2 for source at D = 1 m. see (5) in arXiv:1003.2481
296  #
297 
298  mu = (m1 * m2) / (m1 + m2)
299  mchirp = mu**(3. / 5.) * (m1 + m2)**(2. / 5.)
300 
301  inspiral = (5 * math.pi / (24 * lal.C_SI**3)) * (lal.G_SI * mchirp * lal.MSUN_SI)**(5. / 3.) * (math.pi * f)**(-7. / 3.)
302 
303  #
304  # SNR for source at D = 1 m <--> D in m for source w/ SNR = 1. see
305  # (3) in arXiv:1003.2481
306  #
307 
308  D_at_snr_1 = math.sqrt(4 * (inspiral / Sn).sum() * psd.deltaF)
309 
310  #
311  # scale inspiral spectrum by distance to achieve desired SNR
312  #
313 
314  if inspiral_spectrum is not None:
315  inspiral_spectrum[0] = f
316  inspiral_spectrum[1] = 4 * inspiral / (D_at_snr_1 / snr)**2
317 
318  #
319  # D in Mpc for source with desired SNR
320  #
321 
322  return D_at_snr_1 / snr / (1e6 * lal.PC_SI)
323 
324 
325 def effective_distance_factor(inclination, fp, fc):
326  """
327  Returns the ratio of effective distance to physical distance for
328  compact binary mergers. Inclination is the orbital inclination of
329  the system in radians, fp and fc are the F+ and Fx antenna factors.
330  See lal.ComputeDetAMResponse() for a function to compute antenna
331  factors. The effective distance is given by
332 
333  Deff = effective_distance_factor * D
334 
335  See Equation (4.3) of arXiv:0705.1514.
336  """
337  cos2i = math.cos(inclination)**2
338  return 1.0 / math.sqrt(fp**2 * (1+cos2i)**2 / 4 + fc**2 * cos2i)
339 
340 
342  """
343  Compute an acausal finite impulse-response filter kernel from a power
344  spectral density conforming to the LAL normalization convention,
345  such that if zero-mean unit-variance Gaussian random noise is fed
346  into an FIR filter using the kernel the filter's output will
347  possess the given PSD. The PSD must be provided as a
348  REAL8FrequencySeries object (see
349  pylal.xlal.datatypes.real8frequencyseries).
350 
351  The return value is the tuple (kernel, latency, sample rate). The
352  kernel is a numpy array containing the filter kernel, the latency
353  is the filter latency in samples and the sample rate is in Hz. The
354  kernel and latency can be used, for example, with gstreamer's stock
355  audiofirfilter element.
356  """
357  #
358  # this could be relaxed with some work
359  #
360 
361  assert psd.f0 == 0.0
362 
363  #
364  # extract the PSD bins and determine sample rate for kernel
365  #
366 
367  data = psd.data / 2
368  sample_rate = 2 * int(round(psd.f0 + len(data) * psd.deltaF))
369 
370  #
371  # remove LAL normalization
372  #
373 
374  data *= sample_rate
375 
376  #
377  # compute the FIR kernel. it always has an odd number of samples
378  # and no DC offset.
379  #
380 
381  data[0] = data[-1] = 0.0
382  try:
383  kernel = scipy.fftpack.idct(numpy.sqrt(data), type = 1) / (2 * len(data) - 1)
384  kernel = numpy.hstack((kernel[::-1], kernel[1:]))
385  except AttributeError:
386  # this computer's scipy.fftpack is missing idct()
387  # repack data: data[0], data[1], 0, data[2], 0, ....
388  tmp = numpy.zeros((2 * len(data) - 1,), dtype = data.dtype)
389  tmp[0] = data[0]
390  tmp[1::2] = data[1:]
391  data = tmp
392  del tmp
393  kernel = scipy.fftpack.irfft(numpy.sqrt(data))
394  kernel = numpy.roll(kernel, (len(kernel) - 1) / 2)
395 
396  #
397  # apply a Tukey window whose flat bit is 50% of the kernel.
398  # preserve the FIR kernel's square magnitude
399  #
400 
401  norm_before = numpy.dot(kernel, kernel)
402  kernel *= lal.CreateTukeyREAL8Window(len(kernel), .5).data.data
403  kernel *= math.sqrt(norm_before / numpy.dot(kernel, kernel))
404 
405  #
406  # the kernel's latency
407  #
408 
409  latency = (len(kernel) - 1) / 2
410 
411  #
412  # done
413  #
414 
415  return kernel, latency, sample_rate
416 
417 
419  """
420  Compute an acausal finite impulse-response filter kernel from a power
421  spectral density conforming to the LAL normalization convention,
422  such that if colored Gaussian random noise with the given PSD is fed
423  into an FIR filter using the kernel the filter's output will
424  be zero-mean unit-variance Gaussian random noise. The PSD must be
425  provided as a REAL8FrequencySeries object (see
426  pylal.xlal.datatypes.real8frequencyseries).
427 
428  The phase response of this filter is 0, just like whitening done in
429  the frequency domain.
430 
431  The return value is the tuple (kernel, latency, sample rate). The
432  kernel is a numpy array containing the filter kernel, the latency
433  is the filter latency in samples and the sample rate is in Hz. The
434  kernel and latency can be used, for example, with gstreamer's stock
435  audiofirfilter element.
436  """
437  #
438  # this could be relaxed with some work
439  #
440 
441  assert psd.f0 == 0.0
442 
443  #
444  # extract the PSD bins and determine sample rate for kernel
445  #
446 
447  data = psd.data / 2
448  sample_rate = 2 * int(round(psd.f0 + len(data) * psd.deltaF))
449 
450  #
451  # remove LAL normalization
452  #
453 
454  data *= sample_rate
455 
456  #
457  # compute the FIR kernel. it always has an odd number of samples
458  # and no DC offset.
459  #
460 
461  data[0] = data[-1] = 0.0
462  data_nonzeros = (data != 0.)
463  data[data_nonzeros] = 1./data[data_nonzeros]
464  try:
465  kernel = scipy.fftpack.idct(numpy.sqrt(data), type = 1) / (2 * len(data) - 1)
466  kernel = numpy.hstack((kernel[::-1], kernel[1:]))
467  except AttributeError:
468  # this computer's scipy.fftpack is missing idct()
469  # repack data: data[0], data[1], 0, data[2], 0, ....
470  tmp = numpy.zeros((2 * len(data) - 1,), dtype = data.dtype)
471  tmp[0] = data[0]
472  tmp[1::2] = data[1:]
473  data = tmp
474  del tmp
475  kernel = scipy.fftpack.irfft(numpy.sqrt(data))
476  kernel = numpy.roll(kernel, (len(kernel) - 1) / 2)
477 
478  #
479  # apply a Tukey window whose flat bit is 50% of the kernel.
480  # preserve the FIR kernel's square magnitude
481  #
482 
483  norm_before = numpy.dot(kernel, kernel)
484  kernel *= lal.CreateTukeyREAL8Window(len(kernel), .5).data.data
485  kernel *= math.sqrt(norm_before / numpy.dot(kernel, kernel))
486 
487  #
488  # the kernel's latency
489  #
490 
491  latency = (len(kernel) - 1) / 2
492 
493  #
494  # done
495  #
496 
497  return kernel, latency, sample_rate
498 
499 
501  """
502  Compute the minimum-phase response filter (zero latency) associated with a
503  linear-phase response filter (latency equal to half the filter length).
504 
505  From "Design of Optimal Minimum-Phase Digital FIR Filters Using
506  Discrete Hilbert Transforms", IEEE Trans. Signal Processing, vol. 48,
507  pp. 1491-1495, May 2000.
508 
509  The return value is the tuple (kernel, phase response). The kernel is
510  a numpy array containing the filter kernel. The kernel can be used,
511  for example, with gstreamer's stock audiofirfilter element.
512  """
513  #
514  # compute abs of FFT of kernel
515  #
516 
517  absX = abs(scipy.fftpack.fft(linear_phase_kernel))
518 
519  #
520  # compute the cepstrum of the kernel
521  # (i.e., the iFFT of the log of the abs of the FFT of the kernel)
522  #
523 
524  cepstrum = scipy.fftpack.ifft(scipy.log(absX))
525 
526  #
527  # compute sgn
528  #
529 
530  sgn = scipy.ones(len(linear_phase_kernel))
531  sgn[0] = 0.
532  sgn[(len(sgn)+1)/2] = 0.
533  sgn[(len(sgn)+1)/2:] *= -1.
534 
535  #
536  # compute theta
537  #
538 
539  theta = -1.j * scipy.fftpack.fft(sgn * cepstrum)
540 
541  #
542  # compute minimum phase kernel
543  #
544 
545  minimum_phase_kernel = scipy.real(scipy.fftpack.ifft(absX * scipy.exp(1.j * theta)))
546 
547  #
548  # this kernel needs to be reversed to follow conventions used with the
549  # audiofirfilter and lal_firbank elements
550  #
551 
552  minimum_phase_kernel = minimum_phase_kernel[-1::-1]
553 
554  #
555  # done
556  #
557 
558  return minimum_phase_kernel, -theta
559 
560 
561 def interpolate_psd(psd, deltaF):
562  #
563  # no-op? and disallow resolution reduction
564  #
565 
566  if deltaF == psd.deltaF:
567  return psd
568  assert deltaF < psd.deltaF
569 
570  #
571  # interpolate PSD by clipping/zero-padding time-domain impulse
572  # response of equivalent whitening filter
573  #
574 
575  #from scipy import fftpack
576  #psd_data = psd.data
577  #x = numpy.zeros((len(psd_data) * 2 - 2,), dtype = "double")
578  #psd_data = numpy.where(psd_data, psd_data, float("inf"))
579  #x[0] = 1 / psd_data[0]**.5
580  #x[1::2] = 1 / psd_data[1:]**.5
581  #x = fftpack.irfft(x)
582  #if deltaF < psd.deltaF:
583  # x *= numpy.cos(numpy.arange(len(x)) * math.pi / (len(x) + 1))**2
584  # x = numpy.concatenate((x[:(len(x) / 2)], numpy.zeros((int(round(len(x) * psd.deltaF / deltaF)) - len(x),), dtype = "double"), x[(len(x) / 2):]))
585  #else:
586  # x = numpy.concatenate((x[:(int(round(len(x) * psd.deltaF / deltaF)) / 2)], x[-(int(round(len(x) * psd.deltaF / deltaF)) / 2):]))
587  # x *= numpy.cos(numpy.arange(len(x)) * math.pi / (len(x) + 1))**2
588  #x = 1 / fftpack.rfft(x)**2
589  #psd_data = numpy.concatenate(([x[0]], x[1::2]))
590 
591  #
592  # interpolate PSD with linear interpolator
593  #
594 
595  #psd_data = psd.data
596  #f = psd.f0 + numpy.arange(len(psd_data)) * psd.deltaF
597  #interp = interpolate.interp1d(f, psd_data, bounds_error = False)
598  #f = psd.f0 + numpy.arange(round(len(psd_data) * psd.deltaF / deltaF)) * deltaF
599  #psd_data = interp(f)
600 
601  #
602  # interpolate log(PSD) with cubic spline. note that the PSD is
603  # clipped at 1e-300 to prevent nan's in the interpolator (which
604  # doesn't seem to like the occasional sample being -inf)
605  #
606 
607  psd_data = psd.data
608  psd_data = numpy.where(psd_data, psd_data, 1e-300)
609  f = psd.f0 + numpy.arange(len(psd_data)) * psd.deltaF
610  interp = interpolate.splrep(f, numpy.log(psd_data), s = 0)
611  f = psd.f0 + numpy.arange(round((len(psd_data) - 1) * psd.deltaF / deltaF) + 1) * deltaF
612  psd_data = numpy.exp(interpolate.splev(f, interp, der = 0))
613 
614  #
615  # return result
616  #
617 
618  return laltypes.REAL8FrequencySeries(
619  name = psd.name,
620  epoch = psd.epoch,
621  f0 = psd.f0,
622  deltaF = deltaF,
623  sampleUnits = psd.sampleUnits,
624  data = psd_data
625  )
626 
627 
628 def movingmedian(psd, window_size):
629  """
630  Assumes that the underlying PSD doesn't have variance, i.e., that there
631  is no median / mean correction factor required
632  """
633  data = psd.data
634  datacopy = numpy.copy(psd.data)
635  for i in range(window_size, len(data)-window_size):
636  datacopy[i] = numpy.median(data[i-window_size:i+window_size])
637  return laltypes.REAL8FrequencySeries(
638  name = psd.name,
639  epoch = psd.epoch,
640  f0 = psd.f0,
641  deltaF = psd.deltaF,
642  sampleUnits = psd.sampleUnits,
643  data = datacopy
644  )
645 
646 
647 def polyfit(psd, minsample, maxsample, order, verbose = False):
648  # f / f_min between f_min and f_max, i.e. f[0] here is 1
649  f = numpy.arange(maxsample - minsample) * psd.deltaF + 1
650  data = psd.data[minsample:maxsample]
651 
652  logf = numpy.linspace(numpy.log(f[0]), numpy.log(f[-1]), 100000)
653  interp = interpolate.interp1d(numpy.log(f), numpy.log(data))
654  data = interp(logf)
655  p = numpy.poly1d(numpy.polyfit(logf, data, order))
656  if verbose:
657  print >> sys.stderr, "\nFit polynomial is: \n\nlog(PSD) = \n", p, "\n\nwhere x = f / f_min\n"
658  data = numpy.exp(p(numpy.log(f)))
659  olddata = psd.data
660  olddata[minsample:maxsample] = data
661  return laltypes.REAL8FrequencySeries(
662  name = psd.name,
663  epoch = psd.epoch,
664  f0 = psd.f0,
665  deltaF = psd.deltaF,
666  sampleUnits = psd.sampleUnits,
667  data = olddata
668  )