gstlal  0.8.1
 All Classes Namespaces Files Functions Variables Pages
gstlal_fake_frames
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2011 Kipp Cannon, Chad Hanna, Drew Keppel
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 from optparse import OptionParser
20 import os
21 import numpy
22 import sys
23 
24 import pygtk
25 pygtk.require("2.0")
26 import gobject
27 gobject.threads_init()
28 import pygst
29 pygst.require("0.10")
30 import gst
31 
32 from gstlal import pipeparts
33 from gstlal import reference_psd
34 from gstlal import simplehandler
35 from gstlal import datasource
36 from gstlal import multirate_datasource
37 from glue.ligolw import ligolw
38 from glue.ligolw import array as ligolw_array
39 from glue.ligolw import param as ligolw_param
40 from glue.ligolw import utils as ligolw_utils
41 from pylal.datatypes import LIGOTimeGPS
42 from pylal import series as lalseries
43 
44 class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
45  pass
46 ligolw_array.use_in(LIGOLWContentHandler)
47 ligolw_param.use_in(LIGOLWContentHandler)
48 
49 ## @file gstlal_fake_frames
50 # This program will make fake data in a variety of ways; see gstlal_fake_frames for help and usage
51 #
52 # This program will make fake data in a variety of ways. Its input is anything
53 # supported by datasource.py. You can additionally whiten the data or apply a
54 # frequency domain coloring filter. It writes its output to frame files.
55 #
56 # #### Overview graph of the pipeline ####
57 #
58 # - gray boxes are optional and depend on the command line given
59 #
60 # @dot
61 # digraph G {
62 # // graph properties
63 #
64 # rankdir=LR;
65 # compound=true;
66 # node [shape=record fontsize=10 fontname="Verdana"];
67 # edge [fontsize=8 fontname="Verdana"];
68 #
69 # // nodes
70 #
71 # "mkbasicsrc()" [URL="\ref datasource.mkbasicsrc()"];
72 # "whitened_multirate_src()" [label="whitened_multirate_src()", URL="\ref multirate_datasource.mkwhitened_multirate_src()", style=filled, color=lightgrey];
73 # capsfilter [style=filled, color=lightgrey, URL="\ref pipeparts.mkcapsfilter()"];
74 # resample [style=filled, color=lightgrey, URL="\ref pipeparts.mkresample()"];
75 # audioamplify [style=filled, color=lightgrey, URL="\ref pipeparts.mkaudioamplify()", label="audioamplify \n if --data-source=white \n in order to have unit variance \n after resampling"];
76 # lal_shift[style=filled, color=lightgrey, URL="\ref pipeparts.mkshift()", label="lal_shift \n [iff --shift provided]"];
77 # progressreport1 [URL="\ref pipeparts.mkprogressreport()", label="progressreport \n [iff --verbose provided]"];
78 # progressreport2 [style=filled, color=lightgrey, URL="\ref pipeparts.mkprogressreport()", label="progressreport \n [iff --verbose provided]"];
79 # lal_firbank [style=filled, color=lightgrey, URL="\ref pipeparts.mkfirbank()", label="lal_firbank \n [iff --color-psd provided]"];
80 # taginject [URL="\ref pipeparts.mktaginject()"];
81 # lal_simulation [style=filled, color=lightgrey, URL="\ref pipeparts.mkinjections()", label="lal_simulation \n [iff --injections provided]"];
82 # framecppchannelmux [URL="\ref pipeparts.mkframecppchannelmux()"];
83 # framecppfilesink [URL="\ref pipeparts.mkframecppfilesink()"];
84 #
85 # // connect it up
86 #
87 # "mkbasicsrc()" -> progressreport1-> lal_shift -> progressreport2;
88 # progressreport2 -> "whitened_multirate_src()" [label="if --whiten given"];
89 # "whitened_multirate_src()" -> lal_firbank;
90 # progressreport2 -> resample [label="if --whiten not given"];
91 # resample -> capsfilter;
92 # capsfilter -> audioamplify
93 # audioamplify -> lal_firbank;
94 # lal_firbank -> taginject -> lal_simulation -> framecppchannelmux -> framecppfilesink;
95 # }
96 # @enddot
97 #
98 #
99 # ### Usage cases ###
100 #
101 #
102 # -# Making initial LIGO colored noise, Advanced LIGO noise, etc for different
103 # instruments. See datasource.append_options() for other choices
104 #
105 # $ gstlal_fake_frames --data-source=LIGO --channel-name=H1=FAKE-STRAIN --frame-type=H1_FAKE --gps-start-time=900000000 --gps-end-time=900005000 --output-path=testframes --verbose
106 #
107 # $ gstlal_fake_frames --data-source=AdvLIGO --channel-name=L1=FAKE-STRAIN --frame-type=L1_FAKE --gps-start-time=900000000 --gps-end-time=900005000 --output-path=testframes --verbose
108 #
109 #
110 # -# Custom colored noise.
111 #
112 # Obviously it is not practical to code up every possible noise curve to
113 # simulate as a custom data source. However, it is possible to provide your
114 # own custom noise curve as an ASCII file with frequency in one column and
115 # strain/Hz in the second. e.g., early advanced ligo noise curve found <a
116 # href=http://www.lsc-group.phys.uwm.edu/cgit/gstlal/plain/gstlal/share/v1_early_asd.txt>here</a>.
117 # You will need to convert ASD text files to PSD xml files using
118 # gstlal_psd_xml_from_asd_txt first.
119 #
120 # $ gstlal_fake_frames --data-source=white --color-psd v1psd.xml.gz --channel-name=V1=FAKE-STRAIN --frame-type=V1_FAKE --gps-start-time=900000000 --gps-end-time=900005000 --output-path=testframes --verbose
121 #
122 #
123 # -# Recoloring existing frame data
124 #
125 # This procedure is very similar to the above except that instead of
126 # using white noise to drive the fake frame generation, we start with
127 # real frame data and whiten it. Recoloring can be thought of as first
128 # whitening data and then applying a coloring filter. You must first
129 # make a reference PSD of the data with e.g. gstlal_reference_psd. You
130 # will need to make a frame cache of the frames to recolor.
131 #
132 # gstlal_fake_frames --whiten-reference-psd reference_psd.xml.gz --color-psd recolor_psd.xml.gz --data-source frames --output-path /path/to/output --output-channel-name h_16384Hz --gps-start-time 966384031 --frame-type T1300121_V1_EARLY_RECOLORED_V2 --gps-end-time 966389031 --frame-duration 16 --frames-per-file 256 --frame-cache frame.cache --channel-name=V1=h_16384Hz --verbose
133 #
134 #
135 # -# Creating injections into silence (assumes you have an injection xml file from e.g. lalapps_inspinj)
136 #
137 # gstlal_fake_frames --data-source silence --output-path /path/to/output --gps-start-time 966384031 --frame-type V1_INJECTIONS --gps-end-time 966389031 --frame-duration 16 --frames-per-file 256 --verbose --channel-name=V1=INJECTIONS --injections injections.xml
138 #
139 #
140 # -# Other things are certainly possible, please add some!
141 #
142 # ### Command line options ###
143 #
144 # See datasource.append_options() for common command line options shared among different programs
145 #
146 # + `--shift` [int] (ns): Number of nanoseconds, \f$\tau\f$ to delay (negative) or advance (positive) the input time series \f$x\f$ relative to the output time series \f$y\f$. \f$ y(t) = x(t+\tau)\f$
147 # + `--sample-rate` [int] (Hz): Sample rate at which to generate the data, should be less than or equal to the sample rate of the measured psds provided. Default = 16384 Hz, max 16384 Hz.
148 # + `--whiten-reference-psd` [file name]: Set the name of psd xml file to whiten the data with.
149 # + `--whiten-track-psd` []: Calculate PSD from input data and track with time.
150 # + `--color-psd` [file name]: Set the name of psd xml file to color the data with
151 # + `--output-path` [file path]: Path to output frame files. Default = "."
152 # + `--output-channel-name` [string]: The name of the channel in the output frames. The default is the same as the channel name.
153 # + `--frame-type` [string]: Frame type, required.
154 # + `--frame-duration` [int] (s): Set the duration of the output frames. The duration of the frame file will be multiplied by --frames-per-file. Default is 16s.
155 # + `--frames-per-file` [int]: Set the number of frames per file. Default is 256.
156 # + `--verbose` []: Be verbose.
157 #
158 # ### Debug
159 #
160 # It is possible to check the pipeline graph by interupting the running process
161 # with ctrl+c if you have the GST_DEBUG_DUMP_DOT_DIR evironment set. A dot
162 # graph will be written to gstlal_fake_frames. Here is an example:
163 #
164 # $ GST_DEBUG_DUMP_DOT_DIR="." gstlal_fake_frames --data-source=LIGO --channel-name=H1=FAKE-STRAIN --frame-type=H1_FAKE --gps-start-time=900000000 --gps-end-time=900005000 --output-path=testframes --verbose
165 #
166 # You should see:
167 #
168 # Wrote pipeline to ./gstlal_fake_frames_PLAYING.dot
169 #
170 # After Ctrl+c you should see:
171 #
172 # ^C*** SIG 2 attempting graceful shutdown (this might take several minutes) ... ***
173 # Wrote pipeline to ./gstlal_fake_frames.dot
174 #
175 # You can then turn the pipeline graph into an image with dot, e.g.,
176 #
177 # dot -Tpng gstlal_fake_frames_PLAYING.dot > test.png
178 #
179 # ### Review
180 #
181 #
182 # @cond
183 
184 ##
185 # Return the number of digits in a number
186 #
187 def digits(num):
188  return int(numpy.ceil(numpy.log10(float(num))))
189 
190 
191 ##
192 # Extract and validate the command line options
193 #
194 def parse_command_line():
195  parser = OptionParser(description = __doc__)
196 
197  #
198  # Append data source options
199  #
200 
201  datasource.append_options(parser)
202 
203  #
204  # Append program specific options
205  #
206 
207  parser.add_option("--shift", metavar = "ns", help = "Number of nanoseconds to delay (negative) or advance (positive) the time stream", type = "int")
208  parser.add_option("--sample-rate", metavar = "Hz", default = 16384, type = "int", help = "Sample rate at which to generate the data, should be less than or equal to the sample rate of the measured psds provided, default = 16384 Hz, max 16384 Hz")
209  parser.add_option("--whiten-reference-psd", metavar = "name", help = "Set the name of psd xml file to whiten the data with")
210  parser.add_option("--whiten-track-psd", action = "store_true", help = "Calculate PSD from input data and track with time.")
211  parser.add_option("--color-psd", metavar = "name", help = "Set the name of psd xml file to color the data with")
212  parser.add_option("--output-path", metavar = "name", default = ".", help = "Path to output frame files (default = \".\").")
213  parser.add_option("--output-channel-name", metavar = "name", help = "The name of the channel in the output frames. The default is the same as the channel name")
214  parser.add_option("--frame-type", metavar = "name", help = "Frame type, required")
215  parser.add_option("--frame-duration", metavar = "s", default = 16, type = "int", help = "Set the duration of the output frames. The duration of the frame file will be multiplied by --frames-per-file. Default: 16s")
216  parser.add_option("--frames-per-file", metavar = "n", default = 256, type = "int", help = "Set the number of frames per file. Default: 256")
217  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose (optional).")
218 
219  #
220  # Parse options
221  #
222 
223  options, filenames = parser.parse_args()
224 
225  if options.sample_rate > 16384:
226  raise ValueError("--sample-rate must be <= 16384")
227 
228  if options.frame_type is None:
229  raise ValueError("--frame-type is required")
230 
231  return options, filenames
232 
233 
234 #
235 # Main
236 #
237 
238 options, filenames = parse_command_line()
239 
240 ## Parse the command line options into a python.datasource.GWDataSourceInfo class instance
241 gw_data_source = datasource.GWDataSourceInfo(options)
242 
243 ## Assume instrument is the first and only key of the python.datasource.GWDataSourceInfo.channel_dict
244 instrument = gw_data_source.channel_dict.keys()[0]
245 
246 # disable progress reports if not verbose
247 if not options.verbose:
248  pipeparts.mkprogressreport = lambda pipeline, src, *args: src
249 
250 # set default output channel if not set by user
251 if options.output_channel_name is None:
252  options.output_channel_name = gw_data_source.channel_dict[instrument]
253 
254 # do not do injections in datasource.mkbasicsrc(), we will do them later
255 injections, gw_data_source.injection_filename = options.injections, None
256 
257 ## Setup the pipeline
258 pipeline = gst.Pipeline(os.path.split(sys.argv[0])[1])
259 
260 ## Main loop
261 mainloop = gobject.MainLoop()
262 
263 ## An instance of the python.simplehandler.Handler class
264 handler = simplehandler.Handler(mainloop, pipeline)
265 
266 ## 1) Set the pipeline head to basic input from datasource.mkbasicsrc()
267 head = datasource.mkbasicsrc(pipeline, gw_data_source, instrument, verbose = options.verbose)
268 
269 ## 2) Set the pipeline head to be verbose with pipeparts.mkprogressreport()
270 head = pipeparts.mkprogressreport(pipeline, head, "frames")
271 
272 if options.shift is not None:
273  ## 3) Set the pipeline head to apply a time shift if requested with a pipeparts.mkshift() element
274  head = pipeparts.mkshift(pipeline, head, shift = options.shift)
275 
276  ## 4) Set the pipeline head to be verbose with a pipeparts.mkprogressreport() element
277  head = pipeparts.mkprogressreport(pipeline, head, "frames_shifted")
278 
279 if options.whiten_reference_psd:
280  ## If whitening read the PSD
281  wpsd = lalseries.read_psd_xmldoc(ligolw_utils.load_filename(options.whiten_reference_psd, verbose = options.verbose, contenthandler = LIGOLWContentHandler))[instrument]
282 else:
283  ## else set wpsd to None
284  wpsd = None
285 
286 if options.whiten_reference_psd or options.whiten_track_psd:
287  ## 5) Set the pipeline head to a whitened data stream if requested using a multirate_datasource.mkwhitened_multirate_src()
288  head = multirate_datasource.mkwhitened_multirate_src(pipeline, head, [options.sample_rate], instrument, psd = wpsd, seekevent = gw_data_source.seekevent, track_psd = options.whiten_track_psd)[options.sample_rate]
289 else:
290  ## 6) Otherwise simply add a pipeparts.mkcapsfilter() and pipeparts.mkresample()
291  head = pipeparts.mkcapsfilter(pipeline, pipeparts.mkresample(pipeline, head, quality = 9), "audio/x-raw-float, rate=%d" % options.sample_rate)
292  # FIXME this is a bit hacky, should datasource.mkbasicsrc be patched to change the sample rate?
293  # FIXME don't hardcode sample rate (though this is what datasource generates for all fake data, period
294  # Renormalize if datasource is "white"
295  if options.data_source == "white":
296  head = pipeparts.mkaudioamplify(pipeline, head, 1.0 / (pipeparts.audioresample_variance_gain(9, 16384, options.sample_rate))**.5)
297 
298 # Apply a coloring filter
299 if options.color_psd:
300 
301  ## read coloring psd file and convert to an FIR filter
302  rpsd = lalseries.read_psd_xmldoc(ligolw_utils.load_filename(options.color_psd, verbose = options.verbose, contenthandler = LIGOLWContentHandler))[instrument]
303 
304  ## Calculate the maximum sample rate
305  max_sample = int(round(1.0 / rpsd.deltaF * options.sample_rate / 2.0)) + 1
306 
307  # Truncate to requested output sample rate, if it is higher than the psd provides an assert will fail later
308  rpsd.data = rpsd.data[:max_sample]
309 
310  # create the coloring FIR kernel from reference_psd.psd_to_fir_kernel()
311  fir_matrix, latency, measured_sample_rate = reference_psd.psd_to_fir_kernel(rpsd)
312 
313  ## 7) Set the pipeline head to a pipeparts.mkfirbank() recoloring filter
314  head = pipeparts.mkfirbank(pipeline, head, latency = latency, fir_matrix = [fir_matrix], block_stride = 32 * options.sample_rate)
315 
316 
317 ## Set the tags for the output data
318 tagstr = "units=strain,channel-name=%s,instrument=%s" % (options.output_channel_name, instrument)
319 
320 ## 8) Put the units back to strain before writing to frames. Additionally, override the output channel name if provided from the command line
321 head = pipeparts.mktaginject(pipeline, head, tagstr)
322 
323 
324 if injections is not None:
325  ## 9) Do injections into the final fake data
326  head = pipeparts.mkinjections(pipeline, head, injections)
327 
328 if not os.path.isdir(options.output_path):
329  try:
330  os.makedirs(options.output_path)
331  except:
332  print >> sys.stderr, "Unable to make directory ", options.output_path
333  pass
334 else:
335  print "Target output directory already exists."
336 
337 ## 10) create frames
338 head = pipeparts.mkframecppchannelmux(pipeline, {"%s:%s" % (instrument, options.output_channel_name): head}, frame_duration = options.frame_duration, frames_per_file = options.frames_per_file)
339 
340 ## 11) Write the frames to disk
341 head = pipeparts.mkframecppfilesink(pipeline, head, frame_type = options.frame_type)
342 
343 # Put O(100) frames in each directory
344 head.connect("notify::timestamp", pipeparts.framecpp_filesink_ldas_path_handler, (options.output_path, digits(options.gps_start_time) - digits(options.frame_duration * options.frames_per_file * 100)))
345 
346 # Run it
347 if pipeline.set_state(gst.STATE_PLAYING) == gst.STATE_CHANGE_FAILURE:
348  raise RuntimeError("pipeline failed to enter PLAYING state")
349 
350 ## Debugging output
351 if "GST_DEBUG_DUMP_DOT_DIR" in os.environ:
352  pipeparts.write_dump_dot(pipeline, "%s_PLAYING" % pipeline.get_name(), verbose = True)
353 
354  ## Setup a signal handler to intercept SIGINT in order to write the pipeline graph at ctrl+C before cleanly shutting down
355  class SigHandler(simplehandler.OneTimeSignalHandler):
356  def do_on_call(self, signum, frame):
357  pipeparts.write_dump_dot(pipeline, "%s_SIGINT" % pipeline.get_name(), verbose = True)
358 
359  sighandler = SigHandler(pipeline)
360 
361 mainloop.run()
362 
363 ## @endcond