gstlal  0.8.1
 All Classes Namespaces Files Functions Variables Pages
gstlal_fake_frames_pipe
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2011 Chad Hanna
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 ## @file
20 # An HTCondor DAG generator to recolor frame data
21 
22 """
23 This program makes a dag to recolor frames
24 """
25 
26 __author__ = 'Chad Hanna <chad.hanna@ligo.org>'
27 
28 
29 ##############################################################################
30 # import standard modules and append the lalapps prefix to the python path
31 import sys, os, copy, math
32 import subprocess, socket, tempfile
33 
34 ##############################################################################
35 # import the modules we need to build the pipeline
36 from glue import iterutils
37 from glue import pipeline
38 from glue import lal
39 from glue.ligolw import lsctables
40 from glue import segments
41 from glue.ligolw import ligolw
42 import glue.ligolw.utils as utils
43 import glue.ligolw.utils.segments as ligolw_segments
44 from optparse import OptionParser
45 from gstlal import datasource
46 from gstlal import dagparts
47 
48 class ContentHandler(ligolw.LIGOLWContentHandler):
49  pass
50 lsctables.use_in(ContentHandler)
51 
52 #
53 # Classes for generating reference psds
54 #
55 
56 class gstlal_reference_psd_job(pipeline.CondorDAGJob):
57  """
58  A gstlal_reference_psd job
59  """
60  def __init__(self, executable=dagparts.which('gstlal_reference_psd'), tag_base='gstlal_reference_psd'):
61  """
62  """
63  self.__prog__ = 'gstlal_reference_psd'
64  self.__executable = executable
65  self.__universe = 'vanilla'
66  pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
67  self.add_condor_cmd('getenv','True')
68  self.add_condor_cmd('requirements', 'Memory > 1999') #FIXME is this enough?
69  self.tag_base = tag_base
70  self.add_condor_cmd('environment',"KMP_LIBRARY=serial;MKL_SERIAL=yes")
71  self.set_sub_file(tag_base+'.sub')
72  self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(process).out')
73  self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(process).err')
74 
75 
76 class gstlal_median_psd_job(pipeline.CondorDAGJob):
77  """
78  A gstlal_median_psd job
79  """
80  def __init__(self, executable=dagparts.which('gstlal_ninja_median_of_psds'), tag_base='gstlal_ninja_median_of_psds'):
81  """
82  """
83  self.__prog__ = 'gstlal_ninja_median_of_psds'
84  self.__executable = executable
85  self.__universe = 'vanilla'
86  pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
87  self.add_condor_cmd('getenv','True')
88  self.tag_base = tag_base
89  self.add_condor_cmd('environment',"KMP_LIBRARY=serial;MKL_SERIAL=yes")
90  self.set_sub_file(tag_base+'.sub')
91  self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(process).out')
92  self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(process).err')
93 
94 
95 class gstlal_ninja_smooth_reference_psd_job(pipeline.CondorDAGJob):
96  """
97  A gstlal_ninja_smooth_reference_psd job
98  """
99  def __init__(self, executable=dagparts.which('gstlal_ninja_smooth_reference_psd'), tag_base='gstlal_ninja_smooth_reference_psd'):
100  """
101  """
102  self.__prog__ = 'gstlal_ninja_smooth_reference_psd'
103  self.__executable = executable
104  self.__universe = 'vanilla'
105  pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
106  self.add_condor_cmd('getenv','True')
107  self.tag_base = tag_base
108  self.add_condor_cmd('environment',"KMP_LIBRARY=serial;MKL_SERIAL=yes")
109  self.set_sub_file(tag_base+'.sub')
110  self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(process).out')
111  self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(process).err')
112 
113 
114 class gstlal_reference_psd_node(pipeline.CondorDAGNode):
115  """
116  A gstlal_reference_psd node
117  """
118  def __init__(self, job, dag, frame_cache, gps_start_time, gps_end_time, instrument, channel, injections=None, p_node=[]):
119 
120  pipeline.CondorDAGNode.__init__(self,job)
121  self.add_var_opt("frame-cache", frame_cache)
122  self.add_var_opt("gps-start-time", gps_start_time)
123  self.add_var_opt("gps-end-time", gps_end_time)
124  self.add_var_opt("data-source", "frames")
125  self.add_var_arg("--channel-name=%s=%s" % (instrument, channel))
126  if injections:
127  self.add_var_opt("injections", injections)
128  path = os.getcwd()
129  output_name = self.output_name = '%s/%s-%d-%d-reference_psd.xml.gz' % (path, instrument, gps_start_time, gps_end_time)
130  self.add_var_opt("write-psd",output_name)
131  dag.output_cache.append(lal.CacheEntry(instrument, "-", segments.segment(gps_start_time, gps_end_time), "file://localhost/%s" % (output_name,)))
132  for p in p_node:
133  self.add_parent(p)
134  dag.add_node(self)
135 
136 
137 class gstlal_ninja_smooth_reference_psd_node(pipeline.CondorDAGNode):
138  """
139  A gstlal_ninja_smooth_reference_psd node
140  """
141  def __init__(self, job, dag, instrument, input_psd, p_node=[]):
142  pipeline.CondorDAGNode.__init__(self,job)
143  path = os.getcwd()
144  #FIXME shouldn't be hardcoding stuff like this
145  output_name = self.output_name = input_psd.replace('reference_psd', 'smoothed_reference_psd')
146  self.add_var_opt("instrument", instrument)
147  self.add_var_opt("input-psd", input_psd)
148  self.add_var_opt("output-psd", output_name)
149  for p in p_node:
150  self.add_parent(p)
151  dag.add_node(self)
152 
153 
154 class gstlal_median_psd_node(pipeline.CondorDAGNode):
155  """
156  A gstlal_median_psd node
157  """
158  def __init__(self, job, dag, instrument, input_psds, output, p_node=[]):
159  pipeline.CondorDAGNode.__init__(self,job)
160  path = os.getcwd()
161  #FIXME shouldn't be hardcoding stuff like this
162  output_name = self.output_name = output
163  self.add_var_opt("instrument", instrument)
164  self.add_var_opt("output-name", output_name)
165  for psd in input_psds:
166  self.add_file_arg(psd)
167  for p in p_node:
168  self.add_parent(p)
169  dag.add_node(self)
170 
171 
172 #
173 # classes for generating recolored frames
174 #
175 
176 class gstlal_fake_frames_job(pipeline.CondorDAGJob):
177  """
178  A gstlal_fake_frames job
179  """
180  def __init__(self, executable=dagparts.which('gstlal_fake_frames'), tag_base='gstlal_fake_frames'):
181  """
182  """
183  self.__prog__ = 'gstlal_fake_frames'
184  self.__executable = executable
185  self.__universe = 'vanilla'
186  pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
187  self.add_condor_cmd('getenv','True')
188  self.add_condor_cmd('requirements', 'Memory > 1999') #FIXME is this enough?
189  self.tag_base = tag_base
190  self.add_condor_cmd('environment',"KMP_LIBRARY=serial;MKL_SERIAL=yes")
191  self.set_sub_file(tag_base+'.sub')
192  self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(process).out')
193  self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(process).err')
194 
195 
196 class gstlal_fake_frames_node(pipeline.CondorDAGNode):
197  """
198  A gstlal_fake_frames node
199  """
200  def __init__(self, job, dag, frame_cache, gps_start_time, gps_end_time, channel, reference_psd, color_psd, sample_rate, injections=None, output_channel_name = None, duration = 4096, output_path = None, frame_type = None, shift = None, whiten_track_psd = False, frames_per_file = 1, p_node=[]):
201 
202  pipeline.CondorDAGNode.__init__(self,job)
203  self.add_var_opt("frame-cache", frame_cache)
204  self.add_var_opt("gps-start-time",gps_start_time)
205  self.add_var_opt("gps-end-time",gps_end_time)
206  self.add_var_opt("data-source", "frames")
207  self.add_var_arg("--channel-name=%s=%s" % (instrument, channel))
208  self.add_var_opt("whiten-reference-psd",reference_psd)
209  self.add_var_opt("color-psd", color_psd)
210  self.add_var_opt("sample-rate", sample_rate)
211  if injections is not None:
212  self.add_var_opt("injections", injections)
213  self.add_var_opt("output-channel-name", output_channel_name)
214  self.add_var_opt("frame-duration", duration)
215  if output_path is not None:
216  self.add_var_opt("output-path", output_path)
217  self.add_var_opt("frame-type", frame_type)
218  if whiten_track_psd:
219  self.add_var_opt("whiten-track-psd",reference_psd)
220  if shift:
221  self.add_var_opt("shift", shift)
222  self.add_var_opt("frames-per-file", frames_per_file)
223  for p in p_node:
224  self.add_parent(p)
225  dag.add_node(self)
226 
227 
228 def choosesegs(seglists, min_segment_length):
229  for instrument, seglist in seglists.iteritems():
230  newseglist = segments.segmentlist()
231  for seg in seglist:
232  if abs(seg) > min_segment_length:
233  newseglist.append(segments.segment(seg))
234  seglists[instrument] = newseglist
235 
236 
237 def parse_command_line():
238  parser = OptionParser(description = __doc__)
239 
240  parser.add_option("--frame-cache", metavar = "filename", help = "Set the name of the LAL cache listing the LIGO-Virgo .gwf frame files (optional)")
241  parser.add_option("--injections", metavar = "filename", help = "Set the name of the LIGO light-weight XML file from which to load injections (optional).")
242  parser.add_option("--channel-name", metavar = "name", action = "append", help = "Set the name of the channels to process. Can be given multiple times as --channel-name=IFO=CHANNEL-NAME")
243  parser.add_option("--frame-segments-file", metavar = "filename", help = "Set the name of the LIGO light-weight XML file from which to load frame segments. Required")
244  parser.add_option("--frame-segments-name", metavar = "name", help = "Set the name of the segments to extract from the segment tables. Required")
245 
246  parser.add_option("--min-segment-length", metavar = "SECONDS", help = "Set the minimum segment length to process (required)", type="float")
247  parser.add_option("--shift", metavar = "NANOSECONDS", help = "Number of nanoseconds to delay (negative) or advance (positive) the time stream", type = "int")
248  parser.add_option("--sample-rate", metavar = "HZ", default = 16384, type = "int", help = "Sample rate at which to generate the data, should be less than or equal to the sample rate of the measured psds provided, default = 16384 Hz")
249  parser.add_option("--whiten-type", metavar="psdperseg|medianofpsdperseg|FILE", help = "Whiten whatever data is coming out of datasource either from the data or from a fixed reference psd if a file is given")
250  parser.add_option("--whiten-track-psd", action = "store_true", help = "Calculate PSD from input data and track with time.")
251  parser.add_option("--color-psd", metavar = "FILE", help = "Set the name of psd xml file to color the data with")
252  parser.add_option("--output-path", metavar = "IFO=PATH", action = "append", help = "Set the instrument dependent output path for frames, defaults to current working directory. eg H1=/path/to/H1/frames. Can be given more than once.")
253  parser.add_option("--output-channel-name", metavar = "IFO=NAME", action="append", help = "The name of the channel in the output frames. The default is the same as the channel name. can be given more than once. Required ")
254  parser.add_option("--frame-type", metavar = "IFO=TYPE", action = "append", help = "Set the instrument dependent frame type, H1=TYPE. Can be given more than once and is required for each instrument processed.")
255  parser.add_option("--frame-duration", metavar = "SECONDS", default = 16, type = "int", help = "Set the duration of the output frames. The duration of the frame file will be multiplied by --frames-per-file. Default: 16s")
256  parser.add_option("--frames-per-file", metavar = "INT", default = 256, type = "int", help = "Set the number of frames per file. Default: 256")
257  parser.add_option("--verbose", action = "store_true", help = "Be verbose")
258 
259  options, filenames = parser.parse_args()
260 
261  fail = ""
262  for option in ("min_segment_length", "frame_type", "frame_segments_file", "frame_segments_name"):
263  if getattr(options, option) is None:
264  fail += "must provide option %s\n" % (option)
265  if fail:
266  raise ValueError(fail)
267 
268  inchannels = datasource.channel_dict_from_channel_list(options.channel_name)
269  outchannels = datasource.channel_dict_from_channel_list(options.output_channel_name)
270  frametypes = datasource.channel_dict_from_channel_list(options.frame_type)
271  outpaths = datasource.channel_dict_from_channel_list(options.output_path)
272 
273  if not (set(frametypes) == set(inchannels) == set(outchannels)):
274  raise ValueError('--frame-type, --channel-name and --output-channel-name must contain same instruments')
275 
276  return options, inchannels, outchannels, outpaths, frametypes, filenames
277 
278 
279 options, inchannels, outchannels, outpaths, frametypes, filenames = parse_command_line()
280 
281 try:
282  os.mkdir("logs")
283 except:
284  pass
285 
286 dag = dagparts.CondorDAG("gstlal_fake_frames_pipe")
287 
288 seglists = ligolw_segments.segmenttable_get_by_name(utils.load_filename(options.frame_segments_file, verbose = options.verbose, contenthandler = ContentHandler), options.frame_segments_name).coalesce()
289 choosesegs(seglists, options.min_segment_length)
290 
291 psdJob = gstlal_reference_psd_job()
292 smoothJob = gstlal_ninja_smooth_reference_psd_job()
293 medianJob = gstlal_median_psd_job()
294 colorJob = gstlal_fake_frames_job()
295 
296 smoothnode = {}
297 mediannode = {}
298 p_node = dict([(i, []) for i in seglists])
299 
300 if options.whiten_type in ("psdperseg", "medianofpsdperseg"):
301  psd = {}
302  for instrument, seglist in seglists.iteritems():
303  mediannode[instrument] = {}
304  smoothnode[instrument] = {}
305  psd[instrument] = {}
306  for seg in seglist:
307  #FIXME if there are sements without frame caches this will barf
308  psdnode = gstlal_reference_psd_node(psdJob, dag, options.frame_cache, int(seg[0]), int(seg[1]), instrument, inchannels[instrument], injections=None, p_node=[])
309  smoothnode[instrument][seg] = gstlal_ninja_smooth_reference_psd_node(smoothJob, dag, instrument, psdnode.output_name, p_node=[psdnode])
310  if options.whiten_type == "psdperseg":
311  psd[instrument][seg] = smoothnode[instrument][seg].output_name
312 
313  mediannode[instrument] = gstlal_median_psd_node(medianJob, dag, instrument, [v.output_name for v in smoothnode[instrument].values()], "%s_median_psd.xml.gz" % instrument, p_node=smoothnode[instrument].values())
314  p_node[instrument] = [mediannode[instrument]]
315  if options.whiten_type == "medianofpsdperseg":
316  psd[instrument] = mediannode[instrument].output_name
317 
318 elif options.whiten_type is not None:
319  psd = lalseries.read_psd_xmldoc(utils.load_filename(options.whiten_reference_psd, verbose = options.verbose, contenthandler = ligolw.LIGOLWContentHandler))
320 else:
321  psd = dict([(i, None) for i in seglists])
322 
323 for instrument, seglist in seglists.iteritems():
324  try:
325  output_path = outpaths[instrument]
326  except KeyError:
327  output_path = None
328  for seg in seglist:
329  try:
330  reference_psd = psd[instrument][seg]
331  except TypeError:
332  reference_psd = psd[instrument]
333  gstlal_fake_frames_node(colorJob, dag, options.frame_cache, int(seg[0]), int(seg[1]), inchannels[instrument], reference_psd, color_psd=options.color_psd, sample_rate = options.sample_rate, injections=options.injections, output_channel_name = outchannels[instrument], output_path = output_path, duration = options.frame_duration, frame_type = frametypes[instrument], shift = options.shift, whiten_track_psd = options.whiten_track_psd, frames_per_file = options.frames_per_file, p_node=p_node[instrument])
334 
335 dag.write_sub_files()
336 dag.write_dag()
337 dag.write_script()
338 dag.write_cache()