3 # Copyright (C) 2011-2014 Chad Hanna
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 ## @file gstlal_inspiral_pipe
20 # The offline gstlal inspiral workflow generator; Use to make HTCondor DAGs to run CBC workflows
23 # It is rare that you would invoke this program in a standalone mode. Usually the inputs are complicated and best automated via a Makefile, e.g., Makefile.triggers_example
25 # ### Command line options
27 # See datasource.append_options() for generic options
29 # + `--psd-fft-length` [int] (s): FFT length, default 16s.
30 # + `--reference-psd: Don't measure PSDs, use this one instead
31 # + `--overlap [int]: Set the factor that describes the overlap of the sub banks, must be even!
32 # + `--autocorrelation-length [int]: The minimum number of samples to use for auto-chisquared, default 201 should be odd
33 # + `--samples-min [int]: The minimum number of samples to use for time slices default 1024
34 # + `--samples-max-256 [int]: The maximum number of samples to use for time slices with frequencies above 256Hz, default 1024
35 # + `--samples-max-64 [int]: The maximum number of samples to use for time slices with frequencies above 64Hz, default 2048
36 # + `--samples-max [int]: The maximum number of samples to use for time slices with frequencies below 64Hz, default 4096
37 # + `--bank-cache [file names]: Set the bank cache files in format H1=H1.cache,H2=H2.cache, etc..
38 # + `--tolerance [float]: set the SVD tolerance, default 0.9999
39 # + `--flow [float]: set the low frequency cutoff, default 40 (Hz)
40 # + `--identity-transform: Use identity transform, i.e. no SVD
41 # + `--vetoes [filename]: Set the veto xml file.
42 # + `--time-slide-file [filename]: Set the time slide table xml file
43 # + `--web-dir [filename]: Set the web directory like /home/USER/public_html
44 # + `--fir-stride [int] (s): Set the duration of the fft output blocks, default 8
45 # + `--control-peak-time [int] (s): Set the peak finding time for the control signal, default 8
46 # + `--coincidence-threshold [int] (s): Set the coincidence window in seconds (default = 0.005). The light-travel time between instruments will be added automatically in the coincidence test.
47 # + `--num-banks [str]: The number of parallel subbanks per gstlal_inspiral job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.
48 # + `--max-inspiral-jobs [int]: Set the maximum number of gstlal_inspiral jobs to run simultaneously, default no constraint.
49 # + `--ht-gate-threshold [float]: set a threshold on whitened h(t) to veto glitches
50 # + `--inspiral-executable [str]: Options gstlal_inspiral | gstlal_iir_inspiral, default gstlal_inspiral
51 # + `--blind-injections [filename]: Set the name of an injection file that will be added to the data without saving the sim_inspiral table or otherwise processing the data differently. Has the effect of having hidden signals in the input data. Separate injection runs using the --injections option will still occur.
52 # + `--far-injections` [filename]: Injection files with injections too far away to be seen that are not filtered. Must be 1:1 with --injections if given at all. See https://www.lsc-group.phys.uwm.edu/ligovirgo/cbcnote/NSBH/MdcInjections/MDC1 for example.
53 # + `--verbose: Be verbose
55 # ### Diagram of the HTCondor workfow produced
56 # @dotfile trigger_pipe.dot
60 This program makes a dag to run gstlal_inspiral offline
63 __author__ =
'Chad Hanna <chad.hanna@ligo.org>'
65 ##############################################################################
66 # import standard modules and append the lalapps prefix to the python path
67 import sys, os, copy, math, stat
68 import subprocess, socket, tempfile
71 ##############################################################################
72 # import the modules we need to build the pipeline
73 from glue
import iterutils
76 from glue
import segments
77 from glue.ligolw
import ligolw
78 from glue.ligolw
import array as ligolw_array
79 from glue.ligolw
import lsctables
80 from glue.ligolw
import param as ligolw_param
81 import glue.ligolw.utils as ligolw_utils
82 import glue.ligolw.utils.segments as ligolw_segments
83 from optparse
import OptionParser
84 from gstlal
import inspiral, inspiral_pipe
85 from gstlal
import dagparts as gstlaldagparts
86 from pylal
import series as lalseries
88 from pylal.datatypes
import LIGOTimeGPS
89 from gstlal
import datasource
91 class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
93 ligolw_array.use_in(LIGOLWContentHandler)
94 lsctables.use_in(LIGOLWContentHandler)
95 ligolw_param.use_in(LIGOLWContentHandler)
103 def sim_tag_from_inj_file(injections):
104 if injections is None:
106 return injections.replace('.xml', '').replace('.gz', '').replace('-','_')
108 def get_max_length(bank_cache, verbose = False):
110 cache = lal.Cache.fromfilenames([bank_cache.values()[0]])
111 for f in cache.pfnlist():
112 xmldoc = ligolw_utils.load_filename(f, verbose = verbose, contenthandler = LIGOLWContentHandler)
113 sngls = lsctables.SnglInspiralTable.get_table(xmldoc)
114 max_time = max(max_time, max(sngls.get_column('template_duration')))
120 for i in xrange(0, len(l), n):
124 "Flatten one level of nesting"
125 return list(itertools.chain.from_iterable(lst))
128 # get a dictionary of all the disjoint 2+ detector combination segments
131 def analysis_segments(analyzable_instruments_set, allsegs, boundary_seg, max_template_length):
132 segsdict = segments.segmentlistdict()
133 # 512 seconds for the whitener to settle + the maximum template_length
134 start_pad = 512 + max_template_length
135 # Chosen so that the overlap is only a ~5% hit in run time for long segments...
136 segment_length = int(20 * start_pad)
137 for n in range(2, 1 + len(analyzable_instruments_set)):
138 for ifo_combos in iterutils.choices(list(analyzable_instruments_set), n):
139 # never analyze H1H2 or H2L1 times
140 if set(ifo_combos) == set(('H1', 'H2')) or set(ifo_combos) == set(('L1', 'H2')):
141 print >> sys.stderr, "not analyzing: ", ifo_combos, " only time"
143 segsdict[frozenset(ifo_combos)] = allsegs.intersection(ifo_combos) - allsegs.union(analyzable_instruments_set - set(ifo_combos))
144 segsdict[frozenset(ifo_combos)] &= segments.segmentlist([boundary_seg])
145 segsdict[frozenset(ifo_combos)] = segsdict[frozenset(ifo_combos)].protract(start_pad) #FIXME don't hard code
146 segsdict[frozenset(ifo_combos)] = gstlaldagparts.breakupsegs(segsdict[frozenset(ifo_combos)], segment_length, start_pad) #FIXME don't hardcode
147 if not segsdict[frozenset(ifo_combos)]:
148 del segsdict[frozenset(ifo_combos)]
151 def psd_node_gen(refPSDJob, dag, parent_nodes, segsdict, channel_dict, options):
153 for ifos in segsdict:
154 this_channel_dict = dict((k, channel_dict[k])
for k in ifos
if k in channel_dict)
155 for seg in segsdict[ifos]:
156 psd_nodes[(ifos, seg)] = \
157 inspiral_pipe.generic_node(refPSDJob, dag, parent_nodes = parent_nodes,
158 opts = {
"gps-start-time":seg[0].seconds,
159 "gps-end-time":seg[1].seconds,
160 "data-source":
"frames",
161 "channel-name":datasource.pipeline_channel_list_from_channel_dict(this_channel_dict, ifos = ifos),
162 "psd-fft-length":options.psd_fft_length,
163 "frame-segments-name": options.frame_segments_name},
164 input_files = {
"frame-cache":options.frame_cache,
165 "frame-segments-file":options.frame_segments_file},
166 output_files = {
"write-psd":inspiral_pipe.T050017_filename(ifos,
"REFERENCE_PSD", seg[0].seconds, seg[1].seconds,
'.xml.gz', path = refPSDJob.output_path)}
170 def svd_node_gen(svdJob, dag, parent_nodes, psd, bank_groups, options, seg):
172 for i, bank_group in enumerate(bank_groups):
173 for ifo, files in bank_group.items():
174 # First sort out the clipleft, clipright options
178 for n, f in enumerate(files):
179 # handle template bank clipping
180 if (n == 0) and (i == 0):
183 clipleft.append(options.overlap / 2)
184 if (i == len(bank_groups) - 1) and (n == len(files) -1):
187 clipright.append(options.overlap / 2)
188 ids.append(
"%d_%d" % (i, n))
190 svd_bank_name = inspiral_pipe.
T050017_filename(ifo,
'%d_SVD' % (i,), seg[0].seconds, seg[1].seconds,
'.xml.gz', path = svdJob.output_path)
192 svd_nodes.setdefault(ifo, []).append(
193 inspiral_pipe.generic_node(svdJob, dag,
194 parent_nodes = parent_nodes,
195 opts = {
"svd-tolerance":options.tolerance,
198 "clipright":clipright,
199 "samples-min":options.samples_min,
200 "samples-max-256":options.samples_max_256,
201 "samples-max-64":options.samples_max_64,
202 "samples-max":options.samples_max,
203 "autocorrelation-length":options.autocorrelation_length,
205 "identity-transform":options.identity_transform,
206 "snr-threshold":4.0,
"ortho-gate-fap":0.5},
207 input_files = {
"template-bank":files,
208 "reference-psd":psd},
209 output_files = {
"write-svd":svd_bank_name}
214 def create_svd_bank_strings(svd_nodes, instruments = None):
215 # FIXME assume that the number of svd nodes is the same per ifo, a good assumption though
217 for i in range(len(svd_nodes.values()[0])):
219 for ifo in svd_nodes:
220 if instruments is not None and ifo not in instruments:
222 svd_bank_string +=
"%s:%s," % (ifo, svd_nodes[ifo][i].output_files[
"write-svd"])
223 svd_bank_string = svd_bank_string.strip(
",")
224 outstrings.append(svd_bank_string)
228 def inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict):
231 for ifos in segsdict:
233 # setup dictionaries to hold the inspiral nodes
234 inspiral_nodes[(ifos, None)] = {}
235 for injections in options.injections:
236 inspiral_nodes[(ifos, sim_tag_from_inj_file(injections))] = {}
238 # FIXME choose better splitting?
241 for seg in segsdict[ifos]:
243 # only use a channel dict with the relevant channels
244 this_channel_dict = dict((k, channel_dict[k])
for k in ifos
if k in channel_dict)
246 # get the svd bank strings
247 svd_bank_strings_full = create_svd_bank_strings(svd_nodes, instruments = this_channel_dict.keys())
249 for chunk_counter, svd_bank_strings in enumerate(chunks(svd_bank_strings_full, numchunks)):
251 output_names = [inspiral_pipe.T050017_filename(ifos,
'%d_LLOID' % (i + numchunks * chunk_counter,), seg[0].seconds, seg[1].seconds,
'.xml.gz', path = gstlalInspiralJob.output_path)
for i, s in enumerate(svd_bank_strings)]
252 dist_stat_names = [inspiral_pipe.T050017_filename(ifos,
'%d_DIST_STATS' % (i + numchunks * chunk_counter,), seg[0].seconds, seg[1].seconds,
'.xml.gz', path = gstlalInspiralJob.output_path)
for i,s in enumerate(svd_bank_strings)]
254 # FIXME do better with svd node parents?
256 noninjnode = inspiral_pipe.generic_node(gstlalInspiralJob, dag, parent_nodes = sum(svd_nodes.values(),[]),
257 opts = {
"psd-fft-length":options.psd_fft_length,
258 "ht-gate-threshold":options.ht_gate_threshold,
259 "frame-segments-name":options.frame_segments_name,
260 "gps-start-time":seg[0].seconds,
261 "gps-end-time":seg[1].seconds,
262 "channel-name":datasource.pipeline_channel_list_from_channel_dict(this_channel_dict),
263 "svd-bank":svd_bank_strings,
264 "tmp-space":inspiral_pipe.condor_scratch_space(),
266 "control-peak-time":options.control_peak_time,
267 "coincidence-threshold":options.coincidence_threshold,
268 "fir-stride":options.fir_stride,
269 "data-source":
"frames",
270 "local-frame-caching":
""
272 input_files = {
"time-slide-file":options.time_slide_file,
273 "frame-cache":options.frame_cache,
274 "frame-segments-file":options.frame_segments_file,
275 "reference-psd":psd_nodes[(ifos, seg)].output_files[
"write-psd"],
276 "blind-injections":options.blind_injections,
277 "veto-segments-file":options.vetoes,
280 "output":output_names,
281 "likelihood-file":dist_stat_names
284 # Set a post script to check
for file integrity
285 noninjnode.set_post_script(
"gzip_test.sh")
286 noninjnode.add_post_script_arg(
" ".join(output_names + dist_stat_names))
287 # impose a priority to help with depth first submission
288 noninjnode.set_priority(chunk_counter)
289 inspiral_nodes[(ifos, None)].setdefault(seg, []).append(noninjnode)
291 for chunk_counter, svd_bank_strings in enumerate(chunks(svd_bank_strings_full, numchunks)):
294 for injections in options.injections:
297 sim_name = sim_tag_from_inj_file(injections)
298 output_names = [inspiral_pipe.
T050017_filename(ifos,
'%d_LLOID_%s' % (i + numchunks * chunk_counter, sim_name), seg[0].seconds, seg[1].seconds,
'.xml.gz', path = gstlalInspiralInjJob.output_path) for i, s in enumerate(svd_bank_strings)]
299 dist_stat_names = [inspiral_pipe.
T050017_filename(ifos,
'%d_DIST_STATS_%s' % (i + numchunks * chunk_counter, sim_name), seg[0].seconds, seg[1].seconds,
'.xml.gz', path = gstlalInspiralInjJob.output_path) for i, s in enumerate(svd_bank_strings)]
301 # setup injection node
302 injnode = inspiral_pipe.generic_node(gstlalInspiralInjJob, dag, parent_nodes = sum(svd_nodes.values(),[]),
303 opts = {
"psd-fft-length":options.psd_fft_length,
304 "ht-gate-threshold":options.ht_gate_threshold,
305 "frame-segments-name":options.frame_segments_name,
306 "gps-start-time":seg[0].seconds,
307 "gps-end-time":seg[1].seconds,
308 "channel-name":datasource.pipeline_channel_list_from_channel_dict(this_channel_dict),
309 "svd-bank":svd_bank_strings,
310 "tmp-space":inspiral_pipe.condor_scratch_space(),
312 "control-peak-time":options.control_peak_time,
313 "coincidence-threshold":options.coincidence_threshold,
314 "fir-stride":options.fir_stride,
315 "data-source":
"frames",
316 "local-frame-caching":
""
318 input_files = {
"frame-cache":options.frame_cache,
319 "frame-segments-file":options.frame_segments_file,
320 "reference-psd":psd_nodes[(ifos, seg)].output_files[
"write-psd"],
321 "veto-segments-file":options.vetoes,
322 "injections": injections
325 "output":output_names,
326 "likelihood-file":dist_stat_names
329 # Set a post script to check
for file integrity
330 injnode.set_post_script(
"gzip_test.sh")
331 injnode.add_post_script_arg(
" ".join(output_names))
332 # impose a priority to help with depth first submission
333 injnode.set_priority(chunk_counter)
334 inspiral_nodes[(ifos, sim_name)].setdefault(seg, []).append(injnode)
336 return inspiral_nodes
338 def adapt_gstlal_inspiral_output(inspiral_nodes, options, segsdict):
340 # first get the previous output in a usable form
342 for inj in options.injections + [None]:
343 lloid_output[sim_tag_from_inj_file(inj)] = {}
345 for ifos in segsdict:
346 for seg in segsdict[ifos]:
347 # iterate over the mass space chunks for each segment
348 for j, node in enumerate(inspiral_nodes[(ifos, None)][seg]):
349 len_out_files = len(node.output_files[
"output"])
350 for i,f in enumerate(node.output_files[
"output"]):
351 # Store the output files and the node for use as a parent dependency
352 lloid_output[None].setdefault((j,i), []).append((f, [node]))
353 for i,f in enumerate(node.output_files[
"likelihood-file"]):
354 lloid_diststats.setdefault((j,i) ,[]).append(f)
355 for inj in options.injections:
356 # NOTE This assumes that injection jobs
357 # and non injection jobs are 1:1 in
358 # terms of the mass space they cover,
359 # e.g., that the chunks ar the same!
360 injnode = inspiral_nodes[(ifos, sim_tag_from_inj_file(inj))][seg][j]
361 for i,f in enumerate(injnode.output_files[
"output"]):
362 # Store the output files and the node and injnode for use as a parent dependencies
363 lloid_output[sim_tag_from_inj_file(inj)].setdefault((j,i), []).append((f, [node, injnode]))
365 return lloid_output, lloid_diststats
367 def rank_and_merge(dag, createPriorDistStatsJob, calcRankPDFsJob, calcLikelihoodJob, calcLikelihoodJobInj, lalappsRunSqliteJob, toSqliteJob, inspiral_nodes, lloid_output, lloid_diststats, segsdict, options, boundary_seg, instrument_set, snrpdfnode):
369 likelihood_nodes = {}
372 instruments =
"".join(sorted(instrument_set))
374 # first non-injections
375 for n, (outputs, diststats) in enumerate((lloid_output[None][key], lloid_diststats[key])
for key in sorted(lloid_output[None].keys())):
376 inputs = [o[0]
for o in outputs]
378 [parents.extend(o[1])
for o in outputs]
379 # FIXME we keep this here in case we someday want to have a
380 # mass bin dependent prior, but it really doesn't matter for
382 priornode = inspiral_pipe.generic_node(createPriorDistStatsJob, dag,
383 parent_nodes = parents,
384 opts = {
"instrument":instrument_set,
"synthesize-injection-count":10000000,
"background-prior":1},
385 output_files = {
"write-likelihood":inspiral_pipe.T050017_filename(instruments,
'%d_CREATE_PRIOR_DIST_STATS' % (n,), boundary_seg[0].seconds, boundary_seg[1].seconds,
'.xml.gz', path = createPriorDistStatsJob.output_path)}
387 calcranknode = inspiral_pipe.generic_node(calcRankPDFsJob, dag,
388 parent_nodes = [priornode, snrpdfnode],
389 input_files = {
"":diststats + [priornode.output_files[
"write-likelihood"], snrpdfnode.output_files[
"write-likelihood"]]}, #FIXME is
this right,
do I just add the output of the calc prior job?
390 output_files = {
"output":inspiral_pipe.T050017_filename(instruments,
'%d_CALC_RANK_PDFS' % (n,), boundary_seg[0].seconds, boundary_seg[1].seconds,
'.xml.gz', path = calcRankPDFsJob.output_path)}
392 priornodes.append(priornode)
393 rankpdf_nodes.append(calcranknode)
396 # Break up the likelihood jobs into chunks to process fewer files, e.g, 25
397 likelihood_nodes.setdefault(None,[]).append(
398 [inspiral_pipe.generic_node(calcLikelihoodJob, dag,
399 parent_nodes = [priornode, snrpdfnode] + parents, # add parents here in
case a gstlal inpsiral job
's trigger file is corrupted - then we can just mark that job as not done and this job will rerun.
400 opts = {"tmp-space":inspiral_pipe.condor_scratch_space()},
401 input_files = {"likelihood-url":diststats +
402 [priornode.output_files["write-likelihood"],
403 snrpdfnode.output_files["write-likelihood"]], "":chunked_inputs
405 ) for chunked_inputs in chunks(inputs, 25)]
409 for inj in options.injections:
410 for n, (outputs, diststats) in enumerate((lloid_output[sim_tag_from_inj_file(inj)][key], lloid_diststats[key]) for key in sorted(lloid_output[None].keys())):
411 inputs = [o[0] for o in outputs]
413 [parents.extend(o[1]) for o in outputs]
414 # Break up the likelihood jobs into chunks to process fewer files, e.g., 25
415 likelihood_nodes.setdefault(sim_tag_from_inj_file(inj),[]).append(
416 [inspiral_pipe.generic_node(calcLikelihoodJobInj, dag,
417 parent_nodes = parents + [priornodes[n], snrpdfnode],
418 opts = {"tmp-space":inspiral_pipe.condor_scratch_space()},
419 input_files = {"likelihood-url":diststats +
420 [priornodes[n].output_files["write-likelihood"],
421 snrpdfnode.output_files["write-likelihood"]], "":chunked_inputs
423 ) for chunked_inputs in chunks(inputs, 25)]
427 # after assigning the likelihoods cluster and merge by sub bank and whether or not it was an injection run
429 for subbank, (inj, nodes) in enumerate(likelihood_nodes.items()):
430 # Flatten the nodes for this sub bank
431 nodes = flatten(nodes)
433 # Flatten the input/output files from calc_likelihood
434 inputs = flatten([node.input_files[""] for node in nodes])
436 # 10 at a time irrespective of the sub bank they came from so the jobs take a bit longer to run
437 for n in range(0, len(inputs), files_to_group):
438 merge_nodes.append(inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = nodes,
439 opts = {"sql-file":options.cluster_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
440 input_files = {"":inputs[n:n+files_to_group]}
444 # Merging all the dbs from the same sub bank
445 for subbank, inputs in enumerate([node.input_files[""] for node in nodes]):
446 db = inspiral_pipe.T050017_filename(instruments, '%04d_LLOID
' % (subbank,), int(boundary_seg[0]), int(boundary_seg[1]), '.sqlite
')
447 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = merge_nodes,
448 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
449 input_files = {"":inputs},
450 output_files = {"database":db}
452 sqlitenode = inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = [sqlitenode],
453 opts = {"sql-file":options.cluster_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
454 input_files = {"":db}
456 outnodes.setdefault(None, []).append(sqlitenode)
458 # 10 at a time irrespective of the sub bank they came from so the jobs take a bit longer to run
459 for n in range(0, len(inputs), files_to_group):
460 merge_nodes.append(inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = nodes,
461 opts = {"sql-file":options.injection_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
462 input_files = {"":inputs[n:n+files_to_group]}
466 # Merging all the dbs from the same sub bank and injection run
467 for subbank, inputs in enumerate([node.input_files[""] for node in nodes]):
468 injdb = inspiral_pipe.T050017_filename(instruments, '%04d_LLOID_%s
' % (subbank, sim_tag_from_inj_file(inj)), int(boundary_seg[0]), int(boundary_seg[1]), '.sqlite
')
469 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = merge_nodes,
470 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
471 input_files = {"":inputs},
472 output_files = {"database":injdb}
474 sqlitenode = inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = [sqlitenode],
475 opts = {"sql-file":options.injection_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
476 input_files = {"":injdb}
478 outnodes.setdefault(sim_tag_from_inj_file(inj), []).append(sqlitenode)
480 return rankpdf_nodes, outnodes
482 def finalize_runs(dag, lalappsRunSqliteJob, toXMLJob, ligolwInspinjFindJob, toSqliteJob, innodes, options, instruments):
484 if options.vetoes is None:
487 vetoes = [options.vetoes]
490 # Process the chirp mass bins in chunks to paralellize the merging process
491 for chunk, dbs in enumerate(chunks([node.input_files[""] for node in innodes[None]], 20)):
492 # Merge the final non injection database into chunks
493 noninjdb = inspiral_pipe.T050017_filename(instruments, 'ALL_LLOID_CHUNK_%d
' % chunk, int(boundary_seg[0]), int(boundary_seg[1]), '.sqlite
')
494 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = innodes[None],
495 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
496 input_files = {"": dbs},
497 output_files = {"database":noninjdb}
500 # cluster the final non injection database
501 noninjsqlitenode = inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = [sqlitenode],
502 opts = {"sql-file":options.cluster_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
503 input_files = {"":noninjdb}
505 chunk_nodes.append(noninjsqlitenode)
507 # Merge the final non injection database
508 noninjdb = inspiral_pipe.T050017_filename(instruments, 'ALL_LLOID
', int(boundary_seg[0]), int(boundary_seg[1]), '.sqlite
')
509 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = chunk_nodes,
510 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
511 input_files = {"": ([node.input_files[""] for node in chunk_nodes] + vetoes + [options.frame_segments_file])},
512 output_files = {"database":noninjdb}
515 # cluster the final non injection database
516 noninjsqlitenode = inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = [sqlitenode],
517 opts = {"sql-file":options.cluster_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
518 input_files = {"":noninjdb}
522 outnodes = [noninjsqlitenode]
524 for injections, far_injections in zip(options.injections, options.far_injections):
526 # extract only the nodes that were used for injections
527 thisinjnodes = innodes[sim_tag_from_inj_file(injections)]
530 for chunk, dbs in enumerate(chunks([node.input_files[""] for node in thisinjnodes], 20)):
532 # Setup the final output names, etc.
533 injdb = inspiral_pipe.T050017_filename(instruments, 'ALL_LLOID_CHUNK_%d_%s
' % (chunk, sim_tag_from_inj_file(injections)), int(boundary_seg[0]), int(boundary_seg[1]), '.sqlite
')
537 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = thisinjnodes,
538 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
539 input_files = {"":dbs},
540 output_files = {"database":injdb}
544 clusternode = inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = [sqlitenode],
545 opts = {"sql-file":options.cluster_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
546 input_files = {"":injdb}
548 chunk_nodes.append(clusternode)
551 # Setup the final output names, etc.
552 injdb = inspiral_pipe.T050017_filename(instruments, 'ALL_LLOID_%s
' % sim_tag_from_inj_file(injections), int(boundary_seg[0]), int(boundary_seg[1]), '.sqlite
')
554 injxml = os.path.splitext(injdb)[0] + ".xml.gz"
556 # If there are injections that are too far away to be seen in a separate file, add them now.
557 if far_injections is not None:
558 xml_input = [injxml] + [far_injections]
563 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = chunk_nodes,
564 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
565 input_files = {"": ([node.input_files[""] for node in chunk_nodes] + vetoes + [options.frame_segments_file, injections])},
566 output_files = {"database":injdb}
570 clusternode = inspiral_pipe.generic_node(lalappsRunSqliteJob, dag, parent_nodes = [sqlitenode],
571 opts = {"sql-file":options.cluster_sql_file, "tmp-space":inspiral_pipe.condor_scratch_space()},
572 input_files = {"":injdb}
576 clusternode = inspiral_pipe.generic_node(toXMLJob, dag, parent_nodes = [clusternode],
577 opts = {"tmp-space":inspiral_pipe.condor_scratch_space()},
578 output_files = {"extract":injxml},
579 input_files = {"database":injdb}
582 inspinjnode = inspiral_pipe.generic_node(ligolwInspinjFindJob, dag, parent_nodes = [clusternode],
583 opts = {"time-window":0.9},
584 input_files = {"":injxml}
587 sqlitenode = inspiral_pipe.generic_node(toSqliteJob, dag, parent_nodes = [inspinjnode],
588 opts = {"replace":"", "tmp-space":inspiral_pipe.condor_scratch_space()},
589 output_files = {"database":injdb},
590 input_files = {"":xml_input}
593 outnodes.append(sqlitenode)
595 return injdbs, noninjdb, outnodes
597 def compute_FAP(marginalizeJob, gstlalInspiralComputeFarFromSnrChisqHistogramsJob, dag, rankpdf_nodes, injdbs, noninjdb, final_sqlite_nodes):
598 # compute FAPs and FARs
599 # split up the marginilization into groups of 10
600 margin = [node.output_files["output"] for node in rankpdf_nodes]
604 for i,n in enumerate(range(0, len(margin), margnum)):
605 margout.append("%d_marginalized_likelihood.xml.gz" % (i,))
606 margnodes.append(inspiral_pipe.generic_node(marginalizeJob, dag, parent_nodes = final_sqlite_nodes + rankpdf_nodes,
607 output_files = {"output":margout[-1]},
608 input_files = {"":margin[n:n+margnum]}
611 margnode = inspiral_pipe.generic_node(marginalizeJob, dag, parent_nodes = margnodes,
612 output_files = {"output":"marginalized_likelihood.xml.gz"},
613 input_files = {"":margout}
616 farnode = inspiral_pipe.generic_node(gstlalInspiralComputeFarFromSnrChisqHistogramsJob, dag, parent_nodes = [margnode],
617 opts = {"tmp-space":inspiral_pipe.condor_scratch_space()},
618 input_files = {"background-bins-file":"marginalized_likelihood.xml.gz", "injection-db":injdbs, "non-injection-db":noninjdb}
623 def parse_command_line():
624 parser = OptionParser(description = __doc__)
626 # generic data source options
627 datasource.append_options(parser)
628 parser.add_option("--psd-fft-length", metavar = "s", default = 16, type = "int", help = "FFT length, default 16s")
631 parser.add_option("--reference-psd", help = "Don't measure PSDs, use
this one instead
")
633 # SVD bank construction options
634 parser.add_option("--overlap
", metavar = "num
", type = "int", default = 0, help = "set the factor that describes the overlap of the sub banks, must be even!
")
635 parser.add_option("--autocorrelation-length
", type = "int", default = 201, help = "The minimum number of samples to use
for auto-chisquared,
default 201 should be odd
")
636 parser.add_option("--samples-min
", type = "int", default = 1024, help = "The minimum number of samples to use
for time slices
default 1024
")
637 parser.add_option("--samples-max-256
", type = "int", default = 1024, help = "The maximum number of samples to use
for time slices with frequencies above 256Hz,
default 1024
")
638 parser.add_option("--samples-max-64
", type = "int", default = 2048, help = "The maximum number of samples to use
for time slices with frequencies above 64Hz,
default 2048
")
639 parser.add_option("--samples-max
", type = "int", default = 4096, help = "The maximum number of samples to use
for time slices with frequencies below 64Hz,
default 4096
")
640 parser.add_option("--bank-cache
", metavar = "filenames
", help = "Set the bank cache files in format H1=H1.cache,H2=H2.cache, etc..
")
641 parser.add_option("--tolerance
", metavar = "float", type = "float", default = 0.9999, help = "set the SVD tolerance,
default 0.9999
")
642 parser.add_option("--flow
", metavar = "num
", type = "float", default = 40, help = "set the low frequency cutoff,
default 40 (Hz)
")
643 parser.add_option("--identity-transform
", action = "store_true
", help = "Use identity transform, i.e. no SVD
")
645 # trigger generation options
646 parser.add_option("--vetoes
", metavar = "filename
", help = "Set the veto xml file.
")
647 parser.add_option("--time-slide-file
", metavar = "filename
", help = "Set the time slide table xml file
")
648 parser.add_option("--web-dir
", metavar = "directory
", help = "Set the web directory like /home/USER/public_html
")
649 parser.add_option("--fir-stride
", type="int", metavar = "secs
", default = 8, help = "Set the duration of the fft output blocks,
default 8
")
650 parser.add_option("--control-peak-time
", type="int", default = 8, metavar = "secs
", help = "Set the peak finding time
for the control signal,
default 8
")
651 parser.add_option("--coincidence-threshold
", metavar = "value
", type = "float", default = 0.005, help = "Set the coincidence window in seconds (
default = 0.005). The light-travel time between instruments will be added automatically in the coincidence test.
")
652 parser.add_option("--num-banks
", metavar = "str
", help = "The number of parallel subbanks per gstlal_inspiral job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.
")
653 parser.add_option("--max-inspiral-jobs
", type="int", metavar = "jobs
", help = "Set the maximum number of gstlal_inspiral jobs to run simultaneously,
default no constraint.
")
654 parser.add_option("--ht-gate-threshold
", type="float", help="set a threshold on whitened h(t) to veto glitches
")
655 parser.add_option("--inspiral-executable
", default = "gstlal_inspiral
", help = "Options gstlal_inspiral | gstlal_iir_inspiral,
default gstlal_inspiral
")
656 parser.add_option("--blind-injections
", metavar = "filename
", help = "Set the name of an injection file that will be added to the data without saving the sim_inspiral table or otherwise processing the data differently. Has the effect of having hidden signals in the input data. Separate injection runs
using the --injections option will still occur.
")
657 parser.add_option("--far-injections
", action = "append
", help = "Injection files with injections too far away to be seen and are not filtered. Required. See https:
658 parser.add_option(
"--verbose", action =
"store_true", help =
"Be verbose")
660 # Override the datasource injection option
661 parser.remove_option(
"--injections")
662 parser.add_option(
"--injections", action =
"append", help =
"append injection files to analyze")
664 options, filenames = parser.parse_args()
665 options.num_banks = [int(v) for v in options.num_banks.split(",")]
667 if options.overlap % 2:
668 raise ValueError("overlap must be even")
671 for option in ("bank_cache",):
672 if getattr(options, option) is None:
673 fail += "must provide option %s\n" % (option)
674 if fail: raise ValueError, fail
676 if options.far_injections is not None and len(options.injections) != len(options.far_injections):
677 raise ValueError("number of injection files and far injection files must be equal")
678 if options.far_injections is None:
679 options.far_injections = [None for inj in options.injections]
681 #FIXME a hack to find the sql paths
682 share_path = os.path.split(inspiral_pipe.which(
'gstlal_inspiral'))[0].replace(
'bin',
'share/gstlal')
683 options.cluster_sql_file = os.path.join(share_path,
'simplify_and_cluster.sql')
684 options.injection_sql_file = os.path.join(share_path,
'inj_simplify_and_cluster.sql')
686 return options, filenames
693 options, filenames = parse_command_line()
695 detectors = datasource.GWDataSourceInfo(options)
696 channel_dict = detectors.channel_dict
697 instruments = "".join(sorted(bank_cache.keys()))
698 instrument_set = bank_cache.keys()
699 boundary_seg = detectors.seg
711 dag = inspiral_pipe.DAG(
"trigger_pipe")
713 if options.max_inspiral_jobs is not None:
714 dag.add_maxjobs_category(
"INSPIRAL", options.max_inspiral_jobs)
717 # Make an xml integrity checker
720 f = open(
"gzip_test.sh",
"w")
721 f.write("
#!/bin/bash\nsleep 60\ngzip --test $@")
723 os.chmod(
"gzip_test.sh", stat.S_IRUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH | stat.S_IWUSR)
726 # setup the job classes
729 refPSDJob = inspiral_pipe.generic_job(
"gstlal_reference_psd", condor_commands = {
"request_memory":
"2GB",
"request_cpus":
"2"})
730 medianPSDJob = inspiral_pipe.generic_job(
"gstlal_median_of_psds", condor_commands = {
"request_memory":
"2GB"})
731 svdJob = inspiral_pipe.generic_job(
"gstlal_svd_bank", condor_commands = {
"request_memory":
"2GB"})
732 horizonJob = inspiral_pipe.generic_job(
"gstlal_plot_psd_horizon", condor_commands = {
"request_memory":
"2GB"})
733 gstlalInspiralJob = inspiral_pipe.generic_job(options.inspiral_executable, condor_commands = {
"request_memory":
"15GB",
"request_cpus":
"8"})
734 gstlalInspiralInjJob = inspiral_pipe.generic_job(options.inspiral_executable, tag_base=
"gstlal_inspiral_inj", condor_commands = {
"request_memory":
"15GB",
"request_cpus":
"8"})
735 createPriorDistStatsJob = inspiral_pipe.generic_job(
"gstlal_inspiral_create_prior_diststats", condor_commands = {
"request_memory":
"2GB"})
736 calcRankPDFsJob = inspiral_pipe.generic_job(
"gstlal_inspiral_calc_rank_pdfs", condor_commands = {
"request_memory":
"2GB",
"request_cpus":
"3"})
737 calcLikelihoodJob = inspiral_pipe.generic_job(
"gstlal_inspiral_calc_likelihood", condor_commands = {
"request_memory":
"2GB"})
738 calcLikelihoodJobInj = inspiral_pipe.generic_job(
"gstlal_inspiral_calc_likelihood", tag_base=
'gstlal_inspiral_calc_likelihood_inj', condor_commands = {
"request_memory":
"2GB"})
739 gstlalInspiralComputeFarFromSnrChisqHistogramsJob = inspiral_pipe.generic_job(
"gstlal_compute_far_from_snr_chisq_histograms", condor_commands = {
"request_memory":
"2GB"})
740 ligolwInspinjFindJob = inspiral_pipe.generic_job(
"ligolw_inspinjfind", condor_commands = {
"request_memory":
"2GB"})
741 toSqliteJob = inspiral_pipe.generic_job(
"ligolw_sqlite", tag_base =
"ligolw_sqlite_from_xml", condor_commands = {
"request_memory":
"2GB"})
742 toXMLJob = inspiral_pipe.generic_job(
"ligolw_sqlite", tag_base =
"ligolw_sqlite_to_xml", condor_commands = {
"request_memory":
"2GB"})
743 lalappsRunSqliteJob = inspiral_pipe.generic_job(
"lalapps_run_sqlite", condor_commands = {
"request_memory":
"2GB"})
744 plotSummaryJob = inspiral_pipe.generic_job(
"gstlal_inspiral_plotsummary", condor_commands = {
"request_memory":
"2GB"})
745 plotIndividualInjectionsSummaryJob = inspiral_pipe.generic_job(
"gstlal_inspiral_plotsummary", tag_base =
"gstlal_inspiral_plotsummary_inj", condor_commands = {
"request_memory":
"2GB"})
746 plotSensitivityJob = inspiral_pipe.generic_job(
"gstlal_inspiral_plot_sensitivity", condor_commands = {
"request_memory":
"2GB"})
747 openpageJob = inspiral_pipe.generic_job(
"gstlal_inspiral_summary_page", tag_base =
'gstlal_inspiral_summary_page_open', condor_commands = {
"request_memory":
"2GB"})
748 pageJob = inspiral_pipe.generic_job(
"gstlal_inspiral_summary_page", condor_commands = {
"request_memory":
"2GB"})
749 marginalizeJob = inspiral_pipe.generic_job(
"gstlal_inspiral_marginalize_likelihood", condor_commands = {
"request_memory":
"2GB"})
750 plotbackgroundJob = inspiral_pipe.generic_job(
"gstlal_inspiral_plot_background", condor_commands = {
"request_memory":
"2GB"})
753 # Get the analysis segments
756 segsdict = analysis_segments(
set(bank_cache.keys()), detectors.frame_segments, boundary_seg, get_max_length(bank_cache))
758 if options.reference_psd is None:
761 # Compute the PSDs
for each segment
765 psd_nodes = psd_node_gen(refPSDJob, dag, [], segsdict, channel_dict, options)
768 # plot the horizon distance
771 inspiral_pipe.generic_node(horizonJob, dag,
772 parent_nodes = psd_nodes.values(),
773 input_files = {
"":[node.output_files[
"write-psd"]
for node in psd_nodes.values()]},
774 output_files = {
"":inspiral_pipe.T050017_filename(instruments,
"HORIZON", boundary_seg[0].seconds, boundary_seg[1].seconds,
'.png', path = output_dir)}
778 # compute the median PSD
782 inspiral_pipe.generic_node(medianPSDJob, dag,
783 parent_nodes = psd_nodes.values(),
784 input_files = {
"":[node.output_files[
"write-psd"]
for node in psd_nodes.values()]},
785 output_files = {
"output-name": inspiral_pipe.T050017_filename(instruments,
"REFERENCE_PSD", boundary_seg[0].seconds, boundary_seg[1].seconds,
'.xml.gz', path = medianPSDJob.output_path)}
788 ref_psd = median_psd_node.output_files[
"output-name"]
789 ref_psd_parent_nodes = [median_psd_node]
792 # NOTE: compute just the SNR pdf cache here,
set other features to 0
793 snrpdfnode = inspiral_pipe.generic_node(createPriorDistStatsJob, dag,
794 parent_nodes = ref_psd_parent_nodes,
795 opts = {
"instrument":instrument_set,
"synthesize-injection-count":0,
"background-prior":0},
796 input_files = {
"":ref_psd},
797 output_files = {
"write-likelihood":inspiral_pipe.T050017_filename(instruments,
'SNR_PDFS_CREATE_PRIOR_DIST_STATS', boundary_seg[0].seconds, boundary_seg[1].seconds,
'.xml.gz', path = createPriorDistStatsJob.output_path)}
801 ref_psd = lalseries.read_psd_xmldoc(ligolw_utils.load_filename(options.reference_psd, verbose = options.verbose, contenthandler = LIGOLWContentHandler))
803 # NOTE: compute just the SNR pdf cache here,
set other features to 0
804 # NOTE: This will likely result in downstream codes needing to compute
805 # more SNR PDFS, since in
this codepath only an average spectrum is
807 snrpdfnode = inspiral_pipe.generic_node(createPriorDistStatsJob, dag,
808 opts = {
"instrument":instrument_set,
"synthesize-injection-count":0,
"background-prior":0},
809 input_files = {
"":options.reference_psd},
810 output_files = {
"write-likelihood":inspiral_pipe.T050017_filename(instruments,
'SNR_PDFS_CREATE_PRIOR_DIST_STATS', boundary_seg[0].seconds, boundary_seg[1].seconds,
'.xml.gz', path = createPriorDistStatsJob.output_path)}
812 ref_psd_parent_nodes = []
818 svd_nodes = svd_node_gen(svdJob, dag, ref_psd_parent_nodes, ref_psd, inspiral_pipe.build_bank_groups(bank_cache, options.num_banks), options, boundary_seg)
822 # Inspiral jobs by segment
825 inspiral_nodes = inspiral_node_gen(gstlalInspiralJob, gstlalInspiralInjJob, dag, svd_nodes, segsdict, options, channel_dict)
828 # Adapt the output of the gstlal_inspiral jobs to be suitable for the remainder of this analysis
831 lloid_output, lloid_diststats = adapt_gstlal_inspiral_output(inspiral_nodes, options, segsdict)
834 # Setup likelihood jobs, clustering and merging
837 rankpdf_nodes, outnodes = rank_and_merge(dag, createPriorDistStatsJob, calcRankPDFsJob, calcLikelihoodJob, calcLikelihoodJobInj, lalappsRunSqliteJob, toSqliteJob, inspiral_nodes, lloid_output, lloid_diststats, segsdict, options, boundary_seg, instrument_set, snrpdfnode)
840 # after all of the likelihood ranking and preclustering is finished put everything into single databases based on the injection file (or lack thereof)
843 injdbs, noninjdb, final_sqlite_nodes = finalize_runs(dag, lalappsRunSqliteJob, toXMLJob, ligolwInspinjFindJob, toSqliteJob, outnodes, options, instruments)
849 farnode = compute_FAP(marginalizeJob, gstlalInspiralComputeFarFromSnrChisqHistogramsJob, dag, rankpdf_nodes, injdbs, noninjdb, final_sqlite_nodes)
854 plotnodes.append(inspiral_pipe.generic_node(plotSummaryJob, dag, parent_nodes=[farnode],
855 opts = {
"segments-name": options.frame_segments_name,
"tmp-space": inspiral_pipe.condor_scratch_space(),
"user-tag":
"ALL_LLOID_COMBINED",
"output-dir": output_dir},
856 input_files = {
"":[noninjdb] + injdbs}
860 plotnodes.append(inspiral_pipe.generic_node(plotIndividualInjectionsSummaryJob, dag, parent_nodes=[farnode],
861 opts = {
"segments-name": options.frame_segments_name,
"tmp-space":inspiral_pipe.condor_scratch_space(),
"user-tag":injdb.replace(
".sqlite",
"").split(
"-")[1],
"plot-group":1,
"output-dir":output_dir},
862 input_files = {
"":[noninjdb] + [injdb]}
864 # make sensitivity plots
865 plotnodes.append(inspiral_pipe.generic_node(plotSensitivityJob, dag, parent_nodes=[farnode],
866 opts = {
"user-tag":
"ALL_LLOID_COMBINED",
"output-dir":output_dir,
"tmp-space":inspiral_pipe.condor_scratch_space(),
"veto-segments-name":
"vetoes",
"bin-by-total-mass":
"",
"bin-by-mass1-mass2":
"",
"bin-by-chirp-mass":
"",
"bin-by-mass-ratio":
"",
"bin-by-aligned-spin":
"",
"include-play":
"",
"dist-bins":200,
"data-segments-name":
"datasegments"},
867 input_files = {
"zero-lag-database":noninjdb,
"":injdbs}
870 plotnodes.append(inspiral_pipe.generic_node(plotSensitivityJob, dag, parent_nodes=[farnode],
871 opts = {
"user-tag":injdb.replace(
".sqlite",
"").split(
"-")[1],
"output-dir":output_dir,
"tmp-space":inspiral_pipe.condor_scratch_space(),
"veto-segments-name":
"vetoes",
"bin-by-total-mass":
"",
"bin-by-mass1-mass2":
"",
"bin-by-chirp-mass":
"",
"bin-by-mass-ratio":
"",
"bin-by-aligned-spin":
"",
"include-play":
"",
"dist-bins":200,
"data-segments-name":
"datasegments"},
872 input_files = {
"zero-lag-database":noninjdb,
"":injdb}
876 # make background plots
877 plotnodes.append(inspiral_pipe.generic_node(plotbackgroundJob, dag, parent_nodes = [farnode], opts = {
"user-tag":
"ALL_LLOID_COMBINED",
"output-dir":output_dir}, input_files = {
"":
"post_marginalized_likelihood.xml.gz",
"database":noninjdb}))
881 inspiral_pipe.generic_node(openpageJob, dag, parent_nodes = plotnodes,
882 opts = {
"title":
"gstlal-%d-%d-open-box" % (int(boundary_seg[0]), int(boundary_seg[1])),
"webserver-dir":options.web_dir,
"glob-path":output_dir,
"output-user-tag":[
"ALL_LLOID_COMBINED"] + [injdb.replace(
".sqlite",
"").split(
"-")[1]
for injdb in injdbs],
"open-box":
""}
884 inspiral_pipe.generic_node(pageJob, dag, parent_nodes = plotnodes,
885 opts = {
"title":
"gstlal-%d-%d-closed-box" % (
int(boundary_seg[0]),
int(boundary_seg[1])),
"webserver-dir":options.web_dir,
"glob-path":output_dir,
"output-user-tag":[
"ALL_LLOID_COMBINED"] + [injdb.replace(
".sqlite",
"").split(
"-")[1]
for injdb in injdbs]}
892 dag.write_sub_files()