gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_inspiral_svd_bank_pipe
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2010 Kipp Cannon, Chad Hanna, Drew Keppel
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 """
20 This program makes a dag to generate svd banks
21 """
22 
23 __author__ = 'Chad Hanna <chad.hanna@ligo.org>'
24 
25 #
26 # import standard modules and append the lalapps prefix to the python path
27 #
28 
29 import sys, os, copy, math
30 import subprocess, socket, tempfile
31 
32 #
33 # import the modules we need to build the pipeline
34 #
35 
36 from glue import iterutils
37 from glue import pipeline
38 from glue import lal
39 from glue.ligolw import lsctables
40 from glue import segments
41 from optparse import OptionParser
42 from gstlal import inspiral_pipe
43 from gstlal import far
44 
45 ## @file gstlal_inspiral_svd_bank_pipe
46 # This program will make a HTCondor DAG to automate the creation of svd bank files; see gstlal_inspiral_svd_bank_pipe for more information
47 #
48 #
49 # ### Graph of the condor DAG
50 #
51 # @dot
52 # digraph G {
53 # // graph properties
54 #
55 # rankdir=LR;
56 # compound=true;
57 # node [shape=record fontsize=10 fontname="Verdana"];
58 # edge [fontsize=8 fontname="Verdana"];
59 #
60 # // nodes
61 #
62 # "gstlal_svd_bank" [URL="\ref gstlal_svd_bank"];
63 # }
64 # @enddot
65 #
66 # This DAG implements only a single job type; gstlal_svd_bank
67 #
68 # ### Usage cases
69 #
70 # - Typical usage case for H1
71 #
72 # $ gstlal_inspiral_svd_bank_pipe --autocorrelation-length 351 --instrument H1 --reference-psd reference_psd.xml --bank-cache H1_split_bank.cache --overlap 10 --flow 15 --num-banks 1 --output-name H1_bank
73 #
74 # - Please add more!
75 #
76 # ### Command line options
77 #
78 # + `--instrument` [ifo]: set the name of the instrument, required
79 # + `--reference-psd` [file]: Set the name of the reference psd file, required
80 # + `--bank-cache` [file]: Set the name of the bank cache, required
81 # + `--overlap` [int]: Set the factor that describes the overlap of the sub banks, must be even!
82 # + `--identity-transform`: Turn off the SVD and use the identity reconstruction matrix
83 # + `--autocorrelation-length` [int]: The number of samples to use for auto-chisquared, default 201 should be odd
84 # + `--samples-min` [int]: The minimum number of samples to use for time slices default 1024
85 # + `--samples-max-256` [int]: The maximum number of samples to use for time slices with frequencies above 256Hz, default 1024
86 # + `--samples-max-64` [int]: The maximum number of samples to use for time slices with frequencies between 64Hz and 256 Hz, default 2048
87 # + `--samples-max` [int]: The maximum number of samples to use for time slices with frequencies below 64Hz, default 4096
88 # + `--tolerance` [float]: Set the SVD tolerance, default 0.9995
89 # + `--flow` [float]: Set the low frequency cutoff, default 40 (Hz)
90 # + `--num-banks [str]: The number of banks per job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.")
91 # + `--output-name` [file]: Set the base name of the output, required
92 # + `--verbose`: Be verbose.
93 #
94 # ### Review Status
95 #
96 # | Reviewers | git hash | date |
97 # | ----------------------------------- | --------------------- | ------------- |
98 # | Florent, Duncan Me., Jolien, Kipp, Chad | 004cf84d7e980eca0c7fd228c6d49cad959649c7 | 2014-04-29 |
99 #
100 
101 def parse_command_line():
102  parser = OptionParser()
103  parser.add_option("--instrument", help = "set the name of the instrument, required")
104  parser.add_option("--reference-psd", metavar = "file", help = "Set the name of the reference psd file, required")
105  parser.add_option("--bank-cache", metavar = "file", help = "Set the name of the bank cache, required")
106  parser.add_option("--overlap", metavar = "num", type = "int", default = 0, help = "set the factor that describes the overlap of the sub banks, must be even!")
107  parser.add_option("--identity-transform", default = False, action = "store_true", help = "turn off the SVD and use the identity reconstruction matrix")
108  parser.add_option("--autocorrelation-length", type = "int", default = 201, help = "The number of samples to use for auto-chisquared, default 201 should be odd")
109  parser.add_option("--samples-min", type = "int", default = 1024, help = "The minimum number of samples to use for time slices default 1024")
110  parser.add_option("--samples-max-256", type = "int", default = 1024, help = "The maximum number of samples to use for time slices with frequencies above 256Hz, default 1024")
111  parser.add_option("--samples-max-64", type = "int", default = 2048, help = "The maximum number of samples to use for time slices with frequencies above 64Hz, default 2048")
112  parser.add_option("--samples-max", type = "int", default = 4096, help = "The maximum number of samples to use for time slices with frequencies below 64Hz, default 4096")
113  parser.add_option("--tolerance", metavar = "float", type = "float", default = 0.9995, help = "set the SVD tolerance, default 0.9995")
114  parser.add_option("--flow", metavar = "num", type = "float", default = 40, help = "set the low frequency cutoff, default 40 (Hz)")
115  parser.add_option("--output-name", help = "set the base name of the output, required")
116  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
117  parser.add_option("--num-banks", metavar = "str", help = "the number of banks per job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.")
118  options, filenames = parser.parse_args()
119 
120  if options.overlap % 2:
121  raise ValueError("overlap must be even")
122 
123  options.num_banks = [int(s) for s in options.num_banks.split(",")]
124 
125  return options, filenames
126 
127 
128 #
129 # get input arguments
130 #
131 
132 
133 options, filenames = parse_command_line()
134 ifo = options.instrument
135 ref_psd = options.reference_psd
136 
137 
138 #
139 # Do some setup
140 #
141 
142 
143 try:
144  os.mkdir("logs")
145 except:
146  pass
147 dag = inspiral_pipe.DAG(options.output_name)
148 svdJob = inspiral_pipe.generic_job("gstlal_svd_bank", tag_base = "gstlal_svd_bank_%s" % ifo, condor_commands = {"request_memory":"1999"})
149 # Assumes cache is sorted by chirpmass or whatever the SVD sorting algorithm that was chosen
150 files = [lal.CacheEntry(line).path for line in open(options.bank_cache)]
151 
152 
153 #
154 # loop over files to set up svd bank jobs
155 #
156 
157 groups = list(inspiral_pipe.group(files, options.num_banks))
158 bank_ids = [0]
159 for i, f in enumerate(groups):
160  # handle the edges by not clipping so you retain the template bank as intended.
161  clipleft = [options.overlap / 2] * len(f) # overlap must be even
162  clipright = [options.overlap / 2] * len(f) # overlap must be even
163  if i == 0:
164  clipleft[0] = 0
165  if i == len(groups) - 1:
166  clipright[-1] = 0
167  bank_ids = range(bank_ids[-1] + 1, bank_ids[-1] + 1 + len(f))
168  svd_bank_name = inspiral_pipe.T050017_filename(ifo, "GSTLAL_SVD_BANK_%d" % i, 0, 0, ".xml.gz", path = svdJob.output_path)
169  svd_bank_name = os.path.join(os.getcwd(), svd_bank_name)
170  dag.output_cache.append(lal.CacheEntry(ifo, "GSTLAL_SVD_BANK_%d" % i, segments.segment(0, 0), "file://localhost%s" % (svd_bank_name,)))
171 
172  svdNode = inspiral_pipe.generic_node(svdJob, dag, [],
173  opts = {"snr-threshold":far.ThincaCoincParamsDistributions.snr_min,
174  "flow":options.flow,
175  "svd-tolerance":options.tolerance,
176  "ortho-gate-fap":0.5,
177  "samples-min":options.samples_min,
178  "samples-max":options.samples_max,
179  "samples-max-64":options.samples_max_64,
180  "samples-max-256":options.samples_max_256,
181  "clipleft":clipleft,
182  "clipright":clipright,
183  "autocorrelation-length":options.autocorrelation_length,
184  "bank-id":bank_ids
185  },
186  input_files = {
187  "template-bank":f,
188  "reference-psd":ref_psd
189  },
190  output_files = {
191  "write-svd-bank":svd_bank_name
192  }
193  )
194  if options.identity_transform:
195  svdNode.add_var_arg("--identity-transform")
196 
197 
198 #
199 # Write out the dag files
200 #
201 
202 
203 dag.write_sub_files()
204 dag.write_dag()
205 dag.write_script()
206 dag.write_cache()