gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_bank_splitter
1 #! /usr/bin/env python
2 #
3 # Copyright (C) 2012 Stephen Privitera
4 # Copyright (C) 2011-2014 Chad Hanna
5 # Copyright (C) 2010 Melissa Frei
6 #
7 # This program is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by the
9 # Free Software Foundation; either version 2 of the License, or (at your
10 # option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
15 # Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21 
22 import itertools
23 import math
24 import os
25 import sys
26 from optparse import OptionParser
27 from pylal import spawaveform
28 from glue.ligolw import ligolw
29 from glue.ligolw import lsctables
30 from glue.ligolw import utils as ligolw_utils
31 from glue.ligolw.utils import process as ligolw_process
32 from lal import MSUN_SI
33 from glue import lal as gluelal
34 from pylal.datatypes import LIGOTimeGPS
35 from gstlal import templates
36 from gstlal import inspiral_pipe
37 from gstlal import chirptime
38 
39 ## @file gstlal_bank_splitter
40 #
41 # This program splits template banks into sub banks suitable for singular value decomposition; see gstlal_bank_splitter for more information
42 #
43 # ### Usage examples
44 #
45 # - split up bank file for H1; sort by mchirp; add final frequency and specify a maximum frequency
46 #
47 # $ gstlal_bank_splitter --overlap 10 --instrument H1 --n 100 --sort-by mchirp --add-f-final --max-f-final 2048 H1-TMPLTBANK-871147516-2048.xml
48 #
49 # - Please add more!
50 #
51 # ### Command line interface
52 #
53 # + `--output-path` [path]: Set the path to the directory where output files will be written. Default is "."
54 # + `--output-cache` [file]: Set the file name for the output cache.
55 # + `--n` [count] (int): Set the number of templates per output file (required). It will be rounded to make all sub banks approximately the same size.
56 # + `--overlap` [count] (int): Overlap the templates in each file by this amount, must be even.
57 # + `--sort-by` [column]: Select the template sort order column (required).
58 # + `--add-f-final`: Select whether to add f_final to the bank.
59 # + `--max-f-final` [max final freq] (float): Max f_final to populate table with; if f_final > max, use max.
60 # + `--instrument` [ifo]: Override the instrument, required
61 # + `--bank-program` [name]: Select name of the program used to generate the template bank (default: tmpltbank).
62 # + `--verbose`: Be verbose.
63 # + `--approximant` [string]: Must specify an approximant
64 # + `--f-low` [frequency] (floate): Lower frequency cutoff
65 # + `--group-by-chi`: group templates into chi bins 0.1 in chi - helps with SVD.
66 #
67 # ### Review status
68 #
69 # Compared original bank with the split banks. Verified that they are the same, e.g., add sub bank files into test.xml.gz and run (except that lalapps_tmpltbank adds redundant templates):
70 #
71 # ligolw_print -t sngl_inspiral -c mass1 -c mass2 ../H1-TMPLTBANK-871147516-2048.xml | sort -u | wc
72 # ligolw_print -t sngl_inspiral -c mass1 -c mass2 test.xml.gz | sort -u | wc
73 #
74 # | Names | Hash | Date |
75 # | ------------------------------------------- | ------------------------------------------- | ---------- |
76 # | Florent, Sathya, Duncan Me., Jolien, Kipp, Chad | 7536db9d496be9a014559f4e273e1e856047bf71 | 2014-04-28 |
77 #
78 # #### Action
79 #
80 # - Consider cleanup once additional bank programs are used and perhaps have additional metadata
81 #
82 
83 class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
84  pass
85 lsctables.use_in(LIGOLWContentHandler)
86 
87 
88 def group_templates(templates, n, overlap = 0):
89  """
90  break up the template table into sub tables of length n with overlap
91  overlap. n must be less than the number of templates and overlap must be less
92  than n
93  """
94  if n >= len(templates):
95  yield templates
96  else:
97  n = len(templates) / round(len(templates) / float(n))
98  assert n >= 1
99  for i in itertools.count():
100  start = int(round(i * n)) - overlap // 2
101  end = int(round((i + 1) * n)) + overlap // 2
102  yield templates[max(start, 0):end]
103  if end >= len(templates):
104  break
105 
106 
107 def parse_command_line():
108  parser = OptionParser()
109  parser.add_option("--output-path", metavar = "path", default = ".", help = "Set the path to the directory where output files will be written. Default is \".\".")
110  parser.add_option("--output-cache", metavar = "file", help = "Set the file name for the output cache.")
111  parser.add_option("--n", metavar = "count", type = "int", help = "Set the number of templates per output file (required). It will be rounded to make all sub banks approximately the same size.")
112  parser.add_option("--overlap", default = 0, metavar = "count", type = "int", help = "overlap the templates in each file by this amount, must be even")
113  parser.add_option("--sort-by", metavar = "column", default="mchirp", help = "Select the template sort column, default mchirp")
114  parser.add_option("--add-f-final", action = "store_true", help = "Select whether to add f_final to the bank.")
115  parser.add_option("--max-f-final", metavar = "float", type="float", help = "Max f_final to populate table with; if f_final over mx, use max.")
116  parser.add_option("--instrument", metavar = "ifo", type="string", help = "override the instrument, required")
117  parser.add_option("--bank-program", metavar = "name", default = "tmpltbank", type="string", help = "Select name of the program used to generate the template bank (default: tmpltbank).")
118  parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
119  parser.add_option("--approximant", type = "string", help = "Must specify an approximant")
120  parser.add_option("--f-low", type = "float", metavar = "frequency", help = "Lower frequency cutoff")
121  parser.add_option("--group-by-chi", action = "store_true", help = "group templates into chi bins 0.1 in chi - helps with SVD.")
122  options, filenames = parser.parse_args()
123 
124  required_options = ("n", "instrument", "sort_by", "output_cache", "approximant")
125  missing_options = [option for option in required_options if getattr(options, option) is None]
126  if missing_options:
127  raise ValueError, "missing required option(s) %s" % ", ".join("--%s" % option.replace("_", "-") for option in missing_options)
128 
129  if options.overlap % 2:
130  raise ValueError("overlap must be even")
131 
132  return options, filenames
133 
134 options, filenames = parse_command_line()
135 output_cache_file = open(options.output_cache, "w")
136 bank_count = 0
137 
138 # Hacky way to bin by chi
139 def chikey(chi, group_by_chi = options.group_by_chi):
140  if group_by_chi:
141  return math.floor(chi * 10) / 10.
142  else:
143  return None
144 
145 outputrows = []
146 
147 for filename in filenames:
148  xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose, contenthandler = LIGOLWContentHandler)
149  sngl_inspiral_table = lsctables.SnglInspiralTable.get_table(xmldoc)
150 
151  if options.add_f_final:
152  if options.f_low is None:
153  flow, = ligolw_process.get_process_params(xmldoc, options.bank_program, "--flow") + ligolw_process.get_process_params(xmldoc, options.bank_program, "--low-frequency-cutoff") + ligolw_process.get_process_params(xmldoc, options.bank_program, "--f-low")
154  else:
155  flow = options.f_low
156  for row in sngl_inspiral_table:
157  # Find the total spin magnitudes
158  spin1, spin2 = (row.spin1x**2 + row.spin1y**2 + row.spin1z**2)**.5, (row.spin2x**2 + row.spin2y**2 + row.spin2z**2)**.5
159  # Chirptime uses SI
160  m1_SI, m2_SI = MSUN_SI * row.mass1, MSUN_SI * row.mass2
161 
162  if options.approximant in templates.gstlal_IMR_approximants:
163  # make sure to go a factor of 2 above the ringdown frequency for safety
164  row.f_final = 2 * chirptime.ringf(m1_SI + m2_SI, chirptime.overestimate_j_from_chi(max(spin1, spin2)))
165  else:
166  # otherwise choose a suitable high frequency
167  # NOTE not SI
168  row.f_final = spawaveform.ffinal(row.mass1, row.mass2, 'bkl_isco')
169 
170  # Override the high frequency with the max if appropriate
171  if options.max_f_final and (row.f_final > options.max_f_final):
172  row.f_final = options.max_f_final
173 
174  # Record the conservative template duration
175  row.template_duration = chirptime.imr_time(flow, m1_SI, m2_SI, spin1, spin2, f_max = row.f_final)
176 
177 
178  for row in sngl_inspiral_table:
179  row.ifo = options.instrument
180 
181  # just to make sure it is set
182  for row in sngl_inspiral_table:
183  row.mtotal = row.mass1 + row.mass2
184 
185  # Bin by Chi, has no effect if option is not specified, i.e. there is only one bin.
186  chidict = {}
187  [chidict.setdefault(chikey(spawaveform.computechi(row.mass1, row.mass2, row.spin1z, row.spin2z)), []).append(row) for row in sngl_inspiral_table]
188 
189  for chi in chidict:
190  chirows = chidict[chi]
191 
192  # store the process params
193  process = ligolw_process.register_to_xmldoc(xmldoc, program = "gstlal_bank_splitter", paramdict = options.__dict__, comment = "Split bank into smaller banks for SVD")
194 
195  def sort_func(row, column = options.sort_by):
196  return getattr(row, column)
197 
198  chirows.sort(key=sort_func)
199 
200 
201  for rows in group_templates(chirows, options.n, options.overlap):
202  assert len(rows) >= options.n/2, "There are too few templates in this chi interval. Requested %d: have %d" % (options.n, len(rows))
203  outputrows.append((rows[0], rows))
204 
205 
206 # One last sort now that the templates have been grouped
207 def sort_func((row, rows), column = options.sort_by):
208  return getattr(row, column)
209 
210 outputrows.sort(key=sort_func)
211 
212 for bank_count, (row, rows) in enumerate(outputrows):
213  sngl_inspiral_table[:] = rows
214  output = inspiral_pipe.T050017_filename(options.instrument, "GSTLAL_SPLIT_BANK_%04d" % bank_count, 0, 0, ".xml.gz", path = options.output_path)
215  output_cache_file.write("%s\n" % gluelal.CacheEntry.from_T050017("file://localhost%s" % os.path.abspath(output)))
216  ligolw_utils.write_filename(xmldoc, output, gz = output.endswith('gz'), verbose = options.verbose)
217 
218 output_cache_file.close()