gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_llcbcnode
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2012 Chad Hanna
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 import math
20 import bisect
21 import cgi
22 import cgitb
23 import os
24 os.environ["MPLCONFIGDIR"] = "/tmp"
25 import sys
26 import matplotlib
27 matplotlib.use('Agg')
28 from matplotlib import figure
29 from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
30 import matplotlib.pyplot as plt
31 import numpy
32 cgitb.enable()
33 form = cgi.FieldStorage()
34 import lal
35 from glue.ligolw import ligolw
36 from glue.ligolw import array as ligolw_array
37 from glue.ligolw import lsctables
38 from glue.ligolw import param as ligolw_param
39 from glue.ligolw import utils as ligolw_utils
40 from pylal import series as lalseries
41 from gstlal import far
42 from gstlal import reference_psd
43 import copy
44 import StringIO
45 import base64
46 import urlparse
47 
48 class LIGOLWContentHandler(ligolw.LIGOLWContentHandler):
49  pass
50 ligolw_array.use_in(LIGOLWContentHandler)
51 ligolw_param.use_in(LIGOLWContentHandler)
52 lsctables.use_in(LIGOLWContentHandler)
53 
54 ## @file gstlal_llcbcnode
55 # This program will monitor the output of a single job in the gstlal inspiral
56 # low latency analysis; See gstlal_llcbcnode for help and usage.
57 
58 ## @package gstlal_llcbcnode
59 #
60 # This program is designed to be placed in the cgi-bin directory of the user's
61 # public_html directory on the cluster that is running the gstlal inspiral low
62 # latency analysis
63 #
64 # ## USAGE:
65 # This program is never meant to be executed by a user, but rather on a
66 # webserver via a url such as:
67 #
68 # https://ldas-jobs.ligo.caltech.edu/~gstlalcbc/cgi-bin/gstlal_llcbcnode?dir=/path/to/analysis/dir/&id=<jobid>&url=/path/to/likelihood.xml
69 #
70 # e.g.,
71 #
72 # https://ldas-jobs.ligo.caltech.edu/~gstlalcbc/cgi-bin/gstlal_llcbcnode?dir=/mnt/qfs3/gstlalcbc/engineering/5/bns_trigs_40Hz&id=0009&url=/mnt/qfs3/gstlalcbc/engineering/5/bns_trigs_40Hz/0009_likelihood.xml
73 #
74 #
75 # ## Interpretation of the output page
76 #
77 # ### Trigger stats
78 #
79 # \image html gstlal_llcbcnode01.png
80 #
81 # - The leftmost plot is trigger latency vs time. It should be flat. If it is
82 # rising as a function of time that indicates that the job may be falling
83 # behind. Occasional spikes are expected
84 #
85 # - The next plot is a trigger latency histogram over the entire duration of
86 # the run.
87 #
88 # - The next plot is a SNR vs time. It should be relatively flat and below 10
89 # for well behaved noise. An occasional spike might be caused from a signal or
90 # a glitch, but long term features indicate poor data quality
91 #
92 # - The rightmost plot is the maximum RAM usage history. It can only ever go
93 # up, but it is a problem if it nears the maximum RAM available on the machine.
94 #
95 # ### Live time
96 #
97 # \image html gstlal_llcbcnode02.png
98 #
99 # These per IFO pie charts indicate time when the detectors were on (white) off (gray) and times when the data was missing (MIA) blue. The blue fraction should be << 1%
100 #
101 # ### PSDs
102 #
103 # \image html gstlal_llcbcnode03.png
104 #
105 # These represent the instantaneous PSD estimates of the running job. The horizon distance is also computed.
106 #
107 # ### SNR / Chi-squared stats
108 #
109 # \image html gstlal_llcbcnode04.png
110 #
111 # These represent the instantaneous, cumulative SNR/chi-squared statistics for the job as well as the likelihood ranking plot.
112 #
113 
114 
115 def to_png_image(matplotlib_figure = None):
116  # matplotlib_figure is needed because plot_likelihood_ratio_pdf
117  # generates plots using matplotlib.figure while other plotting
118  # functions use matplotlib.pyplot
119  f = StringIO.StringIO()
120  if matplotlib_figure is None:
121  plt.savefig(f, format="png")
122  else:
123  matplotlib_figure.savefig(f, format="png")
124  print '<img src="data:image/png;base64,',base64.b64encode(f.getvalue()),'"></img>'
125  f.close()
126 
127 def plot(dataurl, plottype, xlabel = "", ylabel = "", title = "", textbox = None):
128  fig = plt.figure(figsize=(5,3.5),)
129  fig.patch.set_alpha(0.0)
130  h = fig.add_subplot(111, axisbg = 'k')
131  plt.subplots_adjust(bottom = 0.2, left = .16)
132  plt.grid(color=(0.1,0.4,0.5), linewidth=2)
133  try:
134  data = numpy.loadtxt(dataurl)
135  except IOError:
136  print "Data could not be loaded"
137  return
138  except ValueError:
139  print "<h2>Data is mis formatted, perhaps the webserver returned an error</h2>"
140  return
141 
142  if plottype == "history":
143  h.semilogy(data[:,0] - data[-1,0], data[:,1], 'w', alpha=0.75, linewidth=2)
144  plt.ylim([min(data[:,1]), max(data[:,1])])
145  locs = [min(data[:,1]), numpy.median(data[:,1]), max(data[:,1])]
146  labels = ['%.2g' % lab for lab in locs]
147  plt.yticks(locs, labels)
148  elif plottype == "plot":
149  h.plot(data[:,0], data[:,1], 'w', alpha=0.75, linewidth=2)
150  elif plottype == "hist":
151  from scipy.interpolate import interp1d
152  x = numpy.linspace(data[0,0], data[-1,0], 100)
153  f = interp1d(data[:,0], data[:,1], kind='linear')
154  y = f(x)
155  y[y<0] = 0.
156  h.fill_between(x, y, alpha=0.75, linewidth=2, facecolor="w", color="w")
157  else:
158  raise ValueError(plottype)
159 
160  if textbox is not None:
161  plt.figtext(0.5, 0.2, textbox, color="w")
162 
163  plt.xlabel(xlabel)
164  plt.ylabel(ylabel)
165  plt.title(title)
166  plt.savefig(sys.stdout, format="svg")
167 
168 
169 def ceil10(x):
170  return 10**math.ceil(math.log10(x))
171 
172 def floor10(x):
173  return 10**math.floor(math.log10(x))
174 
175 
176 def psdplot(instrument, psd, fmin = 10., fmax = 2048.):
177  fig = plt.figure(figsize=(5,3.5),)
178  fig.patch.set_alpha(0.0)
179  h = fig.add_subplot(111, axisbg = 'k')
180  plt.subplots_adjust(bottom = 0.2, left = .16)
181  plt.grid(color=(0.1,0.4,0.5), linewidth=2)
182 
183  y = psd.data
184  f = numpy.linspace(psd.f0, len(y) * psd.deltaF, len(y)) + psd.f0
185 
186  print len(y), len(f)
187 
188  h.loglog(f, y, 'w', alpha=0.75, linewidth=2)
189 
190  plt.figtext(0.5, 0.2, "Horizon %.0f Mpc" % reference_psd.horizon_distance(psd, m1 = 1.4, m2 = 1.4, snr = 8., f_min = 30.), color="w")
191 
192  imin = bisect.bisect_left(f, fmin)
193  imax = bisect.bisect_right(f, fmax)
194  ymax = ceil10(y[imin:imax].max())
195  ymin = max(floor10(y[imin:imax].min()), ymax / 1e6)
196  plt.xlim((fmin, fmax))
197  #plt.ylim((ymin, ymax))
198  plt.ylim((1e-50, 1e-40))
199 
200  plt.xlabel("Frequency (Hz)")
201  plt.ylabel("Power (strain^2/Hz)")
202  plt.title(instrument)
203  plt.savefig(sys.stdout, format="svg")
204 
205 
206 def livetime_plot(disconturl, livetimeurl, xlabel = "", ylabel = "", title = ""):
207  fig = plt.figure(figsize=(5,3),)
208  fig.patch.set_alpha(0.0)
209  h = fig.add_subplot(111, axisbg = 'k', aspect='equal')
210  plt.subplots_adjust(bottom = 0, left = .25, top = 1, right = .75)
211  plt.grid(color="w")
212 
213  try:
214  livetimedata = numpy.loadtxt(livetimeurl)
215  discontdata = numpy.loadtxt(disconturl)
216  except IOError:
217  print "Data could not be loaded"
218  return
219 
220  dt = livetimedata[2]
221  lt = livetimedata[1]
222  discont = discontdata[1]
223  # FIXME Hack to adjust for high sample rate L1 and H1 state vector
224  if "V1" not in title:
225  dt /= 16
226  lt /= 16
227  discont /= 16
228  data = [dt, lt, discont]
229  explode = [0.0, 0, 0.15]
230  labels = ["OFF : %g (s)" % dt, "ON : %g (s)" % lt, "MIA : %g (s)" % discont]
231 
232  h.pie(data, shadow=True, explode = explode, labels = labels, autopct='%1.1f%%', colors = ('0.5', '1.0', (0.7, 0.7, 1.)))
233 
234  plt.xlabel(xlabel)
235  plt.ylabel(ylabel)
236  plt.title(title)
237  plt.savefig(sys.stdout, format="svg")
238 
239 # FIXME plot_likelihood_ratio_pdf is copied (with slight modifications) from
240 # gstlal_inspiral_plot_background. We considered moving plotting functions from
241 # gstlal_inspiral_plot_background to a python package plot_background.py and
242 # importing it instead of repeating the function definitions, but this is the
243 # only function we're using in here (for now) so making a seperate file doesn't
244 # make sense at the moment
245 def plot_likelihood_ratio_pdf(instruments, pdf, (xlo, xhi), tag, zerolag_pdf, size_inches):
246  fig = figure.Figure()
247  FigureCanvas(fig)
248  fig.set_size_inches(size_inches)
249  axes = fig.gca()
250  axes.set_position((.15, .14, .84, .76))
251  axes.semilogy(pdf.bins[0].centres(), pdf.array, color = "k", label="Background")
252  if zerolag_pdf is not None:
253  axes.semilogy(zerolag_pdf.bins[0].centres(), zerolag_pdf.array, color = "k", linestyle = "--", label="Zero-lag")
254  axes.grid(which = "both")
255  if instruments is None:
256  axes.set_title(r"%s Log Likelihood Ratio PDF" % tag)
257  else:
258  axes.set_title(r"%s %s Log Likelihood Ratio PDF" % (", ".join(sorted(instruments)), tag))
259  # FIXME latex text throws a warning about matplotlib missing fonts and
260  # then doesn't display latex symbols correctly
261  #axes.set_xlabel(r"$\ln \Lambda$")
262  #axes.set_ylabel(r"$P(\ln \Lambda | \mathrm{%s})$" % tag.lower())
263  # FIXME ylabel assumes all plots have noise tag
264  axes.set_xlabel("ln Lambda")
265  axes.set_ylabel("P(ln Lambda | n)")
266  yhi = pdf[xlo:xhi,].max()
267  ylo = pdf[xlo:xhi,].min()
268  if zerolag_pdf is not None:
269  yhi = max(yhi, zerolag_pdf[xlo:xhi,].max())
270  ylo = min(ylo, zerolag_pdf[xlo:xhi,].min())
271  ylo = max(yhi * 1e-40, ylo)
272  axes.set_ylim((10**math.floor(math.log10(ylo) - .5), 10**math.ceil(math.log10(yhi) + .5)))
273  axes.set_xlim((xlo, xhi))
274  axes.legend()
275  return fig
276 
277 
278 if "dir" not in form:
279  raise ValueError("must specify dir")
280 if "id" not in form:
281  raise ValueError("must specify id")
282 
283 baseurl = '%s/%s' % (form.getvalue("dir"), form.getvalue("id"))
284 
285 print >>sys.stdout, 'Cache-Control: no-cache, must-revalidate'
286 print >>sys.stdout, 'Expires: Mon, 26 Jul 1997 05:00:00 GMT'
287 print >>sys.stdout, 'Content-type: text/html\r\n'
288 
289 print """
290 <html>
291 <head>
292 <meta http-equiv="Pragma" content="no-cache">
293 <meta http-equiv="Expires" content="-1">
294 <meta http-equiv="CACHE-CONTROL" content="NO-CACHE">
295 <meta http-equiv="refresh" content="300">
296  <link rel="stylesheet" href="//code.jquery.com/ui/1.10.0/themes/base/jquery-ui.css" />
297  <script src="//code.jquery.com/jquery-1.8.3.js"></script>
298  <script src="//code.jquery.com/ui/1.10.0/jquery-ui.js"></script>
299  <script type="text/javascript"> $(function() {
300  $("#accordion").accordion({
301  });
302 
303  });</script>
304 </head>
305 <body>
306 <img src="../lcbo.jpg"> <font size=10>Low-latency Compact Binary Online %d </font><hr><br>
307 """ % int(lal.GPSTimeNow())
308 
309 print '<div id="accordion">'
310 
311 print '<h1>Trigger stats</h1><div>'
312 plot(baseurl+'/latency_history.txt', "history", ylabel = "Latency (s)", xlabel = "Time since last trigger (s)")
313 plot(baseurl+'/latency_histogram.txt', "hist", ylabel = "Count", xlabel = "Latency (s)")
314 plot(baseurl+'/snr_history.txt', "history", ylabel = "SNR", xlabel = "Time since last trigger (s)")
315 plot(baseurl+'/ram_history.txt', "history", ylabel = "RAM (GB)", xlabel = "Time before now (s)")
316 print "</div>"
317 
318 # Pie table
319 print "<h1> Live time </h1><div>"
320 for ifo in ("H1", "L1", "V1"):
321  try:
322  livetime_plot(baseurl+'/%s/strain_add_drop.txt' % ifo, baseurl+'/%s/state_vector_on_off_gap.txt' % ifo, title = ifo)
323  except:
324  print "could not get livetime data for ", ifo
325 print"</div>"
326 
327 # Psd table
328 print "<h1>PSDs</h1><div>"
329 try:
330  psds = lalseries.read_psd_xmldoc(ligolw_utils.load_url("%s/psds.xml" % baseurl, contenthandler = LIGOLWContentHandler))
331 except IOError:
332  print "Data could not be loaded"
333 except ValueError:
334  print "<h2>Data is mis formatted, perhaps the webserver returned an error</h2>"
335 else:
336  for instrument, psd in sorted(psds.items()):
337  psdplot(instrument, psd)
338 print"</div>"
339 
340 # likelihoods
341 path = form.getlist("url")[0]
342 
343 coinc_params_distributions, ranking_data, seglists = far.parse_likelihood_control_doc(ligolw_utils.load_filename(path, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler))
344 
345 print >> sys.stderr, "smooth" in form
346 
347 if "smooth" in form:
348  counts = coinc_params_distributions.background_pdf
349  inj = coinc_params_distributions.injection_pdf
350 else:
351  counts = coinc_params_distributions.background_rates
352  inj = coinc_params_distributions.injection_rates
353 
354 bgcol = (224/255.,224/255.,224/255.)
355 
356 likely = copy.deepcopy(inj)
357 for i, ifo in enumerate(['H1','L1', 'V1']):
358  print "<h1>%s SNR / CHISQ stats</h1>" % ifo
359  print "<div>"
360  likely[ifo+"_snr_chi"].array /= counts[ifo+"_snr_chi"].array
361  for name, obj in (("background", counts), ("injections", inj), ("likelihood", likely)):
362  fig = plt.figure(figsize=(6,5), facecolor = 'g')
363  fig.patch.set_alpha(0.0)
364  #plt.gray()
365  H1 = obj[ifo+"_snr_chi"].array
366  snr = obj[ifo+"_snr_chi"].bins[0].centres()[1:-1]
367  chi = obj[ifo+"_snr_chi"].bins[1].centres()[1:-1]
368  chi[0] = 0 # not inf
369  ax = plt.subplot(111)
370  plt.pcolormesh(snr, chi, numpy.log10(H1.T +1)[1:-1,1:-1])
371  ax.set_yscale('log')
372  plt.colorbar()
373  plt.xlabel('SNR')
374  plt.ylabel('reduced chi^2 / SNR^2')
375  plt.ylim([.001, .5])
376  plt.xlim([4,100])
377  ax.set_xscale('log')
378  plt.title('%s: %s log base 10 (number + 1)' % (ifo, name))
379  plt.grid(color=(0.1,0.4,0.5), linewidth=2)
380  to_png_image()
381  print "</div>"
382 
383 print "<h1> Likelihood Ratio PDFs </h1>"
384 print "<div>"
385 
386 # Calculuate combined pdfs
387 ranking_data.finish()
388 for instruments, binnedarray in ranking_data.background_likelihood_pdfs.items() if ranking_data is not None else ():
389  fig = plot_likelihood_ratio_pdf(instruments, binnedarray, (-5.,100.), "Noise", ranking_data.zero_lag_likelihood_pdfs[instruments], (6,5))
390  to_png_image(matplotlib_figure = fig)
391 print "</div>"
392 
393 #FIXME don't hard code CIT values
394 node = urlparse.urlparse(open("%s_registry.txt" % baseurl).readline()).netloc.split(":")[0]
395 print "<h1> Cluster node health<h1>"
396 print "<div>"
397 print "<img src=https://ldas-gridmon.ligo.caltech.edu/ganglia/graph.php?r=hour&z=xlarge&h=%s.cluster.ldas.cit&m=load_one&s=by+name&mc=2&g=cpu_report&c=Nodes/>" % node
398 print "<img src=https://ldas-gridmon.ligo.caltech.edu/ganglia/graph.php?r=hour&z=xlarge&h=%s.cluster.ldas.cit&m=load_one&s=by+name&mc=2&g=mem_report&c=Nodes/>" %node
399 print"</div>"
400 print "</body>"