gstlal-inspiral  0.4.2
 All Classes Namespaces Files Functions Variables Pages
gstlal_llcbcsummary
1 #!/usr/bin/env python
2 #
3 # Copyright (C) 2011 Chad Hanna
4 #
5 # This program is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by the
7 # Free Software Foundation; either version 2 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 # Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 
19 import sys
20 import cgi
21 import cgitb
22 import os
23 os.environ["MPLCONFIGDIR"] = "/tmp"
24 import matplotlib
25 matplotlib.use('Agg')
26 import numpy
27 import matplotlib.pyplot as plt
28 import time
29 import StringIO
30 import base64
31 from urlparse import urlparse
32 cgitb.enable()
33 form = cgi.FieldStorage()
34 
35 ## @file gstlal_llcbcsummary
36 # This program will monitor the output of a gstlal inspiral low latency
37 # analysis; See gstlal_llcbcsummary for help and usage.
38 
39 ## @package gstlal_llcbcsummary
40 #
41 # This program is designed to be placed in the cgi-bin directory of the user's
42 # public_html directory on the cluster that is running the gstlal inspiral low
43 # latency analysis
44 #
45 # ## USAGE:
46 # This program is never meant to be executed by a user, but rather on a
47 # webserver via a url such as:
48 #
49 # https://hostname.domain/path/to/cgi-bin/gstlal_llcbcsummary?id=<start>,<stop>&dir=/path/to/analysis/directory
50 #
51 # e.g.,
52 #
53 # https://ldas-jobs.ligo.caltech.edu/~gstlalcbc/cgi-bin/gstlal_llcbcsummary?id=0001,0010&dir=/home/gstlalcbc/engineering/5/bns_trigs_40Hz
54 #
55 # <start> and <stop> are the 4 digit id numbers corresponding to the first and
56 # last job, respectively.
57 #
58 # ## Interpretation of the output page
59 #
60 # General remarks. Red bars indicate that no data is available for the job in
61 # question. If you are sure the url is correct, this could mean that a job has
62 # failed. Each bar contains the node and job number.
63 #
64 # ### Header information
65 #
66 # \image html gstlal_llcbcsummary01.png
67 #
68 # This displays the current ifos involved ("H1L1V1") as well as the current GPS
69 # time and local time
70 #
71 # ### Latency
72 #
73 # \image html gstlal_llcbcsummary02.png
74 #
75 # This plot indicates the time since the last event (gray) as well as the
76 # latency of the last event (white). If things are behaving well the latency
77 # should be ~60 seconds and the time since last event should be no more than
78 # ~300 seconds.
79 #
80 # #### Reasons that this might not be the case:
81 #
82 # -# At least two detectors are not functioning
83 # -# The gstlal inspiral jobs are in a period "update" mode where they pause to
84 # update background statistics. This should only last for a few minutes every
85 # 4 hours and should not cause more than a few hundred seconds of delay
86 # -# Something more serious is wrong, e.g. the jobs are not able to keep up with the data.
87 #
88 # ### SNR
89 #
90 # \image html gstlal_llcbcsummary03.png
91 #
92 # This plot indicates the coincident SNR (root-sum-squares) of the last 1000
93 # events. The median SNR is white and the maximum SNR is gray. The median SNR
94 # should typically be small (< 8), however the maximum SNR might be large due
95 # to the presence of a glitch or a signal.
96 #
97 #
98 # ### Livetime plots
99 #
100 # \image html gstlal_llcbcsummary04.png
101 #
102 # These IFO specific plots can be used to infer the ammount of up-time,
103 # down-time and periods of lost data for a given job. This is *only* since the
104 # job has been running. Thus it is an indicator of how long the job has been
105 # running without crashing. Ideally jobs would only be restarted during
106 # maintenance periods. If the livetime is low for one or more jobs it could
107 # indicate instability that is causing crashes
108 #
109 # ### Template Parameters
110 #
111 # \image html gstlal_llcbcsummary05.png
112 #
113 # \image html gstlal_llcbcsummary06.png
114 #
115 # These plots give the chirp mass and template duration bins of the given job.
116 #
117 # ### RAM usage
118 #
119 # \image html gstlal_llcbcsummary07.png
120 #
121 # This plot gives the maximum RAM usage seen over the duration of the job. If
122 # it is near the limit of the resource it might indicate that the jobs are
123 # "swapping" and could be related to problems such as falling behind.
124 #
125 # ### Links to more information about a given job
126 #
127 # \image html gstlal_llcbcsummary08.png
128 #
129 # To drill down into specific information of a job click on the links at the
130 # bottom of the screen. These will bring up pages created by gstlal_llcbcnode.
131 # Please see gstlal_llcbcnode for more information
132 
133 def now():
134  #FIXME use pylal when available
135  return time.time() - 315964785
136 
137 def to_png_image():
138  f = StringIO.StringIO()
139  plt.savefig(f, format="png")
140  print '<img src="data:image/png;base64,',base64.b64encode(f.getvalue()),'"></img>'
141  f.close()
142 
143 def read_registry(dir, dataurl, ids):
144  nodedict = {}
145  for id in ids:
146  url = '%s/%s%s' % (dir, id, dataurl)
147  try:
148  tmp = open(url,"r")
149  nodedict[id] = urlparse(tmp.readline()).netloc
150  tmp.close()
151  except IOError:
152  nodedict[id] = ""
153  return nodedict
154 
155 
156 def load_data(directory, idrange, type):
157  found = {}
158  missed = {}
159  for i, id in enumerate(idrange):
160  fname = "%s/%s/%s.txt" % (directory, id, type)
161  try:
162  found[i] = numpy.loadtxt(fname)
163  if len(found[i].shape) == 1:
164  found[i] = numpy.array([found[i],])
165  except IOError:
166  missed[i] = numpy.array([])
167  except ValueError:
168  missed[i] = numpy.array([])
169  return found, missed
170 
171 def setup_plot():
172  fig = plt.figure(figsize=(20,5),)
173  fig.patch.set_alpha(0.0)
174  h = fig.add_subplot(111, axisbg = 'k')
175  plt.subplots_adjust(left = .062, right = 0.98, bottom = 0.3)
176  return fig, h
177 
178 def finish_plot(ids, registry, ylim, title=''):
179  plt.grid(color=(0.1,0.4,0.5), linewidth=2)
180  ticks = ["%s : %s " % (id, registry[id]) for id in ids]
181  plt.xticks(numpy.arange(len(ids))+.3, ticks, rotation=90, fontsize = 10)
182  plt.xlim([0, len(ids)])
183  plt.ylim(ylim)
184  tickpoints = numpy.linspace(ylim[0], ylim[1], 8)
185  ticks = ["%.1e" % (10.**t,) for t in tickpoints]
186  plt.yticks(tickpoints, ticks, fontsize = 14)
187  plt.title(title, fontsize = 18)
188  to_png_image()
189  #plt.savefig(sys.stdout, format = "svg")
190 
191 def plot_latency(found, missed, ids, registry):
192  fig, h = setup_plot()
193 
194  found_x = found.keys()
195  latency_y = numpy.log10(numpy.array([found[k][-1,1] for k in found_x]))
196  time_y = numpy.log10(now() - numpy.array([found[k][-1,0] for k in found_x]))
197  try:
198  max_y = max(time_y.max(), latency_y.max())
199  except ValueError:
200  max_y = 1
201  missed_x = missed.keys()
202  missed_y = numpy.ones(len(missed_x)) * max_y
203 
204  h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
205  h.bar(found_x, latency_y, color='w', alpha=0.9, linewidth=2)
206  h.bar(found_x, time_y, color='w', alpha=0.7, linewidth=2)
207  finish_plot(ids, registry, [0, max_y], 'Time (s) since last event (gray) and latency (white)')
208 
209 def plot_snr(found, missed, ids, registry):
210  fig, h = setup_plot()
211 
212  found_x = found.keys()
213  maxsnr_y = numpy.log10(numpy.array([found[k][:,1].max() for k in found_x]))
214  mediansnr_y = numpy.log10(numpy.array([numpy.median(found[k][:,1]) for k in found_x]))
215 
216  try:
217  max_y = max(maxsnr_y)
218  except ValueError:
219  max_y = 1
220  missed_x = missed.keys()
221  missed_y = numpy.ones(len(missed_x)) * max_y
222 
223  h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
224  h.bar(found_x, mediansnr_y, color='w', alpha=0.9, linewidth=2)
225  h.bar(found_x, maxsnr_y, color='w', alpha=0.7, linewidth=2)
226  finish_plot(ids, registry, [numpy.log10(5.5), max_y], 'SNR of last 1000 events: max (gray) and median (white)')
227 
228 def plot_livetime(found, missed, ids, registry, ifo):
229  fig, h = setup_plot()
230 
231  found_x = found.keys()
232  # Handle log of 0 by setting it to max of (actual value, 1)
233  on_y = numpy.log10(numpy.array([max(found[k][0][1],1) for k in found_x]))
234  off_y = numpy.log10(numpy.array([max(found[k][0][2],1) for k in found_x]))
235  gap_y = numpy.log10(numpy.array([max(found[k][0][3],1) for k in found_x]))
236  # FIXME Hack to adjust for high sample rate L1 and V1 state vector
237  if ifo != "V1":
238  on_y -= numpy.log10(16)
239  off_y -= numpy.log10(16)
240  gap_y -= numpy.log10(16)
241 
242  if len(found_x) > 0:
243  max_y = max(on_y.max(), off_y.max(), gap_y.max())
244  min_y = min(on_y.min(), off_y.min(), gap_y.min())
245  else:
246  max_y = 1
247  min_y = 0
248 
249  missed_x = missed.keys()
250  missed_y = numpy.ones(len(missed_x)) * max_y
251 
252  h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
253  h.bar(found_x, off_y, color='w', alpha=0.7, linewidth=2)
254  h.bar(found_x, gap_y, color='b', alpha=0.5, linewidth=2)
255  h.bar(found_x, on_y, color='w', alpha=0.5, linewidth=2)
256  finish_plot(ids, registry, [min_y*.9, max_y], '%s Up time (gray) Down time (white) Dropped time (blue)' % (ifo,))
257 
258 def plot_ram(found, missed, ids, registry):
259  fig, h = setup_plot()
260 
261  found_x = found.keys()
262  found_y = numpy.log10(numpy.array([found[k][0,1] for k in found_x]))
263 
264  try:
265  max_y, min_y = max(found_y), min(found_y)
266  except ValueError:
267  max_y, min_y = (1,0)
268  missed_x = missed.keys()
269  missed_y = numpy.ones(len(missed_x)) * max_y
270 
271  h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
272  h.bar(found_x, found_y, color='w', alpha=0.9, linewidth=2)
273  finish_plot(ids, registry, [0.9 * min_y, max_y], 'RAM usage GB')
274 
275 def plot_single_col(found, missed, ids, registry, col = 0, title = ''):
276  fig, h = setup_plot()
277 
278  found_x = found.keys()
279  found_y = numpy.log10(numpy.array([found[k][0][col] for k in found_x]))
280 
281  try:
282  max_y, min_y = max(found_y), min(found_y)
283  except ValueError:
284  max_y, min_y = (1,0)
285  missed_x = missed.keys()
286  missed_y = numpy.ones(len(missed_x)) * max_y
287 
288  h.bar(missed_x, missed_y, color='r', alpha=0.9, linewidth=2)
289  h.bar(found_x, found_y, color='w', alpha=0.9, linewidth=2)
290  finish_plot(ids, registry, [0.9 * min_y, max_y], title)
291 
292 
293 def get_ids(form):
294  idrange = [int(n) for n in form.getvalue("id").split(",")]
295  #FIXME relies on 4 digit ids
296  ids = ['%04d' % (job,) for job in range(idrange[0], idrange[1]+1)]
297  return ids
298 
299 if "dir" not in form:
300  raise ValueError("must specify dir")
301 if "id" not in form:
302  raise ValueError("must specify id")
303 
304 ids = get_ids(form)
305 directory = form.getvalue("dir")
306 ifos = form.getvalue("ifos").split(",")
307 reg = read_registry(form.getvalue("dir"), "_registry.txt", ids)
308 
309 # Header
310 print >>sys.stdout, 'Cache-Control: no-cache, must-revalidate'
311 print >>sys.stdout, 'Expires: Mon, 26 Jul 1997 05:00:00 GMT'
312 print >>sys.stdout, 'Content-type: text/html\r\n'
313 
314 # HTML preamble
315 print """
316 <html>
317 <head>
318 <meta http-equiv="Pragma" content="no-cache">
319 <meta http-equiv="Expires" content="-1">
320 <meta http-equiv="CACHE-CONTROL" content="NO-CACHE">
321 <meta http-equiv="refresh" content="300">
322  <link rel="stylesheet" href="//code.jquery.com/ui/1.10.0/themes/base/jquery-ui.css" />
323  <script src="//code.jquery.com/jquery-1.8.3.js"></script>
324  <script src="//code.jquery.com/ui/1.10.0/jquery-ui.js"></script>
325  <script type="text/javascript"> $(function() {
326  $("#accordion").accordion({
327  });
328 
329  });</script>
330 </head>
331 <body>
332 """
333 
334 # title
335 print """
336 <font size=10><img src="http://www.lsc-group.phys.uwm.edu/cgit/gstlal/plain/gstlal/doc/gstlal.png">gstlal_inspiral online </font><font size=6 color=#707070><b><right>%s: %d - %s </right></b><br></font><hr><br>
337 """ % ("".join(sorted(ifos)), int(now()), time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime()))
338 
339 print '<div id="accordion">'
340 
341 
342 #
343 # latency history
344 #
345 print "<h1>Latency</h1>"
346 print "<div id='canvaslatency'>"
347 found, missed = load_data(directory, ids, "latency_history")
348 plot_latency(found, missed, ids, reg)
349 print "</div>"
350 
351 #
352 # snr history
353 #
354 
355 print "<h1>SNR</h1>"
356 print "<div id='canvassnr'>"
357 found, missed = load_data(directory, ids, "snr_history")
358 plot_snr(found, missed, ids, reg)
359 print "</div>"
360 
361 #
362 # live time by ifo
363 #
364 
365 for ifo in ifos:
366  print "<h1>Livetime for %s</h1>" % ifo
367  print "<div id='%scanvastime'>" % (ifo,)
368  found, missed = load_data(directory, ids, "%s/state_vector_on_off_gap" % (ifo,))
369  plot_livetime(found, missed, ids, reg, ifo)
370  print "</div>"
371 
372 #
373 # Template Duration
374 #
375 
376 print "<h1>Template Duration</h1>"
377 print "<div id='canvasdur'>"
378 found, missed = load_data(directory, ids, "bank")
379 plot_single_col(found, missed, ids, reg, 1, "Template Duration (s)")
380 print "</div>"
381 
382 #
383 # Chirp Mass
384 #
385 
386 print "<h1>Chirp Mass</h1>"
387 print "<div id='canvasmchirp'>"
388 found, missed = load_data(directory, ids, "bank")
389 plot_single_col(found, missed, ids, reg, 2, "Chirp Mass")
390 print "</div>"
391 
392 #
393 # RAM
394 #
395 
396 
397 print "<h1>RAM</h1>"
398 print "<div id='canvasram'>"
399 found, missed = load_data(directory, ids, "ram_history")
400 plot_ram(found, missed, ids, reg)
401 print "</div>"
402 
403 print "<h1>SNR History</h1>"
404 print "<div id='canvassnrhistory'>"
405 print "<img src=gstlal_llsnrhistory?dir=%s&id=%s,%s></img>" % (form.getvalue("dir"), ids[0], ids[-1])
406 print "</div>"
407 
408 print "</div>"
409 
410 # links at bottom
411 print "<h3>Node summary info:</h3><hr>"
412 for id in ids:
413  url = os.path.join(form.getvalue("dir"), "%s/likelihood.xml" % (id,))
414  print '<font size=5><a target="_blank" href="gstlal_llcbcnode?dir=%s&id=%s&url=%s"> %s </a></font>' % (form.getvalue("dir"), id, url, id)
415 
416 print "</body>"
417