1
2
3
4
5
6
7
8
9 import sqlite3
10
11 import sys
12 import re
13 import os
14 import bisect
15 import copy
16 import time
17 import pdb
18
19 from glue.ligolw import dbtables
20 from glue.ligolw import lsctables
21 from glue.ligolw import ilwd
22 from glue import git_version
23
24 __author__ = "Collin Capano <cdcapano@physics.syr.edu>"
25 __version__ = git_version.verbose_msg
26
27 """
28 A collection of utilities to assist in carrying out operations on a SQLite
29 database containing lsctables.
30 """
31
32
33
34
35
36
37
38
39
41 """
42 SQLite doesn't have a tuple type built-in. This can be frustrating if one
43 needs to compare values from multiple columns when doing queries. For example,
44 if one wanted to do something like:
45
46 connection.cursor().execute('''
47 SELECT *
48 FROM a
49 WHERE (a.val1, a.val2) IN (
50 SELECT (b.val1, b.val2)
51 FROM b)
52 ''')
53
54 an error would be raised.
55
56 This function tries to alleiviate the problem by giving the ability to concatenate
57 results from multiple columns into a single colon-seperated string. These strings can then be
58 compared directly. So, in the above example, one would do:
59
60 from pylal import ligolw_sqlutils as sqlutils
61 connection.create_function("concatenate", 2, sqlutils.concatenate)
62 connection.cursor().execute('''
63 SELECT *
64 FROM a
65 WHERE concatenate(a.val1, a.val2) IN (
66 SELECT concatenate(b.val1, b.val2)
67 FROM b)
68 ''')
69
70 Note that the create_function method must be called first with the number of
71 values that will be passed to concatenate before using it in any query.
72 """
73 return ':'.join([str(val) for val in args])
74
76 """
77 This class builds on the concatenate method to allow string concatenation
78 across multiple columns and rows. These strings can then be compared in
79 SQLite. For example, if one wanted to match ids from two different tables that share
80 the same values, one would do:
81
82 from pylal import ligolw_sqlutils as sqlutils
83 connection.create_aggregate("agg_concatenate", 2, sqlutils.aggregate_concatenate)
84 connection.cursor().execute('''
85 SELECT a.id, b.id
86 FROM a, b
87 WHERE
88 (
89 SELECT agg_concatenate(a.val1, a.val2)
90 FROM a
91 GROUP BY id
92 ORDER BY a.val1, a.val2 ASC
93 ) == (
94 SELECT agg_concatenate(b.val1, b.val2)
95 FROM b
96 GROUP BY id
97 ORDER BY b.val1, b.val2 ASC
98 )
99 ''')
100
101 In the strings that are created, rows are seperated by ",", columns by ":".
102
103 Note that the create_aggregate method must be called first with the number of
104 values that will be passed to aggregate_concatenate before using it in any query.
105 """
108 - def step(self, *args):
109 self.result = ','.join([ self.result, concatenate(*args) ])
111 return self.result.lstrip(',')
112
114 """
115 Strips and checks that there are no newlines, tabs, spaces, or semi-colons in the given option.
116 This should be used for options that will be plugged into sqlite statements to
117 protect against injection attacks. If lower is set on, will also make all letters lower-case
118 in the option.
119
120 @option: option from config parser to validate
121 @lower: if set to True, will make all letters lower-case in the option
122 """
123 option = option.strip()
124 if lower:
125 option = option.lower()
126
127 if re.search(r'\n|\t| |;', option) is not None:
128 raise ValueError, "option %s contains illegal characters" % option
129
130 return option
131
132
134
135 - def __init__( self, table_name, table_param, param_ranges_opt, verbose = False ):
136 """
137 Parse --param-ranges option. Creates self.param which is the table_name and
138 the table_param appended together (with a '.') and self.param_ranges, which is
139 a list of tuples that give the lower parameter value, whether it is an open or
140 closed boundary, and the same for the upper parameter. For example, if
141 table_name is coinc_inspiral, table_param is mchirp and param_ranges_opt
142 is '[2,8);[8,17]' will get:
143 self.param = 'coinc_inspiral.mchirp'
144 self.param_ranges =
145 [ ( ('>=',2.0), ('<',8.0) ),
146 ( ('>=',8.0), ('<=', 17.0) ) ]
147
148 @table_name: Name of coinc_table in which the desired parameter is a column.
149 @table_param: Parameter in the table on which to separate rows.
150 @param_ranges_opt: string from the --param-ranges option. Param-ranges must
151 follow these format rules:
152 * A '(' or ')' implies an open boundary, a '[' or ']' a closed boundary.
153 * To specify multiple ranges, separate each range by a ';'.
154 * To specify equal to a single value, just specify the value, e.g., '2.3'
155 * To specify not-equal to a single value, put a ! infront of the value, e.g., '!2.3'.
156 @verbose: be verbose
157 """
158 if verbose:
159 print >> sys.stderr, "Parsing param-ranges..."
160
161 self.param = None
162 self.param_ranges = []
163
164
165 table_name = validate_option( table_name )
166 if re.search(r'\n|\t|DROP|DELETE', table_param) is not None:
167 raise ValueError, r'param-name cannot have "\n","\t", "DROP", or "DELETE" in it'
168 table_param = table_param.strip()
169
170
171 if table_param.find( table_name+'.' ) == -1:
172 table_param = '.'.join([ table_name, table_param ])
173
174 self.param = table_param
175
176 ranges = param_ranges_opt.split(';')
177
178 for this_range in ranges:
179
180
181 if re.search('\]|\[|\)|\(', this_range) is None:
182 this_range = this_range.strip()
183
184 if this_range.startswith('!'):
185 btest = '!='
186 param = this_range.lstrip('!')
187 else:
188 btest = '=='
189 param = this_range
190
191 try:
192 param = float(param)
193 except ValueError:
194 pass
195 self.param_ranges.append( ((btest, param),) )
196
197 else:
198
199 lowerparam = this_range.split(',')[0].strip()
200
201 if lowerparam.find('[') != -1:
202 lowerbndry = '>='
203 lowerparam = float( lowerparam.lstrip('[') )
204 elif lowerparam.find('(') != -1:
205 lowerbndry = '>'
206 lowerparam = float( lowerparam.lstrip('(') )
207 else:
208 raise ValueError, "Parameter range %s not formatted correctly" % this_range
209
210
211 upperparam = this_range.split(',')[1].strip()
212 if upperparam.find(']') != -1:
213 upperbndry = '<='
214 upperparam = float( upperparam.rstrip(']') )
215 elif upperparam.find(')') != -1:
216 upperbndry = '<'
217 upperparam = float( upperparam.rstrip(')') )
218 else:
219 raise ValueError, "Parameter range %s not formatted correctly" % this_range
220
221
222 self.param_ranges.append( ((lowerbndry, lowerparam), (upperbndry, upperparam)) )
223
224
227
228
230 return self.param_ranges
231
232
234 """
235 Converts param_ranges into a list of strings that can be used in
236 a SQLite WHERE clause. For example, if table_name is coinc_inspiral,
237 table_param is mchirp and param_ranges_opt is '[2,8);[8,17]' the
238 elements in the returned list will be:
239 ['coinc_inspiral.mchirp >= 2.0 AND coinc_inspiral.mchirp < 8.0',
240 'coinc_inspiral.mchirp >= 8.0 AND coinc_inspiral.mchirp <= 17.0']
241 """
242 self.param_filters = []
243
244 for range in self.param_ranges:
245 if len(range) == 1:
246 btest = range[0][0]
247 param = range[0][1]
248 if isinstance(param, str):
249 param = param.join(['"','"'])
250 else:
251 param = str(param)
252 self.param_filters.append( ' '.join([ self.param, btest, param ]) )
253 else:
254 lowerbndry = range[0][0]
255 lowerparam = str( range[0][1] )
256 upperbndry = range[1][0]
257 upperparam = str( range[1][1] )
258 self.param_filters.append( ' '.join([ '(', self.param, lowerbndry, lowerparam,
259 'AND', self.param, upperbndry, upperparam, ')' ]) )
260
261 return self.param_filters
262
263
265 """
266 Takes in a value and returns a number corresponding to
267 which value param_range it falls in.
268 """
269 for n, range in enumerate(self.param_ranges):
270
271 if len(range) == 1:
272 btest = range[0][0]
273 param = range[0][1]
274 if btest == '==' and param == param_value:
275 return n
276 if btest == '!=' and param != param_value:
277 return n
278 else:
279
280 lowerbndry = range[0][0]
281 lowerparam = range[0][1]
282 upperbndry = range[1][0]
283 upperparam = range[1][1]
284
285
286
287 if ((lowerbndry, upperbndry) == ('>=', '<=')) and \
288 (param_value >= lowerparam and param_value <= upperparam):
289 return n
290
291 if ((lowerbndry, upperbndry) == ('>', '<=')) and \
292 (param_value > lowerparam and param_value <= upperparam):
293 return n
294
295 if ((lowerbndry, upperbndry) == ('>=', '<')) and \
296 (param_value >= lowerparam and param_value < upperparam):
297 return n
298
299 if ((lowerbndry, upperbndry) == ('>', '<')) and \
300 (param_value > lowerparam and param_value < upperparam):
301 return n
302
303
304
305 return None
306
308 """
309 Takes in a group number as returned by group_by_param_range
310 and returns a string representing that group.
311 """
312 this_range = self.param_ranges[group_num]
313 if len(this_range) > 1:
314 range_str = '%s%.2f,%.2f%s' % (
315 this_range[0][0] == '>=' and '[' or this_range[0][0] == '>' and '(',
316 float(this_range[0][1]),
317 float(this_range[1][1]),
318 this_range[1][0] == '<=' and ']' or this_range[1][0] == '<' and ')'
319 )
320 else:
321 range_str = '%s %.2f' % ( this_range[0][0], this_range[0][1] )
322
323 return range_str
324
325
327
328 - def __init__( self, coincs_opt, verbose = False):
329 """
330 Parses --exclude-coincs and --include-coincs options. The class doesn't
331 care whether it's --include or exclude; it just takes in the input and
332 creates self.coinc_types, which is a dictionary of coinc types in which the
333 keys are the type of time and the values are the coincidence type. For example,
334 if either --include-coincs or --exclude coincs is set to "[h2,l1 in h1,h2,l1]"
335 self.coinc_types will be:
336 coinc_types[ frozenset(H1,H2,L1) ] = set(H2,L1)
337
338
339 @coincs_opt: the input from either --exclude-coincs or --include-coincs.
340 This input must follow these format rules (the following can be copied
341 into the help message for --input/exclude-coincs opts):
342 * Coinc-types and detector time must be separated by
343 an ' in '. When specifying a coinc_type or detector
344 time, detectors and/or ifos must be separated by
345 commas, e.g. 'H1,L1' not 'H1L1'.
346 * To specify multiple coinc-types in one type of time,
347 separate each coinc-type by a '+', e.g.,
348 '[H1,H2 + H2,L1 in H1,H2,L1]'.
349 * To specify all the coincs in a detector time
350 or a specific coinc-type in all times, use 'ALL'. E.g.,
351 to exclude/include all H1,H2 triggers, use '[H1,H2 in ALL]'
352 or to exclude/include all H2,L1 time use '[ALL in H2,L1]'.
353 * To specify multiple exclusions, separate each
354 bracket by a ';'.
355 * Order of the instruments nor case of the letters
356 matter. So if your pinky is broken and you're
357 dyslexic you can type '[h2,h1 in all]' without a
358 problem.
359 @verbose: be verbose.
360 """
361
362 if verbose:
363 print >> sys.stderr, "Parsing coinc options..."
364
365 self.coinc_types = {}
366
367 for rule in coincs_opt.split(';'):
368 rule = rule.strip().lstrip('[').rstrip(']').upper()
369
370
371 if len(rule.split('IN')) != 2:
372 raise ValueError, "Must seperate coinc. types and on-instruments by 'in'"
373 [ coinc_instruments, instruments_on ] = rule.split('IN')
374 instruments_on = instruments_on.strip()
375 coinc_instruments = coinc_instruments.strip()
376
377
378 if instruments_on != 'ALL':
379 instruments_on = frozenset(sorted([instrument.strip() for instrument in instruments_on.split(',')]))
380 if instruments_on not in self.coinc_types:
381
382 if len(instruments_on) <= 1:
383 raise ValueError, "Must delimit instruments by commas."
384
385 for instrument in instruments_on:
386 if len(instrument.split(' ')) > 1:
387 raise ValueError, "Instrument names cannot have spaces in them."
388 self.coinc_types[ instruments_on ] = []
389 elif 'ALL' not in self.coinc_types:
390 self.coinc_types[ 'ALL' ] = []
391
392
393 if coinc_instruments != 'ALL':
394 for coinc_instset in coinc_instruments.split('+'):
395 coinc_instset = set(sorted( instrument.strip() for instrument in coinc_instset.split(',') ))
396 if coinc_instset not in self.coinc_types[ instruments_on ]:
397
398 if len(coinc_instset) <= 1:
399 raise ValueError, "Must delimit instruments by commas."
400 for instrument in coinc_instset:
401 if len(instrument.split(' ')) > 1:
402 raise ValueError, "Instrument names cannot have spaces in them."
403
404 self.coinc_types[ instruments_on ].append( coinc_instset )
405 else:
406 self.coinc_types[ instruments_on ] = ['ALL']
407
408
410 return self.coinc_types
411
412
414 """
415 Converts self.coinc_types to a list of strings that can be used
416 in a SQLite WHERE clause to filter coincs by coinc_type,
417 by coinc_instruments (which is stored in the given coinc_instruments_table)
418 in instruments_on (which is stored in the experiment table).
419 """
420 self.coinc_filters = []
421
422
423 from glue.ligolw.lsctables import ifos_from_instrument_set
424
425 coinc_instruments_table = validate_option( coinc_instruments_table )
426
427
428 for instruments_on in self.coinc_types:
429 this_coincfilter = ''
430 if instruments_on != 'ALL':
431 this_coincfilter = ''.join([
432 'experiment.instruments == "', ifos_from_instrument_set(instruments_on), '"' ])
433
434
435
436 if 'ALL' not in self.coinc_types[ instruments_on ]:
437 this_coincfilter = ' '.join([ this_coincfilter, 'AND (' ])
438 for coinc_instruments in self.coinc_types[ instruments_on ]:
439 this_coincfilter = ''.join([ this_coincfilter,
440 ' %s.ifos == "' % coinc_instruments_table, ifos_from_instrument_set(coinc_instruments), '"', ' OR' ])
441
442 this_coincfilter = this_coincfilter.rstrip('OR') + ')'
443
444 elif instruments_on == 'ALL' and 'ALL' not in self.coinc_types[ instruments_on ]:
445 for coinc_instruments in self.coinc_types[ instruments_on ]:
446 this_coincfilter = ''.join([ this_coincfilter,
447 ' %s.ifos == "' % coinc_instruments_table, ifos_from_instrument_set(coinc_instruments), '"', ' OR' ])
448
449 this_coincfilter = this_coincfilter.rstrip('OR')
450
451 self.coinc_filters.append( ''.join([ '(', this_coincfilter, ')' ]) )
452
453 return self.coinc_filters
454
455
456
457 -def del_rows_from_table( connection, del_table, del_table_id, join_conditions, del_filters = None, save_filters = None, verbose = False ):
458 """
459 Deletes triggers from any specified table in the del_table option.
460 @connection: DBTables connection to a database
461 del_table: Any coinc_table (coinc_inspiral, sngl_inspiral, coinc_event,
462 etc.) from which to delete triggers.
463 @del_table_id: name of ID column in the del_table on which will be deleting
464 triggers.
465 @join_conditions: SQLite string that draws connections between different
466 coinc_tables. Must be of format 'JOIN table1 ON table1-link-to-other-table
467 JOIN table2 ON table2-link', etc.
468 @del_filter: List of filters. Triggers that fall within will be deleted.
469 @save_filter: List of filters. Triggers that fall within will NOT be deleted.
470
471 NOTE: Save filters will override del_filters if they overlap. For example,
472 say del filter species H1,H2 triggers in H1,H2,L1 time and save filters are
473 for triggers with mchirp between 2 and 8. Then all triggers with chirp mass
474 between 2 and 8 will be saved, even if they are H1,H2. All other H1,H2
475 triggers will be deleted. What this means is if you want to do a global
476 delete -- say you wan to delete all H1,H2 triggers, do not specify a
477 save_filter that overlaps with it.
478 """
479
480 del_table_id = '.'.join([ del_table, del_table_id])
481
482
483
484 where_clause = ''
485 if del_filters:
486 del_filters = [ ''.join([ '(', filter, ')' ]) for filter in del_filters ]
487 del_filters = ' OR '.join( del_filters )
488 where_clause = ' '.join([ 'WHERE (', del_filters, ')' ])
489 if save_filters:
490 save_filters = [ ''.join([ '(', filter, ')' ]) for filter in save_filters ]
491 save_filters = ' OR '.join( save_filters )
492 if not del_filters:
493 where_clause = ' '.join([ 'WHERE NOT (', save_filters, ')' ])
494 else:
495 where_clause = ' '.join([ where_clause, 'AND NOT (', save_filters, ')' ])
496
497 if not where_clause:
498 print >> sys.stderr, '''WARNING: No filters specified in delete statement.
499 Deleting all rows from %s''' % del_table
500 elif verbose:
501 print >> sys.stderr, "Deleting rows from %s table %s..." % (del_table, where_clause)
502
503 sqlquery = ' '.join([
504 'DELETE FROM', del_table,
505 'WHERE', del_table_id, 'IN (',
506 'SELECT', del_table_id,
507 'FROM', del_table, join_conditions,
508 where_clause, ')' ])
509 connection.cursor().execute( sqlquery )
510 connection.commit()
511
512
514 """
515 Gets the names of tables that are in the database.
516 """
517 sqlquery = 'SELECT name FROM sqlite_master WHERE type == "table"'
518 return connection.cursor().execute(sqlquery).fetchall()
519
521 """
522 Remove empty space and defragment the database.
523 """
524 if vacuum:
525 if verbose:
526 print >> sys.stderr, "Start vacuuming database"
527 connection.execute("VACUUM")
528 if verbose:
529 print >> sys.stderr, "\tVacuuming database finished\n"
530
532 """
533 Gets the column names from a table and returns them as a list.
534 """
535 sqlquery = ''.join(['PRAGMA table_info(', table_name, ')' ])
536 column_names = [ name[1] for name in connection.cursor().execute( sqlquery).fetchall() ]
537
538 return column_names
539
541 """
542 Get all index names and associated SQL CREATE statements associated with
543 tables in table_names. Return a list of (idx_name, sql) tuples.
544 """
545
546 sqlquery = """
547 SELECT name, tbl_name, sql
548 FROM sqlite_master
549 WHERE type = 'index' AND sql != 'None'
550 """
551 indices = []
552 for idx_name, tbl_name, sql in connection.execute(sqlquery):
553 if tbl_name in table_names:
554 indices.append( (idx_name, sql) )
555
556 return indices
557
558
560 """
561 Converts durations stored in the experiment_summary_table from seconds
562 to other units of time.
563
564 @duration: duration to convert; assumed to be a float or long in seconds
565 @convert_to: the unit to convert to. Options are:
566 's': to seconds - will just divide by 1.
567 This can be useful if need to convert
568 the duration from a long int to a float.
569 'min': to minutes - will divide by 60.
570 'hr': to hours - will divide by 3600.
571 'days': to days - will divide by 86400.
572 'yr': to years - will divide by 31557600.
573 This is the Julian year, which is the
574 accepted astronomical year
575 """
576 if not duration:
577 return 0.
578 if convert_to == 's':
579 return duration / 1.
580 elif convert_to == 'min':
581 return duration / 60.
582 elif convert_to == 'hr':
583 return duration / 3600.
584 elif convert_to == 'days':
585 return duration / 86400.
586 elif convert_to == 'yr':
587 return duration / 31557600.
588 else:
589 raise ValueError, "Unrecognized unit."
590
592 """
593 Gets the next available id in the specified id_column in the specified table.
594 """
595 sqlquery = ' '.join(['SELECT', id_column, 'FROM', table ])
596 ids = dict([ [int(ilwd.ilwdchar(this_id)), ilwd.ilwdchar(this_id)] for (this_id,) in connection.cursor().execute(sqlquery)])
597 if ids == {}:
598 new_id = ilwd.ilwdchar(':'.join([table, id_column, '0']))
599 else:
600 new_id = ids[ max(ids.keys()) ] + 1
601
602 return new_id
603
605 """
606 Increments the given id by nsteps.
607 """
608 next_id = last_id.split(':')
609 next_id[-1] = str( int(next_id[-1]) + nsteps )
610 return ':'.join(next_id)
611
614
616 """
617 This class stores information about the foreground and background in a
618 database for making calculation of uncombined fars and combined fars quick
619 and efficient.
620
621 bkg_stats groups triggers by experiment_id, ifos, and param_group
622 (param_group is an arbitrary integer representing the param bin, e.g.,
623 mchirp [3.48,7.4), to which a trigger belongs; if no binning is done, then
624 it is 0 for all triggers). It stores ALL the triggers in all the time
625 slides (except zero-lag) within that group.
626
627 sngl_slide_stats groups triggers by experiment_id, experiment_summ_id, ifos, and
628 param_group. It therefore groups all triggers within each time slide
629 separately. It is used to subtract triggers within the same slide when calculating
630 uncombined fars for the background. Therefore, it only stores slide triggers;
631 for any zero-lag datatype sngl_slide_stats is just an empty list.
632
633 datatypes maps the list of datatypes for an experiment to the corresponding
634 experiment_summ_ids:
635 datatypes[experiment_id][datatype] = [esid1, esid2, etc.]
636
637 frg_durs stores the duration for each experiment_summ_id. Its keys are
638 [experiment_id][experimen_summ_id].
639
640 bkg_durs stores the background duration for each time-slide and zero-lag,
641 i.e., for each experiment_summ_id. This is the sum of all other slide
642 datatypes sharing the same experiment_id except for the given slide.
643
644 max_bkg_fars stores the maximum background fars of all the categories
645 within each time slide. It's keys are (experiment_summ_id, ifo_group).
646 The maximum background far is just the total number of triggers within a
647 category divided by the background duration for that time slide.
648 If opts.combine_fars is set to across_all a category is defined by the
649 param bin in which a trigger exists and the ifos that took part in the
650 trigger. So, if there are three param bins and we've excluded H2,L1 triggers
651 in H1,H2,L1 time, then there are 6 categories for H1,H2,L1 time: three param
652 bins each for H1,L1 and H1,H2,L1 coincident triggrs. Thus, ifo_group will
653 be set to "ALL_IFOS" and there will be 6 max_bkg_fars stored for each
654 experiment_summ_id in triple time.
655 If opts.combine_fars is set to across_param_only, then a category is
656 defined only by the param bins; ifo coincidences are treated as
657 separate experiments. Thus, ifo_group will be set to whatever
658 coinc. trigger we are considering and there will only be 3 max_bkg_fars
659 stored for that entry.
660
661 zero_lag_ids stores the esid of all zero-lag "slides" of an experiment.
662 zero_lag_ids[ experiment_id ] = [experiment_summ_id1, experiment_summ_id2, etc.]
663 """
665 self.bkg_stats = {}
666 self.sngl_slide_stats = {}
667 self.datatypes = {}
668 self.frg_durs = {}
669 self.bkg_durs = {}
670 self.max_bkg_fars = {}
671 self.zero_lag_ids = {}
672
673 - def add_to_bkg_stats(self, experiment_id, experiment_summ_id, ifos, param_group, stat):
674 """
675 Adds a stat to bkg_stats and sngl_slide_stats. What stat is added is determined on the command
676 line by the ranking-stat option.
677 """
678
679 if (experiment_id, ifos, param_group) not in self.bkg_stats:
680 self.bkg_stats[(experiment_id, ifos, param_group)] = []
681 if (experiment_id, experiment_summ_id, ifos, param_group) not in self.sngl_slide_stats:
682 self.sngl_slide_stats[(experiment_id, experiment_summ_id, ifos, param_group)] = []
683
684 if not ( experiment_id in self.zero_lag_ids and experiment_summ_id in self.zero_lag_ids[experiment_id] ):
685 self.bkg_stats[(experiment_id, ifos, param_group)].append( stat )
686 self.sngl_slide_stats[(experiment_id, experiment_summ_id, ifos, param_group)].append(stat)
687
689 """
690 Sorts each list in bkg_stats and sngl_slide_stats from smallest to largest value.
691 """
692 for thislist in self.bkg_stats.values():
693 thislist.sort()
694 for thislist in self.sngl_slide_stats.values():
695 thislist.sort()
696
698 """
699 Stores the experiment_summ_id associated with each datatype.
700 """
701 if experiment_id not in self.datatypes:
702 self.datatypes[experiment_id] = {}
703 if datatype not in self.datatypes[experiment_id]:
704 self.datatypes[experiment_id][datatype] = []
705 self.datatypes[experiment_id][datatype].append(experiment_summ_id)
706
708 """
709 Retrieve the datatype for a given experiment_summ_id.
710 """
711 for eid in self.datatypes:
712 for datatype, esid_list in self.datatypes[eid].items():
713 if experiment_summ_id in esid_list:
714 return datatype
715
717 """
718 Adds a zero_lag_id to the zero_lag_ids dictionary.
719 """
720 if experiment_id not in self.zero_lag_ids:
721 self.zero_lag_ids[experiment_id] = []
722 self.zero_lag_ids[experiment_id].append(zero_lag_esid)
723
725 """
726 Adds a duration to frg_durs.
727 """
728 if experiment_id not in self.frg_durs:
729 self.frg_durs[experiment_id] = {}
730 self.frg_durs[experiment_id][experiment_summ_id] = duration
731
733 """
734 Sums the background durs for each time-slide (experiment_summ_id).
735 """
736 for eid, durs_dict in self.frg_durs.items():
737 culled_durs_dict = copy.deepcopy(durs_dict)
738 if eid in self.zero_lag_ids:
739 for frgd_esid in self.zero_lag_ids[eid]:
740 del culled_durs_dict[frgd_esid]
741 tot_dur = sum( culled_durs_dict.values() )
742 for esid, duration in durs_dict.items():
743 if esid in culled_durs_dict.keys():
744 self.bkg_durs[esid] = tot_dur - duration
745 else:
746 self.bkg_durs[esid] = tot_dur
747
749 """
750 Adds a max_bkg_far to the appropiate list; lists are grouped by
751 experiment_summ_id and ifo_group. If one wants to combined fars across
752 param_bins and coincident_ifos (as was done in the low-mass S51yr and
753 12-18 month analyses), ifo_group should be set to "ALL_IFOS".
754 """
755 if (experiment_summ_id, ifo_group) not in self.max_bkg_fars:
756 self.max_bkg_fars[(experiment_summ_id, ifo_group)] = []
757 self.max_bkg_fars[(experiment_summ_id, ifo_group)].append(max_bkg_far)
758
760 """
761 Sorts the max_bkg_fars lists from smallest to highest values.
762 """
763 for thislist in self.max_bkg_fars.values():
764 thislist.sort()
765
767 """
768 Calculates the uncombined false alarm rate for a trigger by counting
769 the number of background triggers in the same category as it that have
770 a stat value greater than or equal to the trigger's stat value and
771 dividing by the background duration for that slide.
772 To do this quickly, bisect.bisect_left is used (see python
773 documentation for more info) on the bkg_stats list. Since bkg_stats
774 contains all the triggers in all the slides for some experiment_id,
775 this will result in counting the triggers that are in the same slide
776 (given by the esid) as the trigger we are considering (except for zero-lag).
777 To correct for this, the trigger's place in it's sngl_slide_stats list is
778 subtracted from this value. The "background" considered for some trigger is
779 therefore all the triggers sharing the same experiment_id, excluding
780 zero-lag triggers and triggers in the same time-slide as the trigger. This
781 means that uncombined far for non-zero-lag triggers will use one less time
782 slide than zero-lag triggers.
783 """
784 return (\
785 ( len(self.bkg_stats[(eid, ifos, param_group)]) - bisect.bisect_left(self.bkg_stats[(eid, ifos, param_group)], stat) ) \
786 - \
787 ( len(self.sngl_slide_stats[(eid, esid, ifos, param_group)]) - bisect.bisect_left(self.sngl_slide_stats[(eid, esid, ifos, param_group)], stat) ) \
788 ) / self.bkg_durs[esid]
789
791 """
792 Same as calc_ufar_by_max, except that the uncombined far is calculated
793 by counting background triggers that have a stat value less than or
794 equal to the given stat. (Done by using bisect.bisect_right as opposed to
795 len(list) - bisect.bisect_left).
796 Note: if stat is 0, will just return 0. This is because a 0 when caclulating
797 FARs by minimum value is equivalent to inf. when caclulating FARs by maximum
798 value.
799 """
800 if stat == 0.:
801 return stat
802
803 return ( \
804 bisect.bisect_right(self.bkg_stats[(eid, ifos, param_group)], stat) \
805 - \
806 bisect.bisect_right(self.sngl_slide_stats[(eid, esid, ifos, param_group)], stat) \
807 ) / self.bkg_durs[esid]
808
809 - def calc_cfar( self, esid, ifo_group, ufar ):
810 """
811 Calculates the combined far for the given uncombined far (ufar). This
812 is defined as the ufar times the number of categories that are active
813 at that point plus the sum of the max_bkg_fars of all the categories
814 that are inactive. Whether or not a category is "active" is determined
815 by it's max_bkg_far. If the given ufar is greater than some max_bkg_far,
816 then the category which that max_bkg_far represents is considered
817 inactive. If the given ufar is less than some max_bkg_far, then
818 the category is considered active.
819 """
820 return \
821 (len( self.max_bkg_fars[(esid, ifo_group)] ) - bisect.bisect_left( self.max_bkg_fars[(esid,ifo_group)], ufar ))*ufar \
822 + sum([self.max_bkg_fars[(esid,ifo_group)][ii] for ii in range(bisect.bisect_left( self.max_bkg_fars[(esid,ifo_group)], ufar))])
823
825 """
826 Class to return a rank for stats.
827 """
828 - def __init__(self, table, ranking_stat, rank_by):
829 """
830 @table: table containing the stats to rank
831 @ranking_stat: stat in table that wish to rank
832 @rank_by: should be either "ASC" or "DESC"
833 """
834 self.stats = []
835 self.table = table
836 self.ranking_stat = ranking_stat
837 self.rank_by = rank_by
838
840 """
841 Gets top stats from database for later ranking
842 @connection: connection to a sqlite database
843 @limit: put a limit on the number of stats to rank
844 @filter: apply a filter (i.e., a SQLite WHERE clause).
845 Note: If the filter uses colums from tables other than
846 self.table, must include the join conditions as well
847 """
848 if limit is not None:
849 limit = "LIMIT " + str(limit)
850 else:
851 limit = ''
852 sqlquery = ''.join(["""
853 SELECT
854 """, self.ranking_stat, """
855 FROM
856 """, self.table, """
857 """, filter, """
858 ORDER BY """, self.ranking_stat, ' ', self.rank_by, """
859 """, limit ])
860 self.stats = [stat[0] for stat in connection.cursor().execute(sqlquery).fetchall()]
861 self.stats.sort()
862
864 if self.rank_by == "ASC":
865 return bisect.bisect_left(self.stats, this_stat) + 1
866 else:
867 return len(self.stats) - bisect.bisect_right(self.stats, this_stat) + 1
868
869
870 -def get_col_type(table_name, col_name, default = 'lstring'):
871 """
872 Attempts to get column type from lsctables.py for the given table name and
873 column name. If the table doesn't exist in lsctables or the column doesn't
874 exist in the lsctables definition of the table, returns the default type.
875 """
876 if table_name in lsctables.TableByName.keys() and col_name in lsctables.TableByName[table_name].validcolumns.keys():
877 return lsctables.TableByName[table_name].validcolumns[col_name]
878 else:
879 return default
880
899
900
901
902
903
904
905
906
907
908
909
910
966
1045
1046
1048 """
1049 Create a map between process_ids so duplicate entries in numerous ligolw_xml
1050 tables can be removed without a loss of information. The tables the _pidmap_
1051 is used to simplify are:
1052
1053 -- process, process_params
1054 -- filter, summ_value, search_summary, search_summvars
1055 -- sim_inspiral, sim_ringdown
1056 -- time_slide
1057 -- update the sim_proc_ids in the experiment_summary table
1058 """
1059 if verbose:
1060 print >> sys.stdout, "\nCreate a map of process_ids for removal of duplicates"
1061
1062
1063 connection.create_function("concat_5cols", 5, concatenate)
1064
1065
1066 cursor = connection.cursor()
1067
1068 sqlscript = """
1069 CREATE INDEX pp_pivp_idx ON process_params (process_id, value, param);
1070
1071 CREATE TEMP TABLE proc_params AS
1072 SELECT
1073 process.process_id AS proc_id,
1074 process.program AS program,
1075 group_concat(pp_table.value) AS value,
1076 group_concat(pp_table.param) AS params,
1077 concat_5cols(process.start_time, process.end_time, process.username,
1078 process.node, process.version) AS process_info
1079 FROM
1080 process
1081 LEFT JOIN process_params AS pp_table ON (
1082 pp_table.process_id == process.process_id)
1083 GROUP BY proc_id;
1084
1085 UPDATE
1086 proc_params
1087 SET
1088 value = 'NULL'
1089 WHERE
1090 value is null OR value = '';
1091
1092 UPDATE
1093 proc_params
1094 SET
1095 params = 'NULL'
1096 WHERE
1097 params is null OR params = '';
1098 UPDATE
1099 proc_params
1100 SET
1101 process_info = 'NULL'
1102 WHERE
1103 process_info is null OR process_info = '';
1104 UPDATE
1105 proc_params
1106 SET
1107 program = 'NULL'
1108 WHERE
1109 program is null OR program = '';
1110
1111 CREATE TEMP TABLE proc_params_map AS
1112 SELECT
1113 MIN(proc_id) AS proc_id,
1114 program,
1115 value,
1116 params,
1117 process_info
1118 FROM
1119 proc_params
1120 GROUP BY program, value, params, process_info;
1121
1122 DROP INDEX pp_pivp_idx;
1123 CREATE INDEX proc_params_idx ON proc_params (program, value, params, process_info);
1124 CREATE INDEX proc_params_map_idx ON proc_params_map (program, value, params, process_info);
1125
1126 CREATE TEMP TABLE _pidmap_ AS
1127 SELECT
1128 old_pp_table.proc_id AS old_pid,
1129 MIN(new_pp_table.proc_id) AS new_pid,
1130 old_pp_table.program AS program
1131 FROM
1132 proc_params AS old_pp_table
1133 JOIN proc_params_map AS new_pp_table ON (
1134 old_pp_table.value == new_pp_table.value
1135 AND old_pp_table.process_info == new_pp_table.process_info
1136 AND old_pp_table.params == new_pp_table.params
1137 AND old_pp_table.program == new_pp_table.program)
1138 GROUP BY old_pid;
1139
1140 CREATE INDEX _pidmap_idx ON _pidmap_ (old_pid);
1141
1142 DROP INDEX proc_params_idx;
1143 DROP INDEX proc_params_map_idx;
1144 DROP TABLE proc_params;
1145 DROP TABLE proc_params_map;
1146 """
1147 if debug:
1148 print >> sys.stderr, sqlscript
1149 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1150
1151 cursor.executescript( sqlscript )
1152
1153 connection.commit()
1154 cursor.close()
1155 if debug:
1156 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1157
1158
1160 """
1161 Make a tuple of the process_ids from the temp table '_pidmap_' that need to
1162 be updated for a given list of programs.
1163 """
1164
1165 sqlquery = """
1166 SELECT DISTINCT old_pid
1167 FROM _pidmap_
1168 WHERE new_pid != old_pid """
1169 if not programs:
1170
1171 process_ids = zip( *cursor.execute(sqlquery).fetchall() )
1172 else:
1173 process_ids = []
1174 sqlquery += """
1175 AND program = :1
1176 """
1177
1178 for program in programs:
1179 validate_option(program)
1180 if (program,) in cursor.execute('SELECT DISTINCT program FROM _pidmap_'):
1181 process_ids += zip( *cursor.execute(sqlquery, (program,)).fetchall() )
1182
1183 return process_ids
1184
1185
1187 """
1188 Delete duplicate information in summary tables according to the process_ids.
1189 tbl_name: filter, summ_value, search_summary, search_summvars
1190 """
1191
1192
1193 cursor = connection.cursor()
1194
1195
1196 old_pids = get_pids_to_update(cursor, ['inspiral','ringdown', 'gstlal_inspiral'])
1197
1198
1199 all_tables = zip(*get_tables_in_database(connection))[0]
1200 table_names = ['filter','summ_value','search_summary','search_summvars']
1201
1202 if old_pids and set(table_names) & set(all_tables):
1203 if verbose:
1204 print >> sys.stdout, "\nDelete redundant rows in available summary tables"
1205
1206
1207 relevant_indices = get_user_created_indices(connection, table_names)
1208
1209 for idx, sql in relevant_indices:
1210 validate_option(idx)
1211 cursor.execute('DROP INDEX %s' % idx)
1212
1213 sqlscript = ""
1214
1215 if 'filter' in all_tables:
1216 sqlscript += """
1217 DELETE FROM filter
1218 WHERE process_id NOT IN (
1219 SELECT DISTINCT new_pid
1220 FROM _pidmap_
1221 WHERE program = 'inspiral' OR program = 'ringdown' ); """
1222
1223 if 'summ_value' in all_tables:
1224 sqlscript += """
1225 DELETE FROM summ_value
1226 WHERE process_id NOT IN (
1227 SELECT DISTINCT new_pid
1228 FROM _pidmap_
1229 WHERE program = 'inspiral' OR program = 'ringdown' ); """
1230
1231 if 'search_summary' in all_tables:
1232 sqlscript += """
1233 DELETE FROM search_summary
1234 WHERE process_id NOT IN (
1235 SELECT DISTINCT new_pid
1236 FROM _pidmap_
1237 WHERE program = 'inspiral' OR program = 'ringdown' ); """
1238
1239 if 'search_summvars' in all_tables:
1240 sqlscript += """
1241 DELETE FROM search_summvars
1242 WHERE process_id NOT IN (
1243 SELECT DISTINCT new_pid
1244 FROM _pidmap_
1245 WHERE program = 'inspiral' OR program = 'ringdown' ); """
1246 if debug:
1247 print >> sys.stderr, sqlscript
1248 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1249
1250 cursor.executescript( sqlscript )
1251
1252 connection.commit()
1253
1254 if debug:
1255 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1256
1257
1258 for idx, sql in relevant_indices:
1259 cursor.execute(sql)
1260 connection.commit()
1261
1262 if debug:
1263 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
1264
1265 else:
1266 if verbose:
1267 if not set(table_names) & set(all_tables):
1268 print >> sys.stdout, "This database lacks all filtering related summary tables."
1269 else:
1270 print >> sys.stdout, "The search & filter tables lack duplicates."
1271 cursor.close()
1272
1273
1275 """
1276 Update process_ids for events in the sngl_inspiral &/or sngl_ringdown table.
1277 This function does *not* remove duplicate events that have different
1278 event_ids.
1279 """
1280
1281
1282 cursor = connection.cursor()
1283
1284
1285 old_pids = get_pids_to_update(cursor, ['inspiral','ringdown'])
1286
1287
1288 all_tables = zip(*get_tables_in_database(connection))[0]
1289 table_names = ['sngl_inspiral','sngl_ringdown']
1290
1291 if old_pids and set(table_names) & set(all_tables):
1292 if verbose:
1293 print >> sys.stdout, "\nUpdate process_ids in the sngl-ifo trigger tables"
1294
1295
1296 relevant_indices = get_user_created_indices(connection, table_names)
1297
1298 for idx, sql in relevant_indices:
1299 validate_option(idx)
1300 cursor.execute('DROP INDEX %s' % idx)
1301
1302 sqlscript = ""
1303
1304 if 'sngl_inspiral' in all_tables:
1305 sqlscript += """
1306 UPDATE sngl_inspiral
1307 SET process_id = (
1308 SELECT new_pid
1309 FROM _pidmap_
1310 WHERE process_id == old_pid
1311 AND program = 'inspiral'); """
1312
1313 if 'sngl_ringdown' in all_tables:
1314 sqlscript += """
1315 UPDATE sngl_ringdown
1316 SET process_id = (
1317 SELECT new_pid
1318 FROM _pidmap_
1319 WHERE process_id == old_pid
1320 AND program = 'ringdown'); """
1321
1322 if debug:
1323 print >> sys.stderr, sqlscript
1324 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1325
1326 cursor.executescript( sqlscript )
1327
1328 connection.commit()
1329 if debug:
1330 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1331
1332 for idx, sql in relevant_indices:
1333 cursor.execute(sql)
1334 connection.commit()
1335
1336 if debug:
1337 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
1338 else:
1339 if verbose:
1340 if not set(table_names) & set(all_tables):
1341 print >> sys.stdout, "This database lacks a sngl_inspiral &/or sngl_ringdown table."
1342 else:
1343 print >> sys.stdout, "The sngl_inspiral & sngl_ringdown tables lack duplicates."
1344 cursor.close()
1345
1346
1348 """
1349 Delete duplicate rows in the process & process params table.
1350 The temp table _pidmap_ created by the get_process_info function is dropped.
1351 """
1352 if verbose:
1353 print >> sys.stdout, "\nDeleting redundant rows in the process & process_params tables"
1354
1355
1356 cursor = connection.cursor()
1357
1358
1359 old_pids = get_pids_to_update(cursor, [])
1360
1361 if old_pids:
1362
1363 table_names = ['process','process_params']
1364 relevant_indices = get_user_created_indices(connection, table_names)
1365
1366 for idx, sql in relevant_indices:
1367 validate_option(idx)
1368 cursor.execute('DROP INDEX %s' % idx)
1369
1370 sqlscript = """
1371 -- Remove redundant process rows
1372 DELETE FROM process
1373 WHERE process_id IN (
1374 SELECT DISTINCT old_pid
1375 FROM _pidmap_
1376 WHERE old_pid != new_pid );
1377 DELETE FROM process_params
1378 WHERE process_id IN (
1379 SELECT DISTINCT old_pid
1380 FROM _pidmap_
1381 WHERE old_pid != new_pid );
1382
1383 DROP INDEX _pidmap_idx;
1384 DROP TABLE _pidmap_; """
1385 if debug:
1386 print >> sys.stderr, sqlscript
1387 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1388
1389 cursor.executescript( sqlscript )
1390
1391 connection.commit()
1392 if debug:
1393 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1394
1395
1396 for idx, sql in relevant_indices:
1397 cursor.execute(sql)
1398 connection.commit()
1399
1400 if debug:
1401 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
1402
1403 else:
1404 sqlscript = """
1405 DROP INDEX _pidmap_idx;
1406 DROP TABLE _pidmap_; """
1407 if debug:
1408 print >> sys.stderr, sqlscript
1409 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1410
1411 cursor.executescript( sqlscript )
1412
1413 connection.commit()
1414
1415 if verbose:
1416 print >> sys.stdout, "The process & process_params tables lack duplicates."
1417 cursor.close()
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1429 """
1430 Writes JOIN string to join the experiment, experiment_summary,
1431 and experiment_map tables to the specified table. This allows
1432 querying across any of these tables.
1433
1434 @table: any lsctable that has a coinc_event_id column
1435
1436 NOTE: Should only use when querying the specified table; i.e.,
1437 when the specified table is the only one listed in the FROM statement.
1438 """
1439
1440 return """
1441 JOIN
1442 experiment, experiment_summary, experiment_map
1443 ON (
1444 experiment.experiment_id == experiment_summary.experiment_id
1445 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
1446 AND experiment_map.coinc_event_id == %s.coinc_event_id )""" % table
1447
1449 """
1450 Writes JOIN string to join the experiment, experiment_summary,
1451 and experiment_map tables to the specified table. This allows
1452 querying across any of these tables.
1453
1454 @table: any lsctable that has an event_id column
1455
1456 NOTE: Should only use when querying the specified table; i.e.,
1457 when the specified table is the only one listed in the FROM statement.
1458 """
1459
1460 return ''.join(['''
1461 JOIN
1462 experiment, experiment_summary, experiment_map, coinc_event_map
1463 ON (
1464 experiment.experiment_id == experiment_summary.experiment_id
1465 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
1466 AND experiment_map.coinc_event_id == coinc_event_map.coinc_event_id
1467 AND coinc_event_map.event_id == ''', table, '''.event_id
1468 AND coinc_event_map.table_name == "''', table, '" )' ])
1469
1471 """
1472 Removes entries from the experiment, experiment_summary, and time_slide tables
1473 that have no events in them, i.e., that have no mapping to any coinc_event_ids
1474 via the experiment_map table. Entries are only removed if none of the
1475 experiment_summ_ids associated with an experiment_id have coinc_events. In other words,
1476 Even if only one of the experiment_summ_ids associated with an experiment_id has an event,
1477 all of the experiment_summ_ids and experiment_ids associated with that event are
1478 saved. This perserves the background time and slide set associated with an experiment.
1479
1480 WARNING: This should only be used for purposes of scaling down a temporary database in prep.
1481 for xml extraction. In general, all experiment and time_slide entries should be left in
1482 the experiment tables even if they don't have events in them.
1483
1484 @connection: connection to a sqlite database
1485 """
1486 if verbose:
1487 print >> sys.stderr, "Removing experiments that no longer have events in them..."
1488
1489 sqlscript = """
1490 DELETE FROM
1491 experiment
1492 WHERE
1493 experiment_id NOT IN (
1494 SELECT DISTINCT
1495 experiment_summary.experiment_id
1496 FROM
1497 experiment_summary, experiment_map
1498 WHERE
1499 experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id
1500 );
1501 DELETE FROM
1502 experiment_summary
1503 WHERE
1504 experiment_id NOT IN (
1505 SELECT
1506 experiment_id
1507 FROM
1508 experiment
1509 );
1510 DELETE FROM
1511 time_slide
1512 WHERE
1513 time_slide_id NOT IN (
1514 SELECT DISTINCT
1515 time_slide_id
1516 FROM
1517 experiment_summary
1518 );
1519 """
1520 connection.cursor().executescript(sqlscript)
1521
1522
1524 """
1525 Cleaning up the experiment table by removing duplicate rows and remapping
1526 events to the appropriate experiment.
1527 """
1528
1529 all_tables = zip(*get_tables_in_database(connection))[0]
1530 table_names = ['experiment','experiment_summary']
1531
1532 if not set(table_names) - set(all_tables):
1533 if verbose:
1534 print >> sys.stdout, "\nCleaning experiment table..."
1535
1536
1537 connection.create_function("concat_7cols", 7, concatenate)
1538
1539
1540 cursor = connection.cursor()
1541
1542
1543 relevant_indices = get_user_created_indices(connection, table_names)
1544
1545 for idx, sql in relevant_indices:
1546 validate_option(idx)
1547 cursor.execute('DROP INDEX %s' % idx)
1548
1549 sqlscript = """
1550 -- create map table to map experiment_ids that are to be kept
1551 -- to experiment ids that are to be discarded, in the same manner
1552 -- as done above
1553
1554 CREATE TEMP TABLE expr_info AS
1555 SELECT
1556 expr.experiment_id AS eid,
1557 concat_7cols(expr.search, expr.search_group, expr.instruments,
1558 expr.gps_start_time, expr.gps_end_time,
1559 expr.lars_id, expr.comments) AS info
1560 FROM experiment AS expr;
1561
1562 CREATE TEMP TABLE _eidmap_ AS
1563 SELECT
1564 old_exp.eid AS old_eid,
1565 MIN(new_exp.eid) AS new_eid
1566 FROM
1567 expr_info AS old_exp
1568 JOIN expr_info AS new_exp ON (
1569 old_exp.info == new_exp.info)
1570 GROUP BY old_eid;
1571
1572 DROP TABLE expr_info;
1573 CREATE INDEX _eidmap_idx ON _eidmap_ (old_eid);
1574
1575 -- delete the old ids from the experiment table
1576 DELETE FROM experiment
1577 WHERE experiment_id NOT IN (
1578 SELECT new_eid
1579 FROM _eidmap_ );
1580
1581 -- update the experiment_ids in the experiment summary table
1582 UPDATE experiment_summary
1583 SET experiment_id = (
1584 SELECT new_eid
1585 FROM _eidmap_
1586 WHERE experiment_summary.experiment_id == old_eid);
1587
1588 DROP INDEX _eidmap_idx;
1589 DROP TABLE _eidmap_; """
1590 if debug:
1591 print >> sys.stderr, sqlscript
1592 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1593
1594 cursor.executescript( sqlscript )
1595
1596 connection.commit()
1597 if debug:
1598 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1599
1600
1601 for idx, sql in relevant_indices:
1602 cursor.execute(sql)
1603 connection.commit()
1604
1605 if debug:
1606 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
1607
1608 else:
1609 if verbose:
1610 print >> sys.stdout, "The database is lacks the experiment &/or experiment_summary table(s)."
1611
1612
1614 """
1615 Cleaning up the experiment_summary and the experiment_map tables
1616 by removing duplicate rows and remapping events to the appropriate
1617 experiment.
1618 """
1619
1620 all_tables = zip(*get_tables_in_database(connection))[0]
1621 table_names = ['experiment_map','experiment_summary']
1622
1623 if not set(table_names) - set(all_tables):
1624 if verbose:
1625 print >> sys.stdout, "\nCleaning experiment_summary & experiment_map tables..."
1626
1627
1628 connection.create_function("concat_5cols", 5, concatenate)
1629
1630
1631 cursor = connection.cursor()
1632
1633
1634 relevant_indices = get_user_created_indices(connection, table_names)
1635
1636 for idx, sql in relevant_indices:
1637 validate_option(idx)
1638 cursor.execute('DROP INDEX %s' % idx)
1639
1640 sqlscript = """
1641 -- experiment summary clean up
1642
1643 -- create a table to map esids to be deleted to esids to be saved
1644 CREATE TEMP TABLE expr_summ_info AS
1645 SELECT
1646 expr_summ.experiment_summ_id AS esid,
1647 concat_5cols(expr_summ.experiment_id, expr_summ.time_slide_id,
1648 expr_summ.veto_def_name, expr_summ.datatype, expr_summ.sim_proc_id) AS info
1649 FROM experiment_summary AS expr_summ;
1650
1651 CREATE INDEX expr_summ_info_idx ON expr_summ_info (info);
1652
1653 CREATE TEMP TABLE _esidmap_ AS
1654 SELECT
1655 old_expsumm.esid AS old_esid,
1656 MIN(new_expsumm.esid) AS new_esid
1657 FROM
1658 expr_summ_info AS old_expsumm
1659 JOIN expr_summ_info AS new_expsumm ON (
1660 old_expsumm.info == new_expsumm.info)
1661 GROUP BY old_esid;
1662
1663 DROP INDEX expr_summ_info_idx;
1664 DROP TABLE expr_summ_info;
1665
1666 CREATE INDEX _esidmap_idx on _esidmap_ (old_esid);
1667 CREATE INDEX es_dne_idx ON experiment_summary (duration,nevents);
1668
1669 -- sum durations and nevents
1670 CREATE TEMP TABLE sum_dur_nevents AS
1671 SELECT
1672 _esidmap_.new_esid AS esid,
1673 SUM(experiment_summary.duration) AS sum_dur,
1674 SUM(experiment_summary.nevents) AS sum_nevents
1675 FROM _esidmap_
1676 JOIN experiment_summary ON (
1677 _esidmap_.old_esid == experiment_summary.experiment_summ_id)
1678 GROUP BY esid;
1679
1680 DROP INDEX es_dne_idx;
1681 CREATE INDEX sdn_esid_index ON sum_dur_nevents (esid);
1682
1683 -- delete the old ids from the experiment_summary table
1684 DELETE FROM experiment_summary
1685 WHERE experiment_summ_id NOT IN (
1686 SELECT DISTINCT new_esid
1687 FROM _esidmap_ );
1688
1689 -- update the durations and the nevents
1690 UPDATE experiment_summary
1691 SET duration = (
1692 SELECT sum_dur
1693 FROM sum_dur_nevents
1694 WHERE sum_dur_nevents.esid == experiment_summary.experiment_summ_id),
1695 nevents = (
1696 SELECT sum_nevents
1697 FROM sum_dur_nevents
1698 WHERE sum_dur_nevents.esid == experiment_summary.experiment_summ_id);
1699
1700 DROP INDEX sdn_esid_index;
1701 DROP TABLE sum_dur_nevents;
1702
1703 -- update the experiment_map table
1704 UPDATE experiment_map
1705 SET experiment_summ_id = (
1706 SELECT new_esid
1707 FROM _esidmap_
1708 WHERE experiment_map.experiment_summ_id == old_esid);
1709
1710 DROP INDEX _esidmap_idx;
1711 DROP TABLE _esidmap_; """
1712 if debug:
1713 print >> sys.stderr, sqlscript
1714 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
1715
1716 cursor.executescript( sqlscript )
1717
1718 connection.commit()
1719 if debug:
1720 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1721
1722
1723 for idx, sql in relevant_indices:
1724 cursor.execute(sql)
1725 connection.commit()
1726
1727 if debug:
1728 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
1729
1730 else:
1731 if verbose:
1732 print >> sys.stdout, "The database is lacks the experiment_map &/or experiment_summary table(s)."
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1744 """
1745 Updates the number of events in the num_events column of the
1746 experiment_summary table. This should be used whenever coincs
1747 are deleted from the experiment_map table or when new files
1748 are added to a database.
1749 """
1750 if verbose:
1751 print >> sys.stderr, "Updating nevents column in experiment_summary table..."
1752
1753
1754 connection.cursor().execute("CREATE INDEX IF NOT EXISTS em_esi_index ON experiment_map(experiment_summ_id)")
1755
1756 sqlquery = """
1757 UPDATE experiment_summary
1758 SET nevents = (
1759 SELECT COUNT(*)
1760 FROM experiment_map
1761 WHERE experiment_map.experiment_summ_id == experiment_summary.experiment_summ_id )
1762 """
1763 connection.cursor().execute(sqlquery)
1764 if verbose:
1765 print >> sys.stderr, "done."
1766
1767
1769 """
1770 Class to map sim_proc_ids in the experiment summary table to simulation names
1771 and vice-versa.
1772 """
1774 self.id_tag_map = {}
1775 self.tag_id_map = {}
1776 sqlquery = """
1777 SELECT
1778 process_id,
1779 value
1780 FROM
1781 process_params
1782 WHERE
1783 process_id IN (
1784 SELECT DISTINCT
1785 sim_proc_id
1786 FROM
1787 experiment_summary )
1788 AND (param == "--userTag" OR param=="-userTag")
1789 """
1790 for proc_id, sim_tag in connection.cursor().execute(sqlquery):
1791 self.id_tag_map[proc_id] = sim_tag
1792 self.tag_id_map[sim_tag] = proc_id
1793
1795 return proc_id in self.id_tag_map and self.id_tag_map[proc_id] or None
1796
1798 return sim_tag in self.tag_id_map and self.tag_id_map[sim_tag] or None
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808 -def clean_using_coinc_table( connection, table_name, verbose = False,
1809 clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True,
1810 clean_coinc_event_map = True, clean_mapped_tables = True, selected_tables = []):
1811 """
1812 Clears experiment_map, coinc_event, coinc_event_map, and all tables pointing to the
1813 coinc_event_map of triggers that are no longer in the specified table.
1814 Note that the experiment_summary, experiment, and time_slide_tables are left alone.
1815 This is because even if no events are left in an experiment, we still want info. about
1816 the experiment that was performed.
1817
1818 @connection to a sqlite database
1819 @table_name: name of table on which basing the cleaning. Can be any table having a coinc_event_id
1820 column.
1821 @clean_experiment_map: if set to True will clean the experiment_map table
1822 @clean_coinc_event_table: if set to True will clean the coinc_event table
1823 @clean_coinc_definer: if set to True will clean the coinc_definer table if clean_coinc_event_table
1824 is set to True (the coinc_event_table is used to determine what coinc_definers to delete)
1825 @clean_coinc_event_map: if set to True, will clean the coinc_event_map
1826 @clean_mapped_tables: clean tables listed in the coinc_event_map who's event_ids are not not in
1827 the coinc_event_map
1828 @selected_tables: if clean_mapped_tables is on, will clean the listed tables if they appear in the
1829 coinc_event_map and have an event_id column. Default, [], is to clean all tables found.
1830 The requirement that the table has an event_id avoids cleaning simulation tables.
1831 """
1832
1833
1834 if clean_experiment_map:
1835 if verbose:
1836 print >> sys.stderr, "Cleaning the experiment_map table..."
1837 sqlquery = ''.join(["""
1838 DELETE
1839 FROM experiment_map
1840 WHERE coinc_event_id NOT IN (
1841 SELECT coinc_event_id
1842 FROM """, table_name, ')' ])
1843 connection.cursor().execute( sqlquery )
1844 connection.commit()
1845
1846
1847 if clean_coinc_event_map:
1848 if verbose:
1849 print >> sys.stderr, "Cleaning the coinc_event_map table..."
1850 skip_tables = [ ''.join(['table_name != "', tname, '"'])
1851 for tname in get_cem_table_names(connection) if tname == 'coinc_event' or tname.startswith('sim_')
1852 ]
1853
1854 sqlquery = ''.join([ """
1855 DELETE
1856 FROM coinc_event_map
1857 WHERE
1858 coinc_event_id NOT IN (
1859 SELECT coinc_event_id
1860 FROM """, table_name, ')',
1861 skip_tables != [] and ' AND\n\t\t'+' AND\n\t\t'.join(skip_tables) or '' ])
1862 connection.cursor().execute( sqlquery )
1863 connection.commit()
1864
1865
1866 if clean_mapped_tables and selected_tables == []:
1867 selected_tables = get_cem_table_names(connection)
1868
1869
1870
1871
1872 if clean_mapped_tables:
1873 clean_mapped_event_tables( connection, selected_tables,
1874 raise_err_on_missing_evid = False, verbose = verbose )
1875
1876
1877 if clean_coinc_event_table:
1878 if verbose:
1879 print >> sys.stderr, "Cleaning the coinc_event table..."
1880 sqlquery = """
1881 DELETE
1882 FROM coinc_event
1883 WHERE coinc_event_id NOT IN (
1884 SELECT DISTINCT coinc_event_id
1885 FROM coinc_event_map)"""
1886 connection.cursor().execute( sqlquery )
1887 connection.commit()
1888
1889
1890 if clean_coinc_definer and clean_coinc_event_table:
1891 if verbose:
1892 print >> sys.stderr, "Cleaning the coinc_definer table..."
1893 sqlquery = """
1894 DELETE
1895 FROM coinc_definer
1896 WHERE coinc_def_id NOT IN (
1897 SELECT coinc_def_id
1898 FROM coinc_event )"""
1899 connection.cursor().execute( sqlquery )
1900 connection.commit()
1901
1904 """
1905 Clears the given table of coinc triggers falling outside of the
1906 desired ranges, as specified by parse_param_ranges and parse_coinc_opts.
1907
1908 @connection: connection to a SQLite database with lsctables
1909 @coinc_table: name of the coinc_table to delete the triggers from.
1910 Can be any table with a coinc_event_id.
1911 @param_filters: output of parse_param_ranges(...).get_param_filters()
1912 @include_coincs: output of parse_coinc_opts(...).get_coinc_filters().
1913 The coincs that are specified in this list will be SAVED.
1914 @exclude_coincs: output of parse_coinc_opts(...).get_coinc_filters().
1915 The coincs that are specified in this list will be DELETED.
1916 Note: exclude_coincs is applied first, so anything falling in it will
1917 be deleted, regardless of wether or not the same falls in include_coincs.
1918 To avoid confusion, it is best to only specify one or the other, not both.
1919 """
1920 coinc_table = validate_option( coinc_table )
1921 if verbose:
1922 print >> sys.stderr, "Removing coincs from %s table that " % coinc_table + \
1923 "fall outside of desired ranges and coinc-types..."
1924
1925 join_conditions = join_experiment_tables_to_coinc_table( coinc_table )
1926
1927 if exclude_coincs:
1928 del_rows_from_table( connection, coinc_table, 'coinc_event_id',
1929 join_conditions,
1930 del_filters = exclude_coincs, verbose = verbose )
1931 if include_coincs:
1932 del_rows_from_table( connection, coinc_table, 'coinc_event_id',
1933 join_conditions,
1934 save_filters = include_coincs, verbose = verbose )
1935 if param_filters:
1936 del_rows_from_table( connection, coinc_table, 'coinc_event_id',
1937 join_conditions,
1938 save_filters = param_filters, verbose = verbose )
1939
1940
1941 clean_using_coinc_table( connection, coinc_table, verbose = verbose,
1942 clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True,
1943 clean_coinc_event_map = True, clean_mapped_tables = True )
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953 -def write_newstyle_coinc_def_entry( connection, description, search = None, search_coinc_type = None ):
1954 """
1955 Adds a new entry to the coinc_definer_table. The only thing used to discriminate
1956 different coinc_definer entries is the description column. Search and search_coinc_type
1957 can also be optionally specified.
1958 """
1959 sqlquery = "SELECT coinc_def_id FROM coinc_definer WHERE description == ?"
1960 results = connection.cursor().execute( sqlquery, (description,) ).fetchall()
1961 if results == []:
1962
1963 sqlquery = 'INSERT INTO coinc_definer (coinc_def_id, description, search, search_coinc_type) VALUES (?, ?, ?, ?)'
1964 this_id = get_next_id( connection, 'coinc_definer', 'coinc_def_id' )
1965 if not search_coinc_type:
1966
1967 last_coinc_type = connection.execute('SELECT MAX(search_coinc_type) FROM coinc_definer').fetchone()[0]
1968 if last_coinc_type is None:
1969 last_coinc_type = 0
1970 search_coinc_type = 1 + last_coinc_type
1971 connection.cursor().execute( sqlquery, (str(this_id), description, search, search_coinc_type) )
1972 connection.commit()
1973 else:
1974 this_id = ilwd.ilwdchar(results.pop()[0])
1975
1976 return this_id
1977
1979 """
1980 Retrieves values in the description column of the coinc_definer table.
1981 """
1982 sqlquery = """
1983 SELECT DISTINCT
1984 description
1985 FROM
1986 coinc_definer
1987 """
1988 return [lbl for (lbl,) in connection.cursor().execute(sqlquery).fetchall()]
1989
1991 """
1992 Retrieves all of the distinct map-label and coinc-type pairs in the database.
1993 A dictionary is returned in which they keys are the map-labels and the values
1994 are lists of tuples. Each tuple gives all of the tables mapped to a
1995 coinc_event_id.
1996 """
1997
1998 create_mapped_tables_func( connection )
1999 sqlquery = """
2000 SELECT DISTINCT
2001 coinc_definer.description,
2002 get_mapped_tables(a.table_name),
2003 get_mapped_tables(b.table_name)
2004 FROM
2005 coinc_event_map AS a
2006 JOIN
2007 coinc_definer, coinc_event
2008 ON (
2009 coinc_definer.coinc_def_id == coinc_event.coinc_def_id AND
2010 coinc_event.coinc_event_id == a.coinc_event_id )
2011 LEFT OUTER JOIN
2012 coinc_event_map AS b
2013 ON (
2014 a.event_id == b.coinc_event_id )
2015 GROUP BY
2016 coinc_event.coinc_event_id
2017 """
2018 coinc_types = {}
2019 for (map_type, tblsA, tblsB) in connection.cursor().execute(sqlquery).fetchall():
2020 coinc_types.setdefault( map_type, [] )
2021 if tblsB == '':
2022 coinc_types[ map_type ].append(set(tblsA.split(',')))
2023 else:
2024 coinc_types[ map_type ].append(set(tblsA.split(',') + tblsB.split(',')))
2025
2026 return coinc_types
2027
2029 """
2030 Deletes all mappings that have the given map_label in the description column of the coinc_definer table.
2031 """
2032
2033
2034
2035 sqlquery = """
2036 DELETE FROM
2037 coinc_definer
2038 WHERE
2039 description == ?"""
2040 connection.cursor().execute( sqlquery, (map_label,) )
2041 connection.commit()
2042 sqlquery = """
2043 DELETE FROM
2044 coinc_event
2045 WHERE
2046 coinc_def_id NOT IN (
2047 SELECT
2048 coinc_def_id
2049 FROM
2050 coinc_definer );
2051 DELETE FROM
2052 coinc_event_map
2053 WHERE
2054 coinc_event_id NOT IN (
2055 SELECT
2056 coinc_event_id
2057 FROM
2058 coinc_event );
2059 """
2060 connection.cursor().executescript(sqlquery)
2061
2063 """
2064 Remove duplicate entries in the coinc_definer table and update the coinc_event table
2065 with new coinc_def_ids.
2066 """
2067
2068 all_tables = zip(*get_tables_in_database(connection))[0]
2069 table_names = ['coinc_definer','coinc_event']
2070
2071 if not set(table_names) - set(all_tables):
2072 if verbose:
2073 print >> sys.stdout, "\nCleaning up the coinc_definer table..."
2074
2075
2076 cursor = connection.cursor()
2077
2078
2079 relevant_indices = get_user_created_indices(connection, table_names)
2080
2081 for idx, sql in relevant_indices:
2082 validate_option(idx)
2083 cursor.execute('DROP INDEX %s' % idx)
2084
2085 sqlscript = """
2086 -- Create a table that maps the coinc_definer_ids of redundant entries
2087 -- to those entries one is going to keep.
2088 CREATE TEMP TABLE _cdidmap_ AS
2089 SELECT
2090 old_cd_table.coinc_def_id AS old_cdid,
2091 MIN(new_cd_table.coinc_def_id) AS new_cdid
2092 FROM
2093 coinc_definer AS old_cd_table
2094 JOIN coinc_definer AS new_cd_table ON (
2095 new_cd_table.search == old_cd_table.search
2096 AND new_cd_table.search_coinc_type == old_cd_table.search_coinc_type
2097 AND new_cd_table.description == old_cd_table.description
2098 )
2099 GROUP BY old_cdid;
2100
2101 CREATE INDEX cdidmap_index ON _cdidmap_ (old_cdid);
2102
2103 -- Update the coinc_event table with new coinc_def_ids
2104 UPDATE coinc_event
2105 SET coinc_def_id = (
2106 SELECT new_cdid
2107 FROM _cdidmap_
2108 WHERE old_cdid == coinc_def_id);
2109
2110 -- Remove redundant entries in the coinc_definer table
2111 DELETE FROM coinc_definer
2112 WHERE coinc_def_id NOT IN (
2113 SELECT DISTINCT new_cdid
2114 FROM _cdidmap_ );
2115
2116 DROP INDEX cdidmap_index;
2117 DROP TABLE _cdidmap_; """
2118 if debug:
2119 print >> sys.stderr, sqlscript
2120 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
2121
2122 cursor.executescript( sqlscript )
2123
2124 connection.commit()
2125 if debug:
2126 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
2127
2128
2129 for idx, sql in relevant_indices:
2130 cursor.execute(sql)
2131 connection.commit()
2132
2133 if debug:
2134 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
2135
2136 else:
2137 if verbose:
2138 print >> sys.stdout, "This database lacks the coinc_event and coinc_definer tables."
2139
2140
2142 """
2143 Deletes all mappings that have the given coinc type.
2144
2145 @map_label: the type of mapping between the tables to delete
2146 (this is stored in the description column of the coinc_definer
2147 table)
2148 @coinctables: list of the table names involved in the
2149 coincidences to delete. These define the coinc type.
2150 """
2151
2152
2153
2154 create_mapped_tables_func(connection, 2)
2155
2156 sqlquery = """
2157 CREATE TEMP TABLE delete_coinc_types AS
2158 SELECT
2159 a.coinc_event_id AS ceid
2160 FROM
2161 coinc_event_map AS a
2162 LEFT OUTER JOIN
2163 coinc_event_map AS b
2164 ON (
2165 a.event_id == b.coinc_event_id )
2166 JOIN
2167 coinc_event, coinc_definer
2168 ON (
2169 a.coinc_event_id == coinc_event.coinc_event_id AND
2170 coinc_event.coinc_def_id == coinc_definer.coinc_def_id )
2171 WHERE
2172 coinc_definer.description == ?
2173 GROUP BY
2174 a.coinc_event_id
2175 HAVING
2176 get_mapped_tables(a.table_name, b.table_name) == ?
2177 """
2178 connection.cursor().execute(sqlquery, (map_label, ','.join(sorted(coincTables)),))
2179 sqlquery = """
2180 DELETE FROM
2181 coinc_event
2182 WHERE
2183 coinc_event_id IN (
2184 SELECT
2185 ceid
2186 FROM
2187 delete_coinc_types );
2188 DELETE FROM
2189 coinc_event_map
2190 WHERE
2191 coinc_event_id IN (
2192 SELECT
2193 ceid
2194 FROM
2195 delete_coinc_types );
2196 DELETE FROM
2197 coinc_definer
2198 WHERE
2199 coinc_def_id NOT IN (
2200 SELECT DISTINCT
2201 coinc_def_id
2202 FROM
2203 coinc_event );
2204 DROP TABLE delete_coinc_types;
2205 """
2206 connection.cursor().executescript(sqlquery)
2207 connection.commit()
2208
2209
2210
2211
2212
2213
2214
2215
2216
2218 """
2219 Writes N new entries in the coinc_event table, where N is given by num_new_entries.
2220 """
2221
2222 start_id = get_next_id( connection, 'coinc_event', 'coinc_event_id' )
2223
2224 new_entries = [(str(process_id), str(coinc_def_id), str(time_slide_id), str(start_id+ii)) for ii in range(num_new_entries)]
2225
2226 sqlquery = 'INSERT INTO coinc_event (process_id, coinc_def_id, time_slide_id, coinc_event_id) VALUES (?, ?, ?, ?)'
2227 connection.cursor().executemany( sqlquery, new_entries )
2228
2229 return [ilwd.ilwdchar(new_id[-1]) for new_id in new_entries]
2230
2231
2233 """
2234 Updates the nevents column based on what's in the coinc_event_map table.
2235 """
2236 sqlquery = """
2237 UPDATE
2238 coinc_event
2239 SET
2240 nevents = (
2241 SELECT
2242 COUNT(*)
2243 FROM
2244 coinc_event_map
2245 WHERE
2246 coinc_event_map.coinc_event_id == coinc_event.coinc_event_id
2247 )"""
2248 connection.cursor().execute(sqlquery)
2249 connection.commit()
2250
2251
2252
2253
2254
2255
2256
2257
2258
2260 """
2261 Retrieves the all of the table names present in the coinc_event_map table.
2262
2263 @connection: connection to a sqlite database
2264 """
2265 sqlquery = 'SELECT DISTINCT table_name FROM coinc_event_map'
2266 return [table_name[0] for table_name in connection.cursor().execute( sqlquery )]
2267
2268
2270 """
2271 Gets all the tables that are directly mapped to a list of coinc_event_ids.
2272 Returns a dictionary mapping the tables their matching coinc_event_ids.
2273
2274 @coinc_event_ids: list of coinc_event_ids to get table matchings for
2275 """
2276 matching_tables = {}
2277 sqlquery = """
2278 SELECT
2279 coinc_event_id,
2280 table_name
2281 FROM
2282 coinc_event_map"""
2283 for ceid, table_name in [(qryid, qryname) for qryid, qryname in connection.cursor().execute(sqlquery) if qryid in coinc_event_ids]:
2284 if table_name not in matching_tables:
2285 matching_tables[table_name] = []
2286 matching_tables[table_name].append(ceid)
2287
2288 return matching_tables
2289
2291 """
2292 Convenience class to retrieve all the tables mapped to a coinc_event_id.
2293 If added as an aggregate function to a connection, this can be called
2294 in a query. Example:
2295
2296 from pylal import ligolw_sqlutils as sqlutils
2297 connection.create_aggregate('get_mapped_tables', 1, sqlutils.get_mapped_tables)
2298 sqlquery = '''
2299 SELECT
2300 coinc_event_id,
2301 get_mapped_tables(table_name)
2302 FROM
2303 coinc_event_map
2304 GROUP BY
2305 coinc_event_id
2306 '''
2307 connection.cursor().execute(sqlquery)
2308
2309 This would return all the tables mapped to each coinc_event_id
2310 in the coinc_event_map table.
2311 """
2314 - def step(self, *table_names):
2315 self.result.extend(map(str, table_names))
2317 return ','.join(sorted(set(self.result)-set(['None'])))
2318
2320 """
2321 Creates a function in the database called get_mapped_tables that allows one
2322 to quickly get all the mapped tables to a coinc_event_id.
2323 """
2324 connection.create_aggregate( 'get_mapped_tables', nargs, get_mapped_tables)
2325
2327 """
2328 Cleans tables given in tableList of events whose event_ids aren't in
2329 the coinc_event_map table.
2330
2331 @connection: connection to a sqlite database
2332 @tableList: Any table with an event_id column.
2333 @raise_err_on_missing_evid: if set to True, will raise an error
2334 if an event_id column can't be found in any table in tableList.
2335 If False, will just skip the table.
2336 """
2337
2338 selected_tables = [ table for table in tableList
2339 if 'event_id' in get_column_names_from_table( connection, table ) ]
2340 if selected_tables != tableList and raise_err_on_missing_evid:
2341 raise ValueError, "tables %s don't have event_id columns" % ', '.join([
2342 table for table in tableList if table not in selected_tables ])
2343
2344
2345 for table in selected_tables:
2346 if verbose:
2347 print >> sys.stderr, "Cleaning the %s table..." % table
2348 sqlquery = ''.join([ """
2349 DELETE
2350 FROM """, table, """
2351 WHERE event_id NOT IN (
2352 SELECT event_id
2353 FROM coinc_event_map )""" ])
2354 connection.cursor().execute( sqlquery )
2355 connection.commit()
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2367 """
2368 Writes JOIN string to join the experiment, experiment_summary,
2369 and experiment_map table to the coinc_inspiral table.
2370 NOTE: Should only use when querying the coinc_inspiral table (i.e.,
2371 the only table listed in the FROM statement is the coinc_inspiral).
2372 """
2373
2374 return join_experiment_tables_to_coinc_table('coinc_inspiral')
2375
2376
2379 """
2380 Clears coinc_inspiral table of coinc triggers falling outside of the
2381 desired ranges, as specified by parse_param_ranges and parse_coinc_opts.
2382
2383 See apply_inclusion_rules_to_coinc_table for more info.
2384 """
2385 apply_inclusion_rules_to_coinc_table(connection, 'coinc_inspiral', exclude_coincs = exclude_coincs, include_coincs = include_coincs,
2386 param_filters = param_filters, verbose = verbose )
2387
2388
2390 """
2391 Clears experiment_map, coinc_event, coinc_event_map, and all tables pointing to the
2392 coinc_event_map of triggers that are no longer in the coinc_inspiral table.
2393 Note that the experiment_summary, experiment, and time_slide_tables are left alone.
2394 This is because even if no events are left in an experiment, we still want info. about
2395 the experiment that was performed.
2396 """
2397 clean_using_coinc_table( connection, 'coinc_inspiral', verbose = verbose,
2398 clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True,
2399 clean_coinc_event_map = True, clean_mapped_tables = True, selected_tables = [])
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2411 """
2412 Creates a temporary table in the sqlite database called sim_rec_map.
2413 This table creates a direct mapping between simulation_ids in the simulation table
2414 and coinc_event_ids from the recovery_table, along with a ranking stat from the
2415 recovery_table.
2416 The columns in the sim_rec_map table are:
2417 * rec_id: coinc_event_ids of matching events from the recovery table
2418 * sim_id: the simulation_id from the sim_inspiral table
2419 * ranking_stat: any stat from the recovery table by which to rank
2420 In addition, indices on the sim and rec ids are put on the table.
2421
2422 Note that because this is a temporary table, as soon as the connection is
2423 closed, it will be deleted.
2424
2425 @connection: connection to a sqlite database
2426 @simulation_table: any lsctable with a simulation_id column; e.g., sim_inspiral
2427 @recovery_table: any lsctable with a coinc_event_id column; e.g., coinc_inspiral
2428 @map_label: the label applied to the mapping between the injections and recovered
2429 @ranking_stat: the name of the ranking stat in the recovery table to use.
2430 If set to None, ranking_stat column won't be populated.
2431 """
2432
2433 if 'sim_rec_map' in get_tables_in_database(connection):
2434 sqlquery = 'DROP TABLE sim_rec_map'
2435 connection.cursor().execute(sqlquery)
2436
2437 sqlquery = ''.join(['''
2438 CREATE TEMP TABLE
2439 sim_rec_map
2440 AS
2441 SELECT
2442 sim.simulation_id AS sim_id,
2443 rec_coinc.coinc_event_id AS rec_id,
2444 NULL AS ranking_stat
2445 FROM
2446 ''', recovery_table, ''' AS rec_coinc
2447 JOIN
2448 coinc_event_map AS a
2449 ON (
2450 a.event_id == rec_coinc.coinc_event_id AND
2451 a.table_name == "coinc_event" )
2452 JOIN
2453 coinc_event_map AS b
2454 ON (
2455 b.coinc_event_id == a.coinc_event_id)
2456 JOIN
2457 ''', simulation_table, ''' AS sim
2458 ON (
2459 b.event_id == sim.simulation_id)
2460 JOIN
2461 coinc_event, coinc_definer
2462 ON (
2463 coinc_event.coinc_event_id == b.coinc_event_id AND
2464 coinc_definer.coinc_def_id == coinc_event.coinc_def_id
2465 )
2466 WHERE
2467 coinc_definer.description == ?
2468 '''])
2469 connection.cursor().execute(sqlquery, (map_label,))
2470
2471
2472 sqlquery = '''
2473 CREATE INDEX srm_sid_index ON sim_rec_map (sim_id);
2474 CREATE INDEX srm_rid_index ON sim_rec_map (rec_id);
2475 '''
2476 connection.cursor().executescript(sqlquery)
2477
2478 if ranking_stat is not None:
2479
2480 if not ranking_stat.strip().startswith(recovery_table):
2481 ranking_stat = '.'.join([recovery_table.strip(), ranking_stat.strip()])
2482
2483 sqlquery = ''.join([ '''
2484 UPDATE
2485 sim_rec_map
2486 SET ranking_stat = (
2487 SELECT
2488 ''', ranking_stat, '''
2489 FROM
2490 ''', recovery_table, '''
2491 WHERE
2492 ''', recovery_table, '''.coinc_event_id == sim_rec_map.rec_id );
2493 ''' ])
2494
2495 connection.cursor().execute(sqlquery)
2496
2497
2499 """
2500 Remove duplicates from simulation tables (sim_inspiral & sim_ringdown)
2501 if those tables exist in the database. Also update the sim_proc_id column
2502 in the experiment_summary table.
2503 """
2504
2505
2506 cursor = connection.cursor()
2507
2508
2509 old_pids = get_pids_to_update(cursor, ['inspinj','rinj','gstlal_injections_by_local_rate'])
2510
2511
2512 all_tables = zip(*get_tables_in_database(connection))[0]
2513
2514 if old_pids and [tbl for tbl in all_tables if 'sim_' in tbl]:
2515 if verbose:
2516 print >> sys.stdout, "\nCleaning simulation tables..."
2517
2518
2519 table_names = ['sim_inspiral','sim_ringdown','experiment_summary']
2520 relevant_indices = get_user_created_indices(connection, table_names)
2521
2522 for idx, sql in relevant_indices:
2523 validate_option(idx)
2524 cursor.execute('DROP INDEX %s' % idx)
2525
2526
2527 sqlscript = ""
2528 if 'sim_inspiral' in all_tables:
2529 sqlscript += """
2530 DELETE FROM sim_inspiral
2531 WHERE process_id NOT IN (
2532 SELECT DISTINCT new_pid
2533 FROM _pidmap_
2534 WHERE program = 'inspinj'); """
2535
2536 if ('sim_ringdown',) in all_tables:
2537 sqlscript += """
2538 DELETE FROM sim_ringdown
2539 WHERE process_id NOT IN (
2540 SELECT DISTINCT new_pid
2541 FROM _pidmap_
2542 WHERE program = 'rinj'); """
2543
2544 if 'experiment_summary' in all_tables:
2545 sqlscript += """
2546 -- Update sim_proc_ids in the experiment_summary table
2547 UPDATE experiment_summary
2548 SET sim_proc_id = (
2549 SELECT DISTINCT new_pid
2550 FROM _pidmap_
2551 WHERE old_pid == sim_proc_id ); """
2552 if debug:
2553 print >> sys.stderr, sqlscript
2554 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
2555
2556 cursor.executescript( sqlscript )
2557
2558 connection.commit()
2559 if debug:
2560 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
2561
2562
2563 for idx, sql in relevant_indices:
2564 cursor.execute(sql)
2565 connection.commit()
2566
2567 if debug:
2568 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
2569
2570 else:
2571 if verbose:
2572 if not [tbl for tbl in all_tables if 'sim_' in tbl]:
2573 print >> sys.stdout, "This database lacks a simulation table."
2574 else:
2575 print >> sys.stdout, "The simulation tables lack duplicates."
2576 cursor.close()
2577
2578
2579
2580
2581
2582
2583
2584
2585
2626
2627
2629 """
2630 Removing duplicate row in the segment, segment_definer and segment_summary
2631 tables as well as the associated entries in the process & process_params tables
2632 """
2633
2634 all_tables = zip(*get_tables_in_database(connection))[0]
2635 if 'segment' in all_tables:
2636 if verbose:
2637 print >> sys.stdout, "\nCleaning up the segments tables..."
2638
2639
2640 connection.create_function("concat_2cols", 2, concatenate)
2641
2642
2643 cursor = connection.cursor()
2644
2645
2646 table_names = ['process','process_params',
2647 'segment','segment_definer','segment_summary']
2648 relevant_indices = get_user_created_indices(connection, table_names)
2649
2650 for idx, sql in relevant_indices:
2651 validate_option(idx)
2652 cursor.execute('DROP INDEX %s' % idx)
2653
2654 sqlscript = """
2655 CREATE INDEX sd_pisdi_index ON segment_definer (process_id, segment_def_id);
2656 CREATE INDEX ss_pisdi_index ON segment_summary (process_id, segment_def_id);
2657
2658 CREATE TEMP TABLE segdef_summary AS
2659 SELECT
2660 segment_definer.process_id AS proc_id,
2661 segment_definer.ifos AS ifo,
2662 concat_2cols(segment_definer.name, segment_definer.version) AS cat_vers,
2663 concat_2cols(segment_summary.start_time, segment_summary.end_time) AS times
2664 FROM
2665 segment_definer
2666 JOIN segment_summary ON (
2667 segment_definer.process_id == segment_summary.process_id
2668 AND segment_definer.segment_def_id == segment_summary.segment_def_id)
2669 GROUP BY proc_id;
2670
2671 DROP INDEX sd_pisdi_index;
2672 DROP INDEX ss_pisdi_index;
2673
2674 CREATE TEMP TABLE _sdpid_map_ AS
2675 SELECT
2676 old_segs_tbl.proc_id AS old_pid,
2677 MIN(new_segs_tbl.proc_id) AS new_pid
2678 FROM
2679 segdef_summary AS old_segs_tbl
2680 JOIN segdef_summary AS new_segs_tbl ON (
2681 new_segs_tbl.ifo == old_segs_tbl.ifo
2682 AND new_segs_tbl.times == old_segs_tbl.times
2683 AND new_segs_tbl.cat_vers == old_segs_tbl.cat_vers )
2684 GROUP BY old_pid;
2685
2686 DROP TABLE segdef_summary;
2687 CREATE INDEX sdpid_idx ON _sdpid_map_ (old_pid, new_pid);
2688
2689 DELETE FROM segment
2690 WHERE process_id NOT IN (
2691 SELECT DISTINCT new_pid
2692 FROM _sdpid_map_ );
2693 DELETE FROM segment_definer
2694 WHERE process_id NOT IN (
2695 SELECT DISTINCT new_pid
2696 FROM _sdpid_map_ );
2697 DELETE FROM segment_summary
2698 WHERE process_id NOT IN (
2699 SELECT DISTINCT new_pid
2700 FROM _sdpid_map_ );
2701
2702 DELETE FROM process
2703 WHERE process_id IN (
2704 SELECT old_pid
2705 FROM _sdpid_map_
2706 WHERE old_pid != new_pid );
2707 DELETE FROM process_params
2708 WHERE process_id IN (
2709 SELECT old_pid
2710 FROM _sdpid_map_
2711 WHERE old_pid != new_pid );
2712
2713 DROP INDEX sdpid_idx;
2714 DROP TABLE _sdpid_map_; """
2715
2716 if debug:
2717 print >> sys.stderr, sqlscript
2718 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
2719
2720 cursor.executescript( sqlscript )
2721
2722 connection.commit()
2723 if debug:
2724 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
2725
2726
2727 for idx, sql in relevant_indices:
2728 cursor.execute(sql)
2729 connection.commit()
2730
2731 if debug:
2732 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
2733
2734 cursor.close()
2735
2736 else:
2737 if verbose:
2738 print >> sys.stdout, "There are no segments tables in this database"
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2751 """
2752 Gets zero-lag time_slide_id's from the time_slide_table.
2753 """
2754 sqlquery = 'SELECT time_slide_id, offset FROM time_slide GROUP BY time_slide_id'
2755 slide_ids = connection.cursor().execute( sqlquery )
2756 zero_lag_ids = [slide_id[0] for slide_id in slide_ids if slide_id[1] == 0.]
2757
2758 return zero_lag_ids
2759
2760
2762 """
2763 Gets instrument sets from time slide table by using the ids of the zero-lag
2764 time-slides (Assumption is there is a zero-lag row in the time-slide table).
2765 """
2766 zero_lag_ids = get_zero_lag_time_slide_ids( connection )
2767
2768
2769 if not zero_lag_ids:
2770 raise ValueError, "No zero-lag ids in time slide table, cannot get instrument set."
2771
2772 zero_lag_instrument_sets = {}
2773 for id in zero_lag_ids:
2774 id = ''.join([ '"', id, '"' ])
2775 sqlquery = ' '.join(['SELECT instrument',
2776 'FROM time_slide',
2777 'WHERE time_slide_id ==', id ])
2778 instruments = sorted(instrument[0] for instrument in connection.cursor().execute( sqlquery ).fetchall())
2779 if instruments not in zero_lag_instrument_sets:
2780 zero_lag_instrument_sets[ instruments ] = [ id ]
2781 else:
2782 zero_lag_instrument_sets[ instruments ].append( id )
2783
2784 return zero_lag_instrument_sets
2785
2786
2788 """
2789 Gets all instrument sets available in the time slide table and gets all
2790 time-slide ids associated with that instrument set. Since this only uses the
2791 time-slide table, will get everything even if there were no coincident
2792 events during a time-slide.
2793 """
2794
2795 zero_lag_instrument_sets = get_zero_lag_instrument_sets( connection )
2796
2797 instrument_set_time_slide_ids = {}
2798
2799 for instrument_set in zero_lag_instrument_sets:
2800 instrument_clause = ' AND '.join([ ''.join([
2801 'instrument == ', '"', instrument, '"' ]) for instrument in instrument_set ])
2802 sqlquery = ' '.join([
2803 'SELECT time_slide_id',
2804 'FROM time_slide',
2805 'WHERE', instrument_set ])
2806 instrument_set_time_slide_ids[ instrument_set ] = [ id[0] for id in
2807 connection.cursor().execute(sqlquery) ]
2808
2809 return instrument_set_time_slide_ids
2810
2811
2813 """
2814 Remove duplicate entries in the time_slide table and update entries in the
2815 the time_slide_id column of both the experiment_summary and coinc_event
2816 tables.
2817 """
2818
2819
2820 cursor = connection.cursor()
2821
2822 all_tables = zip(*get_tables_in_database(connection))[0]
2823 if 'time_slide' in all_tables:
2824 if verbose:
2825 print >> sys.stdout, "\nClean up the time_slide table ..."
2826
2827
2828 table_names = ['time_slide','experiment_summary','coinc_event']
2829 relevant_indices = get_user_created_indices(connection, table_names)
2830
2831 for idx, sql in relevant_indices:
2832 validate_option(idx)
2833 cursor.execute('DROP INDEX %s' % idx)
2834
2835 sqlscript = """
2836 -- Create a table that combines the information about a single time_slide into
2837 -- a single row. This makes comparison between time_slides easier.
2838
2839 CREATE TEMP TABLE compact_time_slide AS
2840 SELECT
2841 time_slide_id AS tsid,
2842 group_concat(instrument) AS ifos,
2843 group_concat(offset) AS offset
2844 FROM time_slide
2845 GROUP BY time_slide_id;
2846
2847 CREATE INDEX cts_io_idx ON compact_time_slide (ifos, offset);
2848
2849 -- Create a table that maps the time_slide_ids of redundant time_slide entries
2850 -- to those entries one is going to keep.
2851 CREATE TEMP TABLE _tsidmap_ AS
2852 SELECT
2853 old_ts_table.tsid AS old_tsid,
2854 MIN(new_ts_table.tsid) AS new_tsid
2855 FROM
2856 compact_time_slide AS old_ts_table
2857 JOIN compact_time_slide AS new_ts_table ON (
2858 new_ts_table.ifos == old_ts_table.ifos
2859 AND new_ts_table.offset == old_ts_table.offset)
2860 GROUP BY old_tsid;
2861
2862 DROP INDEX cts_io_idx;
2863 DROP TABLE compact_time_slide;
2864
2865 CREATE INDEX _tsidmap_idx ON _tsidmap_ (old_tsid);
2866
2867 -- Delete the redundant entries in the time_slide table
2868 DELETE FROM time_slide
2869 WHERE time_slide_id NOT IN (
2870 SELECT DISTINCT new_tsid
2871 FROM _tsidmap_);
2872
2873 UPDATE time_slide
2874 SET process_id = (
2875 SELECT new_pid
2876 FROM _pidmap_
2877 WHERE old_pid = process_id);
2878 """
2879
2880
2881 if 'coinc_event' in all_tables:
2882 sqlscript += """
2883 UPDATE coinc_event
2884 SET time_slide_id = (
2885 SELECT new_tsid
2886 FROM _tsidmap_
2887 WHERE old_tsid == time_slide_id);
2888 """
2889
2890 if 'experiment_summary' in all_tables:
2891 sqlscript += """
2892 UPDATE experiment_summary
2893 SET time_slide_id = (
2894 SELECT new_tsid
2895 FROM _tsidmap_
2896 WHERE old_tsid == time_slide_id);
2897 """
2898 sqlscript += """
2899 DROP INDEX _tsidmap_idx;
2900 DROP TABLE _tsidmap_; """
2901
2902 if debug:
2903 print >> sys.stderr, sqlscript
2904 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
2905
2906 cursor.executescript( sqlscript )
2907
2908 connection.commit()
2909 if debug:
2910 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
2911
2912
2913 for idx, sql in relevant_indices:
2914 cursor.execute(sql)
2915 connection.commit()
2916
2917 if debug:
2918 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
2919
2920 else:
2921 if verbose:
2922 if 'time_slide' not in all_tables:
2923 print >> sys.stdout, "There is no time_slide table in this database."
2924 else:
2925 print >> sys.stdout, "The time_slide table lacks any duplicates."
2926 cursor.close()
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2938 """
2939 Cleaning up the veto_definer table as well as the associated
2940 entries in the process & process_params tables
2941 """
2942
2943 all_tables = zip(*get_tables_in_database(connection))[0]
2944 if 'veto_definer' in all_tables:
2945 if verbose:
2946 print >> sys.stdout, "\nCleaning up the veto_definer table..."
2947
2948
2949 connection.create_function("concat_7cols", 7, concatenate)
2950
2951
2952 cursor = connection.cursor()
2953
2954
2955 table_names = ['process','veto_definer']
2956 relevant_indices = get_user_created_indices(connection, table_names)
2957
2958 for idx, sql in relevant_indices:
2959 validate_option(idx)
2960 cursor.execute('DROP INDEX %s' % idx)
2961
2962 sqlscript = """
2963 CREATE TEMP TABLE veto_procinfo AS
2964 SELECT
2965 process_id,
2966 concat_7cols(process.program, process.version, process.username,
2967 process.ifos, process.cvs_entry_time, process.cvs_repository,
2968 process.comment) AS process_info
2969 FROM process
2970 WHERE
2971 process.process_id IN (
2972 SELECT DISTINCT veto_definer.process_id
2973 FROM veto_definer )
2974 GROUP BY process.process_id;
2975
2976 CREATE TEMP TABLE _veto_pidmap_ AS
2977 SELECT
2978 old_procinfo.process_id AS old_pid,
2979 MIN(new_procinfo.process_id) AS new_pid
2980 FROM
2981 veto_procinfo AS old_procinfo
2982 JOIN veto_procinfo AS new_procinfo ON (
2983 new_procinfo.process_info == old_procinfo.process_info)
2984 GROUP BY old_pid;
2985
2986 DROP TABLE veto_procinfo;
2987
2988 DELETE FROM process
2989 WHERE process_id IN (
2990 SELECT old_pid
2991 FROM _veto_pidmap_
2992 WHERE old_pid != new_pid );
2993 DELETE FROM veto_definer
2994 WHERE process_id NOT IN (
2995 SELECT DISTINCT new_pid
2996 FROM _veto_pidmap_ );
2997
2998 DROP TABLE _veto_pidmap_; """
2999 if debug:
3000 print >> sys.stderr, sqlscript
3001 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6])
3002
3003 cursor.executescript( sqlscript )
3004
3005 connection.commit()
3006 if debug:
3007 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
3008
3009
3010 for idx, sql in relevant_indices:
3011 cursor.execute(sql)
3012 connection.commit()
3013
3014 if debug:
3015 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6])
3016
3017 cursor.close()
3018
3019 else:
3020 if verbose:
3021 print >> sys.stdout, "This database lacks a veto_definer table."
3022