Package pylal :: Module ligolw_sqlutils
[hide private]
[frames] | no frames]

Source Code for Module pylal.ligolw_sqlutils

   1  # 
   2  # ============================================================================= 
   3  # 
   4  #                                   Preamble 
   5  # 
   6  # ============================================================================= 
   7  # 
   8   
   9  import sqlite3 
  10   
  11  import sys 
  12  import re 
  13  import os 
  14  import bisect 
  15  import copy 
  16  import time 
  17  import pdb 
  18   
  19  from glue.ligolw import dbtables 
  20  from glue.ligolw import lsctables 
  21  from glue.ligolw import ilwd 
  22  from glue import git_version 
  23   
  24  __author__ = "Collin Capano <cdcapano@physics.syr.edu>" 
  25  __version__ = git_version.verbose_msg 
  26   
  27  """ 
  28  A collection of utilities to assist in carrying out operations on a SQLite 
  29  database containing lsctables. 
  30  """ 
  31   
  32  # ============================================================================= 
  33  # 
  34  #                           Generic Utilities 
  35  # 
  36  # ============================================================================= 
  37   
  38  # Following utilities are generic sqlite utilities and can be used with any table  
  39   
40 -def concatenate( *args ):
41 """ 42 SQLite doesn't have a tuple type built-in. This can be frustrating if one 43 needs to compare values from multiple columns when doing queries. For example, 44 if one wanted to do something like: 45 46 connection.cursor().execute(''' 47 SELECT * 48 FROM a 49 WHERE (a.val1, a.val2) IN ( 50 SELECT (b.val1, b.val2) 51 FROM b) 52 ''') 53 54 an error would be raised. 55 56 This function tries to alleiviate the problem by giving the ability to concatenate 57 results from multiple columns into a single colon-seperated string. These strings can then be 58 compared directly. So, in the above example, one would do: 59 60 from pylal import ligolw_sqlutils as sqlutils 61 connection.create_function("concatenate", 2, sqlutils.concatenate) 62 connection.cursor().execute(''' 63 SELECT * 64 FROM a 65 WHERE concatenate(a.val1, a.val2) IN ( 66 SELECT concatenate(b.val1, b.val2) 67 FROM b) 68 ''') 69 70 Note that the create_function method must be called first with the number of 71 values that will be passed to concatenate before using it in any query. 72 """ 73 return ':'.join([str(val) for val in args])
74
75 -class aggregate_concatenate:
76 """ 77 This class builds on the concatenate method to allow string concatenation 78 across multiple columns and rows. These strings can then be compared in 79 SQLite. For example, if one wanted to match ids from two different tables that share 80 the same values, one would do: 81 82 from pylal import ligolw_sqlutils as sqlutils 83 connection.create_aggregate("agg_concatenate", 2, sqlutils.aggregate_concatenate) 84 connection.cursor().execute(''' 85 SELECT a.id, b.id 86 FROM a, b 87 WHERE 88 ( 89 SELECT agg_concatenate(a.val1, a.val2) 90 FROM a 91 GROUP BY id 92 ORDER BY a.val1, a.val2 ASC 93 ) == ( 94 SELECT agg_concatenate(b.val1, b.val2) 95 FROM b 96 GROUP BY id 97 ORDER BY b.val1, b.val2 ASC 98 ) 99 ''') 100 101 In the strings that are created, rows are seperated by ",", columns by ":". 102 103 Note that the create_aggregate method must be called first with the number of 104 values that will be passed to aggregate_concatenate before using it in any query. 105 """
106 - def __init__(self):
107 self.result = ''
108 - def step(self, *args):
109 self.result = ','.join([ self.result, concatenate(*args) ])
110 - def finalize(self):
111 return self.result.lstrip(',')
112
113 -def validate_option(option, lower = True):
114 """ 115 Strips and checks that there are no newlines, tabs, spaces, or semi-colons in the given option. 116 This should be used for options that will be plugged into sqlite statements to 117 protect against injection attacks. If lower is set on, will also make all letters lower-case 118 in the option. 119 120 @option: option from config parser to validate 121 @lower: if set to True, will make all letters lower-case in the option 122 """ 123 option = option.strip() 124 if lower: 125 option = option.lower() 126 127 if re.search(r'\n|\t| |;', option) is not None: 128 raise ValueError, "option %s contains illegal characters" % option 129 130 return option
131 132
133 -class parse_param_ranges:
134
135 - def __init__( self, table_name, table_param, param_ranges_opt, verbose = False ):
136 """ 137 Parse --param-ranges option. Creates self.param which is the table_name and 138 the table_param appended together (with a '.') and self.param_ranges, which is 139 a list of tuples that give the lower parameter value, whether it is an open or 140 closed boundary, and the same for the upper parameter. For example, if 141 table_name is coinc_inspiral, table_param is mchirp and param_ranges_opt 142 is '[2,8);[8,17]' will get: 143 self.param = 'coinc_inspiral.mchirp' 144 self.param_ranges = 145 [ ( ('>=',2.0), ('<',8.0) ), 146 ( ('>=',8.0), ('<=', 17.0) ) ] 147 148 @table_name: Name of coinc_table in which the desired parameter is a column. 149 @table_param: Parameter in the table on which to separate rows. 150 @param_ranges_opt: string from the --param-ranges option. Param-ranges must 151 follow these format rules: 152 * A '(' or ')' implies an open boundary, a '[' or ']' a closed boundary. 153 * To specify multiple ranges, separate each range by a ';'. 154 * To specify equal to a single value, just specify the value, e.g., '2.3' 155 * To specify not-equal to a single value, put a ! infront of the value, e.g., '!2.3'. 156 @verbose: be verbose 157 """ 158 if verbose: 159 print >> sys.stderr, "Parsing param-ranges..." 160 161 self.param = None 162 self.param_ranges = [] 163 164 # check that table_name and table_param have no illegal characters in them 165 table_name = validate_option( table_name ) 166 if re.search(r'\n|\t|DROP|DELETE', table_param) is not None: 167 raise ValueError, r'param-name cannot have "\n","\t", "DROP", or "DELETE" in it' 168 table_param = table_param.strip() 169 170 # append table_name if it isn't already in the table_param name 171 if table_param.find( table_name+'.' ) == -1: 172 table_param = '.'.join([ table_name, table_param ]) 173 174 self.param = table_param 175 176 ranges = param_ranges_opt.split(';') 177 178 for this_range in ranges: 179 180 # check if it's a range or number 181 if re.search('\]|\[|\)|\(', this_range) is None: 182 this_range = this_range.strip() 183 # check if it's a not equal 184 if this_range.startswith('!'): 185 btest = '!=' 186 param = this_range.lstrip('!') 187 else: 188 btest = '==' 189 param = this_range 190 # try to convert to a float; if can't just leave as string 191 try: 192 param = float(param) 193 except ValueError: 194 pass 195 self.param_ranges.append( ((btest, param),) ) 196 197 else: 198 # get lower-bound 199 lowerparam = this_range.split(',')[0].strip() 200 # check if lower boundary open or closed 201 if lowerparam.find('[') != -1: 202 lowerbndry = '>=' 203 lowerparam = float( lowerparam.lstrip('[') ) 204 elif lowerparam.find('(') != -1: 205 lowerbndry = '>' 206 lowerparam = float( lowerparam.lstrip('(') ) 207 else: 208 raise ValueError, "Parameter range %s not formatted correctly" % this_range 209 210 # get upper-bound (similar to lower bound method) 211 upperparam = this_range.split(',')[1].strip() 212 if upperparam.find(']') != -1: 213 upperbndry = '<=' 214 upperparam = float( upperparam.rstrip(']') ) 215 elif upperparam.find(')') != -1: 216 upperbndry = '<' 217 upperparam = float( upperparam.rstrip(')') ) 218 else: 219 raise ValueError, "Parameter range %s not formatted correctly" % this_range 220 221 # add param to filters 222 self.param_ranges.append( ((lowerbndry, lowerparam), (upperbndry, upperparam)) )
223 224
225 - def get_param_name( self ):
226 return self.param
227 228
229 - def get_param_ranges( self ):
230 return self.param_ranges
231 232
233 - def get_param_filters( self ):
234 """ 235 Converts param_ranges into a list of strings that can be used in 236 a SQLite WHERE clause. For example, if table_name is coinc_inspiral, 237 table_param is mchirp and param_ranges_opt is '[2,8);[8,17]' the 238 elements in the returned list will be: 239 ['coinc_inspiral.mchirp >= 2.0 AND coinc_inspiral.mchirp < 8.0', 240 'coinc_inspiral.mchirp >= 8.0 AND coinc_inspiral.mchirp <= 17.0'] 241 """ 242 self.param_filters = [] 243 # construct paramfilter for SQL statement 244 for range in self.param_ranges: 245 if len(range) == 1: 246 btest = range[0][0] 247 param = range[0][1] 248 if isinstance(param, str): 249 param = param.join(['"','"']) 250 else: 251 param = str(param) 252 self.param_filters.append( ' '.join([ self.param, btest, param ]) ) 253 else: 254 lowerbndry = range[0][0] 255 lowerparam = str( range[0][1] ) 256 upperbndry = range[1][0] 257 upperparam = str( range[1][1] ) 258 self.param_filters.append( ' '.join([ '(', self.param, lowerbndry, lowerparam, 259 'AND', self.param, upperbndry, upperparam, ')' ]) ) 260 261 return self.param_filters
262 263
264 - def group_by_param_range( self, param_value ):
265 """ 266 Takes in a value and returns a number corresponding to 267 which value param_range it falls in. 268 """ 269 for n, range in enumerate(self.param_ranges): 270 # see if it's a range or boolean test 271 if len(range) == 1: 272 btest = range[0][0] 273 param = range[0][1] 274 if btest == '==' and param == param_value: 275 return n 276 if btest == '!=' and param != param_value: 277 return n 278 else: 279 # set boundry conditions and parameters 280 lowerbndry = range[0][0] 281 lowerparam = range[0][1] 282 upperbndry = range[1][0] 283 upperparam = range[1][1] 284 # the following works by checking what the boundaries are 285 # and then checking if the param value is within those boundaries: 286 # if [a,b] 287 if ((lowerbndry, upperbndry) == ('>=', '<=')) and \ 288 (param_value >= lowerparam and param_value <= upperparam): 289 return n 290 # if (a,b] 291 if ((lowerbndry, upperbndry) == ('>', '<=')) and \ 292 (param_value > lowerparam and param_value <= upperparam): 293 return n 294 # if [a,b) 295 if ((lowerbndry, upperbndry) == ('>=', '<')) and \ 296 (param_value >= lowerparam and param_value < upperparam): 297 return n 298 # if (a,b) 299 if ((lowerbndry, upperbndry) == ('>', '<')) and \ 300 (param_value > lowerparam and param_value < upperparam): 301 return n 302 303 # if get to here, param_value falls outside all the ranges; 304 # just return None 305 return None
306
307 - def param_range_by_group( self, group_num ):
308 """ 309 Takes in a group number as returned by group_by_param_range 310 and returns a string representing that group. 311 """ 312 this_range = self.param_ranges[group_num] 313 if len(this_range) > 1: 314 range_str = '%s%.2f,%.2f%s' % ( 315 this_range[0][0] == '>=' and '[' or this_range[0][0] == '>' and '(', 316 float(this_range[0][1]), 317 float(this_range[1][1]), 318 this_range[1][0] == '<=' and ']' or this_range[1][0] == '<' and ')' 319 ) 320 else: 321 range_str = '%s %.2f' % ( this_range[0][0], this_range[0][1] ) 322 323 return range_str
324 325
326 -class parse_coinc_options:
327
328 - def __init__( self, coincs_opt, verbose = False):
329 """ 330 Parses --exclude-coincs and --include-coincs options. The class doesn't 331 care whether it's --include or exclude; it just takes in the input and 332 creates self.coinc_types, which is a dictionary of coinc types in which the 333 keys are the type of time and the values are the coincidence type. For example, 334 if either --include-coincs or --exclude coincs is set to "[h2,l1 in h1,h2,l1]" 335 self.coinc_types will be: 336 coinc_types[ frozenset(H1,H2,L1) ] = set(H2,L1) 337 338 339 @coincs_opt: the input from either --exclude-coincs or --include-coincs. 340 This input must follow these format rules (the following can be copied 341 into the help message for --input/exclude-coincs opts): 342 * Coinc-types and detector time must be separated by 343 an ' in '. When specifying a coinc_type or detector 344 time, detectors and/or ifos must be separated by 345 commas, e.g. 'H1,L1' not 'H1L1'. 346 * To specify multiple coinc-types in one type of time, 347 separate each coinc-type by a '+', e.g., 348 '[H1,H2 + H2,L1 in H1,H2,L1]'. 349 * To specify all the coincs in a detector time 350 or a specific coinc-type in all times, use 'ALL'. E.g., 351 to exclude/include all H1,H2 triggers, use '[H1,H2 in ALL]' 352 or to exclude/include all H2,L1 time use '[ALL in H2,L1]'. 353 * To specify multiple exclusions, separate each 354 bracket by a ';'. 355 * Order of the instruments nor case of the letters 356 matter. So if your pinky is broken and you're 357 dyslexic you can type '[h2,h1 in all]' without a 358 problem. 359 @verbose: be verbose. 360 """ 361 362 if verbose: 363 print >> sys.stderr, "Parsing coinc options..." 364 365 self.coinc_types = {} 366 367 for rule in coincs_opt.split(';'): 368 rule = rule.strip().lstrip('[').rstrip(']').upper() 369 370 # get coinc_instruments, instruments_on 371 if len(rule.split('IN')) != 2: 372 raise ValueError, "Must seperate coinc. types and on-instruments by 'in'" 373 [ coinc_instruments, instruments_on ] = rule.split('IN') 374 instruments_on = instruments_on.strip() 375 coinc_instruments = coinc_instruments.strip() 376 377 # Parse instruments_on 378 if instruments_on != 'ALL': 379 instruments_on = frozenset(sorted([instrument.strip() for instrument in instruments_on.split(',')])) 380 if instruments_on not in self.coinc_types: 381 # sanity check 382 if len(instruments_on) <= 1: 383 raise ValueError, "Must delimit instruments by commas." 384 # following is to try to protect against injection attacks 385 for instrument in instruments_on: 386 if len(instrument.split(' ')) > 1: 387 raise ValueError, "Instrument names cannot have spaces in them." 388 self.coinc_types[ instruments_on ] = [] 389 elif 'ALL' not in self.coinc_types: 390 self.coinc_types[ 'ALL' ] = [] 391 392 # Parse coinc_instruments 393 if coinc_instruments != 'ALL': 394 for coinc_instset in coinc_instruments.split('+'): 395 coinc_instset = set(sorted( instrument.strip() for instrument in coinc_instset.split(',') )) 396 if coinc_instset not in self.coinc_types[ instruments_on ]: 397 # sainity check 398 if len(coinc_instset) <= 1: 399 raise ValueError, "Must delimit instruments by commas." 400 for instrument in coinc_instset: 401 if len(instrument.split(' ')) > 1: 402 raise ValueError, "Instrument names cannot have spaces in them." 403 # add instset to coinc_types 404 self.coinc_types[ instruments_on ].append( coinc_instset ) 405 else: 406 self.coinc_types[ instruments_on ] = ['ALL']
407 408
409 - def get_coinc_types( self ):
410 return self.coinc_types
411 412
413 - def get_coinc_filters( self, coinc_instruments_table = 'coinc_inspiral' ):
414 """ 415 Converts self.coinc_types to a list of strings that can be used 416 in a SQLite WHERE clause to filter coincs by coinc_type, 417 by coinc_instruments (which is stored in the given coinc_instruments_table) 418 in instruments_on (which is stored in the experiment table). 419 """ 420 self.coinc_filters = [] 421 # import ifos_from_instrument_set in lsctables for converting 422 # instrument sets in self.coinc_types to strings 423 from glue.ligolw.lsctables import ifos_from_instrument_set 424 425 coinc_instruments_table = validate_option( coinc_instruments_table ) 426 427 # cycle through instruments_on in coinc_types 428 for instruments_on in self.coinc_types: 429 this_coincfilter = '' 430 if instruments_on != 'ALL': 431 this_coincfilter = ''.join([ 432 'experiment.instruments == "', ifos_from_instrument_set(instruments_on), '"' ]) 433 # now cycle through coinc_instruments in self.coinc_types[ instruments_on ], 434 # concatenate each coinc_instruments set with an OR; 435 # append the concatenated string this_coincfilter with an AND 436 if 'ALL' not in self.coinc_types[ instruments_on ]: 437 this_coincfilter = ' '.join([ this_coincfilter, 'AND (' ]) 438 for coinc_instruments in self.coinc_types[ instruments_on ]: 439 this_coincfilter = ''.join([ this_coincfilter, 440 ' %s.ifos == "' % coinc_instruments_table, ifos_from_instrument_set(coinc_instruments), '"', ' OR' ]) 441 # strip the last 'OR' and replace with a ')' to close out the coinc_instruments 442 this_coincfilter = this_coincfilter.rstrip('OR') + ')' 443 # if instruments_on is 'ALL', just add what coincs to filter 444 elif instruments_on == 'ALL' and 'ALL' not in self.coinc_types[ instruments_on ]: 445 for coinc_instruments in self.coinc_types[ instruments_on ]: 446 this_coincfilter = ''.join([ this_coincfilter, 447 ' %s.ifos == "' % coinc_instruments_table, ifos_from_instrument_set(coinc_instruments), '"', ' OR' ]) 448 # strip the last 'OR' 449 this_coincfilter = this_coincfilter.rstrip('OR') 450 451 self.coinc_filters.append( ''.join([ '(', this_coincfilter, ')' ]) ) 452 453 return self.coinc_filters
454 455 456
457 -def del_rows_from_table( connection, del_table, del_table_id, join_conditions, del_filters = None, save_filters = None, verbose = False ):
458 """ 459 Deletes triggers from any specified table in the del_table option. 460 @connection: DBTables connection to a database 461 del_table: Any coinc_table (coinc_inspiral, sngl_inspiral, coinc_event, 462 etc.) from which to delete triggers. 463 @del_table_id: name of ID column in the del_table on which will be deleting 464 triggers. 465 @join_conditions: SQLite string that draws connections between different 466 coinc_tables. Must be of format 'JOIN table1 ON table1-link-to-other-table 467 JOIN table2 ON table2-link', etc. 468 @del_filter: List of filters. Triggers that fall within will be deleted. 469 @save_filter: List of filters. Triggers that fall within will NOT be deleted. 470 471 NOTE: Save filters will override del_filters if they overlap. For example, 472 say del filter species H1,H2 triggers in H1,H2,L1 time and save filters are 473 for triggers with mchirp between 2 and 8. Then all triggers with chirp mass 474 between 2 and 8 will be saved, even if they are H1,H2. All other H1,H2 475 triggers will be deleted. What this means is if you want to do a global 476 delete -- say you wan to delete all H1,H2 triggers, do not specify a 477 save_filter that overlaps with it. 478 """ 479 # append table name to table_id to ensure uniqueness 480 del_table_id = '.'.join([ del_table, del_table_id]) 481 482 # set where clause to be used in delete statement based on del and save 483 # filters 484 where_clause = '' 485 if del_filters: 486 del_filters = [ ''.join([ '(', filter, ')' ]) for filter in del_filters ] 487 del_filters = ' OR '.join( del_filters ) 488 where_clause = ' '.join([ 'WHERE (', del_filters, ')' ]) 489 if save_filters: 490 save_filters = [ ''.join([ '(', filter, ')' ]) for filter in save_filters ] 491 save_filters = ' OR '.join( save_filters ) 492 if not del_filters: 493 where_clause = ' '.join([ 'WHERE NOT (', save_filters, ')' ]) 494 else: 495 where_clause = ' '.join([ where_clause, 'AND NOT (', save_filters, ')' ]) 496 # if no filters, warn user 497 if not where_clause: 498 print >> sys.stderr, '''WARNING: No filters specified in delete statement. 499 Deleting all rows from %s''' % del_table 500 elif verbose: 501 print >> sys.stderr, "Deleting rows from %s table %s..." % (del_table, where_clause) 502 503 sqlquery = ' '.join([ 504 'DELETE FROM', del_table, 505 'WHERE', del_table_id, 'IN (', 506 'SELECT', del_table_id, 507 'FROM', del_table, join_conditions, 508 where_clause, ')' ]) 509 connection.cursor().execute( sqlquery ) 510 connection.commit()
511 512
513 -def get_tables_in_database( connection ):
514 """ 515 Gets the names of tables that are in the database. 516 """ 517 sqlquery = 'SELECT name FROM sqlite_master WHERE type == "table"' 518 return connection.cursor().execute(sqlquery).fetchall()
519
520 -def vacuum_database(connection, vacuum=None, verbose=None):
521 """ 522 Remove empty space and defragment the database. 523 """ 524 if vacuum: 525 if verbose: 526 print >> sys.stderr, "Start vacuuming database" 527 connection.execute("VACUUM") 528 if verbose: 529 print >> sys.stderr, "\tVacuuming database finished\n"
530
531 -def get_column_names_from_table( connection, table_name ):
532 """ 533 Gets the column names from a table and returns them as a list. 534 """ 535 sqlquery = ''.join(['PRAGMA table_info(', table_name, ')' ]) 536 column_names = [ name[1] for name in connection.cursor().execute( sqlquery).fetchall() ] 537 538 return column_names
539
540 -def get_user_created_indices( connection, table_names ):
541 """ 542 Get all index names and associated SQL CREATE statements associated with 543 tables in table_names. Return a list of (idx_name, sql) tuples. 544 """ 545 546 sqlquery = """ 547 SELECT name, tbl_name, sql 548 FROM sqlite_master 549 WHERE type = 'index' AND sql != 'None' 550 """ 551 indices = [] 552 for idx_name, tbl_name, sql in connection.execute(sqlquery): 553 if tbl_name in table_names: 554 indices.append( (idx_name, sql) ) 555 556 return indices
557 558
559 -def convert_duration( duration, convert_to ):
560 """ 561 Converts durations stored in the experiment_summary_table from seconds 562 to other units of time. 563 564 @duration: duration to convert; assumed to be a float or long in seconds 565 @convert_to: the unit to convert to. Options are: 566 's': to seconds - will just divide by 1. 567 This can be useful if need to convert 568 the duration from a long int to a float. 569 'min': to minutes - will divide by 60. 570 'hr': to hours - will divide by 3600. 571 'days': to days - will divide by 86400. 572 'yr': to years - will divide by 31557600. 573 This is the Julian year, which is the 574 accepted astronomical year 575 """ 576 if not duration: 577 return 0. 578 if convert_to == 's': 579 return duration / 1. 580 elif convert_to == 'min': 581 return duration / 60. 582 elif convert_to == 'hr': 583 return duration / 3600. 584 elif convert_to == 'days': 585 return duration / 86400. 586 elif convert_to == 'yr': 587 return duration / 31557600. 588 else: 589 raise ValueError, "Unrecognized unit."
590
591 -def get_next_id(connection, table, id_column):
592 """ 593 Gets the next available id in the specified id_column in the specified table. 594 """ 595 sqlquery = ' '.join(['SELECT', id_column, 'FROM', table ]) 596 ids = dict([ [int(ilwd.ilwdchar(this_id)), ilwd.ilwdchar(this_id)] for (this_id,) in connection.cursor().execute(sqlquery)]) 597 if ids == {}: 598 new_id = ilwd.ilwdchar(':'.join([table, id_column, '0'])) 599 else: 600 new_id = ids[ max(ids.keys()) ] + 1 601 602 return new_id
603
604 -def increment_id( last_id, nsteps = 1 ):
605 """ 606 Increments the given id by nsteps. 607 """ 608 next_id = last_id.split(':') 609 next_id[-1] = str( int(next_id[-1]) + nsteps ) 610 return ':'.join(next_id)
611
612 -def end_time_in_ns( end_time, end_time_ns ):
613 return end_time*1e9 + end_time_ns
614
615 -class Summaries:
616 """ 617 This class stores information about the foreground and background in a 618 database for making calculation of uncombined fars and combined fars quick 619 and efficient. 620 621 bkg_stats groups triggers by experiment_id, ifos, and param_group 622 (param_group is an arbitrary integer representing the param bin, e.g., 623 mchirp [3.48,7.4), to which a trigger belongs; if no binning is done, then 624 it is 0 for all triggers). It stores ALL the triggers in all the time 625 slides (except zero-lag) within that group. 626 627 sngl_slide_stats groups triggers by experiment_id, experiment_summ_id, ifos, and 628 param_group. It therefore groups all triggers within each time slide 629 separately. It is used to subtract triggers within the same slide when calculating 630 uncombined fars for the background. Therefore, it only stores slide triggers; 631 for any zero-lag datatype sngl_slide_stats is just an empty list. 632 633 datatypes maps the list of datatypes for an experiment to the corresponding 634 experiment_summ_ids: 635 datatypes[experiment_id][datatype] = [esid1, esid2, etc.] 636 637 frg_durs stores the duration for each experiment_summ_id. Its keys are 638 [experiment_id][experimen_summ_id]. 639 640 bkg_durs stores the background duration for each time-slide and zero-lag, 641 i.e., for each experiment_summ_id. This is the sum of all other slide 642 datatypes sharing the same experiment_id except for the given slide. 643 644 max_bkg_fars stores the maximum background fars of all the categories 645 within each time slide. It's keys are (experiment_summ_id, ifo_group). 646 The maximum background far is just the total number of triggers within a 647 category divided by the background duration for that time slide. 648 If opts.combine_fars is set to across_all a category is defined by the 649 param bin in which a trigger exists and the ifos that took part in the 650 trigger. So, if there are three param bins and we've excluded H2,L1 triggers 651 in H1,H2,L1 time, then there are 6 categories for H1,H2,L1 time: three param 652 bins each for H1,L1 and H1,H2,L1 coincident triggrs. Thus, ifo_group will 653 be set to "ALL_IFOS" and there will be 6 max_bkg_fars stored for each 654 experiment_summ_id in triple time. 655 If opts.combine_fars is set to across_param_only, then a category is 656 defined only by the param bins; ifo coincidences are treated as 657 separate experiments. Thus, ifo_group will be set to whatever 658 coinc. trigger we are considering and there will only be 3 max_bkg_fars 659 stored for that entry. 660 661 zero_lag_ids stores the esid of all zero-lag "slides" of an experiment. 662 zero_lag_ids[ experiment_id ] = [experiment_summ_id1, experiment_summ_id2, etc.] 663 """
664 - def __init__(self):
665 self.bkg_stats = {} 666 self.sngl_slide_stats = {} 667 self.datatypes = {} 668 self.frg_durs = {} 669 self.bkg_durs = {} 670 self.max_bkg_fars = {} 671 self.zero_lag_ids = {}
672
673 - def add_to_bkg_stats(self, experiment_id, experiment_summ_id, ifos, param_group, stat):
674 """ 675 Adds a stat to bkg_stats and sngl_slide_stats. What stat is added is determined on the command 676 line by the ranking-stat option. 677 """ 678 # add the categories to the bkg_stats if they don't exist yet 679 if (experiment_id, ifos, param_group) not in self.bkg_stats: 680 self.bkg_stats[(experiment_id, ifos, param_group)] = [] 681 if (experiment_id, experiment_summ_id, ifos, param_group) not in self.sngl_slide_stats: 682 self.sngl_slide_stats[(experiment_id, experiment_summ_id, ifos, param_group)] = [] 683 # only add the stats if they are slide 684 if not ( experiment_id in self.zero_lag_ids and experiment_summ_id in self.zero_lag_ids[experiment_id] ): 685 self.bkg_stats[(experiment_id, ifos, param_group)].append( stat ) 686 self.sngl_slide_stats[(experiment_id, experiment_summ_id, ifos, param_group)].append(stat)
687
688 - def sort_bkg_stats(self):
689 """ 690 Sorts each list in bkg_stats and sngl_slide_stats from smallest to largest value. 691 """ 692 for thislist in self.bkg_stats.values(): 693 thislist.sort() 694 for thislist in self.sngl_slide_stats.values(): 695 thislist.sort()
696
697 - def store_datatypes(self, experiment_id, experiment_summ_id, datatype):
698 """ 699 Stores the experiment_summ_id associated with each datatype. 700 """ 701 if experiment_id not in self.datatypes: 702 self.datatypes[experiment_id] = {} 703 if datatype not in self.datatypes[experiment_id]: 704 self.datatypes[experiment_id][datatype] = [] 705 self.datatypes[experiment_id][datatype].append(experiment_summ_id)
706
707 - def get_datatype(self, experiment_summ_id):
708 """ 709 Retrieve the datatype for a given experiment_summ_id. 710 """ 711 for eid in self.datatypes: 712 for datatype, esid_list in self.datatypes[eid].items(): 713 if experiment_summ_id in esid_list: 714 return datatype
715
716 - def append_zero_lag_id(self, experiment_id, zero_lag_esid):
717 """ 718 Adds a zero_lag_id to the zero_lag_ids dictionary. 719 """ 720 if experiment_id not in self.zero_lag_ids: 721 self.zero_lag_ids[experiment_id] = [] 722 self.zero_lag_ids[experiment_id].append(zero_lag_esid)
723
724 - def append_duration(self, experiment_id, experiment_summ_id, duration):
725 """ 726 Adds a duration to frg_durs. 727 """ 728 if experiment_id not in self.frg_durs: 729 self.frg_durs[experiment_id] = {} 730 self.frg_durs[experiment_id][experiment_summ_id] = duration
731
732 - def calc_bkg_durs(self):
733 """ 734 Sums the background durs for each time-slide (experiment_summ_id). 735 """ 736 for eid, durs_dict in self.frg_durs.items(): 737 culled_durs_dict = copy.deepcopy(durs_dict) 738 if eid in self.zero_lag_ids: 739 for frgd_esid in self.zero_lag_ids[eid]: 740 del culled_durs_dict[frgd_esid] 741 tot_dur = sum( culled_durs_dict.values() ) 742 for esid, duration in durs_dict.items(): 743 if esid in culled_durs_dict.keys(): 744 self.bkg_durs[esid] = tot_dur - duration 745 else: 746 self.bkg_durs[esid] = tot_dur
747
748 - def append_max_bkg_far(self, experiment_summ_id, ifo_group, max_bkg_far):
749 """ 750 Adds a max_bkg_far to the appropiate list; lists are grouped by 751 experiment_summ_id and ifo_group. If one wants to combined fars across 752 param_bins and coincident_ifos (as was done in the low-mass S51yr and 753 12-18 month analyses), ifo_group should be set to "ALL_IFOS". 754 """ 755 if (experiment_summ_id, ifo_group) not in self.max_bkg_fars: 756 self.max_bkg_fars[(experiment_summ_id, ifo_group)] = [] 757 self.max_bkg_fars[(experiment_summ_id, ifo_group)].append(max_bkg_far)
758
759 - def sort_max_bkg_fars(self):
760 """ 761 Sorts the max_bkg_fars lists from smallest to highest values. 762 """ 763 for thislist in self.max_bkg_fars.values(): 764 thislist.sort()
765
766 - def calc_ufar_by_max(self, eid, esid, ifos, param_group, stat):
767 """ 768 Calculates the uncombined false alarm rate for a trigger by counting 769 the number of background triggers in the same category as it that have 770 a stat value greater than or equal to the trigger's stat value and 771 dividing by the background duration for that slide. 772 To do this quickly, bisect.bisect_left is used (see python 773 documentation for more info) on the bkg_stats list. Since bkg_stats 774 contains all the triggers in all the slides for some experiment_id, 775 this will result in counting the triggers that are in the same slide 776 (given by the esid) as the trigger we are considering (except for zero-lag). 777 To correct for this, the trigger's place in it's sngl_slide_stats list is 778 subtracted from this value. The "background" considered for some trigger is 779 therefore all the triggers sharing the same experiment_id, excluding 780 zero-lag triggers and triggers in the same time-slide as the trigger. This 781 means that uncombined far for non-zero-lag triggers will use one less time 782 slide than zero-lag triggers. 783 """ 784 return (\ 785 ( len(self.bkg_stats[(eid, ifos, param_group)]) - bisect.bisect_left(self.bkg_stats[(eid, ifos, param_group)], stat) ) \ 786 - \ 787 ( len(self.sngl_slide_stats[(eid, esid, ifos, param_group)]) - bisect.bisect_left(self.sngl_slide_stats[(eid, esid, ifos, param_group)], stat) ) \ 788 ) / self.bkg_durs[esid]
789
790 - def calc_ufar_by_min(self, eid, esid, ifos, param_group, stat):
791 """ 792 Same as calc_ufar_by_max, except that the uncombined far is calculated 793 by counting background triggers that have a stat value less than or 794 equal to the given stat. (Done by using bisect.bisect_right as opposed to 795 len(list) - bisect.bisect_left). 796 Note: if stat is 0, will just return 0. This is because a 0 when caclulating 797 FARs by minimum value is equivalent to inf. when caclulating FARs by maximum 798 value. 799 """ 800 if stat == 0.: 801 return stat 802 803 return ( \ 804 bisect.bisect_right(self.bkg_stats[(eid, ifos, param_group)], stat) \ 805 - \ 806 bisect.bisect_right(self.sngl_slide_stats[(eid, esid, ifos, param_group)], stat) \ 807 ) / self.bkg_durs[esid]
808
809 - def calc_cfar( self, esid, ifo_group, ufar ):
810 """ 811 Calculates the combined far for the given uncombined far (ufar). This 812 is defined as the ufar times the number of categories that are active 813 at that point plus the sum of the max_bkg_fars of all the categories 814 that are inactive. Whether or not a category is "active" is determined 815 by it's max_bkg_far. If the given ufar is greater than some max_bkg_far, 816 then the category which that max_bkg_far represents is considered 817 inactive. If the given ufar is less than some max_bkg_far, then 818 the category is considered active. 819 """ 820 return \ 821 (len( self.max_bkg_fars[(esid, ifo_group)] ) - bisect.bisect_left( self.max_bkg_fars[(esid,ifo_group)], ufar ))*ufar \ 822 + sum([self.max_bkg_fars[(esid,ifo_group)][ii] for ii in range(bisect.bisect_left( self.max_bkg_fars[(esid,ifo_group)], ufar))])
823
824 -class rank_stats:
825 """ 826 Class to return a rank for stats. 827 """
828 - def __init__(self, table, ranking_stat, rank_by):
829 """ 830 @table: table containing the stats to rank 831 @ranking_stat: stat in table that wish to rank 832 @rank_by: should be either "ASC" or "DESC" 833 """ 834 self.stats = [] 835 self.table = table 836 self.ranking_stat = ranking_stat 837 self.rank_by = rank_by
838
839 - def populate_stats_list(self, connection, limit = None, filter = ''):
840 """ 841 Gets top stats from database for later ranking 842 @connection: connection to a sqlite database 843 @limit: put a limit on the number of stats to rank 844 @filter: apply a filter (i.e., a SQLite WHERE clause). 845 Note: If the filter uses colums from tables other than 846 self.table, must include the join conditions as well 847 """ 848 if limit is not None: 849 limit = "LIMIT " + str(limit) 850 else: 851 limit = '' 852 sqlquery = ''.join([""" 853 SELECT 854 """, self.ranking_stat, """ 855 FROM 856 """, self.table, """ 857 """, filter, """ 858 ORDER BY """, self.ranking_stat, ' ', self.rank_by, """ 859 """, limit ]) 860 self.stats = [stat[0] for stat in connection.cursor().execute(sqlquery).fetchall()] 861 self.stats.sort()
862
863 - def get_rank( self, this_stat ):
864 if self.rank_by == "ASC": 865 return bisect.bisect_left(self.stats, this_stat) + 1 866 else: 867 return len(self.stats) - bisect.bisect_right(self.stats, this_stat) + 1
868 869
870 -def get_col_type(table_name, col_name, default = 'lstring'):
871 """ 872 Attempts to get column type from lsctables.py for the given table name and 873 column name. If the table doesn't exist in lsctables or the column doesn't 874 exist in the lsctables definition of the table, returns the default type. 875 """ 876 if table_name in lsctables.TableByName.keys() and col_name in lsctables.TableByName[table_name].validcolumns.keys(): 877 return lsctables.TableByName[table_name].validcolumns[col_name] 878 else: 879 return default
880
881 -def create_column( connection, table_name, column_name ):
882 """ 883 Creates a column in the given table if it doesn't exist. Note that table_name and 884 column_name must be all lower-case. 885 """ 886 if table_name != table_name.lower() or column_name != column_name.lower(): 887 raise ValueError, "table_name (%s) and column_name (%s) must be all lower-case" % (table_name, column_name) 888 table_name = validate_option( table_name ) 889 column_name = validate_option( column_name ) 890 if column_name not in get_column_names_from_table( connection, table_name ): 891 sqlquery = ''.join([ """ 892 ALTER TABLE 893 """, table_name, """ 894 ADD COLUMN 895 """, column_name ]) 896 connection.cursor().execute( sqlquery ) 897 898 return table_name, column_name
899 900 901 # ============================================================================= 902 # 903 # Meta-data Tables Utilities 904 # 905 # ============================================================================= 906 907 # Following utilities apply to the meta-data tables: these include the 908 # process, process_params, search_summary,search_summvars, and summ_value 909 # tables 910
911 -def clean_metadata(connection, key_tables, verbose = False):
912 """ 913 Cleans metadata from tables that don't have process_ids in any of the tables 914 listed in the key_tables list. 915 916 @connection: connection to a sqlite database 917 @key_tables: list of tuples that must have the following order: 918 (table, column, filter) 919 where: 920 table is the name of a table to get a save process id from, 921 column is the name of the process_id column in that table 922 (this doesn't have to be 'process_id', but it should be a 923 process_id type), 924 filter is a filter to apply to the table when selecting process_ids 925 """ 926 if verbose: 927 print >> sys.stderr, "Removing unneeded metadata..." 928 929 # 930 # create a temp. table of process_ids to keep 931 # 932 sqlscript = 'CREATE TEMP TABLE save_proc_ids (process_id);' 933 934 # cycle over the key_tables, adding execution blocks for each 935 for table, column, filter in key_tables: 936 if filter != '' and not filter.strip().startswith('WHERE'): 937 filter = 'WHERE\n' + filter 938 sqlscript = '\n'.join([ sqlscript, 939 'INSERT INTO save_proc_ids (process_id)', 940 'SELECT DISTINCT', 941 column, 942 'FROM', table, filter, ';' ]) 943 944 sqlscript = sqlscript + '\nCREATE INDEX proc_index ON save_proc_ids (process_id);' 945 946 # now step through all tables with process_ids and remove rows who's ids 947 # aren't in save_proc_ids 948 tableList = [table for table in ['process','process_params','search_summary','search_summvars','summ_value'] 949 if table in get_tables_in_database(connection) ] 950 for table in tableList: 951 sqlscript = '\n'.join([ sqlscript, 952 'DELETE FROM', 953 table, 954 'WHERE', 955 'process_id NOT IN (', 956 'SELECT', 957 'process_id', 958 'FROM', 959 'save_proc_ids );' ]) 960 961 # drop the save_proc_ids table 962 sqlscript = sqlscript + '\nDROP TABLE save_proc_ids;' 963 964 # execute the script 965 connection.cursor().executescript(sqlscript)
966
967 -def clean_metadata_using_end_time(connection, key_table, key_column, verbose = False):
968 """ 969 An alternate to clean_metadata, this cleans metadata from tables who's 970 start/end_times don't encompass the end_times in the given table. 971 972 @connection: connection to a sqlite database 973 @key_table: name of table to use end_times from 974 @end_time_col_name: name of the end_time column in the key_table 975 """ 976 if verbose: 977 print >> sys.stderr, "Removing unneeded metadata..." 978 979 key_column = '.'.join([key_table, key_column]) 980 connection.create_function('end_time_in_ns', 2, end_time_in_ns ) 981 982 sqlscript = ''.join([ """ 983 DELETE FROM 984 search_summary 985 WHERE NOT EXISTS ( 986 SELECT 987 * 988 FROM 989 """, key_table, """ 990 WHERE 991 end_time_in_ns(""", key_column, ', ', key_column, """_ns) >= end_time_in_ns(search_summary.in_start_time, search_summary.in_start_time_ns) 992 AND end_time_in_ns(""", key_column, ', ', key_column, """_ns) < end_time_in_ns(search_summary.in_end_time, search_summary.in_end_time_ns) 993 ); 994 DELETE FROM 995 search_summvars 996 WHERE 997 process_id NOT IN ( 998 SELECT 999 process_id 1000 FROM 1001 search_summary ); """]) 1002 1003 if 'summ_value' in get_tables_in_database(connection): 1004 sqlscript = ''.join([ sqlscript, """ 1005 DELETE FROM 1006 summ_value 1007 WHERE NOT EXISTS ( 1008 SELECT 1009 * 1010 FROM 1011 """, key_table, """ 1012 WHERE 1013 end_time_in_ns(""", key_column, ', ', key_column, """_ns)(""", key_column, """) >= end_time_in_ns(summ_value.in_start_time, summ_value.in_start_time_ns) 1014 AND end_time_in_ns(""", key_column, ', ', key_column, """_ns) < end_time_in_ns(summ_value.in_end_time, summ_value.in_end_time_ns) 1015 );"""]) 1016 summ_val_str = """ 1017 OR process.process_id NOT IN ( 1018 SELECT 1019 summ_value.process_id 1020 FROM 1021 summ_value )""" 1022 else: 1023 summ_val_str = '' 1024 1025 sqlscript = ''.join([ sqlscript, """ 1026 DELETE FROM 1027 process 1028 WHERE 1029 process.process_id NOT IN ( 1030 SELECT 1031 search_summary.process_id 1032 FROM 1033 search_summary )""", summ_val_str, """; 1034 DELETE FROM 1035 process_params 1036 WHERE 1037 process_params.process_id NOT IN ( 1038 SELECT 1039 process.process_id 1040 FROM 1041 process );"""]) 1042 1043 # execute the script 1044 connection.cursor().executescript(sqlscript)
1045 1046
1047 -def get_process_info(connection, verbose=False, debug=False):
1048 """ 1049 Create a map between process_ids so duplicate entries in numerous ligolw_xml 1050 tables can be removed without a loss of information. The tables the _pidmap_ 1051 is used to simplify are: 1052 1053 -- process, process_params 1054 -- filter, summ_value, search_summary, search_summvars 1055 -- sim_inspiral, sim_ringdown 1056 -- time_slide 1057 -- update the sim_proc_ids in the experiment_summary table 1058 """ 1059 if verbose: 1060 print >> sys.stdout, "\nCreate a map of process_ids for removal of duplicates" 1061 1062 # create function to concatenate 5 columns together per row 1063 connection.create_function("concat_5cols", 5, concatenate) 1064 1065 # create the cursor object used to execute queries and commands 1066 cursor = connection.cursor() 1067 1068 sqlscript = """ 1069 CREATE INDEX pp_pivp_idx ON process_params (process_id, value, param); 1070 1071 CREATE TEMP TABLE proc_params AS 1072 SELECT 1073 process.process_id AS proc_id, 1074 process.program AS program, 1075 group_concat(pp_table.value) AS value, 1076 group_concat(pp_table.param) AS params, 1077 concat_5cols(process.start_time, process.end_time, process.username, 1078 process.node, process.version) AS process_info 1079 FROM 1080 process 1081 LEFT JOIN process_params AS pp_table ON ( 1082 pp_table.process_id == process.process_id) 1083 GROUP BY proc_id; 1084 1085 UPDATE 1086 proc_params 1087 SET 1088 value = 'NULL' 1089 WHERE 1090 value is null OR value = ''; 1091 1092 UPDATE 1093 proc_params 1094 SET 1095 params = 'NULL' 1096 WHERE 1097 params is null OR params = ''; 1098 UPDATE 1099 proc_params 1100 SET 1101 process_info = 'NULL' 1102 WHERE 1103 process_info is null OR process_info = ''; 1104 UPDATE 1105 proc_params 1106 SET 1107 program = 'NULL' 1108 WHERE 1109 program is null OR program = ''; 1110 1111 CREATE TEMP TABLE proc_params_map AS 1112 SELECT 1113 MIN(proc_id) AS proc_id, 1114 program, 1115 value, 1116 params, 1117 process_info 1118 FROM 1119 proc_params 1120 GROUP BY program, value, params, process_info; 1121 1122 DROP INDEX pp_pivp_idx; 1123 CREATE INDEX proc_params_idx ON proc_params (program, value, params, process_info); 1124 CREATE INDEX proc_params_map_idx ON proc_params_map (program, value, params, process_info); 1125 1126 CREATE TEMP TABLE _pidmap_ AS 1127 SELECT 1128 old_pp_table.proc_id AS old_pid, 1129 MIN(new_pp_table.proc_id) AS new_pid, 1130 old_pp_table.program AS program 1131 FROM 1132 proc_params AS old_pp_table 1133 JOIN proc_params_map AS new_pp_table ON ( 1134 old_pp_table.value == new_pp_table.value 1135 AND old_pp_table.process_info == new_pp_table.process_info 1136 AND old_pp_table.params == new_pp_table.params 1137 AND old_pp_table.program == new_pp_table.program) 1138 GROUP BY old_pid; 1139 1140 CREATE INDEX _pidmap_idx ON _pidmap_ (old_pid); 1141 1142 DROP INDEX proc_params_idx; 1143 DROP INDEX proc_params_map_idx; 1144 DROP TABLE proc_params; 1145 DROP TABLE proc_params_map; 1146 """ 1147 if debug: 1148 print >> sys.stderr, sqlscript 1149 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1150 # execute SQL script 1151 cursor.executescript( sqlscript ) 1152 # commit transactions to database and close the cursor 1153 connection.commit() 1154 cursor.close() 1155 if debug: 1156 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6])
1157 1158
1159 -def get_pids_to_update(cursor, programs):
1160 """ 1161 Make a tuple of the process_ids from the temp table '_pidmap_' that need to 1162 be updated for a given list of programs. 1163 """ 1164 1165 sqlquery = """ 1166 SELECT DISTINCT old_pid 1167 FROM _pidmap_ 1168 WHERE new_pid != old_pid """ 1169 if not programs: 1170 # get all process_ids regardless of program 1171 process_ids = zip( *cursor.execute(sqlquery).fetchall() ) 1172 else: 1173 process_ids = [] 1174 sqlquery += """ 1175 AND program = :1 1176 """ 1177 # loop over the list of programs 1178 for program in programs: 1179 validate_option(program) 1180 if (program,) in cursor.execute('SELECT DISTINCT program FROM _pidmap_'): 1181 process_ids += zip( *cursor.execute(sqlquery, (program,)).fetchall() ) 1182 1183 return process_ids
1184 1185
1186 -def simplify_summ_tbls(connection, verbose=False, debug=False):
1187 """ 1188 Delete duplicate information in summary tables according to the process_ids. 1189 tbl_name: filter, summ_value, search_summary, search_summvars 1190 """ 1191 1192 # create the cursor object used to execute queries and commands 1193 cursor = connection.cursor() 1194 1195 # check for duplicate entries from the process tbl 1196 old_pids = get_pids_to_update(cursor, ['inspiral','ringdown', 'gstlal_inspiral']) 1197 1198 # check that at least one table in table_names is in the database 1199 all_tables = zip(*get_tables_in_database(connection))[0] 1200 table_names = ['filter','summ_value','search_summary','search_summvars'] 1201 1202 if old_pids and set(table_names) & set(all_tables): 1203 if verbose: 1204 print >> sys.stdout, "\nDelete redundant rows in available summary tables" 1205 1206 # get the non-auto-generated indices for the tables in table_names 1207 relevant_indices = get_user_created_indices(connection, table_names) 1208 # drop indices that will interfere with update & delete statements 1209 for idx, sql in relevant_indices: 1210 validate_option(idx) 1211 cursor.execute('DROP INDEX %s' % idx) 1212 1213 sqlscript = "" 1214 # if a filter table exists, remove duplicate rows 1215 if 'filter' in all_tables: 1216 sqlscript += """ 1217 DELETE FROM filter 1218 WHERE process_id NOT IN ( 1219 SELECT DISTINCT new_pid 1220 FROM _pidmap_ 1221 WHERE program = 'inspiral' OR program = 'ringdown' ); """ 1222 # if a summ_value table exists, remove duplicate rows 1223 if 'summ_value' in all_tables: 1224 sqlscript += """ 1225 DELETE FROM summ_value 1226 WHERE process_id NOT IN ( 1227 SELECT DISTINCT new_pid 1228 FROM _pidmap_ 1229 WHERE program = 'inspiral' OR program = 'ringdown' ); """ 1230 # if a search_summary table exists, remove duplicate rows 1231 if 'search_summary' in all_tables: 1232 sqlscript += """ 1233 DELETE FROM search_summary 1234 WHERE process_id NOT IN ( 1235 SELECT DISTINCT new_pid 1236 FROM _pidmap_ 1237 WHERE program = 'inspiral' OR program = 'ringdown' ); """ 1238 # if a search_summvars table exists, remove duplicate rows 1239 if 'search_summvars' in all_tables: 1240 sqlscript += """ 1241 DELETE FROM search_summvars 1242 WHERE process_id NOT IN ( 1243 SELECT DISTINCT new_pid 1244 FROM _pidmap_ 1245 WHERE program = 'inspiral' OR program = 'ringdown' ); """ 1246 if debug: 1247 print >> sys.stderr, sqlscript 1248 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1249 # execute SQL script 1250 cursor.executescript( sqlscript ) 1251 # commit transactions to database 1252 connection.commit() 1253 1254 if debug: 1255 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 1256 1257 # Now put the indexes back in 1258 for idx, sql in relevant_indices: 1259 cursor.execute(sql) 1260 connection.commit() 1261 1262 if debug: 1263 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 1264 1265 else: 1266 if verbose: 1267 if not set(table_names) & set(all_tables): 1268 print >> sys.stdout, "This database lacks all filtering related summary tables." 1269 else: 1270 print >> sys.stdout, "The search & filter tables lack duplicates." 1271 cursor.close()
1272 1273
1274 -def update_pid_in_snglstbls(connection, verbose=False, debug=False):
1275 """ 1276 Update process_ids for events in the sngl_inspiral &/or sngl_ringdown table. 1277 This function does *not* remove duplicate events that have different 1278 event_ids. 1279 """ 1280 1281 # create the cursor object used to execute queries and commands 1282 cursor = connection.cursor() 1283 1284 # check for duplicate entries from the process tbl 1285 old_pids = get_pids_to_update(cursor, ['inspiral','ringdown']) 1286 1287 # check that at least one table in table_names is in the database 1288 all_tables = zip(*get_tables_in_database(connection))[0] 1289 table_names = ['sngl_inspiral','sngl_ringdown'] 1290 1291 if old_pids and set(table_names) & set(all_tables): 1292 if verbose: 1293 print >> sys.stdout, "\nUpdate process_ids in the sngl-ifo trigger tables" 1294 1295 # get the non-auto-generated indices for the tables in table_names 1296 relevant_indices = get_user_created_indices(connection, table_names) 1297 # drop indices that will interfere with update & delete statements 1298 for idx, sql in relevant_indices: 1299 validate_option(idx) 1300 cursor.execute('DROP INDEX %s' % idx) 1301 1302 sqlscript = "" 1303 # if a sngl_inspiral table exists, update process_id column 1304 if 'sngl_inspiral' in all_tables: 1305 sqlscript += """ 1306 UPDATE sngl_inspiral 1307 SET process_id = ( 1308 SELECT new_pid 1309 FROM _pidmap_ 1310 WHERE process_id == old_pid 1311 AND program = 'inspiral'); """ 1312 # if a sngl_ringdown table exists, update process_id column 1313 if 'sngl_ringdown' in all_tables: 1314 sqlscript += """ 1315 UPDATE sngl_ringdown 1316 SET process_id = ( 1317 SELECT new_pid 1318 FROM _pidmap_ 1319 WHERE process_id == old_pid 1320 AND program = 'ringdown'); """ 1321 1322 if debug: 1323 print >> sys.stderr, sqlscript 1324 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1325 # execute SQL script 1326 cursor.executescript( sqlscript ) 1327 # commit transactions to database 1328 connection.commit() 1329 if debug: 1330 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 1331 # Now put the indexes back in 1332 for idx, sql in relevant_indices: 1333 cursor.execute(sql) 1334 connection.commit() 1335 1336 if debug: 1337 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 1338 else: 1339 if verbose: 1340 if not set(table_names) & set(all_tables): 1341 print >> sys.stdout, "This database lacks a sngl_inspiral &/or sngl_ringdown table." 1342 else: 1343 print >> sys.stdout, "The sngl_inspiral & sngl_ringdown tables lack duplicates." 1344 cursor.close()
1345 1346
1347 -def simplify_proc_tbls(connection, verbose=False, debug=False):
1348 """ 1349 Delete duplicate rows in the process & process params table. 1350 The temp table _pidmap_ created by the get_process_info function is dropped. 1351 """ 1352 if verbose: 1353 print >> sys.stdout, "\nDeleting redundant rows in the process & process_params tables" 1354 1355 # create the cursor object used to execute queries and commands 1356 cursor = connection.cursor() 1357 1358 # check for duplicate entries from the process tbl 1359 old_pids = get_pids_to_update(cursor, []) 1360 1361 if old_pids: 1362 # get the non-auto-generated indices for the tables in table_names 1363 table_names = ['process','process_params'] 1364 relevant_indices = get_user_created_indices(connection, table_names) 1365 # drop indices that will interfere with update & delete statements 1366 for idx, sql in relevant_indices: 1367 validate_option(idx) 1368 cursor.execute('DROP INDEX %s' % idx) 1369 1370 sqlscript = """ 1371 -- Remove redundant process rows 1372 DELETE FROM process 1373 WHERE process_id IN ( 1374 SELECT DISTINCT old_pid 1375 FROM _pidmap_ 1376 WHERE old_pid != new_pid ); 1377 DELETE FROM process_params 1378 WHERE process_id IN ( 1379 SELECT DISTINCT old_pid 1380 FROM _pidmap_ 1381 WHERE old_pid != new_pid ); 1382 1383 DROP INDEX _pidmap_idx; 1384 DROP TABLE _pidmap_; """ 1385 if debug: 1386 print >> sys.stderr, sqlscript 1387 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1388 # execute SQL script 1389 cursor.executescript( sqlscript ) 1390 # commit transactions to database and close the cursor 1391 connection.commit() 1392 if debug: 1393 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 1394 1395 # Now put the indexes back in 1396 for idx, sql in relevant_indices: 1397 cursor.execute(sql) 1398 connection.commit() 1399 1400 if debug: 1401 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 1402 1403 else: 1404 sqlscript = """ 1405 DROP INDEX _pidmap_idx; 1406 DROP TABLE _pidmap_; """ 1407 if debug: 1408 print >> sys.stderr, sqlscript 1409 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1410 # execute SQL script 1411 cursor.executescript( sqlscript ) 1412 # commit transactions to database and close the cursor 1413 connection.commit() 1414 1415 if verbose: 1416 print >> sys.stdout, "The process & process_params tables lack duplicates." 1417 cursor.close()
1418 1419 1420 # ============================================================================= 1421 # 1422 # Experiment Utilities 1423 # 1424 # ============================================================================= 1425 1426 # Following utilities apply to the experiment tables: these include the 1427 # experiment, experiment_summary, experiment_map, and time_slide tables
1428 -def join_experiment_tables_to_coinc_table(table):
1429 """ 1430 Writes JOIN string to join the experiment, experiment_summary, 1431 and experiment_map tables to the specified table. This allows 1432 querying across any of these tables. 1433 1434 @table: any lsctable that has a coinc_event_id column 1435 1436 NOTE: Should only use when querying the specified table; i.e., 1437 when the specified table is the only one listed in the FROM statement. 1438 """ 1439 1440 return """ 1441 JOIN 1442 experiment, experiment_summary, experiment_map 1443 ON ( 1444 experiment.experiment_id == experiment_summary.experiment_id 1445 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id 1446 AND experiment_map.coinc_event_id == %s.coinc_event_id )""" % table
1447
1448 -def join_experiment_tables_to_sngl_table(table):
1449 """ 1450 Writes JOIN string to join the experiment, experiment_summary, 1451 and experiment_map tables to the specified table. This allows 1452 querying across any of these tables. 1453 1454 @table: any lsctable that has an event_id column 1455 1456 NOTE: Should only use when querying the specified table; i.e., 1457 when the specified table is the only one listed in the FROM statement. 1458 """ 1459 1460 return ''.join([''' 1461 JOIN 1462 experiment, experiment_summary, experiment_map, coinc_event_map 1463 ON ( 1464 experiment.experiment_id == experiment_summary.experiment_id 1465 AND experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id 1466 AND experiment_map.coinc_event_id == coinc_event_map.coinc_event_id 1467 AND coinc_event_map.event_id == ''', table, '''.event_id 1468 AND coinc_event_map.table_name == "''', table, '" )' ])
1469
1470 -def clean_experiment_tables(connection, verbose = False):
1471 """ 1472 Removes entries from the experiment, experiment_summary, and time_slide tables 1473 that have no events in them, i.e., that have no mapping to any coinc_event_ids 1474 via the experiment_map table. Entries are only removed if none of the 1475 experiment_summ_ids associated with an experiment_id have coinc_events. In other words, 1476 Even if only one of the experiment_summ_ids associated with an experiment_id has an event, 1477 all of the experiment_summ_ids and experiment_ids associated with that event are 1478 saved. This perserves the background time and slide set associated with an experiment. 1479 1480 WARNING: This should only be used for purposes of scaling down a temporary database in prep. 1481 for xml extraction. In general, all experiment and time_slide entries should be left in 1482 the experiment tables even if they don't have events in them. 1483 1484 @connection: connection to a sqlite database 1485 """ 1486 if verbose: 1487 print >> sys.stderr, "Removing experiments that no longer have events in them..." 1488 1489 sqlscript = """ 1490 DELETE FROM 1491 experiment 1492 WHERE 1493 experiment_id NOT IN ( 1494 SELECT DISTINCT 1495 experiment_summary.experiment_id 1496 FROM 1497 experiment_summary, experiment_map 1498 WHERE 1499 experiment_summary.experiment_summ_id == experiment_map.experiment_summ_id 1500 ); 1501 DELETE FROM 1502 experiment_summary 1503 WHERE 1504 experiment_id NOT IN ( 1505 SELECT 1506 experiment_id 1507 FROM 1508 experiment 1509 ); 1510 DELETE FROM 1511 time_slide 1512 WHERE 1513 time_slide_id NOT IN ( 1514 SELECT DISTINCT 1515 time_slide_id 1516 FROM 1517 experiment_summary 1518 ); 1519 """ 1520 connection.cursor().executescript(sqlscript)
1521 1522
1523 -def simplify_expr_tbl(connection, verbose=False, debug=False):
1524 """ 1525 Cleaning up the experiment table by removing duplicate rows and remapping 1526 events to the appropriate experiment. 1527 """ 1528 1529 all_tables = zip(*get_tables_in_database(connection))[0] 1530 table_names = ['experiment','experiment_summary'] 1531 # check that both tables in table_names exist in the database 1532 if not set(table_names) - set(all_tables): 1533 if verbose: 1534 print >> sys.stdout, "\nCleaning experiment table..." 1535 1536 # create function to concatenate columns together per row 1537 connection.create_function("concat_7cols", 7, concatenate) 1538 1539 # create the cursor object used to execute queries and commands 1540 cursor = connection.cursor() 1541 1542 # get the non-auto-generated indices for the tables in table_names 1543 relevant_indices = get_user_created_indices(connection, table_names) 1544 # drop indices that will interfere with update & delete statements 1545 for idx, sql in relevant_indices: 1546 validate_option(idx) 1547 cursor.execute('DROP INDEX %s' % idx) 1548 1549 sqlscript = """ 1550 -- create map table to map experiment_ids that are to be kept 1551 -- to experiment ids that are to be discarded, in the same manner 1552 -- as done above 1553 1554 CREATE TEMP TABLE expr_info AS 1555 SELECT 1556 expr.experiment_id AS eid, 1557 concat_7cols(expr.search, expr.search_group, expr.instruments, 1558 expr.gps_start_time, expr.gps_end_time, 1559 expr.lars_id, expr.comments) AS info 1560 FROM experiment AS expr; 1561 1562 CREATE TEMP TABLE _eidmap_ AS 1563 SELECT 1564 old_exp.eid AS old_eid, 1565 MIN(new_exp.eid) AS new_eid 1566 FROM 1567 expr_info AS old_exp 1568 JOIN expr_info AS new_exp ON ( 1569 old_exp.info == new_exp.info) 1570 GROUP BY old_eid; 1571 1572 DROP TABLE expr_info; 1573 CREATE INDEX _eidmap_idx ON _eidmap_ (old_eid); 1574 1575 -- delete the old ids from the experiment table 1576 DELETE FROM experiment 1577 WHERE experiment_id NOT IN ( 1578 SELECT new_eid 1579 FROM _eidmap_ ); 1580 1581 -- update the experiment_ids in the experiment summary table 1582 UPDATE experiment_summary 1583 SET experiment_id = ( 1584 SELECT new_eid 1585 FROM _eidmap_ 1586 WHERE experiment_summary.experiment_id == old_eid); 1587 1588 DROP INDEX _eidmap_idx; 1589 DROP TABLE _eidmap_; """ 1590 if debug: 1591 print >> sys.stderr, sqlscript 1592 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1593 # execute SQL script 1594 cursor.executescript( sqlscript ) 1595 # commit transactions to database and close the cursor 1596 connection.commit() 1597 if debug: 1598 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 1599 1600 # Now put the indexes back in 1601 for idx, sql in relevant_indices: 1602 cursor.execute(sql) 1603 connection.commit() 1604 1605 if debug: 1606 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 1607 1608 else: 1609 if verbose: 1610 print >> sys.stdout, "The database is lacks the experiment &/or experiment_summary table(s)."
1611 1612
1613 -def simplify_exprsumm_tbl(connection, verbose=False, debug=False):
1614 """ 1615 Cleaning up the experiment_summary and the experiment_map tables 1616 by removing duplicate rows and remapping events to the appropriate 1617 experiment. 1618 """ 1619 1620 all_tables = zip(*get_tables_in_database(connection))[0] 1621 table_names = ['experiment_map','experiment_summary'] 1622 # check that both tables in table_names exist in the database 1623 if not set(table_names) - set(all_tables): 1624 if verbose: 1625 print >> sys.stdout, "\nCleaning experiment_summary & experiment_map tables..." 1626 1627 # create function to concatenate columns together per row 1628 connection.create_function("concat_5cols", 5, concatenate) 1629 1630 # create the cursor object used to execute queries and commands 1631 cursor = connection.cursor() 1632 1633 # get the non-auto-generated indices for the tables in table_names 1634 relevant_indices = get_user_created_indices(connection, table_names) 1635 # drop indices that will interfere with update & delete statements 1636 for idx, sql in relevant_indices: 1637 validate_option(idx) 1638 cursor.execute('DROP INDEX %s' % idx) 1639 1640 sqlscript = """ 1641 -- experiment summary clean up 1642 1643 -- create a table to map esids to be deleted to esids to be saved 1644 CREATE TEMP TABLE expr_summ_info AS 1645 SELECT 1646 expr_summ.experiment_summ_id AS esid, 1647 concat_5cols(expr_summ.experiment_id, expr_summ.time_slide_id, 1648 expr_summ.veto_def_name, expr_summ.datatype, expr_summ.sim_proc_id) AS info 1649 FROM experiment_summary AS expr_summ; 1650 1651 CREATE INDEX expr_summ_info_idx ON expr_summ_info (info); 1652 1653 CREATE TEMP TABLE _esidmap_ AS 1654 SELECT 1655 old_expsumm.esid AS old_esid, 1656 MIN(new_expsumm.esid) AS new_esid 1657 FROM 1658 expr_summ_info AS old_expsumm 1659 JOIN expr_summ_info AS new_expsumm ON ( 1660 old_expsumm.info == new_expsumm.info) 1661 GROUP BY old_esid; 1662 1663 DROP INDEX expr_summ_info_idx; 1664 DROP TABLE expr_summ_info; 1665 1666 CREATE INDEX _esidmap_idx on _esidmap_ (old_esid); 1667 CREATE INDEX es_dne_idx ON experiment_summary (duration,nevents); 1668 1669 -- sum durations and nevents 1670 CREATE TEMP TABLE sum_dur_nevents AS 1671 SELECT 1672 _esidmap_.new_esid AS esid, 1673 SUM(experiment_summary.duration) AS sum_dur, 1674 SUM(experiment_summary.nevents) AS sum_nevents 1675 FROM _esidmap_ 1676 JOIN experiment_summary ON ( 1677 _esidmap_.old_esid == experiment_summary.experiment_summ_id) 1678 GROUP BY esid; 1679 1680 DROP INDEX es_dne_idx; 1681 CREATE INDEX sdn_esid_index ON sum_dur_nevents (esid); 1682 1683 -- delete the old ids from the experiment_summary table 1684 DELETE FROM experiment_summary 1685 WHERE experiment_summ_id NOT IN ( 1686 SELECT DISTINCT new_esid 1687 FROM _esidmap_ ); 1688 1689 -- update the durations and the nevents 1690 UPDATE experiment_summary 1691 SET duration = ( 1692 SELECT sum_dur 1693 FROM sum_dur_nevents 1694 WHERE sum_dur_nevents.esid == experiment_summary.experiment_summ_id), 1695 nevents = ( 1696 SELECT sum_nevents 1697 FROM sum_dur_nevents 1698 WHERE sum_dur_nevents.esid == experiment_summary.experiment_summ_id); 1699 1700 DROP INDEX sdn_esid_index; 1701 DROP TABLE sum_dur_nevents; 1702 1703 -- update the experiment_map table 1704 UPDATE experiment_map 1705 SET experiment_summ_id = ( 1706 SELECT new_esid 1707 FROM _esidmap_ 1708 WHERE experiment_map.experiment_summ_id == old_esid); 1709 1710 DROP INDEX _esidmap_idx; 1711 DROP TABLE _esidmap_; """ 1712 if debug: 1713 print >> sys.stderr, sqlscript 1714 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 1715 # execute SQL script 1716 cursor.executescript( sqlscript ) 1717 # commit transactions to database and close the cursor 1718 connection.commit() 1719 if debug: 1720 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 1721 1722 # Now put the indexes back in 1723 for idx, sql in relevant_indices: 1724 cursor.execute(sql) 1725 connection.commit() 1726 1727 if debug: 1728 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 1729 1730 else: 1731 if verbose: 1732 print >> sys.stdout, "The database is lacks the experiment_map &/or experiment_summary table(s)."
1733 1734 1735 # ============================================================================= 1736 # 1737 # ExperimentSummary Utilities 1738 # 1739 # ============================================================================= 1740 1741 # Following utilities are specific to the experiment_summary table 1742
1743 -def update_experiment_summ_nevents( connection, verbose = False ):
1744 """ 1745 Updates the number of events in the num_events column of the 1746 experiment_summary table. This should be used whenever coincs 1747 are deleted from the experiment_map table or when new files 1748 are added to a database. 1749 """ 1750 if verbose: 1751 print >> sys.stderr, "Updating nevents column in experiment_summary table..." 1752 1753 # Speedup measure: index may have been deleted during dbsimplify 1754 connection.cursor().execute("CREATE INDEX IF NOT EXISTS em_esi_index ON experiment_map(experiment_summ_id)") 1755 1756 sqlquery = """ 1757 UPDATE experiment_summary 1758 SET nevents = ( 1759 SELECT COUNT(*) 1760 FROM experiment_map 1761 WHERE experiment_map.experiment_summ_id == experiment_summary.experiment_summ_id ) 1762 """ 1763 connection.cursor().execute(sqlquery) 1764 if verbose: 1765 print >> sys.stderr, "done."
1766 1767
1768 -class sim_tag_proc_id_mapper:
1769 """ 1770 Class to map sim_proc_ids in the experiment summary table to simulation names 1771 and vice-versa. 1772 """
1773 - def __init__( self, connection ):
1774 self.id_tag_map = {} 1775 self.tag_id_map = {} 1776 sqlquery = """ 1777 SELECT 1778 process_id, 1779 value 1780 FROM 1781 process_params 1782 WHERE 1783 process_id IN ( 1784 SELECT DISTINCT 1785 sim_proc_id 1786 FROM 1787 experiment_summary ) 1788 AND (param == "--userTag" OR param=="-userTag") 1789 """ 1790 for proc_id, sim_tag in connection.cursor().execute(sqlquery): 1791 self.id_tag_map[proc_id] = sim_tag 1792 self.tag_id_map[sim_tag] = proc_id
1793
1794 - def get_sim_tag( self, proc_id ):
1795 return proc_id in self.id_tag_map and self.id_tag_map[proc_id] or None
1796
1797 - def get_proc_id( self, sim_tag ):
1798 return sim_tag in self.tag_id_map and self.tag_id_map[sim_tag] or None
1799 1800 1801 # ============================================================================= 1802 # 1803 # Generic Coincident Event Table Utilities 1804 # 1805 # ============================================================================= 1806 1807 # Following utilities are apply to any table with a coinc_event_id column
1808 -def clean_using_coinc_table( connection, table_name, verbose = False, 1809 clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True, 1810 clean_coinc_event_map = True, clean_mapped_tables = True, selected_tables = []):
1811 """ 1812 Clears experiment_map, coinc_event, coinc_event_map, and all tables pointing to the 1813 coinc_event_map of triggers that are no longer in the specified table. 1814 Note that the experiment_summary, experiment, and time_slide_tables are left alone. 1815 This is because even if no events are left in an experiment, we still want info. about 1816 the experiment that was performed. 1817 1818 @connection to a sqlite database 1819 @table_name: name of table on which basing the cleaning. Can be any table having a coinc_event_id 1820 column. 1821 @clean_experiment_map: if set to True will clean the experiment_map table 1822 @clean_coinc_event_table: if set to True will clean the coinc_event table 1823 @clean_coinc_definer: if set to True will clean the coinc_definer table if clean_coinc_event_table 1824 is set to True (the coinc_event_table is used to determine what coinc_definers to delete) 1825 @clean_coinc_event_map: if set to True, will clean the coinc_event_map 1826 @clean_mapped_tables: clean tables listed in the coinc_event_map who's event_ids are not not in 1827 the coinc_event_map 1828 @selected_tables: if clean_mapped_tables is on, will clean the listed tables if they appear in the 1829 coinc_event_map and have an event_id column. Default, [], is to clean all tables found. 1830 The requirement that the table has an event_id avoids cleaning simulation tables. 1831 """ 1832 1833 # Delete from experiment_map 1834 if clean_experiment_map: 1835 if verbose: 1836 print >> sys.stderr, "Cleaning the experiment_map table..." 1837 sqlquery = ''.join([""" 1838 DELETE 1839 FROM experiment_map 1840 WHERE coinc_event_id NOT IN ( 1841 SELECT coinc_event_id 1842 FROM """, table_name, ')' ]) 1843 connection.cursor().execute( sqlquery ) 1844 connection.commit() 1845 1846 # Delete from coinc_event_map 1847 if clean_coinc_event_map: 1848 if verbose: 1849 print >> sys.stderr, "Cleaning the coinc_event_map table..." 1850 skip_tables = [ ''.join(['table_name != "', tname, '"']) 1851 for tname in get_cem_table_names(connection) if tname == 'coinc_event' or tname.startswith('sim_') 1852 ] 1853 1854 sqlquery = ''.join([ """ 1855 DELETE 1856 FROM coinc_event_map 1857 WHERE 1858 coinc_event_id NOT IN ( 1859 SELECT coinc_event_id 1860 FROM """, table_name, ')', 1861 skip_tables != [] and ' AND\n\t\t'+' AND\n\t\t'.join(skip_tables) or '' ]) 1862 connection.cursor().execute( sqlquery ) 1863 connection.commit() 1864 1865 # Find tables listed in coinc_event_map 1866 if clean_mapped_tables and selected_tables == []: 1867 selected_tables = get_cem_table_names(connection) 1868 1869 # Delete events from tables that were listed in the coinc_event_map 1870 # we only want to delete event_ids, not simulations, so if a table 1871 # does not have an event_id, we just pass 1872 if clean_mapped_tables: 1873 clean_mapped_event_tables( connection, selected_tables, 1874 raise_err_on_missing_evid = False, verbose = verbose ) 1875 1876 # Delete from coinc_event 1877 if clean_coinc_event_table: 1878 if verbose: 1879 print >> sys.stderr, "Cleaning the coinc_event table..." 1880 sqlquery = """ 1881 DELETE 1882 FROM coinc_event 1883 WHERE coinc_event_id NOT IN ( 1884 SELECT DISTINCT coinc_event_id 1885 FROM coinc_event_map)""" 1886 connection.cursor().execute( sqlquery ) 1887 connection.commit() 1888 1889 # Delete from coinc_definer 1890 if clean_coinc_definer and clean_coinc_event_table: 1891 if verbose: 1892 print >> sys.stderr, "Cleaning the coinc_definer table..." 1893 sqlquery = """ 1894 DELETE 1895 FROM coinc_definer 1896 WHERE coinc_def_id NOT IN ( 1897 SELECT coinc_def_id 1898 FROM coinc_event )""" 1899 connection.cursor().execute( sqlquery ) 1900 connection.commit()
1901
1902 -def apply_inclusion_rules_to_coinc_table( connection, coinc_table, exclude_coincs = None, include_coincs = None, 1903 param_filters = None, verbose = False ):
1904 """ 1905 Clears the given table of coinc triggers falling outside of the 1906 desired ranges, as specified by parse_param_ranges and parse_coinc_opts. 1907 1908 @connection: connection to a SQLite database with lsctables 1909 @coinc_table: name of the coinc_table to delete the triggers from. 1910 Can be any table with a coinc_event_id. 1911 @param_filters: output of parse_param_ranges(...).get_param_filters() 1912 @include_coincs: output of parse_coinc_opts(...).get_coinc_filters(). 1913 The coincs that are specified in this list will be SAVED. 1914 @exclude_coincs: output of parse_coinc_opts(...).get_coinc_filters(). 1915 The coincs that are specified in this list will be DELETED. 1916 Note: exclude_coincs is applied first, so anything falling in it will 1917 be deleted, regardless of wether or not the same falls in include_coincs. 1918 To avoid confusion, it is best to only specify one or the other, not both. 1919 """ 1920 coinc_table = validate_option( coinc_table ) 1921 if verbose: 1922 print >> sys.stderr, "Removing coincs from %s table that " % coinc_table + \ 1923 "fall outside of desired ranges and coinc-types..." 1924 1925 join_conditions = join_experiment_tables_to_coinc_table( coinc_table ) 1926 1927 if exclude_coincs: 1928 del_rows_from_table( connection, coinc_table, 'coinc_event_id', 1929 join_conditions, 1930 del_filters = exclude_coincs, verbose = verbose ) 1931 if include_coincs: 1932 del_rows_from_table( connection, coinc_table, 'coinc_event_id', 1933 join_conditions, 1934 save_filters = include_coincs, verbose = verbose ) 1935 if param_filters: 1936 del_rows_from_table( connection, coinc_table, 'coinc_event_id', 1937 join_conditions, 1938 save_filters = param_filters, verbose = verbose ) 1939 1940 # remove deleted coincs from other tables 1941 clean_using_coinc_table( connection, coinc_table, verbose = verbose, 1942 clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True, 1943 clean_coinc_event_map = True, clean_mapped_tables = True )
1944 1945 1946 # ============================================================================= 1947 # 1948 # CoincDefiner Utilities 1949 # 1950 # ============================================================================= 1951 1952 # Following utilities are specific to the coinc_definer table
1953 -def write_newstyle_coinc_def_entry( connection, description, search = None, search_coinc_type = None ):
1954 """ 1955 Adds a new entry to the coinc_definer_table. The only thing used to discriminate 1956 different coinc_definer entries is the description column. Search and search_coinc_type 1957 can also be optionally specified. 1958 """ 1959 sqlquery = "SELECT coinc_def_id FROM coinc_definer WHERE description == ?" 1960 results = connection.cursor().execute( sqlquery, (description,) ).fetchall() 1961 if results == []: 1962 # none found, write new entry 1963 sqlquery = 'INSERT INTO coinc_definer (coinc_def_id, description, search, search_coinc_type) VALUES (?, ?, ?, ?)' 1964 this_id = get_next_id( connection, 'coinc_definer', 'coinc_def_id' ) 1965 if not search_coinc_type: 1966 # assign to the new coinc_definer row the largest+1 search_coinc_type INT 1967 last_coinc_type = connection.execute('SELECT MAX(search_coinc_type) FROM coinc_definer').fetchone()[0] 1968 if last_coinc_type is None: 1969 last_coinc_type = 0 1970 search_coinc_type = 1 + last_coinc_type 1971 connection.cursor().execute( sqlquery, (str(this_id), description, search, search_coinc_type) ) 1972 connection.commit() 1973 else: 1974 this_id = ilwd.ilwdchar(results.pop()[0]) 1975 1976 return this_id
1977
1978 -def get_map_labels( connection ):
1979 """ 1980 Retrieves values in the description column of the coinc_definer table. 1981 """ 1982 sqlquery = """ 1983 SELECT DISTINCT 1984 description 1985 FROM 1986 coinc_definer 1987 """ 1988 return [lbl for (lbl,) in connection.cursor().execute(sqlquery).fetchall()]
1989
1990 -def get_coinc_types( connection ):
1991 """ 1992 Retrieves all of the distinct map-label and coinc-type pairs in the database. 1993 A dictionary is returned in which they keys are the map-labels and the values 1994 are lists of tuples. Each tuple gives all of the tables mapped to a 1995 coinc_event_id. 1996 """ 1997 # create a function to get the mapped tables 1998 create_mapped_tables_func( connection ) 1999 sqlquery = """ 2000 SELECT DISTINCT 2001 coinc_definer.description, 2002 get_mapped_tables(a.table_name), 2003 get_mapped_tables(b.table_name) 2004 FROM 2005 coinc_event_map AS a 2006 JOIN 2007 coinc_definer, coinc_event 2008 ON ( 2009 coinc_definer.coinc_def_id == coinc_event.coinc_def_id AND 2010 coinc_event.coinc_event_id == a.coinc_event_id ) 2011 LEFT OUTER JOIN 2012 coinc_event_map AS b 2013 ON ( 2014 a.event_id == b.coinc_event_id ) 2015 GROUP BY 2016 coinc_event.coinc_event_id 2017 """ 2018 coinc_types = {} 2019 for (map_type, tblsA, tblsB) in connection.cursor().execute(sqlquery).fetchall(): 2020 coinc_types.setdefault( map_type, [] ) 2021 if tblsB == '': 2022 coinc_types[ map_type ].append(set(tblsA.split(','))) 2023 else: 2024 coinc_types[ map_type ].append(set(tblsA.split(',') + tblsB.split(','))) 2025 2026 return coinc_types
2027
2028 -def delete_map( connection, map_label ):
2029 """ 2030 Deletes all mappings that have the given map_label in the description column of the coinc_definer table. 2031 """ 2032 # FIXME: Currently, this only will delete coinc_event_ids from the coinc_event 2033 # coinc_event_map table; consider extending to data tables (e.g., coinc_inspiral) 2034 # in the future 2035 sqlquery = """ 2036 DELETE FROM 2037 coinc_definer 2038 WHERE 2039 description == ?""" 2040 connection.cursor().execute( sqlquery, (map_label,) ) 2041 connection.commit() 2042 sqlquery = """ 2043 DELETE FROM 2044 coinc_event 2045 WHERE 2046 coinc_def_id NOT IN ( 2047 SELECT 2048 coinc_def_id 2049 FROM 2050 coinc_definer ); 2051 DELETE FROM 2052 coinc_event_map 2053 WHERE 2054 coinc_event_id NOT IN ( 2055 SELECT 2056 coinc_event_id 2057 FROM 2058 coinc_event ); 2059 """ 2060 connection.cursor().executescript(sqlquery)
2061
2062 -def simplify_coincdef_tbl(connection, verbose=False, debug=False):
2063 """ 2064 Remove duplicate entries in the coinc_definer table and update the coinc_event table 2065 with new coinc_def_ids. 2066 """ 2067 2068 all_tables = zip(*get_tables_in_database(connection))[0] 2069 table_names = ['coinc_definer','coinc_event'] 2070 # check that both tables in table_names exist in the database 2071 if not set(table_names) - set(all_tables): 2072 if verbose: 2073 print >> sys.stdout, "\nCleaning up the coinc_definer table..." 2074 2075 # create the cursor object used to execute queries and commands 2076 cursor = connection.cursor() 2077 2078 # get the non-auto-generated indices for the tables in table_names 2079 relevant_indices = get_user_created_indices(connection, table_names) 2080 # drop indices that will interfere with update & delete statements 2081 for idx, sql in relevant_indices: 2082 validate_option(idx) 2083 cursor.execute('DROP INDEX %s' % idx) 2084 2085 sqlscript = """ 2086 -- Create a table that maps the coinc_definer_ids of redundant entries 2087 -- to those entries one is going to keep. 2088 CREATE TEMP TABLE _cdidmap_ AS 2089 SELECT 2090 old_cd_table.coinc_def_id AS old_cdid, 2091 MIN(new_cd_table.coinc_def_id) AS new_cdid 2092 FROM 2093 coinc_definer AS old_cd_table 2094 JOIN coinc_definer AS new_cd_table ON ( 2095 new_cd_table.search == old_cd_table.search 2096 AND new_cd_table.search_coinc_type == old_cd_table.search_coinc_type 2097 AND new_cd_table.description == old_cd_table.description 2098 ) 2099 GROUP BY old_cdid; 2100 2101 CREATE INDEX cdidmap_index ON _cdidmap_ (old_cdid); 2102 2103 -- Update the coinc_event table with new coinc_def_ids 2104 UPDATE coinc_event 2105 SET coinc_def_id = ( 2106 SELECT new_cdid 2107 FROM _cdidmap_ 2108 WHERE old_cdid == coinc_def_id); 2109 2110 -- Remove redundant entries in the coinc_definer table 2111 DELETE FROM coinc_definer 2112 WHERE coinc_def_id NOT IN ( 2113 SELECT DISTINCT new_cdid 2114 FROM _cdidmap_ ); 2115 2116 DROP INDEX cdidmap_index; 2117 DROP TABLE _cdidmap_; """ 2118 if debug: 2119 print >> sys.stderr, sqlscript 2120 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 2121 # execute SQL script 2122 cursor.executescript( sqlscript ) 2123 # commit transactions to database and close the cursor 2124 connection.commit() 2125 if debug: 2126 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 2127 2128 # Now put the indexes back in 2129 for idx, sql in relevant_indices: 2130 cursor.execute(sql) 2131 connection.commit() 2132 2133 if debug: 2134 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 2135 2136 else: 2137 if verbose: 2138 print >> sys.stdout, "This database lacks the coinc_event and coinc_definer tables."
2139 2140
2141 -def delete_coinc_type( connection, map_label, coincTables ):
2142 """ 2143 Deletes all mappings that have the given coinc type. 2144 2145 @map_label: the type of mapping between the tables to delete 2146 (this is stored in the description column of the coinc_definer 2147 table) 2148 @coinctables: list of the table names involved in the 2149 coincidences to delete. These define the coinc type. 2150 """ 2151 # FIXME: Currently, this only will delete coinc_event_ids from the coinc_event 2152 # coinc_event_map table; consider extending to data tables (e.g., coinc_inspiral) 2153 # in the future 2154 create_mapped_tables_func(connection, 2) 2155 # create a table of coinc_event_ids to delete 2156 sqlquery = """ 2157 CREATE TEMP TABLE delete_coinc_types AS 2158 SELECT 2159 a.coinc_event_id AS ceid 2160 FROM 2161 coinc_event_map AS a 2162 LEFT OUTER JOIN 2163 coinc_event_map AS b 2164 ON ( 2165 a.event_id == b.coinc_event_id ) 2166 JOIN 2167 coinc_event, coinc_definer 2168 ON ( 2169 a.coinc_event_id == coinc_event.coinc_event_id AND 2170 coinc_event.coinc_def_id == coinc_definer.coinc_def_id ) 2171 WHERE 2172 coinc_definer.description == ? 2173 GROUP BY 2174 a.coinc_event_id 2175 HAVING 2176 get_mapped_tables(a.table_name, b.table_name) == ? 2177 """ 2178 connection.cursor().execute(sqlquery, (map_label, ','.join(sorted(coincTables)),)) 2179 sqlquery = """ 2180 DELETE FROM 2181 coinc_event 2182 WHERE 2183 coinc_event_id IN ( 2184 SELECT 2185 ceid 2186 FROM 2187 delete_coinc_types ); 2188 DELETE FROM 2189 coinc_event_map 2190 WHERE 2191 coinc_event_id IN ( 2192 SELECT 2193 ceid 2194 FROM 2195 delete_coinc_types ); 2196 DELETE FROM 2197 coinc_definer 2198 WHERE 2199 coinc_def_id NOT IN ( 2200 SELECT DISTINCT 2201 coinc_def_id 2202 FROM 2203 coinc_event ); 2204 DROP TABLE delete_coinc_types; 2205 """ 2206 connection.cursor().executescript(sqlquery) 2207 connection.commit()
2208 2209 2210 # ============================================================================= 2211 # 2212 # CoincEvent Utilities 2213 # 2214 # ============================================================================= 2215 2216 # Following utilities are specific to the coinc_event table
2217 -def add_coinc_event_entries( connection, process_id, coinc_def_id, time_slide_id, num_new_entries = 1 ):
2218 """ 2219 Writes N new entries in the coinc_event table, where N is given by num_new_entries. 2220 """ 2221 # get the next id 2222 start_id = get_next_id( connection, 'coinc_event', 'coinc_event_id' ) 2223 # create list of new entries to add 2224 new_entries = [(str(process_id), str(coinc_def_id), str(time_slide_id), str(start_id+ii)) for ii in range(num_new_entries)] 2225 # add the entries to the coinc_event tabe 2226 sqlquery = 'INSERT INTO coinc_event (process_id, coinc_def_id, time_slide_id, coinc_event_id) VALUES (?, ?, ?, ?)' 2227 connection.cursor().executemany( sqlquery, new_entries ) 2228 # return the coinc_event_ids of the new entries 2229 return [ilwd.ilwdchar(new_id[-1]) for new_id in new_entries]
2230 2231
2232 -def update_coinctab_nevents( connection ):
2233 """ 2234 Updates the nevents column based on what's in the coinc_event_map table. 2235 """ 2236 sqlquery = """ 2237 UPDATE 2238 coinc_event 2239 SET 2240 nevents = ( 2241 SELECT 2242 COUNT(*) 2243 FROM 2244 coinc_event_map 2245 WHERE 2246 coinc_event_map.coinc_event_id == coinc_event.coinc_event_id 2247 )""" 2248 connection.cursor().execute(sqlquery) 2249 connection.commit()
2250 2251 2252 # ============================================================================= 2253 # 2254 # CoincEventMap Utilities 2255 # 2256 # ============================================================================= 2257 2258 # Following utilities are specific to the coinc_event_map table
2259 -def get_cem_table_names( connection ):
2260 """ 2261 Retrieves the all of the table names present in the coinc_event_map table. 2262 2263 @connection: connection to a sqlite database 2264 """ 2265 sqlquery = 'SELECT DISTINCT table_name FROM coinc_event_map' 2266 return [table_name[0] for table_name in connection.cursor().execute( sqlquery )]
2267 2268
2269 -def get_matching_tables( connection, coinc_event_ids ):
2270 """ 2271 Gets all the tables that are directly mapped to a list of coinc_event_ids. 2272 Returns a dictionary mapping the tables their matching coinc_event_ids. 2273 2274 @coinc_event_ids: list of coinc_event_ids to get table matchings for 2275 """ 2276 matching_tables = {} 2277 sqlquery = """ 2278 SELECT 2279 coinc_event_id, 2280 table_name 2281 FROM 2282 coinc_event_map""" 2283 for ceid, table_name in [(qryid, qryname) for qryid, qryname in connection.cursor().execute(sqlquery) if qryid in coinc_event_ids]: 2284 if table_name not in matching_tables: 2285 matching_tables[table_name] = [] 2286 matching_tables[table_name].append(ceid) 2287 2288 return matching_tables
2289
2290 -class get_mapped_tables:
2291 """ 2292 Convenience class to retrieve all the tables mapped to a coinc_event_id. 2293 If added as an aggregate function to a connection, this can be called 2294 in a query. Example: 2295 2296 from pylal import ligolw_sqlutils as sqlutils 2297 connection.create_aggregate('get_mapped_tables', 1, sqlutils.get_mapped_tables) 2298 sqlquery = ''' 2299 SELECT 2300 coinc_event_id, 2301 get_mapped_tables(table_name) 2302 FROM 2303 coinc_event_map 2304 GROUP BY 2305 coinc_event_id 2306 ''' 2307 connection.cursor().execute(sqlquery) 2308 2309 This would return all the tables mapped to each coinc_event_id 2310 in the coinc_event_map table. 2311 """
2312 - def __init__(self):
2313 self.result = []
2314 - def step(self, *table_names):
2315 self.result.extend(map(str, table_names))
2316 - def finalize(self):
2317 return ','.join(sorted(set(self.result)-set(['None'])))
2318
2319 -def create_mapped_tables_func(connection, nargs = 1):
2320 """ 2321 Creates a function in the database called get_mapped_tables that allows one 2322 to quickly get all the mapped tables to a coinc_event_id. 2323 """ 2324 connection.create_aggregate( 'get_mapped_tables', nargs, get_mapped_tables)
2325
2326 -def clean_mapped_event_tables( connection, tableList, raise_err_on_missing_evid = False, verbose = False ):
2327 """ 2328 Cleans tables given in tableList of events whose event_ids aren't in 2329 the coinc_event_map table. 2330 2331 @connection: connection to a sqlite database 2332 @tableList: Any table with an event_id column. 2333 @raise_err_on_missing_evid: if set to True, will raise an error 2334 if an event_id column can't be found in any table in tableList. 2335 If False, will just skip the table. 2336 """ 2337 # get tables from tableList that have event_id columns 2338 selected_tables = [ table for table in tableList 2339 if 'event_id' in get_column_names_from_table( connection, table ) ] 2340 if selected_tables != tableList and raise_err_on_missing_evid: 2341 raise ValueError, "tables %s don't have event_id columns" % ', '.join([ 2342 table for table in tableList if table not in selected_tables ]) 2343 2344 # clean the tables 2345 for table in selected_tables: 2346 if verbose: 2347 print >> sys.stderr, "Cleaning the %s table..." % table 2348 sqlquery = ''.join([ """ 2349 DELETE 2350 FROM """, table, """ 2351 WHERE event_id NOT IN ( 2352 SELECT event_id 2353 FROM coinc_event_map )""" ]) 2354 connection.cursor().execute( sqlquery ) 2355 connection.commit()
2356 2357 2358 # ============================================================================= 2359 # 2360 # CoincInspiral Utilities 2361 # 2362 # ============================================================================= 2363 2364 # Following utilities are specific to the coinc_inspiral table 2365
2366 -def join_experiment_tables_to_coinc_inspiral():
2367 """ 2368 Writes JOIN string to join the experiment, experiment_summary, 2369 and experiment_map table to the coinc_inspiral table. 2370 NOTE: Should only use when querying the coinc_inspiral table (i.e., 2371 the only table listed in the FROM statement is the coinc_inspiral). 2372 """ 2373 2374 return join_experiment_tables_to_coinc_table('coinc_inspiral')
2375 2376
2377 -def apply_inclusion_rules_to_coinc_inspiral( connection, exclude_coincs = None, include_coincs = None, 2378 param_filters = None, verbose = False ):
2379 """ 2380 Clears coinc_inspiral table of coinc triggers falling outside of the 2381 desired ranges, as specified by parse_param_ranges and parse_coinc_opts. 2382 2383 See apply_inclusion_rules_to_coinc_table for more info. 2384 """ 2385 apply_inclusion_rules_to_coinc_table(connection, 'coinc_inspiral', exclude_coincs = exclude_coincs, include_coincs = include_coincs, 2386 param_filters = param_filters, verbose = verbose )
2387 2388
2389 -def clean_inspiral_tables( connection, verbose = False ):
2390 """ 2391 Clears experiment_map, coinc_event, coinc_event_map, and all tables pointing to the 2392 coinc_event_map of triggers that are no longer in the coinc_inspiral table. 2393 Note that the experiment_summary, experiment, and time_slide_tables are left alone. 2394 This is because even if no events are left in an experiment, we still want info. about 2395 the experiment that was performed. 2396 """ 2397 clean_using_coinc_table( connection, 'coinc_inspiral', verbose = verbose, 2398 clean_experiment_map = True, clean_coinc_event_table = True, clean_coinc_definer = True, 2399 clean_coinc_event_map = True, clean_mapped_tables = True, selected_tables = [])
2400 2401 2402 # ============================================================================= 2403 # 2404 # Simulation Utilities 2405 # 2406 # ============================================================================= 2407 2408 # Following utilities are specific to any simulation table 2409
2410 -def create_sim_rec_map_table(connection, simulation_table, recovery_table, map_label, ranking_stat = None):
2411 """ 2412 Creates a temporary table in the sqlite database called sim_rec_map. 2413 This table creates a direct mapping between simulation_ids in the simulation table 2414 and coinc_event_ids from the recovery_table, along with a ranking stat from the 2415 recovery_table. 2416 The columns in the sim_rec_map table are: 2417 * rec_id: coinc_event_ids of matching events from the recovery table 2418 * sim_id: the simulation_id from the sim_inspiral table 2419 * ranking_stat: any stat from the recovery table by which to rank 2420 In addition, indices on the sim and rec ids are put on the table. 2421 2422 Note that because this is a temporary table, as soon as the connection is 2423 closed, it will be deleted. 2424 2425 @connection: connection to a sqlite database 2426 @simulation_table: any lsctable with a simulation_id column; e.g., sim_inspiral 2427 @recovery_table: any lsctable with a coinc_event_id column; e.g., coinc_inspiral 2428 @map_label: the label applied to the mapping between the injections and recovered 2429 @ranking_stat: the name of the ranking stat in the recovery table to use. 2430 If set to None, ranking_stat column won't be populated. 2431 """ 2432 # remove the table if it is already in the database 2433 if 'sim_rec_map' in get_tables_in_database(connection): 2434 sqlquery = 'DROP TABLE sim_rec_map' 2435 connection.cursor().execute(sqlquery) 2436 # create the sim_rec_map table; initially, this contains all mapped triggers in the database 2437 sqlquery = ''.join([''' 2438 CREATE TEMP TABLE 2439 sim_rec_map 2440 AS 2441 SELECT 2442 sim.simulation_id AS sim_id, 2443 rec_coinc.coinc_event_id AS rec_id, 2444 NULL AS ranking_stat 2445 FROM 2446 ''', recovery_table, ''' AS rec_coinc 2447 JOIN 2448 coinc_event_map AS a 2449 ON ( 2450 a.event_id == rec_coinc.coinc_event_id AND 2451 a.table_name == "coinc_event" ) 2452 JOIN 2453 coinc_event_map AS b 2454 ON ( 2455 b.coinc_event_id == a.coinc_event_id) 2456 JOIN 2457 ''', simulation_table, ''' AS sim 2458 ON ( 2459 b.event_id == sim.simulation_id) 2460 JOIN 2461 coinc_event, coinc_definer 2462 ON ( 2463 coinc_event.coinc_event_id == b.coinc_event_id AND 2464 coinc_definer.coinc_def_id == coinc_event.coinc_def_id 2465 ) 2466 WHERE 2467 coinc_definer.description == ? 2468 ''']) 2469 connection.cursor().execute(sqlquery, (map_label,)) 2470 2471 # create indices 2472 sqlquery = ''' 2473 CREATE INDEX srm_sid_index ON sim_rec_map (sim_id); 2474 CREATE INDEX srm_rid_index ON sim_rec_map (rec_id); 2475 ''' 2476 connection.cursor().executescript(sqlquery) 2477 2478 if ranking_stat is not None: 2479 # if it isn't already, append the recovery_table name to the ranking_stat to ensure uniqueness 2480 if not ranking_stat.strip().startswith(recovery_table): 2481 ranking_stat = '.'.join([recovery_table.strip(), ranking_stat.strip()]) 2482 2483 sqlquery = ''.join([ ''' 2484 UPDATE 2485 sim_rec_map 2486 SET ranking_stat = ( 2487 SELECT 2488 ''', ranking_stat, ''' 2489 FROM 2490 ''', recovery_table, ''' 2491 WHERE 2492 ''', recovery_table, '''.coinc_event_id == sim_rec_map.rec_id ); 2493 ''' ]) 2494 2495 connection.cursor().execute(sqlquery)
2496 2497
2498 -def simplify_sim_tbls(connection, verbose=False, debug=False):
2499 """ 2500 Remove duplicates from simulation tables (sim_inspiral & sim_ringdown) 2501 if those tables exist in the database. Also update the sim_proc_id column 2502 in the experiment_summary table. 2503 """ 2504 2505 # create the cursor object used to execute queries and commands 2506 cursor = connection.cursor() 2507 2508 # check for duplicate entries from the process tbl 2509 old_pids = get_pids_to_update(cursor, ['inspinj','rinj','gstlal_injections_by_local_rate']) 2510 2511 # check whether there is a simulation table in the database 2512 all_tables = zip(*get_tables_in_database(connection))[0] 2513 2514 if old_pids and [tbl for tbl in all_tables if 'sim_' in tbl]: 2515 if verbose: 2516 print >> sys.stdout, "\nCleaning simulation tables..." 2517 2518 # get the non-auto-generated indices for the tables in table_names 2519 table_names = ['sim_inspiral','sim_ringdown','experiment_summary'] 2520 relevant_indices = get_user_created_indices(connection, table_names) 2521 # drop indices that will interfere with update & delete statements 2522 for idx, sql in relevant_indices: 2523 validate_option(idx) 2524 cursor.execute('DROP INDEX %s' % idx) 2525 2526 # if a sim_inspiral table exists, remove duplicate rows 2527 sqlscript = "" 2528 if 'sim_inspiral' in all_tables: 2529 sqlscript += """ 2530 DELETE FROM sim_inspiral 2531 WHERE process_id NOT IN ( 2532 SELECT DISTINCT new_pid 2533 FROM _pidmap_ 2534 WHERE program = 'inspinj'); """ 2535 # if a sim_ringdown table exists, remove duplicate rows 2536 if ('sim_ringdown',) in all_tables: 2537 sqlscript += """ 2538 DELETE FROM sim_ringdown 2539 WHERE process_id NOT IN ( 2540 SELECT DISTINCT new_pid 2541 FROM _pidmap_ 2542 WHERE program = 'rinj'); """ 2543 # if an experiment_summary table exists, update its sim_proc_id column 2544 if 'experiment_summary' in all_tables: 2545 sqlscript += """ 2546 -- Update sim_proc_ids in the experiment_summary table 2547 UPDATE experiment_summary 2548 SET sim_proc_id = ( 2549 SELECT DISTINCT new_pid 2550 FROM _pidmap_ 2551 WHERE old_pid == sim_proc_id ); """ 2552 if debug: 2553 print >> sys.stderr, sqlscript 2554 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 2555 # execute SQL script 2556 cursor.executescript( sqlscript ) 2557 # commit transactions to database and close the cursor 2558 connection.commit() 2559 if debug: 2560 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 2561 2562 # Now put the indexes back in 2563 for idx, sql in relevant_indices: 2564 cursor.execute(sql) 2565 connection.commit() 2566 2567 if debug: 2568 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 2569 2570 else: 2571 if verbose: 2572 if not [tbl for tbl in all_tables if 'sim_' in tbl]: 2573 print >> sys.stdout, "This database lacks a simulation table." 2574 else: 2575 print >> sys.stdout, "The simulation tables lack duplicates." 2576 cursor.close()
2577 2578 2579 # ============================================================================= 2580 # 2581 # Segment Utilities 2582 # 2583 # ============================================================================= 2584 2585 # Following utilities apply to the segment and segment definer table
2586 -class segdict_from_segment:
2587 """ 2588 Class to a build a segmentlist dict out of the entries in the segment 2589 and segment_definer table in the sqlite database. 2590 """ 2591 from glue import segments 2592 try: 2593 from pylal.xlal.datatypes.ligotimegps import LIGOTimeGPS 2594 except ImportError: 2595 # s6 code 2596 from pylal.xlal.date import LIGOTimeGPS 2597 2598 snglinst_segdict = segments.segmentlistdict() 2599
2600 - def __init__(self, connection, filter = ''):
2601 if filter != '' and not filter.strip().startswith('WHERE'): 2602 filter = 'WHERE\n' + filter 2603 2604 sqlquery = '\n'.join([""" 2605 SELECT 2606 segment_definer.ifos, 2607 segment.start_time, 2608 segment.end_time 2609 FROM 2610 segment 2611 JOIN 2612 segment_definer ON 2613 segment_definer.segment_def_id == segment.segment_def_id""", 2614 filter ]) 2615 for ifos, start_time, end_time in connection.cursor().execute(sqlquery): 2616 for ifo in lsctables.instrument_set_from_ifos(ifos): 2617 if ifo not in self.snglinst_segdict: 2618 self.snglinst_segdict[ifo] = segments.segmentlist() 2619 self.snglinst_segdict[ifo].append( segments.segment(LIGOTimeGPS(start_time, 0),LIGOTimeGPS(end_time,0)) )
2620
2621 - def is_in_sngl_segdict( self, instrument, gpstime, gpstime_ns ):
2622 """ 2623 Checks if a gpstime is in the given instrument time. 2624 """ 2625 return LIGOTimeGPS(gpstime, gpstime_ns) in self.snglinst_segdict[instrument]
2626 2627
2628 -def simplify_segments_tbls(connection, verbose=False, debug=False):
2629 """ 2630 Removing duplicate row in the segment, segment_definer and segment_summary 2631 tables as well as the associated entries in the process & process_params tables 2632 """ 2633 2634 all_tables = zip(*get_tables_in_database(connection))[0] 2635 if 'segment' in all_tables: 2636 if verbose: 2637 print >> sys.stdout, "\nCleaning up the segments tables..." 2638 2639 # create function to concatenate 2 columns together per row 2640 connection.create_function("concat_2cols", 2, concatenate) 2641 2642 # create the cursor object used to execute queries and commands 2643 cursor = connection.cursor() 2644 2645 # get the non-auto-generated indices for the tables in table_names 2646 table_names = ['process','process_params', 2647 'segment','segment_definer','segment_summary'] 2648 relevant_indices = get_user_created_indices(connection, table_names) 2649 # drop indices that will interfere with update & delete statements 2650 for idx, sql in relevant_indices: 2651 validate_option(idx) 2652 cursor.execute('DROP INDEX %s' % idx) 2653 2654 sqlscript = """ 2655 CREATE INDEX sd_pisdi_index ON segment_definer (process_id, segment_def_id); 2656 CREATE INDEX ss_pisdi_index ON segment_summary (process_id, segment_def_id); 2657 2658 CREATE TEMP TABLE segdef_summary AS 2659 SELECT 2660 segment_definer.process_id AS proc_id, 2661 segment_definer.ifos AS ifo, 2662 concat_2cols(segment_definer.name, segment_definer.version) AS cat_vers, 2663 concat_2cols(segment_summary.start_time, segment_summary.end_time) AS times 2664 FROM 2665 segment_definer 2666 JOIN segment_summary ON ( 2667 segment_definer.process_id == segment_summary.process_id 2668 AND segment_definer.segment_def_id == segment_summary.segment_def_id) 2669 GROUP BY proc_id; 2670 2671 DROP INDEX sd_pisdi_index; 2672 DROP INDEX ss_pisdi_index; 2673 2674 CREATE TEMP TABLE _sdpid_map_ AS 2675 SELECT 2676 old_segs_tbl.proc_id AS old_pid, 2677 MIN(new_segs_tbl.proc_id) AS new_pid 2678 FROM 2679 segdef_summary AS old_segs_tbl 2680 JOIN segdef_summary AS new_segs_tbl ON ( 2681 new_segs_tbl.ifo == old_segs_tbl.ifo 2682 AND new_segs_tbl.times == old_segs_tbl.times 2683 AND new_segs_tbl.cat_vers == old_segs_tbl.cat_vers ) 2684 GROUP BY old_pid; 2685 2686 DROP TABLE segdef_summary; 2687 CREATE INDEX sdpid_idx ON _sdpid_map_ (old_pid, new_pid); 2688 2689 DELETE FROM segment 2690 WHERE process_id NOT IN ( 2691 SELECT DISTINCT new_pid 2692 FROM _sdpid_map_ ); 2693 DELETE FROM segment_definer 2694 WHERE process_id NOT IN ( 2695 SELECT DISTINCT new_pid 2696 FROM _sdpid_map_ ); 2697 DELETE FROM segment_summary 2698 WHERE process_id NOT IN ( 2699 SELECT DISTINCT new_pid 2700 FROM _sdpid_map_ ); 2701 2702 DELETE FROM process 2703 WHERE process_id IN ( 2704 SELECT old_pid 2705 FROM _sdpid_map_ 2706 WHERE old_pid != new_pid ); 2707 DELETE FROM process_params 2708 WHERE process_id IN ( 2709 SELECT old_pid 2710 FROM _sdpid_map_ 2711 WHERE old_pid != new_pid ); 2712 2713 DROP INDEX sdpid_idx; 2714 DROP TABLE _sdpid_map_; """ 2715 2716 if debug: 2717 print >> sys.stderr, sqlscript 2718 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 2719 # execute SQL script 2720 cursor.executescript( sqlscript ) 2721 # commit transactions to database 2722 connection.commit() 2723 if debug: 2724 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 2725 2726 # Now put the indexes back in 2727 for idx, sql in relevant_indices: 2728 cursor.execute(sql) 2729 connection.commit() 2730 2731 if debug: 2732 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 2733 2734 cursor.close() 2735 2736 else: 2737 if verbose: 2738 print >> sys.stdout, "There are no segments tables in this database"
2739 2740 2741 # ============================================================================= 2742 # 2743 # TimeSlide Utilities 2744 # 2745 # ============================================================================= 2746 2747 # Following utilities are specific to the time_slide table 2748 2749
2750 -def get_zero_lag_time_slide_ids( connection ):
2751 """ 2752 Gets zero-lag time_slide_id's from the time_slide_table. 2753 """ 2754 sqlquery = 'SELECT time_slide_id, offset FROM time_slide GROUP BY time_slide_id' 2755 slide_ids = connection.cursor().execute( sqlquery ) 2756 zero_lag_ids = [slide_id[0] for slide_id in slide_ids if slide_id[1] == 0.] 2757 2758 return zero_lag_ids
2759 2760
2761 -def get_zero_lag_instrument_sets( connection ):
2762 """ 2763 Gets instrument sets from time slide table by using the ids of the zero-lag 2764 time-slides (Assumption is there is a zero-lag row in the time-slide table). 2765 """ 2766 zero_lag_ids = get_zero_lag_time_slide_ids( connection ) 2767 2768 # sanity check 2769 if not zero_lag_ids: 2770 raise ValueError, "No zero-lag ids in time slide table, cannot get instrument set." 2771 2772 zero_lag_instrument_sets = {} 2773 for id in zero_lag_ids: 2774 id = ''.join([ '"', id, '"' ]) 2775 sqlquery = ' '.join(['SELECT instrument', 2776 'FROM time_slide', 2777 'WHERE time_slide_id ==', id ]) 2778 instruments = sorted(instrument[0] for instrument in connection.cursor().execute( sqlquery ).fetchall()) 2779 if instruments not in zero_lag_instrument_sets: 2780 zero_lag_instrument_sets[ instruments ] = [ id ] 2781 else: 2782 zero_lag_instrument_sets[ instruments ].append( id ) 2783 2784 return zero_lag_instrument_sets
2785 2786
2787 -def get_instrument_sets_and_time_slide_ids( connection ):
2788 """ 2789 Gets all instrument sets available in the time slide table and gets all 2790 time-slide ids associated with that instrument set. Since this only uses the 2791 time-slide table, will get everything even if there were no coincident 2792 events during a time-slide. 2793 """ 2794 # get zero_lag ids and instrument set 2795 zero_lag_instrument_sets = get_zero_lag_instrument_sets( connection ) 2796 # will save all ids to this new dictionary 2797 instrument_set_time_slide_ids = {} 2798 2799 for instrument_set in zero_lag_instrument_sets: 2800 instrument_clause = ' AND '.join([ ''.join([ 2801 'instrument == ', '"', instrument, '"' ]) for instrument in instrument_set ]) 2802 sqlquery = ' '.join([ 2803 'SELECT time_slide_id', 2804 'FROM time_slide', 2805 'WHERE', instrument_set ]) 2806 instrument_set_time_slide_ids[ instrument_set ] = [ id[0] for id in 2807 connection.cursor().execute(sqlquery) ] 2808 2809 return instrument_set_time_slide_ids
2810 2811
2812 -def simplify_timeslide_tbl(connection, verbose=False, debug=False):
2813 """ 2814 Remove duplicate entries in the time_slide table and update entries in the 2815 the time_slide_id column of both the experiment_summary and coinc_event 2816 tables. 2817 """ 2818 2819 # create the cursor object used to execute queries and commands 2820 cursor = connection.cursor() 2821 2822 all_tables = zip(*get_tables_in_database(connection))[0] 2823 if 'time_slide' in all_tables: 2824 if verbose: 2825 print >> sys.stdout, "\nClean up the time_slide table ..." 2826 2827 # get the non-auto-generated indices for the tables in table_names 2828 table_names = ['time_slide','experiment_summary','coinc_event'] 2829 relevant_indices = get_user_created_indices(connection, table_names) 2830 # drop indices that will interfere with update & delete statements 2831 for idx, sql in relevant_indices: 2832 validate_option(idx) 2833 cursor.execute('DROP INDEX %s' % idx) 2834 2835 sqlscript = """ 2836 -- Create a table that combines the information about a single time_slide into 2837 -- a single row. This makes comparison between time_slides easier. 2838 2839 CREATE TEMP TABLE compact_time_slide AS 2840 SELECT 2841 time_slide_id AS tsid, 2842 group_concat(instrument) AS ifos, 2843 group_concat(offset) AS offset 2844 FROM time_slide 2845 GROUP BY time_slide_id; 2846 2847 CREATE INDEX cts_io_idx ON compact_time_slide (ifos, offset); 2848 2849 -- Create a table that maps the time_slide_ids of redundant time_slide entries 2850 -- to those entries one is going to keep. 2851 CREATE TEMP TABLE _tsidmap_ AS 2852 SELECT 2853 old_ts_table.tsid AS old_tsid, 2854 MIN(new_ts_table.tsid) AS new_tsid 2855 FROM 2856 compact_time_slide AS old_ts_table 2857 JOIN compact_time_slide AS new_ts_table ON ( 2858 new_ts_table.ifos == old_ts_table.ifos 2859 AND new_ts_table.offset == old_ts_table.offset) 2860 GROUP BY old_tsid; 2861 2862 DROP INDEX cts_io_idx; 2863 DROP TABLE compact_time_slide; 2864 2865 CREATE INDEX _tsidmap_idx ON _tsidmap_ (old_tsid); 2866 2867 -- Delete the redundant entries in the time_slide table 2868 DELETE FROM time_slide 2869 WHERE time_slide_id NOT IN ( 2870 SELECT DISTINCT new_tsid 2871 FROM _tsidmap_); 2872 2873 UPDATE time_slide 2874 SET process_id = ( 2875 SELECT new_pid 2876 FROM _pidmap_ 2877 WHERE old_pid = process_id); 2878 """ 2879 2880 # if a coinc_event table exists, update its time_slide_id column 2881 if 'coinc_event' in all_tables: 2882 sqlscript += """ 2883 UPDATE coinc_event 2884 SET time_slide_id = ( 2885 SELECT new_tsid 2886 FROM _tsidmap_ 2887 WHERE old_tsid == time_slide_id); 2888 """ 2889 # if an experiment_summary table exists, update its time_slide_id column 2890 if 'experiment_summary' in all_tables: 2891 sqlscript += """ 2892 UPDATE experiment_summary 2893 SET time_slide_id = ( 2894 SELECT new_tsid 2895 FROM _tsidmap_ 2896 WHERE old_tsid == time_slide_id); 2897 """ 2898 sqlscript += """ 2899 DROP INDEX _tsidmap_idx; 2900 DROP TABLE _tsidmap_; """ 2901 2902 if debug: 2903 print >> sys.stderr, sqlscript 2904 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 2905 # execute SQL script 2906 cursor.executescript( sqlscript ) 2907 # commit transactions to database 2908 connection.commit() 2909 if debug: 2910 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 2911 2912 # Now put the indexes back in 2913 for idx, sql in relevant_indices: 2914 cursor.execute(sql) 2915 connection.commit() 2916 2917 if debug: 2918 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 2919 2920 else: 2921 if verbose: 2922 if 'time_slide' not in all_tables: 2923 print >> sys.stdout, "There is no time_slide table in this database." 2924 else: 2925 print >> sys.stdout, "The time_slide table lacks any duplicates." 2926 cursor.close()
2927 2928 2929 # ============================================================================= 2930 # 2931 # VetoDefiner Utilities 2932 # 2933 # ============================================================================= 2934 2935 # Following utilities are specific to the veto_definer table 2936
2937 -def simplify_vetodef_tbl(connection, verbose=False, debug=False):
2938 """ 2939 Cleaning up the veto_definer table as well as the associated 2940 entries in the process & process_params tables 2941 """ 2942 2943 all_tables = zip(*get_tables_in_database(connection))[0] 2944 if 'veto_definer' in all_tables: 2945 if verbose: 2946 print >> sys.stdout, "\nCleaning up the veto_definer table..." 2947 2948 # create function to concatenate 7 columns together per row 2949 connection.create_function("concat_7cols", 7, concatenate) 2950 2951 # create the cursor object used to execute queries and commands 2952 cursor = connection.cursor() 2953 2954 # get the non-auto-generated indices for the tables in table_names 2955 table_names = ['process','veto_definer'] 2956 relevant_indices = get_user_created_indices(connection, table_names) 2957 # drop indices that will interfere with update & delete statements 2958 for idx, sql in relevant_indices: 2959 validate_option(idx) 2960 cursor.execute('DROP INDEX %s' % idx) 2961 2962 sqlscript = """ 2963 CREATE TEMP TABLE veto_procinfo AS 2964 SELECT 2965 process_id, 2966 concat_7cols(process.program, process.version, process.username, 2967 process.ifos, process.cvs_entry_time, process.cvs_repository, 2968 process.comment) AS process_info 2969 FROM process 2970 WHERE 2971 process.process_id IN ( 2972 SELECT DISTINCT veto_definer.process_id 2973 FROM veto_definer ) 2974 GROUP BY process.process_id; 2975 2976 CREATE TEMP TABLE _veto_pidmap_ AS 2977 SELECT 2978 old_procinfo.process_id AS old_pid, 2979 MIN(new_procinfo.process_id) AS new_pid 2980 FROM 2981 veto_procinfo AS old_procinfo 2982 JOIN veto_procinfo AS new_procinfo ON ( 2983 new_procinfo.process_info == old_procinfo.process_info) 2984 GROUP BY old_pid; 2985 2986 DROP TABLE veto_procinfo; 2987 2988 DELETE FROM process 2989 WHERE process_id IN ( 2990 SELECT old_pid 2991 FROM _veto_pidmap_ 2992 WHERE old_pid != new_pid ); 2993 DELETE FROM veto_definer 2994 WHERE process_id NOT IN ( 2995 SELECT DISTINCT new_pid 2996 FROM _veto_pidmap_ ); 2997 2998 DROP TABLE _veto_pidmap_; """ 2999 if debug: 3000 print >> sys.stderr, sqlscript 3001 print >> sys.stderr, "SQL script start time: %s" % str(time.localtime()[3:6]) 3002 # execute SQL script 3003 cursor.executescript( sqlscript ) 3004 # commit transactions to database 3005 connection.commit() 3006 if debug: 3007 print >> sys.stderr, "SQL script end time: %s" % str(time.localtime()[3:6]) 3008 3009 # Now put the indexes back in 3010 for idx, sql in relevant_indices: 3011 cursor.execute(sql) 3012 connection.commit() 3013 3014 if debug: 3015 print >> sys.stderr, "Indexes readded at: %s" % str(time.localtime()[3:6]) 3016 3017 cursor.close() 3018 3019 else: 3020 if verbose: 3021 print >> sys.stdout, "This database lacks a veto_definer table."
3022