1
2
3
4
5
6
7
8
9 '''
10 A collection of utilities to assist in storing and sorting data queried from a database
11 or xml document.
12 '''
13
14 import sys, re, math
15 import time, datetime
16
17 from glue.ligolw.utils import print_tables
18 from glue.ligolw import ligolw
19 from glue.ligolw import table
20 from glue.ligolw import lsctables
21 from glue import git_version
22
23 from pylal.xlal.date import XLALGPSToUTC
24 try:
25 from pylal.xlal.datatypes.ligotimegps import LIGOTimeGPS
26 except ImportError:
27
28 from pylal.xlal.date import LIGOTimeGPS
29 from pylal import tools
30 from pylal import ligolw_sqlutils as sqlutils
31
32
33 __author__ = "Collin Capano <collin.capano@ligo.org>"
34 __version__ = git_version.id
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 -def get_row_stat(row, arg):
50 """
51 Method to evaluate the desired operation on columns from a row in a table.
52 The desired operation can be either a pre-defined function (if it exists in
53 the row's namespace) or a function of the elements in the row's name space.
54 Syntax for the arg is python. The name space available to eval is limited
55 to the columns in the row and functions in the math module.
56 """
57
58 try:
59 val = getattr(row, arg)
60 try:
61
62 return val()
63 except TypeError:
64
65 return val
66 except AttributeError:
67
68 try:
69 row_dict = row.__dict__
70 except AttributeError:
71 row_dict = dict([ [name, getattr(row,name)] for name in dir(row) ])
72 safe_dict = {}
73 safe_dict.update(row_dict)
74 safe_dict.update(math.__dict__)
75 return eval(arg, {"__builtins__": None}, safe_dict)
76
78 """
79 Returns a list of columns from the given column list that are needed by
80 the given function string. This can be used to reduce the number of columns
81 that are passed to get_row_stat.
82
83 Parameters
84 ----------
85 column_list: list
86 A list of strings given the possible columns to pull out.
87 function: str
88 A string specifying the match criteria. Can either be:
89 * "endTime" or "startTime": in this case, all columns in
90 column_list with "end_time", "start_time", or "ifo" in their name
91 will be retrieved.
92 * "eThinca": in this case, all columns with mass1, mass2, mchirp,
93 eta, tau[0-9], time, or [Gg]amma[0-9] in their name will be
94 retrieved.
95 * a python string that is a function of one or more of the columns
96 in column_list.
97
98 Returns
99 -------
100 needed_columns: str
101 The subset of columns needed by the function.
102 """
103 if function == 'eThinca':
104 regex = re.compile(
105 'ifo|mass1|mass2|mchirp|eta|time|tau[0-9]|[Gg]amma[0-9]')
106 needed_cols = [col for col in column_list \
107 if regex.search(col) is not None]
108 elif function == 'endTime' or function == 'startTime':
109
110
111
112 regex = re.compile('ifo|end_time|start_time')
113 needed_cols = [col for col in column_list \
114 if regex.search(col) is not None]
115 else:
116 needed_cols = [col for col in column_list \
117 if re.search('(%s)' % col, function) is not None]
118 return needed_cols
119
122 """
123 Creates a DataRow class. If classTableName is the same as a table in
124 lsctables, and baseClass is not specified, the DataRow class will inherit
125 from that table's RowType. If baseClass is specified, the DataRow class
126 that is created will inherit from that base class. If baseClass is not
127 specified and classTableName is not in lsctables, then DataRow will just
128 inherit from object. Regardless of inheritance, the DataRow will have a
129 __slots__ attribute. Any columns that are specified in columns will be
130 added to the __slots__ class. The DataRow will also have a tableName
131 attribute. This will be whatever classTableName is, regardless of what
132 baseClass is set to. Think of the DataRow class as a more arbitrary version
133 of an lsctable row.
134
135 @classTableName: a string specifying the DataRow's tableName. If baseClass
136 is not specified, and tableName is the same as a table in lsctables, the
137 DataRow will inherit from that table's RowType. Example: 'sngl_inspiral'
138
139 @baseClass: specify what class the DataRow should inherit from. Example:
140 lsctables.SnglInspiral
141
142 @columns: a list of strings specifying columns to add to the DataRow's
143 __slots__ attribute. All columns in __slots__ will also be an attribute of
144 the DataRow class. Only columns not in the base class's __slots__ attribute
145 will be added.
146
147 Note that this function returns a class, not an instance of a class.
148 """
149
150 if baseClass is not None:
151 base = baseClass
152 elif classTableName in lsctables.TableByName:
153 base = lsctables.TableByName[ classTableName ].RowType
154 else:
155 base = object
156
157
158 class DataRow( base ):
159
160 tableName = classTableName
161 if '__slots__' not in dir( base ):
162 __slots__ = columns
163 else:
164 __slots__ = base.__slots__ + [c for c in set(columns)-set(base.__slots__)]
165
166 def __init__(self):
167 """
168 If all slots are not populated, we will get an AttributeError when
169 using get_value. To avoid this, initialize all slots as None.
170 """
171 for column in self.__slots__:
172 setattr(self, column, None)
173
174 def store(self, dataTuple):
175 """
176 Takes a list of tuples of (column_name, data) and assigns the
177 values to the object's variables. The column_name must be in
178 self.__slots__.
179 @dataTuple: a list of tuples in which the first element is the
180 column name and the second is the value to assign.
181 """
182 for col, val in dataTuple:
183 setattr( self, col, val )
184
185 def get_value(self, arg):
186 """
187 Returns the result of some operation on the elements in self.
188 @arg: can be the name of any defined function in self's base class,
189 a slot in self, or a function of either or both. See get_row_stat
190 for more info.
191
192 Example:
193 >>> from glue.ligolw import lsctables
194 >>> SnglInspRow = lsctables.createDataRowClass('sngl_inspiral')
195 >>> test = SnglInspRow()
196 >>> test.store([('snr', 6.), ('chisq', 32.), ('chisq_dof', 16.)])
197 >>> test.get_value('snr**2.')
198 36.0
199 >>> test.get_value('get_new_snr')
200 5.8993671171391338
201 >>> test.get_value('log(get_new_snr())')
202 1.7748450768765174
203 """
204 return get_row_stat( self, arg )
205
206 return DataRow
207
210 """
211 Performs the desired function on the list of single statistics. Note: this
212 can only combine one statistic from each row.
213
214 @function: can be either a known pre-set (see below) or an arbitrary
215 function. If an arbitrary function, it must be in terms of the ifo names.
216
217 @rows: a dictionary of statistics keyed by the ifos
218 """
219
220 if function == 'sum':
221 return sum(rows.values())
222 if function == 'quad_sum':
223 return math.sqrt(sum([x**2. for x in rows.values()]))
224 if function == 'min':
225 return min(rows.values())
226 if function == 'max':
227 return max(rows.values())
228 if function == 'mean':
229 return numpy.mean(numpy.array(rows.values()))
230 if function == 'median':
231 return numpy.median(numpy.array(rows.values()))
232 if function == 'alpha_min':
233 return rows[min(rows.keys())]
234 if function == 'sorted_keys':
235 return ','.join(sorted(rows.keys()))
236 if function == 'sorted_values':
237 return ';'.join(sorted(map( str, rows.values() )))
238 if function == 'echo':
239 return rows
240
241
242 safe_dict = dict([ [name,val] for name,val in rows.items() + math.__dict__.items() if not name.startswith('__') ])
243
244 try:
245 return eval( function, {"__builtins__":None}, safe_dict )
246 except NameError:
247
248 return None
249
252 """
253 Creates a CombineRows class that can be used in a sqlite database to
254 combine rows on the fly. Takes in a sngl_function, which is the function
255 used to combine columns within a single row, and a combining_function,
256 which is the function used to combine the results of the sngl_functions
257 across rows.
258
259 @tableName: the name of the table that will be reading from. If it is a table in lsctables.py, all methods and columns from that table will be inherited.
260 @columns: the list of columns that will be storing data to. This list must be in the same order that will be reading data in from the database with.
261 @functionList: a list of tuples. The first item should be the combining function to use, in terms of the ifos to combine, and the second item should be the sngl function to use, in terms of columns or methods of the sngl_row.
262 """
263
264 sngl_row = createDataRowClass(tableName, columns)
265
266 class CombineRows:
267 def __init__(self):
268 """
269 Initializes variables needed for the step process.
270 """
271 self.this_coinc = dict([ [x, {}] for x in functionList ])
272
273 def step(self, *args):
274 """
275 Populates self.this_coinc
276 """
277 this_row = sngl_row(columns)
278 this_row.store(zip(columns,args))
279 for combine_func, sngl_function in functionList:
280 self.this_coinc[(combine_func, sngl_function)][this_row.ifo] = this_row.get_value(sngl_function)
281
282 def finalize(self):
283 """
284 Once all the singles for the coinc have been gathered, applies the desired combining function(s) to them and returns the result. Results are returned as a comma seperated string.
285 """
286 return ','.join([str(combineRowStats( cfunc, self.this_coinc[(cfunc, sfunc)] )) for cfunc, sfunc in functionList])
287
288 return CombineRows
289
297 """
298 A class to assist in loading data from and performing functions on tables
299 in a SQLite database.
300 """
301 - def __init__(self, connection, tableName, baseClass = None ):
306
308 self._function = function
309
310 - def store( self, *rowData):
311 thisRow = self.rowClass()
312 thisRow.store( zip( self.columns, rowData ) )
313 return thisRow
314
316 thisRow = self.store( rowData )
317 return thisRow.get_value( self._function )
318
322
325 """
326 A class that can perform various types of comparison tests between
327 arbitrary DataRow classes. The class has the following attributes:
328 *classA: A DataRow class. Note: this is a class, not an instance
329 of a class.
330 *classB: A DataRow class. This can be the same type of class as
331 classA, or different. Like classA, this is a class, not an instance
332 of that class.
333 *matchCriteriaA: What column, or function of columns, to get from
334 classA when doing a comparison between an instance of classA and
335 an instance of classB.
336 *matchCriteriaB: What column, or function of columns, to get from
337 classB when doing a comparison between an instance of classA and
338 an instance of classB.
339 *_diffFunc: What function to perform to differentiate classA from classB.
340 This function should be one of the functions below; it takes data to
341 populate an instance of classA and an instance of classB, and returns a
342 numerical value >= 0 representing the difference between these instances of
343 classA and classB. This value can then be compared to the window size to
344 determine if A and B are the same or not.
345 *window: How large of a window to use to consider an instance of
346 classA equal to an instance of classB.
347
348 Example:
349 >>> classA = createDataRowClass( 'sngl_inspiral' )
350 >>> classB = createDataRowClass( 'sngl_inspiral' )
351 >>> compF = CompareDataRows( classA, classB )
352 >>> compF.set_diffFunc( compF.diffRowARowB )
353 >>> compF.set_window( 0.1 )
354 >>> compF.set_matchCriteriaA('mass1/mass2')
355 >>> compF.set_matchCriteriaB = ('mass1/mass2')
356 >>> dataA = [('mass1', '10.0'), ('mass2', '5.0')]
357 >>> dataB = [('mass1', '10.1'), ('mass2', '5.0')]
358 >>> compF.compare( dataA, dataB )
359 True
360 >>> compF.set_window(0)
361 >>> compF.compare( dataA, dataB )
362 False
363 """
364 - def __init__(self, RowClassA = None, RowClassB = None):
365 self.classA = RowClassA
366 self.classB = RowClassB
367 self._matchCriteriaA = None
368 self._matchCriteriaB = None
369 self._neededColumnsA = None
370 self._neededColumnsB = None
371 self.diffFunc = None
372 self.window = None
373
375 self.classA = DataRowClass
376
378 self.classB = DataRowClass
379
381 """
382 Sets the match criteria for classA. Also sets the columns needed for
383 the given match criteria.
384 """
385 self._matchCriteriaA = match_criteria
386
387 self.set_neededColumnsA()
388
389 @property
391 return self._matchCriteriaA
392
394 """
395 Sets the match criteria for classB. Also sets the columns needed for
396 the given match criteria.
397 """
398 self._matchCriteriaB = match_criteria
399
400 self.set_neededColumnsB()
401
402 @property
404 return self._matchCriteriaB
405
407 """
408 Retrieves which columns in the desired class is needed for the match
409 criteria.
410
411 Parameters
412 ----------
413 AorB: str
414 Either 'A' or 'B'; which class to get the columns for.
415
416 Returns
417 -------
418 needed_cols: list
419 The list of needed columns; see get_needed_columns for
420 details.
421 """
422 return get_needed_columns(
423 getattr(self, 'class%s' % AorB).__slots__,
424 getattr(self, 'matchCriteria%s' %AorB))
425
428
429 @property
431 return self._neededColumnsA
432
435
436 @property
438 return self._neededColumnsB
439
441 self.diffFunc = function
442
445
446
447
448 - def _diff( self, a, b ):
449 """
450 Returns the absolute value of the difference between a and b.
451
452 Parameters
453 ----------
454 a: float or integer
455 b: float or integer
456
457 Returns
458 -------
459 difference: float or integer
460 The abs difference between a and b.
461 """
462 return abs(a - b)
463
465 """
466 Runs self.diffFunc on a and b and checks that that is <= self.window.
467
468 Parameters
469 ----------
470 a: instance of classA row
471 The data passed to the first argument of self.diffFunc.
472 b: instance of classB row
473 The data passed to the second argument of self.diffFunc.
474
475 Returns
476 -------
477 comparison: bool
478 True if self.diffFunc(a, b) is <= self.window; False otherwise.
479 """
480 return self.diffFunc(a, b) <= self.window
481
483 """
484 A database wrapper for the compare functions.
485
486 Parameters
487 ----------
488 args: list
489 A list of values. The first len(self.neededColumnsA) is assumed to
490 be the data for classA, in the order that neededColumnsA is in.
491 The rest of the values are assumed to be the data for classB, in
492 the order that neededColumnsB is in.
493
494 Returns
495 -------
496 comparison: bool
497 The result of self.compare, where the first argument passed is
498 the data from classA and the second is data from classB.
499 """
500 dataA = [args[i] for i in range(len(self.neededColumnsA))]
501 dataB = [args[i] for i in range(len(self.neededColumnsA), len(args))]
502 dataA = zip(self.neededColumnsA, dataA)
503 dataB = zip(self.neededColumnsB, dataB)
504 return self.compare(dataA, dataB)
505
506 - def create_dbCompF(self, connection, diffFunc, compFuncName, window):
507 """
508 Creates a function in the given connection to a database that allows
509 the given diffFunc to be performed on classA and classB on the fly.
510 The matchCriteria and the neededColumns for each class must be already
511 set (this should happen simultaneously by using set_matchCriteria(A|B).
512
513 Parameters
514 ----------
515 connection: sqlite3.connection
516 A connection to SQLite database.
517 diffFunc: function
518 The function to use to do comparisons; must be one of the
519 functions defined in this class.
520 compFuncName: str
521 What to call the call function in the database; must be unique.
522 window: float
523 The size of the window to use when determining whether or not
524 classA and classB are the same.
525 """
526 if self._matchCriteriaA is None:
527 raise ValueError("matchCriteriaA not set! " +\
528 "Run self.set_matchCriteriaA with appropriate arguments.")
529 if self._neededColumnsA is None:
530 raise ValueError("neededColumnsA not set! " +\
531 "Run self.set_matchCriteriaA to set the needed columns and " +\
532 "the match criteria.")
533 if self._matchCriteriaB is None:
534 raise ValueError("matchCriteriaB not set! " +\
535 "Run self.set_matchCriteriaB with appropriate arguments.")
536 if self._neededColumnsB is None:
537 raise ValueError("neededColumnsB not set! " +\
538 "Run self.set_matchCriteriaB to set the needed columns and " +\
539 "the match criteria.")
540 self.set_diffFunc(diffFunc)
541 self.set_window(window)
542 connection.create_function(compFuncName,
543 len(self.neededColumnsA)+len(self.neededColumnsB), self.dbWrapper)
544
546 """
547
548 Runs self.diff on self.classA and self.classB using self.matchCriteriA
549 and self.matchCriteriaB. A or B can be any DataRow class; the only
550 requirement is that their match criteria (set by
551 self.matchCriteria(A|B)) be a function of their slots. Special match
552 criteria are 'startTime' and 'endTime'. In this case,
553 (start|end)_time+1e-9*(start|end)_time_ns will calculated.
554
555 Parameters
556 ----------
557 dataA: list
558 A list of tuples with data to populate this instance of classA.
559 The first value of each tuple is the column name, the second the
560 value, e.g., ('ifo', 'H1').
561 dataB: list
562 A list of data tuples to populate this instance of classB.
563
564 Returns
565 -------
566 diff: float
567 The return of self._diff(a,b), where a(b) is the matchCritieraA(B)
568 function run on dataA(B).
569 """
570
571 rowA = self.classA()
572 rowA.store(dataA)
573 rowB = self.classB()
574 rowB.store(dataB)
575
576 if self.matchCriteriaA == 'startTime':
577 a = rowA.start_time + 1e-9*rowA.start_time_ns
578 elif self.matchCriteriaA == 'endTime':
579 a = rowA.end_time + 1e-9*rowA.end_time_ns
580 else:
581 a = rowA.get_value( self.matchCriteriaA )
582 if self.matchCriteriaB == 'startTime':
583 b = rowB.start_time + 1e-9*rowB.start_time_ns
584 elif self.matchCriteriaB == 'endTime':
585 b = rowB.end_time + 1e-9*rowB.end_time_ns
586 else:
587 b = rowB.get_value( self.matchCriteriaB )
588 return self._diff(a, b)
589
591 """
592 Same as diffRowARowB, except that classA is assumed to be some sort of
593 simulation table (e.g., sim_inspiral) and classB is assumed to be some
594 sort of single-IFO table (e.g., sngl_inspiral). This assumption only
595 matters if 'startTime' or 'endTime' are the match criteria for classA.
596 In that case, the observatory that recorded the event in classB is
597 retrieved from classB.ifo. This is then used to pick out the
598 appropriate end|start time to use from classA. For example, if H1 is
599 the ifo in the snglData, then
600 h_(end|start)_time+1e-9*h_(end|start)_time_ns will be retrieved from
601 the simData.
602 @simData: a list of tuples with data to populate this instance of
603 classA. If self.matchCriteriaA is 'endTime' or 'startTime', classA is
604 assumed to be a row in a simulation table, and must have
605 {site}_(start|end)_time(_ns) columns.
606 @snglData: a list of tuples with data to populate this instance of
607 classB. If self.matchCriteriaB is 'endTime' or 'startTime', classB is
608 assumed to be a rown in a single-IFO table, and must have an ifo
609 column.
610 """
611
612 simRow = self.classA()
613 simRow.store(simData)
614 snglRow = self.classB()
615 snglRow.store(snglData)
616
617 if self.matchCriteriaA == 'startTime':
618 site = snglRow.ifo.lower()[0]
619 a = getattr( simRow, '%s_start_time' % site ) + 1e-9*getattr( simRow, '%s_start_time_ns' % site )
620 elif self.matchCriteriaA == 'endTime':
621 site = snglRow.ifo.lower()[0]
622 a = getattr( simRow, '%s_end_time' % site ) + 1e-9*getattr( simRow, '%s_end_time_ns' % site )
623 else:
624 a = simRow.get_value( self.matchCriteriaA )
625
626 if self.matchCriteriaB == 'startTime':
627 b = snglRow.start_time + 1e-9*snglRow.start_time_ns
628 elif self.matchCriteriaB == 'endTime':
629 b = snglRow.end_time + 1e-9*snglRow.end_time_ns
630 else:
631 b = snglRow.get_value( self.matchCriteriaB )
632 return self._diff(a, b)
633
635 """
636 Computes the eThinca distance between an instance of self.classA and an
637 instance of self.classB. This assumes that classA inherited from the
638 SimInspiral class and classB inherited from the SnglInspiral class.
639 @simData: List of data tuples (column_name, value) with which to
640 populate this instance of self.classA.
641 @snglData: List of data tuples (column_name, value) with which to
642 populate this instance of self.classB.
643 """
644 simRow = self.classA()
645 simRow.store(simData)
646 snglRow = self.classB()
647 snglRow.store(snglData)
648
649 simRow.simulation_id = 0
650 snglRow.event_id = 0
651 return tools.XLALEThincaParameterForInjection( simRow, snglRow )
652
654 """
655 Computes the eThinca distance between an instance of self.classA and an
656 instance of self.classB. This assumes that both classA and classB
657 inherited from the SnglInspiral class.
658 @snglDataA: List of data tuples (column_name, value) with which to
659 populate this instance of self.classA.
660 @snglDataB: List of data tuples (column_name, value) with which to
661 populate this instance of self.classB.
662 """
663 snglRowA = self.classA()
664 snglRowA.store(snglDataA)
665 snglRowB = self.classB()
666 snglRowB.store(snglDataB)
667
668 snglRowA.event_id = 0
669 snglRowB.event_id = 0
670 try:
671 ethincaVal = tools.XLALCalculateEThincaParameter( snglRowA, snglRowB )
672 except ValueError:
673
674 ethincaVal = float('inf')
675 return ethincaVal
676
679 weak_equality = False
681 for ifo in offset_dict:
682 self[ifo] = offset_dict[ifo]
683
685 """
686 The default equality test is to consider two vectors to be equal only if all ifos are the same and all offsets are the same. If one vector is a subset of the other vector, they will not be considered equal. However, if the class attribute weak_equality is set to True, only offsets of the ifos that are both in self and other will be checked. For example:
687 >>> a = OffsetVector({'H1': 0, 'L1': 5})
688 >>> b = OffsetVector({'H1': 0, 'L1': 5, 'V1': 10})
689 >>> a == b
690 False
691 >>> OffsetVector.weak_equality = True
692 >>> a == b
693 True
694 """
695 if type(other) != type(self):
696 return False
697 if OffsetVector.weak_equality:
698 return all( self[ifo] == other[ifo] for ifo in set(self.keys()) & set(other.keys()) )
699 else:
700 return self.__hash__() == other.__hash__()
701
703 return not self == other
704
710
713 """
714 Class to store category information.
715 """
716 default_match_criteria = ['offset_vector', 'datatype', 'veto_cat', 'on_instruments', 'ifos', 'param_group']
717
718 - def __init__(self, offset_vector = {}, datatype = None, veto_cat = None, on_instruments = frozenset(['ALL']), ifos = frozenset(['ALL']), param_group = None):
719 self.offset_vector = OffsetVector(offset_vector)
720 self.datatype = datatype
721 self.veto_cat = veto_cat
722 self.on_instruments = frozenset(on_instruments)
723 self.ifos = frozenset(ifos)
724 self.param_group = param_group
725 self.livetime = 0
726
728 self.livetime += time
729
732
734 """
735 Only checks the values listed in check_me to figure out whether or not self is equal to other.
736 """
737 if type(other) != type(self):
738 return False
739 return all(getattr(self,x) == getattr(other,x) for x in check_me)
740
749
751 return not self == other
752
755
756
757 -class Data( dict ):
758 """
759 Class to store statistics and livetime for plotting.
760 """
762 """
763 Sub-class to store individual data elements.
764
765 @categories: a list of instances of the Category class defining which categories this data element belongs to
766 @data: an instance of the DataRow class listing statistics and methods associated with this element
767 """
769 self._id = thisid
770 self.data = data
771 self.cumrates = {}
772
773 - def update(self, _id = None, data = None):
774
775 if _id is not None:
776 self._id = _id
777
778 if data is not None:
779 self.data = data
780
782 """
783 A list of all the data elements is kept as an index.
784 """
785 self.data_index = {}
786
787 - def add_data(self, _id, categories, data):
788 """
789 Adds a new DataElement to self.
790
791 @_id: some unique value to identify the data element
792 @categories: a list of categories that this data falls in. If one or more of these categories are equal (equality determined by the default Category match_criteria) to a category already in all_categories, the category is set to that category. This results in distinct categories only being saved once in memory, with all DataElements that share that category pointing to the same memory address.
793 """
794 d = self.DataElement( _id, data )
795 self.data_index[d._id] = d
796 for c in categories:
797 self.setdefault(c, [])
798 self[c].append( d )
799
800 - def update(self, _id, categories = [], data = None, addToExistingCat = True, errOnMissing = True):
801 """
802 Updates all DataElements in self that have the given id. If no DataElement is found with the given id and errOnMissing is False, adds a new entry.
803 """
804 if _id not in self.data_index:
805 if errOnMissing:
806 raise ValueError, "An element with id %s could not be found." % str(_id)
807 else:
808 self.add_data( _id, categories, data )
809 else:
810 self.data_index[_id].update( data = data)
811 self.refresh_categories( [self.data_index[_id]] )
812
813 - def add_livetime(self, livetime, category, match_criteria = []):
814 """
815 Adds livetime to all categories in self that match the given criteria.
816 """
817 if match_criteria == []:
818 match_criteria = Category.default_match_criteria
819 for cat in [cat for cat in self if cat.selective_eq(category, match_criteria)]:
820 cat.livetime += livetime
821
822 - def get_livetime(self, category, match_criteria = [], time_units = 'yr'):
823 """
824 Returns the sum of all the livetimes of categories that match the given category via the given match_criteria.
825 """
826 if match_criteria == []:
827 match_criteria = Category.default_match_criteria
828 return sqlutils.convert_duration(sum([cat.livetime for cat in self if cat.selective_eq(category, match_criteria)]), time_units)
829
831 """
832 Creates background categories out of the slide categories and adds this to all slide elements' categories lists. Default action is to create a background for each veto-category, on_instruments, ifos, and param_group. However, this can be overridden with the match_criteria argument.
833 """
834 if match_criteria == []:
835 match_criteria = ['veto_cat', 'on_instruments', 'ifos', 'param_group']
836 for vals in set([ tuple(getattr(c, x) for x in match_criteria) for c in self if c.datatype == 'slide' ]):
837
838 bkg_cat = Category( offset_vector = {}, datatype = 'background' )
839 [setattr(bkg_cat, x, y) for x, y in zip(match_criteria, vals)]
840 bkg_cat.livetime = sum([c.livetime for c in self if c.datatype == 'slide' and bkg_cat.selective_eq(c, match_criteria) ])
841
842 self[bkg_cat] = list(set([x for c in self if c.datatype == 'slide' and bkg_cat.selective_eq(c, match_criteria) for x in self[c]]))
843
844 - def compute_cumrates(self, stat, foreground_datatype, rank_by = 'max', group_by = [], num_slides = 100.):
845 """
846 Computes the cumulative rates for all the distinct groups that exist in self. Distinct groups are determined by group_by.
847 """
848 if group_by == []:
849 group_by = ['datatype', 'veto_cat', 'on_instruments', 'ifos', 'param_group']
850 distinct_groups = set([ tuple(getattr(c,x) for x in group_by) for c in self])
851 for group in distinct_groups:
852 this_group = Category()
853 [setattr(this_group, x, y) for (x,y) in zip( group_by, group )]
854 this_group.livetime = self.get_livetime( this_group, group_by, time_units = 's' )
855
856 this_data = []
857 for c in self:
858 if c.selective_eq(this_group, group_by):
859 this_data.extend( self[c] )
860 this_data = sorted(set(this_data), key = lambda x: getattr(x.data, stat), reverse = rank_by == 'min')
861 d = [getattr(x.data, stat) for x in this_data]
862
863
864
865 orig_dt = this_group.datatype
866 this_group.datatype = foreground_datatype
867 fg_livetime = self.get_livetime( this_group, match_criteria = 'datatype' not in group_by and group_by+['datatype'] or group_by, time_units = 's' )
868 nTrials = float(this_group.livetime) / fg_livetime
869
870 this_group.datatype = orig_dt
871
872 these_cumrates = [ (len(d) - bisect.bisect_left(d, x))/nTrials for x in d ]
873
874 for data_elem, rate in zip( this_data, these_cumrates ):
875 data_elem.cumrates[this_group] = rate
876
878 """
879 Returns a sorted list (by stat) of stats, cumrates, and ids for the given group.
880 """
881 return sorted([(getattr(d.data, stat), d.cumrates[group], d._id) for d in self.data_index.values() if group in d.cumrates], reverse = rank_by == 'min')
882
883 - def get_data(self, _id = None, category = None, category_match_criteria = []):
884 """
885 Returns a list of DataElements that matches a given id, a given category, or both. If category_match_criteria is specified, will get data that matches the specified elements in category. Otherwise, will use Category.default_match_criteria for comparing category to the stored categories.
886 """
887 if category_match_criteria == []:
888 category_match_criteria = Category.default_match_criteria
889 return set([x for c in self for x in self[c] if (category is None or c.selective_eq(category, category_match_criteria)) and (_id is None or _id == x._id)])
890
892 """
893 Returns a list of categories in self that match the given category via the match_criteria.
894 """
895 if match_criteria == []:
896 match_criteria = Category.default_match_criteria
897 return [x for x in self if x.selective_eq(category, match_criteria)]
898
900 """
901 Cycles over the DataElements in self, keeping only the given args.
902
903 @args: A list of tuples. In each tuple, the first element is the name to give the new collapsed value and the second element is the argument to carry out (either a name or a function) on the uncollapsed row to get the collapsed value.
904 """
905 cols = [arg[0] for arg in args]
906 fns = [arg[1] for arg in args]
907 collapsedRow = createDataRowClass( 'collapsedRow' )
908 for n,origElement in enumerate(self.data_index.values()):
909 d = collapsedRow( cols )
910 d.store([(col, origElement.data.get_value(fn)) for col, fn in zip(cols, fns)])
911 origElement.data = d
912
913 -def combineData(dataObj, match_column, args, param_grouping_function, verbose = False):
914 """
915 Cycles over the DataElements in dataObj, combining any DataElements with the same match_column value via the given args and returns a new Data object in which the element's ids are the values of the match_column. Note: the categories of the DataElements in the new Data object are just the concatenation of the older objects individual categories. These might need to be updated depending on the paramters of the newer category.
916
917 @dataObj: the instace of Data to carry the combination on
918 @match_column: name of column in the DataElements to use to match rows to combine; e.g., 'coinc_event_id'
919 @args: a list of tuples. In each tuple the first element is the name to give the new combined value, the second element is the column in each row to identify that row by, the third is the column or function of columns in each row to combine, and the final element is the way to combine them, which can be either a predefined method or a function in terms of values of the first element. For example, if you wanted the average chirp mass and the sum of the squares of new_snr over H1 and L1, the args should look like:
920 args = [ (combined_newsnr_sq, ifo, get_new_snr, H1**2.+L1**2.), (combined_mchirp, ifo, mchirp, mean) ]
921 """
922 cols = [arg[0] for arg in args]
923 colkeys = [arg[1] for arg in args]
924 sngl_stats = [arg[2] for arg in args]
925 cmb_fncs = [arg[3] for arg in args]
926 newData = Data()
927 combinedRow = createDataRowClass( 'combinedRow' )
928
929 match_vals = {}
930 for d in dataObj.data_index.values():
931 this_id = d.data.get_value(match_column)
932 match_vals.setdefault(this_id, [])
933 match_vals[this_id].append(d)
934 ii = 0
935 for idcol, combine_data in match_vals.items():
936 ii += 1
937 if verbose:
938 if ii != len(match_vals):
939 print "%i/%i (%.2f%%)\r" % (ii, len(match_vals), 100*float(ii)/len(match_vals)),
940 else:
941 print ''
942 newRow = combinedRow( cols )
943 stats = [ dict([ [x.data.get_value(colkey), x.data.get_value(snglstat)] for x in combine_data ]) for colkey, snglstat in zip(colkeys, sngl_stats) ]
944 newRow.store( [( col, combineRowStats( fnc, stat_dict )) for col, fnc, stat_dict in zip(cols, cmb_fncs, stats)] )
945 orig_cats = [y for x in combine_data for y in x.categories]
946 ifos_param = 'ifos' in dir(newRow) and 'ifos' or 'ifo'
947 new_cats = [Category( c.offset_vector, c.datatype, c.veto_cat, c.on_instruments, getattr(newRow, ifos_param), param_grouping_function(newRow.param) ) for c in orig_cats]
948 newData.add_data( id(newRow), new_cats, newRow )
949
950 return newData
951