glue.segmentsUtils

1 # Copyright (C) 2006 Kipp Cannon 2 # 3 # This program is free software; you can redistribute it and/or modify it 4 # under the terms of the GNU General Public License as published by the 5 # Free Software Foundation; either version 3 of the License, or (at your 6 # option) any later version. 7 # 8 # This program is distributed in the hope that it will be useful, but 9 # WITHOUT ANY WARRANTY; without even the implied warranty of 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11 # Public License for more details. 12 # 13 # You should have received a copy of the GNU General Public License along 14 # with this program; if not, write to the Free Software Foundation, Inc., 15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 17 18 # 19 # ============================================================================= 20 # 21 # Preamble 22 # 23 # ============================================================================= 24 # 25 26 27 """ 28 This module provides additional utilities for use with segments.segmentlist 29 objects. 30 """ 31 32 33 import re 34 35 36 from glue import git_version 37 from glue.lal import CacheEntry 38 from .lal import LIGOTimeGPS 39 from glue import segments 40 from six.moves import range 41 42 43 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 44 __version__ = "git id %s" % git_version.id 45 __date__ = git_version.date 46 47 48 # 49 # ============================================================================= 50 # 51 # I/O 52 # 53 # ============================================================================= 54 # 55 56 57 # 58 # A list of file names 59 # 60 61

62 -def fromfilenames(filenames, coltype = int):

63 """ 64 Return a segmentlist describing the intervals spanned by the files 65 whose names are given in the list filenames. The segmentlist is 66 constructed by parsing the file names, and the boundaries of each 67 segment are coerced to type coltype. 68 69 The file names are parsed using a generalization of the format 70 described in Technical Note LIGO-T010150-00-E, which allows the 71 start time and duration appearing in the file name to be 72 non-integers. 73 74 NOTE: the output is a segmentlist as described by the file names; 75 if the file names are not in time order, or describe overlaping 76 segments, then thusly shall be the output of this function. It is 77 recommended that this function's output be coalesced before use. 78 """ 79 pattern = re.compile(r"-([\d.]+)-([\d.]+)\.[\w_+#]+\Z") 80 l = segments.segmentlist() 81 for name in filenames: 82 [(s, d)] = pattern.findall(name.strip().rstrip(".gz")) 83 s = coltype(s) 84 d = coltype(d) 85 l.append(segments.segment(s, s + d)) 86 return l

87 88 89 # 90 # LAL cache files 91 # 92 93

94 -def fromlalcache(cachefile, coltype = int):

95 """ 96 Construct a segmentlist representing the times spanned by the files 97 identified in the LAL cache contained in the file object file. The 98 segmentlist will be created with segments whose boundaries are of 99 type coltype, which should raise ValueError if it cannot convert 100 its string argument. 101 102 Example: 103 104 >>> from lal import LIGOTimeGPS 105 >>> cache_seglists = fromlalcache(open(filename), coltype = LIGOTimeGPS).coalesce() 106 107 See also: 108 109 glue.lal.CacheEntry 110 """ 111 return segments.segmentlist(CacheEntry(l, coltype = coltype).segment for l in cachefile)

112 113 114 # 115 # Segwizard-formated segment list text files 116 # 117 118

119 -def fromsegwizard(file, coltype = int, strict = True):

120 """ 121 Read a segmentlist from the file object file containing a segwizard 122 compatible segment list. Parsing stops on the first line that 123 cannot be parsed (which is consumed). The segmentlist will be 124 created with segment whose boundaries are of type coltype, which 125 should raise ValueError if it cannot convert its string argument. 126 Two-column, three-column, and four-column segwizard files are 127 recognized, but the entire file must be in the same format, which 128 is decided by the first parsed line. If strict is True and the 129 file is in three- or four-column format, then each segment's 130 duration is checked against that column in the input file. 131 132 NOTE: the output is a segmentlist as described by the file; if 133 the segments in the input file are not coalesced or out of order, 134 then thusly shall be the output of this function. It is 135 recommended that this function's output be coalesced before use. 136 """ 137 commentpat = re.compile(r"\s*([#;].*)?\Z", re.DOTALL) 138 twocolsegpat = re.compile(r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z") 139 threecolsegpat = re.compile(r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z") 140 fourcolsegpat = re.compile(r"\A\s*([\d]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z") 141 format = None 142 l = segments.segmentlist() 143 for line in file: 144 line = commentpat.split(line)[0] 145 if not line: 146 continue 147 try: 148 [tokens] = fourcolsegpat.findall(line) 149 num = int(tokens[0]) 150 seg = segments.segment(list(map(coltype, tokens[1:3]))) 151 duration = coltype(tokens[3]) 152 this_line_format = 4 153 except ValueError: 154 try: 155 [tokens] = threecolsegpat.findall(line) 156 seg = segments.segment(list(map(coltype, tokens[0:2]))) 157 duration = coltype(tokens[2]) 158 this_line_format = 3 159 except ValueError: 160 try: 161 [tokens] = twocolsegpat.findall(line) 162 seg = segments.segment(list(map(coltype, tokens[0:2]))) 163 duration = abs(seg) 164 this_line_format = 2 165 except ValueError: 166 break 167 if strict: 168 if abs(seg) != duration: 169 raise ValueError("segment '%s' has incorrect duration" % line) 170 if format is None: 171 format = this_line_format 172 elif format != this_line_format: 173 raise ValueError("segment '%s' format mismatch" % line) 174 l.append(seg) 175 return l

176 177

178 -def tosegwizard(file, seglist, header = True, coltype = int):

179 """ 180 Write the segmentlist seglist to the file object file in a 181 segwizard compatible format. If header is True, then the output 182 will begin with a comment line containing column names. The 183 segment boundaries will be coerced to type coltype and then passed 184 to str() before output. 185 """ 186 if header: 187 file.write("# seg\tstart \tstop \tduration\n") 188 for n, seg in enumerate(seglist): 189 file.write("%d\t%s\t%s\t%s\n" % (n, str(coltype(seg[0])), str(coltype(seg[1])), str(coltype(abs(seg)))))

190 191 192 # 193 # TAMA-formated segment list text files 194 # 195 196

197 -def fromtama(file, coltype = LIGOTimeGPS):

198 """ 199 Read a segmentlist from the file object file containing TAMA 200 locked-segments data. Parsing stops on the first line that cannot 201 be parsed (which is consumed). The segmentlist will be created 202 with segments whose boundaries are of type coltype, which should 203 raise ValueError if it cannot convert its string argument. 204 205 NOTE: TAMA locked-segments files contain non-integer start and end 206 times, so the default column type is set to LIGOTimeGPS. 207 208 NOTE: the output is a segmentlist as described by the file; if 209 the segments in the input file are not coalesced or out of order, 210 then thusly shall be the output of this function. It is 211 recommended that this function's output be coalesced before use. 212 """ 213 segmentpat = re.compile(r"\A\s*\S+\s+\S+\s+\S+\s+([\d.+-eE]+)\s+([\d.+-eE]+)") 214 l = segments.segmentlist() 215 for line in file: 216 try: 217 [tokens] = segmentpat.findall(line) 218 l.append(segments.segment(list(map(coltype, tokens[0:2])))) 219 except ValueError: 220 break 221 return l

222 223 224 # 225 # Command line or config file strings 226 # 227 228

229 -def from_range_strings(ranges, boundtype = int):

230 """ 231 Parse a list of ranges expressed as strings in the form "value" or 232 "first:last" into an equivalent glue.segments.segmentlist. In the 233 latter case, an empty string for "first" and(or) "last" indicates a 234 (semi)infinite range. A typical use for this function is in 235 parsing command line options or entries in configuration files. 236 237 NOTE: the output is a segmentlist as described by the strings; if 238 the segments in the input file are not coalesced or out of order, 239 then thusly shall be the output of this function. It is 240 recommended that this function's output be coalesced before use. 241 242 Example: 243 244 >>> text = "0:10,35,100:" 245 >>> from_range_strings(text.split(",")) 246 [segment(0, 10), segment(35, 35), segment(100, infinity)] 247 """ 248 # preallocate segmentlist 249 segs = segments.segmentlist([None] * len(ranges)) 250 251 # iterate over strings 252 for i, range in enumerate(ranges): 253 parts = range.split(":") 254 if len(parts) == 1: 255 parts = boundtype(parts[0]) 256 segs[i] = segments.segment(parts, parts) 257 continue 258 if len(parts) != 2: 259 raise ValueError(range) 260 if parts[0] == "": 261 parts[0] = segments.NegInfinity 262 else: 263 parts[0] = boundtype(parts[0]) 264 if parts[1] == "": 265 parts[1] = segments.PosInfinity 266 else: 267 parts[1] = boundtype(parts[1]) 268 segs[i] = segments.segment(parts[0], parts[1]) 269 270 # success 271 return segs

272 273

274 -def to_range_strings(seglist):

275 """ 276 Turn a segment list into a list of range strings as could be parsed 277 by from_range_strings(). A typical use for this function is in 278 machine-generating configuration files or command lines for other 279 programs. 280 281 Example: 282 283 >>> from glue.segments import * 284 >>> segs = segmentlist([segment(0, 10), segment(35, 35), segment(100, infinity())]) 285 >>> ",".join(to_range_strings(segs)) 286 '0:10,35,100:' 287 """ 288 # preallocate the string list 289 ranges = [None] * len(seglist) 290 291 # iterate over segments 292 for i, seg in enumerate(seglist): 293 if not seg: 294 ranges[i] = str(seg[0]) 295 elif (seg[0] is segments.NegInfinity) and (seg[1] is segments.PosInfinity): 296 ranges[i] = ":" 297 elif (seg[0] is segments.NegInfinity) and (seg[1] is not segments.PosInfinity): 298 ranges[i] = ":%s" % str(seg[1]) 299 elif (seg[0] is not segments.NegInfinity) and (seg[1] is segments.PosInfinity): 300 ranges[i] = "%s:" % str(seg[0]) 301 elif (seg[0] is not segments.NegInfinity) and (seg[1] is not segments.PosInfinity): 302 ranges[i] = "%s:%s" % (str(seg[0]), str(seg[1])) 303 else: 304 raise ValueError(seg) 305 306 # success 307 return ranges

308 309

310 -def segmentlistdict_to_short_string(seglists):

311 """ 312 Return a string representation of a segmentlistdict object. Each 313 segmentlist in the dictionary is encoded using to_range_strings() 314 with "," used to delimit segments. The keys are converted to 315 strings and paired with the string representations of their 316 segmentlists using "=" as a delimiter. Finally the key=value 317 strings are combined using "/" to delimit them. 318 319 Example: 320 321 >>> from glue.segments import * 322 >>> segs = segmentlistdict({"H1": segmentlist([segment(0, 10), segment(35, 35), segment(100, infinity())]), "L1": segmentlist([segment(5, 15), segment(45, 60)])}) 323 >>> segmentlistdict_to_short_string(segs) 324 'H1=0:10,35,100:/L1=5:15,45:60' 325 326 This function, and its inverse segmentlistdict_from_short_string(), 327 are intended to be used to allow small segmentlistdict objects to 328 be encoded in command line options and config files. For large 329 segmentlistdict objects or when multiple sets of segmentlists are 330 required, the LIGO Light Weight XML encoding available through the 331 glue.ligolw library should be used. 332 """ 333 return "/".join(["%s=%s" % (str(key), ",".join(to_range_strings(value))) for key, value in seglists.items()])

334 335

336 -def segmentlistdict_from_short_string(s, boundtype = int):

337 """ 338 Parse a string representation of a set of named segmentlists into a 339 segmentlistdict object. The string encoding is that generated by 340 segmentlistdict_to_short_string(). The optional boundtype argument 341 will be passed to from_range_strings() when parsing the segmentlist 342 objects from the string. 343 344 Example: 345 346 >>> segmentlistdict_from_short_string("H1=0:10,35,100:/L1=5:15,45:60") 347 {'H1': [segment(0, 10), segment(35, 35), segment(100, infinity)], 'L1': [segment(5, 15), segment(45, 60)]} 348 349 This function, and its inverse segmentlistdict_to_short_string(), 350 are intended to be used to allow small segmentlistdict objects to 351 be encoded in command line options and config files. For large 352 segmentlistdict objects or when multiple sets of segmentlists are 353 required, the LIGO Light Weight XML encoding available through the 354 glue.ligolw library should be used. 355 """ 356 d = segments.segmentlistdict() 357 for token in s.strip().split("/"): 358 key, ranges = token.strip().split("=") 359 d[key.strip()] = from_range_strings(ranges.strip().split(","), boundtype = boundtype) 360 return d

361 362

363 -def from_bitstream(bitstream, start, dt, minlen = 1):

364 """ 365 Convert consecutive True values in a bit stream (boolean-castable 366 iterable) to a stream of segments. Require minlen consecutive True 367 samples to comprise a segment. 368 369 Example: 370 371 >>> list(from_bitstream((True, True, False, True, False), 0, 1)) 372 [segment(0, 2), segment(3, 4)] 373 >>> list(from_bitstream([[], [[]], [[]], [], []], 1013968613, 0.125)) 374 [segment(1013968613.125, 1013968613.375)] 375 """ 376 bitstream = iter(bitstream) 377 i = 0 378 while 1: 379 if next(bitstream): 380 # found start of True block; find the end 381 j = i + 1 382 try: 383 while next(bitstream): 384 j += 1 385 finally: # make sure StopIteration doesn't kill final segment 386 if j - i >= minlen: 387 yield segments.segment(start + i * dt, start + j * dt) 388 i = j # advance to end of block 389 i += 1

390 391 392 # 393 # ============================================================================= 394 # 395 # Pre-defined Segments and Segment Lists 396 # 397 # ============================================================================= 398 # 399 400

401 -def S2playground(extent):

402 """ 403 Return a segmentlist identifying the S2 playground times within the 404 interval defined by the segment extent. 405 406 Example: 407 408 >>> from glue import segments 409 >>> S2playground(segments.segment(874000000, 874010000)) 410 [segment(874000013, 874000613), segment(874006383, 874006983)] 411 """ 412 lo = int(extent[0]) 413 lo -= (lo - 729273613) % 6370 414 hi = int(extent[1]) + 1 415 return segments.segmentlist(segments.segment(t, t + 600) for t in range(lo, hi, 6370)) & segments.segmentlist([extent])

416 417

418 -def segmentlist_range(start, stop, period):

419 """ 420 Analogous to Python's range() builtin, this generator yields a 421 sequence of continuous adjacent segments each of length "period" 422 with the first starting at "start" and the last ending not after 423 "stop". Note that the segments generated do not form a coalesced 424 list (they are not disjoint). start, stop, and period can be any 425 objects which support basic arithmetic operations. 426 427 Example: 428 429 >>> from glue.segments import * 430 >>> segmentlist(segmentlist_range(0, 15, 5)) 431 [segment(0, 5), segment(5, 10), segment(10, 15)] 432 >>> segmentlist(segmentlist_range('', 'xxx', 'x')) 433 [segment('', 'x'), segment('x', 'xx'), segment('xx', 'xxx')] 434 """ 435 n = 1 436 b = start 437 while True: 438 a, b = b, start + n * period 439 if b > stop: 440 break 441 yield segments.segment(a, b) 442 n += 1

443 444 445 # 446 # ============================================================================= 447 # 448 # Extra Manipulation Routines 449 # 450 # ============================================================================= 451 # 452 453

454 -def Fold(seglist1, seglist2):

455 """ 456 An iterator that generates the results of taking the intersection 457 of seglist1 with each segment in seglist2 in turn. In each result, 458 the segment start and stop values are adjusted to be with respect 459 to the start of the corresponding segment in seglist2. See also 460 the segmentlist_range() function. 461 462 This has use in applications that wish to convert ranges of values 463 to ranges relative to epoch boundaries. Below, a list of time 464 intervals in hours is converted to a sequence of daily interval 465 lists with times relative to midnight. 466 467 Example: 468 469 >>> from glue.segments import * 470 >>> x = segmentlist([segment(0, 13), segment(14, 20), segment(22, 36)]) 471 >>> for y in Fold(x, segmentlist_range(0, 48, 24)): print y 472 ... 473 [segment(0, 13), segment(14, 20), segment(22, 24)] 474 [segment(0, 12)] 475 """ 476 for seg in seglist2: 477 yield (seglist1 & segments.segmentlist([seg])).shift(-seg[0])

478 479

480 -def vote(seglists, n):

481 """ 482 Given a sequence of segmentlists, returns the intervals during 483 which at least n of them intersect. The input segmentlists must be 484 coalesced, the output is coalesced. 485 486 Example: 487 488 >>> from glue.segments import * 489 >>> w = segmentlist([segment(0, 15)]) 490 >>> x = segmentlist([segment(5, 20)]) 491 >>> y = segmentlist([segment(10, 25)]) 492 >>> z = segmentlist([segment(15, 30)]) 493 >>> vote((w, x, y, z), 3) 494 [segment(10, 20)] 495 496 The sequence of segmentlists is only iterated over once, and the 497 segmentlists within it are only iterated over once; they can all 498 be generators. If there are a total of N segments in M segment 499 lists and the final result has L segments the algorithm is O(N M) + 500 O(L). 501 """ 502 # check for no-op 503 504 if n < 1: 505 return segments.segmentlist() 506 507 # digest the segmentlists into an ordered sequence of off-on and 508 # on-off transitions with the vote count for each transition 509 # FIXME: this generator is declared locally for now, is it useful 510 # as a stand-alone generator? 511 512 def pop_min(l): 513 # remove and return the smallest value from a list 514 val = min(l) 515 for i in range(len(l) - 1, -1, -1): 516 if l[i] is val: 517 return l.pop(i) 518 assert False # cannot get here

519 520 def vote_generator(seglists): 521 queue = [] 522 for seglist in seglists: 523 segiter = iter(seglist) 524 try: 525 seg = next(segiter) 526 except StopIteration: 527 continue 528 # put them in so that the smallest boundary is 529 # closest to the end of the list 530 queue.append((seg[1], -1, segiter)) 531 queue.append((seg[0], +1, None)) 532 if not queue: 533 return 534 queue.sort(reverse = True) 535 bound = queue[-1][0] 536 votes = 0 537 while queue: 538 this_bound, delta, segiter = pop_min(queue) 539 if this_bound == bound: 540 votes += delta 541 else: 542 yield bound, votes 543 bound = this_bound 544 votes = delta 545 if segiter is not None: 546 try: 547 seg = next(segiter) 548 except StopIteration: 549 continue 550 queue.append((seg[1], -1, segiter)) 551 queue.append((seg[0], +1, None)) 552 yield bound, votes 553 554 # compute the cumulative sum of votes, and assemble a segmentlist 555 # from the intervals when the vote count is equal to or greater 556 # than n 557 558 result = segments.segmentlist() 559 votes = 0 560 for bound, delta in vote_generator(seglists): 561 if delta > 0 and n - delta <= votes < n: 562 start = bound 563 elif delta < 0 and n <= votes < n - delta: 564 result.append(segments.segment(start, bound)) 565 del start # detect stops that aren't preceded by starts 566 votes += delta 567 assert votes == 0 # detect failed cumulative sum 568 569 return result 570

Source Code for Module glue.segmentsUtils