pylal.dq.dqFrameUtils

1 #!/usr/bin/env python 2 3 # Copyright (C) 2011 Duncan Macleod 4 # 5 # This program is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License as published by the 7 # Free Software Foundation; either version 3 of the License, or (at your 8 # option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, but 11 # WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 13 # Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License along 16 # with this program; if not, write to the Free Software Foundation, Inc., 17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 19 # ============================================================================= 20 # Preamble 21 # ============================================================================= 22 23 from __future__ import division 24 import re,os,sys,numpy,subprocess,datetime,shlex,urlparse,glob,fnmatch,\ 25 httplib,cjson,copy 26 from socket import getfqdn 27 28 from glue import segments,git_version 29 from glue.lal import Cache as LALCache 30 from glue.lal import CacheEntry as LALCacheEntry 31 32 from pylal import Fr,date 33 from pylal.xlal.datatypes.ligotimegps import LIGOTimeGPS 34 from pylal.dq.dqDataUtils import make_external_call 35 36 __author__ = "Duncan Macleod <duncan.macleod@ligo.org>" 37 __version__ = "git id %s" % git_version.id 38 __date__ = git_version.date 39 40 """ 41 This module provides frame manipulation routines for use in data quality investigations, and cache manipulation routines. 42 """ 43 44 # ============================================================================== 45 # Grab data from frames 46 # ============================================================================== 47

48 -def fromframefile(filename, channel, start=None, end=None):

49 50 """ 51 Extract 1D data for given channel from the GWF format frame filename. 52 Returns (x, data) pair of numpy arrays (x is array of times or frequencies. 53 54 Arguments: 55 56 filename : string 57 path to GWF frame file 58 channel : string 59 channel name to extract 60 61 Keyword arguments: 62 63 start : float 64 GPS start time (s) or minimum frequency (Hz) to return 65 end : float 66 GPS end time (s) or maximum frequency (Hz) to return 67 """ 68 69 # try to extract data from frame 70 y, fstart, offset, dt = Fr.frgetvect1d(filename, str(channel))[:4] 71 x = fstart+dt*numpy.arange(len(y))+offset 72 73 # apply constraint on x-axis 74 if start or end: 75 if not start: start=-numpy.infty 76 if not end: end=numpy.infty 77 condition = (x>=start) & (x<end) 78 y = y[condition] 79 x = x[condition] 80 81 return x,y

82

83 -def toframefile(filename, channel, data, start, dx, **frargs):

84 85 """ 86 Write numpy array data to GWF frame file using the given arguments. 87 88 Arguments: 89 90 filename : string 91 name of file to write 92 channel : string 93 name of channel to write 94 data : numpy.array 95 array of data to write 96 start : float 97 GPS start time (s) or minimum frequency (Hz) 98 dx : float 99 GPS time step (s) or frequency step (Hz) 100 101 Unnamed arguments are held in frargs. For usage, see documentation for 102 pylal.Fr.frputvect. 103 """ 104 105 datadict = frargs 106 datadict['name'] = channel 107 datadict['data'] = data 108 datadict['start'] = start 109 datadict['dx'] = dx 110 111 Fr.frputvect(filename, [datadict], verbose=False)

112

113 -def fromLALCache(cache, channel, start=None, end=None, verbose=False):

114 115 """ 116 Extract data for given channel from glue.lal.Cache object cache. Returns 117 (time, data) pair of numpy arrays. 118 119 Arguments: 120 121 cache : glue.lal.Cache 122 Cache list of frame files to read 123 channel : string 124 channel name to extract 125 126 Keyword arguments: 127 128 start : float 129 GPS start time (s) or minimum frequency (Hz) to return 130 end : float 131 GPS end time (s) or maximum frequency (Hz) to return 132 """ 133 134 # initialise data 135 time = numpy.ndarray((1,0)) 136 data = numpy.ndarray((1,0)) 137 138 # set up counter 139 if verbose: 140 sys.stdout.write("Extracting data from %d frames... " % len(cache)) 141 sys.stdout.flush() 142 delete = '\b\b\b' 143 num = len(cache)/100 144 145 # loop over frames in cache 146 for i,frame in enumerate(cache): 147 # check for read access 148 if os.access(frame.path, os.R_OK): 149 # get data 150 frtime, frdata = fromframefile(frame.path, channel, start=start,\ 151 end=end) 152 # resize array and extend 153 op = len(time[0]) 154 np = len(frtime) 155 time.resize((1,op+np)) 156 time[0][op:] = frtime 157 data.resize((1,op+np)) 158 data[0][op:] = frdata 159 160 # print verbose message 161 if verbose and len(cache)>1: 162 progress = int((i+1)/num) 163 sys.stdout.write('%s%.2d%%' % (delete, progress)) 164 sys.stdout.flush() 165 166 else: 167 raise RuntimeError("Cannot read frame\n%s" % frame.path) 168 169 if verbose: sys.stdout.write("\n") 170 171 return time[0], data[0]

172

173 -def grab_data(start, end, channel, type, nds=False, dmt=False, verbose=False):

174 175 """ 176 This function will return the frame data for the given channel of the given 177 type in the given [start,end] time range and will construct a gps time 178 vector to go with it. The nds option is not yet supported, and the dmt 179 option will return data for dmt channels in frames not available on the 180 datafind server. 181 182 >>> grab_data(997315215, 997315225, 'G1:DER_DATA_QUALITY', 'G1_RDS_C01_L3') 183 (array([ 9.97315215e+08, 9.97315216e+08, 9.97315217e+08, 184 9.97315218e+08, 9.97315219e+08, 9.97315220e+08, 185 9.97315221e+08, 9.97315222e+08, 9.97315223e+08, 186 9.97315224e+08]), 187 array([ 256., 256., 256., 256., 256., 256., 256., 256., 256., 188 256.])) 189 190 Arguments: 191 192 start : float 193 GPS start time (s). 194 end : float 195 GPS end time (s). 196 channel : string 197 channel name to extract, e.g. 'G1:DER_DATA_H'. 198 type : string 199 frame data type to use, e.g. 'G1_RDS_C01_L3'. 200 201 Keyword arguments: 202 203 nds : [ True | False ] 204 use NDS connection to data server (UNSUPPORTED). 205 dmt : [ True | False ] 206 frame type is DMT product (DMT data not found by datafind server). 207 """ 208 209 time = [] 210 data = [] 211 212 # generate framecache 213 ifo = channel[0] 214 if not dmt: 215 cache = get_cache(start, end, ifo, type, verbose=verbose) 216 else: 217 cache = dmt_cache(start, end, ifo, type) 218 219 time, data = fromLALCache(cache, channel, start=start, end=end,\ 220 verbose=verbose) 221 222 return time,data

223 224 # ============================================================================== 225 # Generate data cache 226 # ============================================================================== 227

228 -def get_cache(start, end, ifo, ftype, framecache=False, server=None,\ 229 verbose=False):

230 231 """ 232 Queries the LSC datafind server and returns a glue.lal.Cache object 233 containing the frame file paths in the given GPS (start, end) interval 234 for the given ifo and type (can be lists). 235 236 framecache=True returns a pylal.dq.dqFrameUTils.FrameCache object in stead. 237 238 Arguments: 239 240 start : float 241 GPS start time (s). 242 end : float 243 GPS end time (s). 244 ifo : [ string | list ] 245 ifo (or list of) to find, e.g. 'G1'. 246 ftype : [ string | list ] 247 frame data type (or list of) to find, e.g. 'G1_RDS_C01_L3'. 248 249 """ 250 251 # set lists 252 if isinstance(ftype, str): 253 types = [ftype] 254 else: 255 types = ftype 256 if isinstance(ifo, str): 257 ifos = [ifo] 258 else: 259 ifos = ifo 260 261 # construct span 262 span = segments.segment(start,end) 263 264 # set options 265 cache = LALCache() 266 entry_class = LALCacheEntry 267 268 # try querying the ligo_data_find server 269 if not server: 270 server = _find_datafind_server() 271 272 if verbose: sys.stdout.write("Opening connection to %s...\n" % server) 273 274 if re.search(':', server): 275 port = int(server.split(':')[-1]) 276 else: 277 port = None 278 279 cert, key = _get_grid_proxy() 280 281 # if we have a credential then use it when setting up the connection 282 if cert and key and port!=80: 283 h = httplib.HTTPSConnection(server, key_file=key, cert_file=cert) 284 else: 285 h = httplib.HTTPConnection(server) 286 287 if verbose: sys.stdout.write("Querying server for frames...\n") 288 289 # loop over ifos and types 290 for ifo in ifos: 291 for t in types: 292 # construct the URL for a simple data find query 293 url = "/LDR/services/data/v1/gwf/%s/%s/%s,%s/file.json" % (ifo[0], t,\ 294 str(start),\ 295 str(end)) 296 # query the server 297 h.request("GET", url) 298 response = h.getresponse() 299 _verify_response(response) 300 # unravel the response 301 urlList = cjson.decode(response.read()) 302 for url in urlList: 303 cache.append(entry_class.from_T050017(url)) 304 305 # close the server connection 306 h.close() 307 if verbose: sys.stdout.write("Connection to %s closed.\n" % server) 308 309 # convert to FrameCache if needed 310 if framecache: 311 cache = LALCachetoFrameCache(cache) 312 313 return cache

314 315 # ============================================================================= 316 # Return latest frame for given type 317 # ============================================================================= 318

319 -def get_latest_frame(ifo, ftype):

320 321 """ 322 Returns the latest frame available in the LSC datafind server for the given 323 ifo and frame type. 324 325 Arguments: 326 327 ifo : string 328 observatory to find 329 ftype : string 330 frame data type to find 331 """ 332 333 url = '/LDR/services/data/v1/gwf/%s/%s/latest/file.json' % (ifo[0], ftype) 334 frame = query_datafind_server(url) 335 336 if isinstance(frame, list) and len(frame)==1: 337 return frame[0] 338 else: 339 return None

340 341 # ============================================================================= 342 # Find ifos 343 # ============================================================================= 344

345 -def find_ifos():

346 347 """ 348 Query the LSC datafind server and return a list of sites for which data 349 is available. Does not differentiate between H1 and H2. 350 351 Example: 352 353 >>> find_ifos() 354 ['G', 'H', 'L', 'V'] 355 """ 356 query_url = "/LDR/services/data/v1/gwf.json" 357 reply = query_datafind_server(query_url) 358 if reply: 359 return [i for i in reply if len(i)==1] 360 else: 361 return []

362 363 # ============================================================================= 364 # Find types 365 # ============================================================================= 366

367 -def find_types(ifo=[], ftype=[], search='standard'):

368 369 """ 370 This function will return a valid list of LIGO frame types given the list of 371 type strings. The search option defines the breadth of the search, to speed 372 up the search, the following search options are supported: 373 'standard','short','full'. 374 375 The 'R', 'T', and 'M' (raw, raw second trends, and raw minute trends) are 376 treated as special cases, so as not to return all types containing those 377 letters. 378 379 Example: 380 381 >>>find_types(ftype='H1_RDS') 382 ['H1_RDS_C01_LX', 383 'H1_RDS_C02_LX', 384 'H1_RDS_C03_L1', 385 'H1_RDS_C03_L2', 386 'H1_RDS_C03_L2_ET', 387 'H1_RDS_C03_L2_ET2', 388 'H1_RDS_C03_L2_ET30', 389 'H1_RDS_C04_LX', 390 'H1_RDS_R_L1', 391 'H1_RDS_R_L3', 392 'H1_RDS_R_L4'] 393 394 >>>find_types(ftype=['H1_RDS','R'],search='short') 395 ['H1_RDS_R_L1', 'H1_RDS_R_L3', 'H1_RDS_R_L4', 'R'] 396 397 Keyword arguments: 398 399 ifo : [ string | list ] 400 ifo (or list of) to find, e.g. 'G1'. 401 ftype : [ string | list ] 402 frame data type (or list of) to find, e.g. 'G1_RDS_C01_L3'. 403 search : string 404 descriptor of how deep to search for frame type. 405 """ 406 407 # make sure types is a list 408 if isinstance(ftype, str): 409 types = [ftype] 410 else: 411 types = ftype 412 if isinstance(ifo, str): 413 ifos = [ifo] 414 else: 415 ifos = ifo 416 417 if not types: 418 types = None 419 420 # treat 'R','M' and 'T' as special cases, 421 special_types = ['M','R','T'] 422 foundtypes = [] 423 424 # there are thousands of GRBXXXXXX frame types, so ignore them 425 ignore = [] 426 if search!='full': 427 ignore.extend(['GRB']) 428 if search=='short': 429 # all of these strings are part of frame types that can be ignored for a 430 # short search 431 short_ignore = ['CAL','BRST','Mon','SG','IMR','DuoTone','Concat',\ 432 'BH','WNB','Lock','_M','_S5','Multi','Noise','_C0'] 433 ignore.extend(short_ignore) 434 ignore = '(%s)' % '|'.join(ignore) 435 436 # set up server connection 437 server = _find_datafind_server() 438 cert, key = _get_grid_proxy() 439 if re.search(':', server): 440 port = int(server.split(':')[-1]) 441 else: 442 port = None 443 444 if cert and key and port!=80: 445 h = httplib.HTTPSConnection(server, key_file=key, cert_file=cert) 446 else: 447 h = httplib.HTTPConnection(server) 448 449 # query for individual sites in datafind server 450 if not ifos: 451 query_url = "/LDR/services/data/v1/gwf.json" 452 h.request("GET", query_url) 453 response = h.getresponse() 454 _verify_response(response) 455 ifos = [i for i in cjson.decode(response.read()) if len(i)==1] 456 457 # query for types for each ifo in turn 458 datafind_types = [] 459 for ifo in ifos: 460 # find types from datafind server 461 query_url = "/LDR/services/data/v1/gwf/%s.json" % ifo[0] 462 h.request("GET", query_url) 463 response = h.getresponse() 464 _verify_response(response) 465 datafind_types.extend(cjson.decode(response.read())) 466 467 # close connection 468 h.close() 469 470 # find special types first, otherwise they'll corrupt the general output 471 r = 0 472 for i,t in enumerate(datafind_types): 473 if (types==None and t in special_types) or\ 474 (types!=None and t in types and t in special_types): 475 foundtypes.append(t) 476 datafind_types.pop(i-r) 477 if types is not None: 478 types.pop(types.index(t)) 479 r+=1; 480 continue 481 482 # find everything else 483 for t in datafind_types: 484 if re.search(ignore, t): 485 continue 486 # if no types have been specified, return all 487 if types==None: 488 foundtypes.append(t) 489 # else check for a match 490 else: 491 if len(types)>=1 and re.search('(%s)' % '|'.join(types), t): 492 foundtypes.append(t) 493 494 foundtypes.sort() 495 types = [] 496 for t in foundtypes: 497 if t not in types: types.append(t) 498 499 return types

500 501 # ============================================================================= 502 # Functions to find channels 503 # ============================================================================= 504

505 -def get_channels(framefile):

506 507 """ 508 Extract the channels held in a frame file. 509 """ 510 511 # get type 512 type = os.path.basename(framefile).split('-')[1] 513 514 # get channels contained in frame, grepping for input channel string 515 frchannels,err = make_external_call('FrChannels %s' % framefile) 516 517 channels = ChannelList() 518 for line in frchannels.splitlines(): 519 name, samp = line.split() 520 channels.append(Channel(name, sampling=samp, type=type)) 521 522 return channels

523

524 -def find_channels(name=[], ftype=[], ifo=[], not_name=[], not_ftype=[],\ 525 exact_match=False, time=None, unique=False):

526 527 """ 528 Returns a ChannelList containing Channels for all data channels matching 529 the given attributes from frames matching the given GPS time (defaults to 530 now). 531 532 Keyword arguments: 533 534 name : [ string | list ] 535 channel name (or list of) to match in search. Can be part of name, 536 e.g. ['STRAIN', 'DARM_ERR'] 537 ftype : [ string | list ] 538 frame data type (or list of) to find, e.g. 'G1_RDS_C01_L3'. 539 ifo : [ string | list ] 540 ifo (or list of) to find, e.g. 'G1'. 541 not_name : [ string | list ] 542 channel name (or list of) to negatively match in search. Can be part of 543 name, e.g. 'ETMY_EXC_DAQ' 544 not_ftype : [ string | list ] 545 frame data type (or list of) to remove from search. 546 exact_match : [ True | False] 547 require complete match with given name list, not just partial match. 548 time : float 549 GPS time to which to restrict search. Data transfer latency means that 550 the very latest data is not always available, best to give a 'recent' 551 time. 552 unique : [ True | False ] 553 return unique list of channels from different types (since the same 554 channel can exist in multiple types). 555 """ 556 557 # check list status 558 if isinstance(name, str): names = [name] 559 else: names = name 560 if isinstance(ftype, str): types = [ftype] 561 else: types = ftype 562 if isinstance(ifo, str): ifos = [ifo] 563 else: ifos = ifo 564 if isinstance(not_ftype, str): not_types = [not_ftype] 565 else: not_types = not_ftype 566 if isinstance(not_name, str): not_names = [not_name] 567 else: not_names = not_name 568 569 # find ifos 570 if not ifos: 571 ifos = find_ifos() 572 573 # find types 574 if not types: 575 types = find_types(ifo=ifos, ftype=types) 576 577 # remove types we don't want 578 if not_types: 579 if exact_match: 580 notmatch = re.compile("\A(%s)\Z" % '|'.join(not_types)) 581 else: 582 notmatch = re.compile("(%s)" % '|'.join(not_types)) 583 types = [t for t in types if not re.search(notmatch, t)] 584 585 channels = ChannelList() 586 587 # loop over each ifo 588 for ifo in ifos: 589 for type in types: 590 frchannels = ChannelList() 591 592 # find first frame file for type 593 if time: 594 frame = get_cache(time, time, ifo, type) 595 if len(frame): 596 frame = frame[-1].path 597 else: 598 frame = get_latest_frame(ifo, type) 599 600 # if frames not found, move on 601 if frame: 602 frame = urlparse.urlparse(frame)[2] 603 else: 604 continue 605 606 # access frame 607 if os.access(frame, os.R_OK): 608 # get channels contained in frame, sieveing when necessary 609 allchannels = get_channels(frame) 610 # test ifo 611 allchannels = allchannels.sieve(ifo=ifo) 612 613 # test names 614 if names: 615 for ch in names: 616 if re.match('%s\d:' % ifo[0], ch): 617 ch = ch.split(':',1)[1] 618 frchannels.extend(allchannels.sieve(name=ch,\ 619 exact_match=exact_match)) 620 else: 621 frchannels = allchannels 622 623 # test excluded names 624 for ch in not_names: 625 frchannels = frchannels.sieve(not_name=ch) 626 627 channels.extend(frchannels) 628 629 if unique: 630 channels = channels.unique() 631 632 channels.sort(key=lambda c: str(c)) 633 634 return channels

635 636 # ============================================================================== 637 # Class to generate channel structure 638 # ============================================================================== 639

640 -class Channel:

641 """ 642 The Channel class defines objects to represent LIGO data channels. Each 643 Channel has a 'name' attribute and can be assigned 'type' and 'sampling' 644 attributes if relevant. 645 646 Example: 647 648 >>>GWChannel = Channel('H1:LSC-DARM_ERR, 'H1_RDS_R_L3', 4096) 649 >>>GWChannel.name, GWChannel.type, GWChannel.sampling 650 ('H1:LSC-DARM_ERR', 'H1_RDS_R_L3', 4096) 651 """ 652

653 - def __init__(self,name,type=None,sampling=None):

654 """Initialise the Channel object. 655 """ 656 657 attributes = ['ifo','site','name',\ 658 'system','subsystem','signal',\ 659 'type',\ 660 'sampling'] 661 662 __slots__ = attributes 663 664 # extract name attributes 665 if ':' in name: 666 self.ifo,self.name = re.split(':',name,maxsplit=1) 667 self.site = self.ifo[0] 668 else: 669 self.name = name 670 self.ifo = "" 671 self.site = "" 672 673 tags = re.split('[-_]',self.name,maxsplit=3) 674 675 self.system = tags[0] 676 677 if len(tags)>1: 678 self.subsystem = tags[1] 679 else: 680 self.subsystem = "" 681 682 if len(tags)>2: 683 self.signal = tags[2] 684 else: 685 self.signal = "" 686 687 if type: 688 self.type = str(type) 689 else: 690 self.type = "" 691 692 if sampling: 693 self.sampling = float(sampling) 694 else: 695 self.sampling = 0

696

697 - def __getattribute__(self,name):

698 699 return self.__dict__[name]

700

701 - def __str__(self):

702 703 return '%s:%s' % (self.ifo, self.name)

704

705 -class ChannelList(list):

706 707 """ 708 Wrapper for a list of Channel objects, with helper functions. 709 """ 710

711 - def find(self, item):

712 """ 713 Return the smallest i such that i is the index of an element that wholly 714 contains item. Raises ValueError if no such element exists. 715 """ 716 for i,chan in enumerate(self): 717 if item.name == chan.name: 718 return i 719 raise ValueError(item)

720

721 - def sieve(self, ifo=None, name=None, type=None, sampling=None,\ 722 sampling_range=None, not_name=None, exact_match=False):

723 724 """ 725 Return a ChannelList object with those Channels that match the given 726 attributes If exact_match is True, then non-None ifo, name, and type 727 patterns must match exactly. 728 729 If sampling is given, it will always test an exact match. 730 731 If sampling_range is given, it will test for Channels with sampling in 732 [min,max) of range. 733 734 Bash-style wildcards (*?) are allowed for ifo, name and type. 735 """ 736 737 if not exact_match: 738 if ifo is not None: ifo = "*%s*" % ifo 739 if name is not None: name = "*%s*" % name 740 if not_name is not None: not_name = "*%s*" % not_name 741 742 c = self 743 if ifo is not None: 744 ifo_regexp = re.compile(fnmatch.translate(ifo)) 745 c = [entry for entry in c if ifo_regexp.match(entry.ifo) is not None] 746 747 if name is not None: 748 name_regexp = re.compile(fnmatch.translate(name)) 749 c = [entry for entry in c if name_regexp.match(entry.name) is not None] 750 751 if not_name is not None: 752 name_regexp = re.compile(fnmatch.translate(not_name)) 753 c = [entry for entry in c if name_regexp.match(entry.name) is None] 754 755 756 if sampling is not None: 757 c = [entry for entry in c if entry.sampling==sampling] 758 759 if sampling_range is not None: 760 c = [entry for entry in c if\ 761 sampling_range[0] <= entry.sampling < sampling_range[1]] 762 763 return self.__class__(c)

764

765 - def __isub__(self, other):

766 """ 767 Remove elements from self that are in other. 768 """ 769 end = len(self) - 1 770 for i, elem in enumerate(self[::-1]): 771 if elem in other: 772 del self[end - i] 773 return self

774

775 - def __sub__(self, other):

776 """ 777 Return a ChannelList containing the entries of self that are not in other. 778 """ 779 return self.__class__([elem for elem in self if elem not in other])

780

781 - def __ior__(self, other):

782 """ 783 Append entries from other onto self without introducing (new) duplicates. 784 """ 785 self.extend(other - self) 786 return self

787

788 - def __or__(self, other):

789 """ 790 Return a ChannelList containing all entries of self and other. 791 """ 792 return self.__class__(self[:]).__ior__(other)

793

794 - def __iand__(self, other):

795 """ 796 Remove elements in self that are not in other. 797 """ 798 end = len(self) - 1 799 for i, elem in enumerate(self[::-1]): 800 if elem not in other: 801 del self[end - i] 802 return self

803

804 - def __and__(self, other):

805 """ 806 Return a ChannelList containing the entries of self that are also in other. 807 """ 808 return self.__class__([elem for elem in self if elem in other])

809

810 - def unique(self):

811 """ 812 Return a ChannelList which has every element of self, but without 813 duplication of channel name. Preserve order. Does not hash, so a bit slow. 814 """ 815 new = self.__class__([]) 816 for elem in self: 817 if str(elem) not in [str(e) for e in new]: 818 new.append(elem) 819 return new

820 821 # ============================================================================== 822 # Function to generate a framecache of /dmt types 823 # ==============================================================================

824 -def dmt_cache(start,end,ifo,type,framecache=False):

825 """ 826 This function will return a list of frame files in the given start and stop 827 time interval for the give IFO using the given DMT frame type. This is 828 required if ligo_data_find will not return the dmt frames. 829 830 Example: 831 832 >>>dmt_cache(960000000,960010000,'H1','LockLoss_H1') 833 ['/archive/frames/dmt/LHO/LockLoss_H1/H-M-960/H-LockLoss_H1_M-960001200-3600.gwf', 834 '/archive/frames/dmt/LHO/LockLoss_H1/H-M-960/H-LockLoss_H1_M-960004800-3600.gwf'] 835 """ 836 837 # find dmt frames path 838 host = getfqdn() 839 if re.search('ligo-',host): 840 dmt_dir = '/dmt' 841 elif re.search('ligo.',host): 842 site = {'H':'LHO','L':'LLO','V':'V1'} 843 dmt_dir = os.path.join('/archive','frames','dmt',site[ifo[0]]) 844 845 846 span = segments.segment(start,end) 847 848 # set options 849 if framecache: 850 cache = FrameCache() 851 entry_class = FrameCacheEntry 852 ldfopt = '--frame-cache' 853 else: 854 cache = LALCache() 855 entry_class = LALCacheEntry 856 ldfopt = '--lal-cache' 857 858 basedir = os.path.join(dmt_dir,type) 859 860 # frames are 3600 seconds long, so round 861 tmp = int(str(start)[0:3]+'000000') 862 cache_start = tmp+3600*int((start-tmp)/3600) 863 tmp = int(str(end)[0:3]+'000000') 864 cache_end = tmp+3600*int((end-tmp)/3600) 865 866 # directories are listed with three time digits 867 start_three = int(str(cache_start)[0:3]) 868 end_three = int(str(cache_end)[0:3]) 869 first_three = numpy.arange(start_three,end_three+1,1) 870 871 #loop over directories 872 for t in first_three: 873 querydir = os.path.join(basedir,'%s-%s-%s' % (ifo[0:1],'M',str(t)),'*') 874 filenames = glob.glob(querydir) 875 for filename in filenames: 876 try: 877 e = entry_class.from_T050017(filename) 878 if span.intersects(e.segment): cache.append(e) 879 except ValueError: 880 sys.stderr.write("Could not convert %s to %s\n"\ 881 % (filename,'.'.join([entry_class.__module__,\ 882 entry_class.__name__]))) 883 884 return cache

885 886 # ============================================================================== 887 # Class for wCacheEntry 888 # ============================================================================== 889

890 -class FrameCacheEntry(LALCacheEntry):

891 """ 892 An object representing one line in a frame cache file. 893 894 Each line in a frame cache identifies multiple files, and the line consists 895 of six columns of white-space delimited text. 896 897 The first column, "observatory", generally stores the name of an 898 observatory site or one or more instruments (preferably delimited by ",", 899 but often there is no delimiter between instrument names). 900 901 The second column, "description", stores a short string tag that is 902 usually all capitals with "_" separating components, in the style of the 903 description part of the LIGO-Virgo frame filename format. 904 905 The third and fourth columns store the start time and stop time in GPS 906 seconds of the interval spanned by the file identified by the cache line. 907 908 The fifth column stored the duration of each frame identified in the cache 909 line. 910 911 The sixth (last) column stores the file's URL. 912 913 The values for these columns are stored in the .observatory, 914 .description, .segment and .url attributes, respectively. The 915 .segment attribute stores a glue.segments.segment object describing 916 the interval spanned by the file. Any of these attributes except 917 the URL is allowed to be None. 918 """ 919 920 # How to parse a line in a frame cache file. Six white-space 921 # delimited columns. 922 _regex = re.compile(r"\A\s*(?P<obs>\S+)\s+(?P<dsc>\S+)\s+(?P<strt>\S+)\s+(?P<end>\S+)\s+(?P<dur>\S+)\s+(?P<url>\S+)\s*\Z") 923 _url_regex = re.compile(r"\A((.*/)*(?P<obs>[^/]+)-(?P<dsc>[^/]+)-(?P<strt>[^/]+)-(?P<dur>[^/\.]+)\.[^/]+)\Z") 924

925 - def __init__(self, *args, **kwargs):

926 """ 927 Intialize a FrameCacheEntry object. The arguments can take two 928 forms: a single string argument, which is interpreted and 929 parsed as a line from a frame cache file, or four arguments 930 used to explicitly initialize the observatory, description, 931 segment and URL in that order. When parsing a single line 932 of text from a frame cache, an optional key-word argument 933 "coltype" can be provided to set the type the start, end and 934 durations are parsed as. The default is glue.lal.LIGOTimeGPS. 935 936 """ 937 if len(args) == 1: 938 # parse line of text as an entry in a cache file 939 match = self._regex.search(args[0]) 940 coltype = kwargs.pop("coltype", LIGOTimeGPS) 941 942 try: 943 match = match.groupdict() 944 except AttributeError: 945 raise ValueError("could not convert %s to FrameCacheEntry"\ 946 % repr(args[0])) 947 self.observatory = match["obs"] 948 self.description = match["dsc"] 949 start = match["strt"] 950 end = match["end"] 951 self.duration = coltype(match["dur"]) 952 953 if start == "-" and end == "-": 954 # no segment information 955 self.segment = None 956 else: 957 self.segment = segments.segment(coltype(start),coltype(end)) 958 self.url = match["url"] 959 960 if kwargs: 961 raise TypeError("unrecognized keyword arguments: %s"\ 962 % ", ".join(kwargs)) 963 elif len(args) == 5: 964 # parse arguments as observatory, description, 965 # segment, duration, url 966 if kwargs: 967 raise TypeError("invalid arguments: %s" % ", ".join(kwargs)) 968 self.observatory, self.description, self.segment, self.duration, self.url\ 969 = args 970 else: 971 raise TypeError("invalid arguments: %s" % args) 972 973 # "-" indicates an empty column 974 if self.observatory == "-": 975 self.observatory = None 976 if self.description == "-": 977 self.description = None

978

979 - def __str__(self):

980 """ 981 Convert the FrameCacheEntry to a string in the format of a line 982 in a frame cache. Used to write the FrameCacheEntry to a file. 983 984 """ 985 if self.segment is not None: 986 start,end = [str(t) for t in self.segment] 987 else: 988 start = "-" 989 end = "-" 990 duration = "-" 991 992 return "%s %s %s %s %s %s" % (self.observatory or "-", self.description or "-", start, end, self.duration, self.url)

993

994 - def __cmp__(self, other):

995 """ 996 Compare two FrameCacheEntry objects by observatory, then 997 description, then segment, then duration, then URL. 998 """ 999 if type(other) != FrameCacheEntry: 1000 raise TypeError("can only compare FrameCacheEntry to FrameCacheEntry)") 1001 return cmp((self.observatory, self.description, self.segment,\ 1002 self.duration, self.url), \ 1003 (other.observatory, other.description, other.segment,\ 1004 other.duration, other.url))

1005

1006 - def get_files(self):

1007 """ 1008 Return Find all files described by this FrameCacheEntry. 1009 """ 1010 1011 filenames = glob.glob(os.path.join(self.path,\ 1012 '%s-%s*-%s.*' % (self.observatory,\ 1013 self.description,\ 1014 self.duration))) 1015 cache = [e.path for e in\ 1016 LALCache([LALCacheEntry.from_T050017(f) for f in filenames])\ 1017 if e.observatory==self.observatory and\ 1018 e.description==self.description and\ 1019 self.segment.intersects(e.segment) and\ 1020 abs(e.segment)==self.duration] 1021 1022 return cache

1023

1024 - def from_T050017(cls, url, coltype = LIGOTimeGPS):

1025 1026 """ 1027 Parse a URL in the style of T050017-00 into a FrameCacheEntry. 1028 The T050017-00 file name format is, essentially, 1029 1030 observatory-description-start-dur.ext 1031 1032 """ 1033 match = cls._url_regex.search(url) 1034 if not match: 1035 raise ValueError("could not convert %s to CacheEntry" % repr(url)) 1036 observatory = match.group("obs") 1037 description = match.group("dsc") 1038 start = match.group("strt") 1039 duration = match.group("dur") 1040 if start == "-" and duration == "-": 1041 # no segment information 1042 segment = None 1043 else: 1044 segment = segments.segment(coltype(start), coltype(start) + coltype(duration)) 1045 return cls(observatory, description, segment, duration,\ 1046 os.path.split(url)[0])

1047 1048 1049 from_T050017 = classmethod(from_T050017) 1050 1051 1052 # ============================================================================== 1053 # Class for FrameCacheEntry 1054 # ============================================================================== 1055

1056 -class FrameCache(LALCache):

1057 """ 1058 An object representing a frame cache file. Currently it is possible to 1059 add anything to a FrameCache. This method should check that the thing you 1060 are adding is a FrameCacheEntry and throw and error if it is not. 1061 """ 1062 entry_class = FrameCacheEntry 1063

1064 - def sieve(self, ifos=None, description=None, segment=None, duration=None, 1065 exact_match=False):

1066 """ 1067 Return a FrameCache object with those FrameCacheEntries that contain the 1068 given patterns (or overlap, in the case of segment). If 1069 exact_match is True, then non-None ifos, description, and 1070 segment patterns must match exactly. 1071 1072 Bash-style wildcards (*?) are allowed for ifos and description. 1073 """ 1074 if exact_match: 1075 segment_func = lambda e: e.segment == segment 1076 else: 1077 if ifos is not None: ifos = "*" + ifos + "*" 1078 if description is not None: description = "*" + description + "*" 1079 segment_func = lambda e: segment.intersects(e.segment) 1080 1081 c = self 1082 1083 if ifos is not None: 1084 ifos_regexp = re.compile(fnmatch.translate(ifos)) 1085 c = [entry for entry in c if ifos_regexp.match(entry.observatory) is not None] 1086 1087 if description is not None: 1088 descr_regexp = re.compile(fnmatch.translate(description)) 1089 c = [entry for entry in c if descr_regexp.match(entry.description) is not None] 1090 1091 if segment is not None: 1092 c = [entry for entry in c if segment_func(entry)] 1093 1094 if duration is not None: 1095 c = [entry for entry in c if entry.duration==duration] 1096 1097 return self.__class__(c)

1098

1099 - def get_files(self):

1100 c = [] 1101 for e in self: 1102 c.extend(e.get_files()) 1103 return c

1104

1105 -def FrameCachetoLALCache(fcache):

1106 1107 lcache = LALCache() 1108 1109 files = fcache.get_files() 1110 1111 for f in files: 1112 lcache.append(LALCacheEntry.from_T050017(f)) 1113 1114 return lcache

1115

1116 -def LALCachetoFrameCache(lcache):

1117 1118 lcache.sort(key=lambda e: (e.path,e.segment[0])) 1119 fcache = FrameCache() 1120 1121 for e in lcache: 1122 matched = False 1123 dir = os.path.split(e.path)[0] 1124 1125 # if path found in FrameCache try to coalesce with other entries 1126 dirs = [d.path for d in fcache] 1127 if dir in dirs: 1128 pathentries = [fe for fe in fcache if fe.path==dir] 1129 # test current entry against other entries for the same path in the 1130 # new cache 1131 for i,pe in enumerate(pathentries): 1132 notdisjoint = e.segment[0] <= pe.segment[1] 1133 # if entries match in directory, duration, and are contiguous, append 1134 if pe.path==os.path.split(e.path)[0]\ 1135 and e.segment.__abs__() == pe.duration\ 1136 and notdisjoint: 1137 seg = segments.segment(min(pe.segment[0], e.segment[0]),\ 1138 max(pe.segment[1], e.segment[1])) 1139 fcache[fcache.index(pe)].segment = seg 1140 matched = True 1141 break 1142 1143 # if we haven't matched the entry to anything already in the cache add now 1144 if not matched: 1145 fe = FrameCacheEntry.from_T050017(e.path) 1146 fcache.append(fe) 1147 1148 # if from a new directory add 1149 else: 1150 fe = FrameCacheEntry.from_T050017(e.path) 1151 fcache.append(fe) 1152 1153 return fcache

1154 1155 # ============================================================================= 1156 # Query the LSC Datafind Server 1157 # ============================================================================= 1158

1159 -def query_datafind_server(url, server=None):

1160 1161 # try querying the ligo_data_find server 1162 if not server: 1163 server = _find_datafind_server() 1164 if re.search(':', server): 1165 port = int(server.split(':')[-1]) 1166 else: 1167 port = None 1168 1169 cert, key = _get_grid_proxy() 1170 # if we have a credential then use it when setting up the connection 1171 if cert and key and port!=80: 1172 h = httplib.HTTPSConnection(server, key_file=key, cert_file=cert) 1173 else: 1174 h = httplib.HTTPConnection(server) 1175 1176 # query the server 1177 h.request("GET", url) 1178 response = h.getresponse() 1179 _verify_response(response) 1180 1181 # since status is 200 OK read the types 1182 body = response.read() 1183 h.close() 1184 if body == "": 1185 return None 1186 if url.endswith('json'): 1187 return cjson.decode(body) 1188 else: 1189 return body.splitlines()

1190

1191 -def _get_grid_proxy():

1192 1193 """ 1194 Returns paths for X509 certificate and proxy if available. 1195 """ 1196 1197 # try and get a proxy or certificate 1198 # FIXME this doesn't check that it is valid, though 1199 cert = None 1200 key = None 1201 try: 1202 proxy = os.environ['X509_USER_PROXY'] 1203 cert = proxy 1204 key = proxy 1205 except: 1206 try: 1207 cert = os.environ['X509_USER_CERT'] 1208 key = os.environ['X509_USER_KEY'] 1209 except: 1210 uid = os.getuid() 1211 proxy_path = "/tmp/x509up_u%d" % uid 1212 if os.access(path, os.R_OK): 1213 cert = proxy_path 1214 key = proxy_path 1215 1216 return cert, key

1217

1218 -def _verify_response(HTTPresponse):

1219 1220 """ 1221 Test response of the server to the query and raise the relevant exception 1222 if necessary. 1223 """ 1224 1225 if HTTPresponse.status != 200: 1226 msg = "Server returned code %d: %s" % (HTTPresponse.status,\ 1227 HTTPresponse.reason) 1228 body = HTTPresponse.read() 1229 msg += body 1230 raise RuntimeError(msg)

1231

1232 -def _find_datafind_server():

1233 1234 """ 1235 Find the LSC datafind server from the LIGO_DATAFIND_SERVER environment 1236 variable and raise exception if not found 1237 """ 1238 1239 var = 'LIGO_DATAFIND_SERVER' 1240 try: 1241 server = os.environ[var] 1242 except KeyError: 1243 raise RuntimeError("Environment variable %s is not set" % var) 1244 1245 return server

1246 1247 # ============================================================================= 1248 # Read GEO control channels from frames 1249 # ============================================================================= 1250

1251 -def parse_composite_channels(superchannel, ifo='G1', frame_type='R'):

1252 1253 """ 1254 Seperate GEO superchannels into seperate channel names. 1255 """ 1256 1257 # get system name 1258 tokens = re.split('#', superchannel) 1259 system = tokens.pop(0) 1260 1261 channels = ChannelList() 1262 1263 # parse channels 1264 while len(tokens) > 1: 1265 # get subsystem 1266 subsystem = tokens.pop(0) 1267 1268 # get number of signals 1269 N = int(tokens.pop(0)) 1270 1271 for i in range(N): 1272 signal = tokens.pop(0) 1273 c = '%s:%s-%s_%s' % (ifo, system, subsystem, signal) 1274 channels.append(Channel(c, frame_type, 1)) 1275 1276 return channels

1277

1278 -def separate_composite_data(data, N=1):

1279 1280 """ 1281 Seperate GEO superchannels data into array of data for each of the composite 1282 channels. 1283 """ 1284 1285 out = [] 1286 for i in range(N): 1287 d = data[i::N] 1288 out.append(d) 1289 1290 return out

1291

1292 -def get_control_channel(cache, superchannel, channel, start=None, end=None,\ 1293 ifo='G1', frame_type='R'):

1294 1295 """ 1296 Get GEO control channel data for a single channel from a given superchannel, 1297 control channels are sampled at 1Hz. 1298 """ 1299 1300 # initialise data 1301 time = numpy.array([]) 1302 data = numpy.array([]) 1303 1304 for frame in cache: 1305 # check for read access 1306 if os.access(frame.path, os.R_OK): 1307 frtime, frdata = fromframefile(frame.path, str(superchannel),\ 1308 start=start, end=end) 1309 channels_list = parse_composite_channels(superchannel) 1310 chanIndex = channels_list.find(channel) 1311 channelData = separate_composite_data(frdata,\ 1312 len(channels_list))[chanIndex] 1313 time = numpy.append(time, frtime) 1314 data = numpy.append(data, channelData) 1315 else: 1316 raise RuntimeError("Cannot read frame\n%s" % frame.path) 1317 1318 return time, data

1319

Source Code for Module pylal.dq.dqFrameUtils