Package glue :: Package ligolw :: Module ligolw
[hide private]
[frames] | no frames]

Source Code for Module glue.ligolw.ligolw

   1  # Copyright (C) 2006--2016  Kipp Cannon 
   2  # 
   3  # This program is free software; you can redistribute it and/or modify it 
   4  # under the terms of the GNU General Public License as published by the 
   5  # Free Software Foundation; either version 3 of the License, or (at your 
   6  # option) any later version. 
   7  # 
   8  # This program is distributed in the hope that it will be useful, but 
   9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
  10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
  11  # Public License for more details. 
  12  # 
  13  # You should have received a copy of the GNU General Public License along 
  14  # with this program; if not, write to the Free Software Foundation, Inc., 
  15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
  16   
  17   
  18  # 
  19  # ============================================================================= 
  20  # 
  21  #                                   Preamble 
  22  # 
  23  # ============================================================================= 
  24  # 
  25   
  26   
  27  """ 
  28  This module provides class definitions corresponding to the elements that 
  29  can be found in a LIGO Light Weight XML file.  It also provides a class 
  30  representing an entire LIGO Light Weight XML document, a ContentHandler 
  31  class for use with SAX2 parsers, and a convenience function for 
  32  constructing a parser. 
  33  """ 
  34   
  35   
  36  import sys 
  37  from xml import sax 
  38  from xml.sax.xmlreader import AttributesImpl 
  39  from xml.sax.saxutils import escape as xmlescape 
  40  from xml.sax.saxutils import unescape as xmlunescape 
  41   
  42   
  43  from glue import git_version 
  44  from . import types as ligolwtypes 
  45  import six 
  46  from functools import reduce 
  47   
  48   
  49  __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 
  50  __version__ = "git id %s" % git_version.id 
  51  __date__ = git_version.date 
  52   
  53   
  54  # 
  55  # ============================================================================= 
  56  # 
  57  #                         Document Header, and Indent 
  58  # 
  59  # ============================================================================= 
  60  # 
  61   
  62   
  63  NameSpace = u"http://ldas-sw.ligo.caltech.edu/doc/ligolwAPI/html/ligolw_dtd.txt" 
  64   
  65   
  66  Header = u"""<?xml version='1.0' encoding='utf-8'?> 
  67  <!DOCTYPE LIGO_LW SYSTEM "%s">""" % NameSpace 
  68   
  69   
  70  Indent = u"\t" 
71 72 73 # 74 # ============================================================================= 75 # 76 # Element Class 77 # 78 # ============================================================================= 79 # 80 81 82 -class ElementError(Exception):
83 """ 84 Base class for exceptions generated by elements. 85 """ 86 pass
87
88 89 -class attributeproxy(property):
90 """ 91 Expose an XML attribute of an Element subclass as Python instance 92 attribute with support for an optional default value. 93 94 The .getAttribute() and .setAttribute() methods of the instance to 95 which this is attached are used to retrieve and set the unicode 96 attribute value, respectively. 97 98 When retrieving a value, the function given via the dec keyword 99 argument will be used to convert the unicode into a native Python 100 object (the default is to leave the unicode value as unicode). 101 When setting a value, the function given via the enc keyword 102 argument will be used to convert a native Python object to a 103 unicode string. 104 105 When retrieving a value, if .getAttribute() raises KeyError then 106 AttributeError is raised unless a default value is provided in 107 which case it is returned instead. 108 109 If doc is provided it will be used as the documentation string, 110 otherwise a default documentation string will be constructed 111 identifying the attribute's name and explaining the default value 112 if one is set. 113 114 NOTE: If an XML document is parsed and an element is encountered 115 that does not have a value set for an attribute whose corresponding 116 attributeproxy has a default value defined, then Python codes will 117 be told the default value. Therefore, the default value given here 118 must match what the XML DTD says the default value is for that 119 attribute. Likewise, attributes for which the DTD does not define 120 a default must not have a default defined here. These conditions 121 must both be met to not create a discrepancy between the behaviour 122 of Python codes relying on this I/O library and other interfaces to 123 the same document. 124 125 Example: 126 127 >>> class Test(Element): 128 ... Scale = attributeproxy(u"Scale", enc = u"%.17g".__mod__, dec = float, default = 1.0, doc = "This is the scale (default = 1).") 129 ... 130 >>> x = Test() 131 >>> # have not set value, default will be returned 132 >>> x.Scale 133 1.0 134 >>> x.Scale = 16 135 >>> x.Scale 136 16.0 137 >>> # default can be retrieved via the .default attribute of the 138 >>> # class attribute 139 >>> Test.Scale.default 140 1.0 141 >>> # default is read-only 142 >>> Test.Scale.default = 2. 143 Traceback (most recent call last): 144 File "<stdin>", line 1, in <module> 145 AttributeError: can't set attribute 146 >>> # internally, value is stored as unicode (for XML) 147 >>> x.getAttribute("Scale") 148 u'16' 149 >>> # deleting an attribute restores the default value if defined 150 >>> del x.Scale 151 >>> x.Scale 152 1.0 153 """
154 - def __init__(self, name, enc = six.text_type, dec = six.text_type, default = None, doc = None):
155 # define get/set/del implementations, relying on Python's 156 # closure mechanism to remember values for name, default, 157 # etc. 158 def getter(self): 159 try: 160 val = self.getAttribute(name) 161 except KeyError: 162 if default is not None: 163 return default 164 raise AttributeError("attribute '%s' is not set" % name) 165 return dec(val)
166 def setter(self, value): 167 self.setAttribute(name, enc(value))
168 def deleter(self): 169 self.removeAttribute(name) 170 # construct a default documentation string if needed 171 if doc is None: 172 doc = "The \"%s\" attribute." % name 173 if default is not None: 174 doc += " Default is \"%s\" if not set." % str(default) 175 # initialize the property object 176 super(attributeproxy, self).__init__(getter, (setter if enc is not None else None), (deleter if enc is not None else None), doc) 177 # documentation is not inherited, need to set it explicitly 178 self.__doc__ = doc 179 # record default attribute. if no value is supplied, 180 # AttributeError will be raised on attempts to retrieve it 181 if default is not None: 182 self._default = default 183 184 @property
185 - def default(self):
186 """ 187 Default value. AttributeError is raised if no default 188 value is set. 189 """ 190 return self._default
191
192 193 -class Element(object):
194 """ 195 Base class for all element types. This class is inspired by the 196 class of the same name in the Python standard library's xml.dom 197 package. One important distinction is that the standard DOM 198 element is used to represent the structure of a document at a much 199 finer level of detail than here. For example, in the case of the 200 standard DOM element, each XML attribute is its own element being a 201 child node of its tag, while here they are simply stored as 202 attributes of the tag element itself. 203 204 Despite the differences, the documentation for the xml.dom package, 205 particularly that of the Element class and it's parent, the Node 206 class, is useful as supplementary material in understanding how to 207 use this class. 208 """ 209 # XML tag names are case sensitive: compare with ==, !=, etc. 210 tagName = None 211 validchildren = frozenset() 212 213 @classmethod
214 - def validattributes(cls):
215 return frozenset(name for name in dir(cls) if isinstance(getattr(cls, name), attributeproxy))
216
217 - def __init__(self, attrs = None):
218 """ 219 Construct an element. The argument is a 220 sax.xmlreader.AttributesImpl object (see the xml.sax 221 documentation, but it's basically a dictionary-like thing) 222 used to set the element attributes. 223 """ 224 self.parentNode = None 225 if attrs is None: 226 self.attributes = AttributesImpl({}) 227 elif set(attrs.keys()) <= self.validattributes(): 228 self.attributes = attrs 229 else: 230 raise ElementError("%s element: invalid attribute(s) %s" % (self.tagName, ", ".join("'%s'" % key for key in set(attrs.keys()) - self.validattributes()))) 231 self.childNodes = [] 232 self.pcdata = None
233
234 - def start_tag(self, indent):
235 """ 236 Generate the string for the element's start tag. 237 """ 238 return u"%s<%s%s>" % (indent, self.tagName, u"".join(u" %s=\"%s\"" % keyvalue for keyvalue in self.attributes.items()))
239
240 - def end_tag(self, indent):
241 """ 242 Generate the string for the element's end tag. 243 """ 244 return u"%s</%s>" % (indent, self.tagName)
245
246 - def appendChild(self, child):
247 """ 248 Add a child to this element. The child's parentNode 249 attribute is updated, too. 250 """ 251 self.childNodes.append(child) 252 child.parentNode = self 253 self._verifyChildren(len(self.childNodes) - 1) 254 return child
255
256 - def insertBefore(self, newchild, refchild):
257 """ 258 Insert a new child node before an existing child. It must 259 be the case that refchild is a child of this node; if not, 260 ValueError is raised. newchild is returned. 261 """ 262 for i, childNode in enumerate(self.childNodes): 263 if childNode is refchild: 264 self.childNodes.insert(i, newchild) 265 newchild.parentNode = self 266 self._verifyChildren(i) 267 return newchild 268 raise ValueError(refchild)
269
270 - def removeChild(self, child):
271 """ 272 Remove a child from this element. The child element is 273 returned, and it's parentNode element is reset. If the 274 child will not be used any more, you should call its 275 unlink() method to promote garbage collection. 276 """ 277 for i, childNode in enumerate(self.childNodes): 278 if childNode is child: 279 del self.childNodes[i] 280 child.parentNode = None 281 return child 282 raise ValueError(child)
283 293
294 - def replaceChild(self, newchild, oldchild):
295 """ 296 Replace an existing node with a new node. It must be the 297 case that oldchild is a child of this node; if not, 298 ValueError is raised. newchild is returned. 299 """ 300 # .index() would use compare-by-value, we want 301 # compare-by-id because we want to find the exact object, 302 # not something equivalent to it. 303 for i, childNode in enumerate(self.childNodes): 304 if childNode is oldchild: 305 self.childNodes[i].parentNode = None 306 self.childNodes[i] = newchild 307 newchild.parentNode = self 308 self._verifyChildren(i) 309 return newchild 310 raise ValueError(oldchild)
311
312 - def getElements(self, filter):
313 """ 314 Return a list of elements below and including this element 315 for which filter(element) returns True. 316 """ 317 l = reduce(lambda l, e: l + e.getElements(filter), self.childNodes, []) 318 if filter(self): 319 l.append(self) 320 return l
321
322 - def getElementsByTagName(self, tagName):
323 return self.getElements(lambda e: e.tagName == tagName)
324
325 - def getChildrenByAttributes(self, attrs):
326 l = [] 327 for c in self.childNodes: 328 try: 329 if reduce(lambda t, kv: t and (c.getAttribute(kv[0]) == kv[1]), six.iteritems(attrs), True): 330 l.append(c) 331 except KeyError: 332 pass 333 return l
334
335 - def hasAttribute(self, attrname):
336 return attrname in self.attributes
337
338 - def getAttribute(self, attrname):
339 return self.attributes[attrname]
340
341 - def setAttribute(self, attrname, value):
342 # cafeful: this digs inside an AttributesImpl object and 343 # modifies its internal data. probably not a good idea, 344 # but I don't know how else to edit an attribute because 345 # the stupid things don't export a method to do it. 346 self.attributes._attrs[attrname] = six.text_type(value)
347
348 - def removeAttribute(self, attrname):
349 # cafeful: this digs inside an AttributesImpl object and 350 # modifies its internal data. probably not a good idea, 351 # but I don't know how else to edit an attribute because 352 # the stupid things don't export a method to do it. 353 try: 354 del self.attributes._attrs[attrname] 355 except KeyError: 356 pass
357
358 - def appendData(self, content):
359 """ 360 Add characters to the element's pcdata. 361 """ 362 if self.pcdata is not None: 363 self.pcdata += content 364 else: 365 self.pcdata = content
366
367 - def _verifyChildren(self, i):
368 """ 369 Method used internally by some elements to verify that 370 their children are from the allowed set and in the correct 371 order following modifications to their child list. i is 372 the index of the child that has just changed. 373 """ 374 pass
375
376 - def endElement(self):
377 """ 378 Method invoked by document parser when it encounters the 379 end-of-element event. 380 """ 381 pass
382
383 - def write(self, fileobj = sys.stdout, indent = u""):
384 """ 385 Recursively write an element and it's children to a file. 386 """ 387 fileobj.write(self.start_tag(indent)) 388 fileobj.write(u"\n") 389 for c in self.childNodes: 390 if c.tagName not in self.validchildren: 391 raise ElementError("invalid child %s for %s" % (c.tagName, self.tagName)) 392 c.write(fileobj, indent + Indent) 393 if self.pcdata is not None: 394 fileobj.write(xmlescape(self.pcdata)) 395 fileobj.write(u"\n") 396 fileobj.write(self.end_tag(indent)) 397 fileobj.write(u"\n")
398
399 400 -class EmptyElement(Element):
401 """ 402 Parent class for Elements that cannot contain text. 403 """
404 - def appendData(self, content):
405 if not content.isspace(): 406 raise TypeError("%s does not hold text" % type(self))
407
408 409 -def WalkChildren(elem):
410 """ 411 Walk the XML tree of children below elem, returning each in order. 412 """ 413 for child in elem.childNodes: 414 yield child 415 for elem in WalkChildren(child): 416 yield elem
417
418 419 # 420 # ============================================================================= 421 # 422 # Name Attribute Manipulation 423 # 424 # ============================================================================= 425 # 426 427 428 -class LLWNameAttr(six.text_type):
429 """ 430 Baseclass to hide pattern-matching of various element names. 431 Subclasses must provide a .dec_pattern compiled regular expression 432 defining a group "Name" that identifies the meaningful portion of 433 the string, and a .enc_pattern that gives a format string to be 434 used with "%" to reconstrct the full string. 435 436 This is intended to be used to provide the enc and dec functions 437 for an attributeproxy instance. 438 439 Example: 440 441 >>> import re 442 >>> class Test(Element): 443 ... class TestName(LLWNameAttr): 444 ... dec_pattern = re.compile(r"(?P<Name>[a-z0-9_]+):test\Z") 445 ... enc_pattern = u"%s:test" 446 ... 447 ... Name = attributeproxy(u"Name", enc = TestName.enc, dec = TestName) 448 ... 449 >>> x = Test() 450 >>> x.Name = u"blah" 451 >>> # internally, suffix has been appended 452 >>> x.getAttribute("Name") 453 u'blah:test' 454 >>> # but attributeproxy reports original value 455 >>> x.Name 456 u'blah' 457 """
458 - def __new__(cls, name):
459 try: 460 name = cls.dec_pattern.search(name).group(u"Name") 461 except AttributeError: 462 pass 463 return six.text_type.__new__(cls, name)
464 465 @classmethod
466 - def enc(cls, name):
467 return cls.enc_pattern % name
468
469 470 # 471 # ============================================================================= 472 # 473 # LIGO Light Weight XML Elements 474 # 475 # ============================================================================= 476 # 477 478 479 -class LIGO_LW(EmptyElement):
480 """ 481 LIGO_LW element. 482 """ 483 tagName = u"LIGO_LW" 484 validchildren = frozenset([u"LIGO_LW", u"Comment", u"Param", u"Table", u"Array", u"Stream", u"IGWDFrame", u"AdcData", u"AdcInterval", u"Time", u"Detector"]) 485 486 Name = attributeproxy(u"Name") 487 Type = attributeproxy(u"Type")
488
489 490 -class Comment(Element):
491 """ 492 Comment element. 493 """ 494 tagName = u"Comment" 495
496 - def write(self, fileobj = sys.stdout, indent = u""):
497 fileobj.write(self.start_tag(indent)) 498 if self.pcdata is not None: 499 fileobj.write(xmlescape(self.pcdata)) 500 fileobj.write(self.end_tag(u"")) 501 fileobj.write(u"\n")
502
503 504 -class Param(Element):
505 """ 506 Param element. 507 """ 508 tagName = u"Param" 509 validchildren = frozenset([u"Comment"]) 510 511 DataUnit = attributeproxy(u"DataUnit") 512 Name = attributeproxy(u"Name") 513 Scale = attributeproxy(u"Scale") 514 Start = attributeproxy(u"Start") 515 Type = attributeproxy(u"Type") 516 Unit = attributeproxy(u"Unit")
517
518 519 -class Table(EmptyElement):
520 """ 521 Table element. 522 """ 523 tagName = u"Table" 524 validchildren = frozenset([u"Comment", u"Column", u"Stream"]) 525 526 Name = attributeproxy(u"Name") 527 Type = attributeproxy(u"Type") 528
529 - def _verifyChildren(self, i):
530 ncomment = 0 531 ncolumn = 0 532 nstream = 0 533 for child in self.childNodes: 534 if child.tagName == Comment.tagName: 535 if ncomment: 536 raise ElementError("only one Comment allowed in Table") 537 if ncolumn or nstream: 538 raise ElementError("Comment must come before Column(s) and Stream in Table") 539 ncomment += 1 540 elif child.tagName == Column.tagName: 541 if nstream: 542 raise ElementError("Column(s) must come before Stream in Table") 543 ncolumn += 1 544 else: 545 if nstream: 546 raise ElementError("only one Stream allowed in Table") 547 nstream += 1
548
549 550 -class Column(EmptyElement):
551 """ 552 Column element. 553 """ 554 tagName = u"Column" 555 556 Name = attributeproxy(u"Name") 557 Type = attributeproxy(u"Type") 558 Unit = attributeproxy(u"Unit") 559
560 - def start_tag(self, indent):
561 """ 562 Generate the string for the element's start tag. 563 """ 564 return u"%s<%s%s/>" % (indent, self.tagName, u"".join(u" %s=\"%s\"" % keyvalue for keyvalue in self.attributes.items()))
565
566 - def end_tag(self, indent):
567 """ 568 Generate the string for the element's end tag. 569 """ 570 return u""
571
572 - def write(self, fileobj = sys.stdout, indent = u""):
573 """ 574 Recursively write an element and it's children to a file. 575 """ 576 fileobj.write(self.start_tag(indent)) 577 fileobj.write(u"\n")
578
579 580 -class Array(EmptyElement):
581 """ 582 Array element. 583 """ 584 tagName = u"Array" 585 validchildren = frozenset([u"Dim", u"Stream"]) 586 587 Name = attributeproxy(u"Name") 588 Type = attributeproxy(u"Type") 589 Unit = attributeproxy(u"Unit") 590
591 - def _verifyChildren(self, i):
592 nstream = 0 593 for child in self.childNodes: 594 if child.tagName == Dim.tagName: 595 if nstream: 596 raise ElementError("Dim(s) must come before Stream in Array") 597 else: 598 if nstream: 599 raise ElementError("only one Stream allowed in Array") 600 nstream += 1
601
602 603 -class Dim(Element):
604 """ 605 Dim element. 606 """ 607 tagName = u"Dim" 608 609 Name = attributeproxy(u"Name") 610 Scale = attributeproxy(u"Scale", enc = ligolwtypes.FormatFunc[u"real_8"], dec = ligolwtypes.ToPyType[u"real_8"]) 611 Start = attributeproxy(u"Start", enc = ligolwtypes.FormatFunc[u"real_8"], dec = ligolwtypes.ToPyType[u"real_8"]) 612 Unit = attributeproxy(u"Unit") 613 614 @property
615 - def n(self):
616 return ligolwtypes.ToPyType[u"int_8s"](self.pcdata) if self.pcdata is not None else None
617 618 @n.setter
619 - def n(self, val):
620 self.pcdata = ligolwtypes.FormatFunc[u"int_8s"](val) if val is not None else None
621 622 @n.deleter
623 - def n(self):
624 self.pcdata = None
625
626 - def write(self, fileobj = sys.stdout, indent = u""):
627 fileobj.write(self.start_tag(indent)) 628 if self.pcdata is not None: 629 fileobj.write(xmlescape(self.pcdata)) 630 fileobj.write(self.end_tag(u"")) 631 fileobj.write(u"\n")
632
633 634 -class Stream(Element):
635 """ 636 Stream element. 637 """ 638 tagName = u"Stream" 639 640 Content = attributeproxy(u"Content") 641 Delimiter = attributeproxy(u"Delimiter", default = u",") 642 Encoding = attributeproxy(u"Encoding") 643 Name = attributeproxy(u"Name") 644 Type = attributeproxy(u"Type", default = u"Local") 645
646 - def __init__(self, *args):
647 super(Stream, self).__init__(*args) 648 if self.Type not in (u"Remote", u"Local"): 649 raise ElementError("invalid Type for Stream: '%s'" % self.Type)
650
651 652 -class IGWDFrame(EmptyElement):
653 """ 654 IGWDFrame element. 655 """ 656 tagName = u"IGWDFrame" 657 validchildren = frozenset([u"Comment", u"Param", u"Time", u"Detector", u"AdcData", u"LIGO_LW", u"Stream", u"Array", u"IGWDFrame"]) 658 659 Name = attributeproxy(u"Name")
660
661 662 -class Detector(EmptyElement):
663 """ 664 Detector element. 665 """ 666 tagName = u"Detector" 667 validchildren = frozenset([u"Comment", u"Param", u"LIGO_LW"]) 668 669 Name = attributeproxy(u"Name")
670
671 672 -class AdcData(EmptyElement):
673 """ 674 AdcData element. 675 """ 676 tagName = u"AdcData" 677 validchildren = frozenset([u"AdcData", u"Comment", u"Param", u"Time", u"LIGO_LW", u"Array"]) 678 679 Name = attributeproxy(u"Name")
680
681 682 -class AdcInterval(EmptyElement):
683 """ 684 AdcInterval element. 685 """ 686 tagName = u"AdcInterval" 687 validchildren = frozenset([u"AdcData", u"Comment", u"Time"]) 688 689 DeltaT = attributeproxy(u"DeltaT", enc = ligolwtypes.FormatFunc[u"real_8"], dec = ligolwtypes.ToPyType[u"real_8"]) 690 Name = attributeproxy(u"Name") 691 StartTime = attributeproxy(u"StartTime")
692
693 694 -class Time(Element):
695 """ 696 Time element. 697 """ 698 tagName = u"Time" 699 700 Name = attributeproxy(u"Name") 701 Type = attributeproxy(u"Type", default = u"ISO-8601") 702
703 - def __init__(self, *args):
704 super(Time, self).__init__(*args) 705 if self.Type not in ligolwtypes.TimeTypes: 706 raise ElementError("invalid Type for Time: '%s'" % self.Type)
707
708 - def endElement(self):
709 if self.Type == u"ISO-8601": 710 import dateutil.parser 711 self.pcdata = dateutil.parser.parse(self.pcdata) 712 elif self.Type == u"GPS": 713 from lal import LIGOTimeGPS 714 # FIXME: remove cast to string when lal swig 715 # can cast from unicode 716 self.pcdata = LIGOTimeGPS(str(self.pcdata)) 717 elif self.Type == u"Unix": 718 self.pcdata = float(self.pcdata) 719 else: 720 # unsupported time type. not impossible that 721 # calling code has overridden TimeTypes set in 722 # glue.ligolw.types; just accept it as a string 723 pass
724
725 - def write(self, fileobj = sys.stdout, indent = u""):
726 fileobj.write(self.start_tag(indent)) 727 if self.pcdata is not None: 728 if self.Type == u"ISO-8601": 729 fileobj.write(xmlescape(six.text_type(self.pcdata.isoformat()))) 730 elif self.Type == u"GPS": 731 fileobj.write(xmlescape(six.text_type(self.pcdata))) 732 elif self.Type == u"Unix": 733 fileobj.write(xmlescape(u"%.16g" % self.pcdata)) 734 else: 735 # unsupported time type. not impossible. 736 # assume correct thing to do is cast to 737 # unicode and let calling code figure out 738 # how to ensure that does the correct 739 # thing. 740 fileobj.write(xmlescape(six.text_type(self.pcdata))) 741 fileobj.write(self.end_tag(u"")) 742 fileobj.write(u"\n")
743 744 @classmethod
745 - def now(cls, Name = None):
746 """ 747 Instantiate a Time element initialized to the current UTC 748 time in the default format (ISO-8601). The Name attribute 749 will be set to the value of the Name parameter if given. 750 """ 751 import datetime 752 self = cls() 753 if Name is not None: 754 self.Name = Name 755 self.pcdata = datetime.datetime.utcnow() 756 return self
757 758 @classmethod
759 - def from_gps(cls, gps, Name = None):
760 """ 761 Instantiate a Time element initialized to the value of the 762 given GPS time. The Name attribute will be set to the 763 value of the Name parameter if given. 764 765 Note: the new Time element holds a reference to the GPS 766 time, not a copy of it. Subsequent modification of the GPS 767 time object will be reflected in what gets written to disk. 768 """ 769 self = cls(AttributesImpl({u"Type": u"GPS"})) 770 if Name is not None: 771 self.Name = Name 772 self.pcdata = gps 773 return self
774
775 776 -class Document(EmptyElement):
777 """ 778 Description of a LIGO LW file. 779 """ 780 tagName = u"Document" 781 validchildren = frozenset([u"LIGO_LW"]) 782
783 - def write(self, fileobj = sys.stdout, xsl_file = None):
784 """ 785 Write the document. 786 """ 787 fileobj.write(Header) 788 fileobj.write(u"\n") 789 if xsl_file is not None: 790 fileobj.write(u'<?xml-stylesheet type="text/xsl" href="%s" ?>\n' % xsl_file) 791 for c in self.childNodes: 792 if c.tagName not in self.validchildren: 793 raise ElementError("invalid child %s for %s" % (c.tagName, self.tagName)) 794 c.write(fileobj)
795
796 797 # 798 # ============================================================================= 799 # 800 # SAX Content Handler 801 # 802 # ============================================================================= 803 # 804 805 806 -class LIGOLWContentHandler(sax.handler.ContentHandler, object):
807 """ 808 ContentHandler class for parsing LIGO Light Weight documents with a 809 SAX2-compliant parser. 810 811 Example: 812 813 >>> # initialize empty Document tree into which parsed XML tree 814 >>> # will be inserted 815 >>> xmldoc = Document() 816 >>> # create handler instance attached to Document object 817 >>> handler = LIGOLWContentHandler(xmldoc) 818 >>> # open file and parse 819 >>> make_parser(handler).parse(open("demo.xml")) 820 >>> # write XML (default to stdout) 821 >>> xmldoc.write() 822 823 NOTE: this example is for illustration only. Most users will wish 824 to use the .load_*() functions in the glue.ligolw.utils subpackage 825 to load documents, and the .write_*() functions to write documents. 826 Those functions provide additional features such as support for 827 gzip'ed documents, MD5 hash computation, and Condor eviction 828 trapping to avoid writing broken documents to disk. 829 830 See also: PartialLIGOLWContentHandler, 831 FilteringLIGOLWContentHandler. 832 """ 833
834 - def __init__(self, document, start_handlers = {}):
835 """ 836 Initialize the handler by pointing it to the Document object 837 into which the parsed file will be loaded. 838 """ 839 self.current = self.document = document 840 841 self._startElementHandlers = { 842 (None, AdcData.tagName): self.startAdcData, 843 (None, AdcInterval.tagName): self.startAdcInterval, 844 (None, Array.tagName): self.startArray, 845 (None, Column.tagName): self.startColumn, 846 (None, Comment.tagName): self.startComment, 847 (None, Detector.tagName): self.startDetector, 848 (None, Dim.tagName): self.startDim, 849 (None, IGWDFrame.tagName): self.startIGWDFrame, 850 (None, LIGO_LW.tagName): self.startLIGO_LW, 851 (None, Param.tagName): self.startParam, 852 (None, Stream.tagName): self.startStream, 853 (None, Table.tagName): self.startTable, 854 (None, Time.tagName): self.startTime, 855 } 856 self._startElementHandlers.update(start_handlers)
857
858 - def startAdcData(self, parent, attrs):
859 return AdcData(attrs)
860
861 - def startAdcInterval(self, parent, attrs):
862 return AdcInterval(attrs)
863
864 - def startArray(self, parent, attrs):
865 return Array(attrs)
866
867 - def startColumn(self, parent, attrs):
868 return Column(attrs)
869
870 - def startComment(self, parent, attrs):
871 return Comment(attrs)
872
873 - def startDetector(self, parent, attrs):
874 return Detector(attrs)
875
876 - def startDim(self, parent, attrs):
877 return Dim(attrs)
878
879 - def startIGWDFrame(self, parent, attrs):
880 return IGWDFrame(attrs)
881
882 - def startLIGO_LW(self, parent, attrs):
883 return LIGO_LW(attrs)
884
885 - def startParam(self, parent, attrs):
886 return Param(attrs)
887
888 - def startStream(self, parent, attrs):
889 return Stream(attrs)
890
891 - def startTable(self, parent, attrs):
892 return Table(attrs)
893
894 - def startTime(self, parent, attrs):
895 return Time(attrs)
896
897 - def startElementNS(self, uri_localname, qname, attrs):
898 (uri, localname) = uri_localname 899 try: 900 start_handler = self._startElementHandlers[(uri, localname)] 901 except KeyError: 902 raise ElementError("unknown element %s for namespace %s" % (localname, uri or NameSpace)) 903 attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) 904 try: 905 self.current = self.current.appendChild(start_handler(self.current, attrs)) 906 except Exception as e: 907 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))
908
909 - def endElementNS(self, uri_localname, qname):
910 (uri, localname) = uri_localname 911 try: 912 self.current.endElement() 913 except Exception as e: 914 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e))) 915 self.current = self.current.parentNode
916
917 - def characters(self, content):
918 try: 919 self.current.appendData(xmlunescape(content)) 920 except Exception as e: 921 raise type(e)("line %d: %s" % (self._locator.getLineNumber(), str(e)))
922
923 924 -class PartialLIGOLWContentHandler(LIGOLWContentHandler):
925 """ 926 LIGO LW content handler object that loads only those parts of the 927 document matching some criteria. Useful, for example, when one 928 wishes to read only a single table from a file. 929 930 Example: 931 932 >>> from glue.ligolw import utils as ligolw_utils 933 >>> def contenthandler(document): 934 ... return PartialLIGOLWContentHandler(document, lambda name, attrs: name == Table.tagName) 935 ... 936 >>> xmldoc = ligolw_utils.load_filename("demo.xml", contenthandler = contenthandler) 937 938 This parses "demo.xml" and returns an XML tree containing only the 939 Table elements and their children. 940 """
941 - def __init__(self, document, element_filter):
942 """ 943 Only those elements for which element_filter(name, attrs) 944 evaluates to True, and the children of those elements, will 945 be loaded. 946 """ 947 super(PartialLIGOLWContentHandler, self).__init__(document) 948 self.element_filter = element_filter 949 self.depth = 0
950
951 - def startElementNS(self, uri_localname, qname, attrs):
952 (uri, localname) = uri_localname 953 filter_attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) 954 if self.depth > 0 or self.element_filter(localname, filter_attrs): 955 super(PartialLIGOLWContentHandler, self).startElementNS((uri, localname), qname, attrs) 956 self.depth += 1
957
958 - def endElementNS(self, *args):
959 if self.depth > 0: 960 self.depth -= 1 961 super(PartialLIGOLWContentHandler, self).endElementNS(*args)
962
963 - def characters(self, content):
964 if self.depth > 0: 965 super(PartialLIGOLWContentHandler, self).characters(content)
966
967 968 -class FilteringLIGOLWContentHandler(LIGOLWContentHandler):
969 """ 970 LIGO LW content handler that loads everything but those parts of a 971 document that match some criteria. Useful, for example, when one 972 wishes to read everything except a single table from a file. 973 974 Example: 975 976 >>> from glue.ligolw import utils as ligolw_utils 977 >>> def contenthandler(document): 978 ... return FilteringLIGOLWContentHandler(document, lambda name, attrs: name != Table.tagName) 979 ... 980 >>> xmldoc = ligolw_utils.load_filename("demo.xml", contenthandler = contenthandler) 981 982 This parses "demo.xml" and returns an XML tree with all the Table 983 elements and their children removed. 984 """
985 - def __init__(self, document, element_filter):
986 """ 987 Those elements for which element_filter(name, attrs) 988 evaluates to False, and the children of those elements, 989 will not be loaded. 990 """ 991 super(FilteringLIGOLWContentHandler, self).__init__(document) 992 self.element_filter = element_filter 993 self.depth = 0
994
995 - def startElementNS(self, uri_localname, qname, attrs):
996 (uri, localname) = uri_localname 997 filter_attrs = AttributesImpl(dict((attrs.getQNameByName(name), value) for name, value in attrs.items())) 998 if self.depth == 0 and self.element_filter(localname, filter_attrs): 999 super(FilteringLIGOLWContentHandler, self).startElementNS((uri, localname), qname, attrs) 1000 else: 1001 self.depth += 1
1002
1003 - def endElementNS(self, *args):
1004 if self.depth == 0: 1005 super(FilteringLIGOLWContentHandler, self).endElementNS(*args) 1006 else: 1007 self.depth -= 1
1008
1009 - def characters(self, content):
1010 if self.depth == 0: 1011 super(FilteringLIGOLWContentHandler, self).characters(content)
1012
1013 1014 # 1015 # ============================================================================= 1016 # 1017 # Convenience Functions 1018 # 1019 # ============================================================================= 1020 # 1021 1022 1023 -def make_parser(handler):
1024 """ 1025 Convenience function to construct a document parser with namespaces 1026 enabled and validation disabled. Document validation is a nice 1027 feature, but enabling validation can require the LIGO LW DTD to be 1028 downloaded from the LDAS document server if the DTD is not included 1029 inline in the XML. This requires a working connection to the 1030 internet and the server to be up. 1031 """ 1032 parser = sax.make_parser() 1033 parser.setContentHandler(handler) 1034 parser.setFeature(sax.handler.feature_namespaces, True) 1035 parser.setFeature(sax.handler.feature_validation, False) 1036 parser.setFeature(sax.handler.feature_external_ges, False) 1037 return parser
1038