Package glue :: Package ligolw :: Module array
[hide private]
[frames] | no frames]

Source Code for Module glue.ligolw.array

  1  # Copyright (C) 2006--2016  Kipp Cannon 
  2  # 
  3  # This program is free software; you can redistribute it and/or modify it 
  4  # under the terms of the GNU General Public License as published by the 
  5  # Free Software Foundation; either version 3 of the License, or (at your 
  6  # option) any later version. 
  7  # 
  8  # This program is distributed in the hope that it will be useful, but 
  9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 11  # Public License for more details. 
 12  # 
 13  # You should have received a copy of the GNU General Public License along 
 14  # with this program; if not, write to the Free Software Foundation, Inc., 
 15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 16   
 17   
 18  # 
 19  # ============================================================================= 
 20  # 
 21  #                                   Preamble 
 22  # 
 23  # ============================================================================= 
 24  # 
 25   
 26   
 27  """ 
 28  While the ligolw module provides classes and parser support for reading and 
 29  writing LIGO Light Weight XML documents, this module supplements that code 
 30  with classes and parsers that add intelligence to the in-RAM document 
 31  representation. 
 32   
 33  In particular, the document tree associated with an Array element is 
 34  enhanced.  During parsing, the Stream element in this module converts the 
 35  character data contained within it into the elements of a numpy array 
 36  object.  The array has the appropriate dimensions and type.  When the 
 37  document is written out again, the Stream element serializes the array back 
 38  into character data. 
 39   
 40  The array is stored as an attribute of the Array element. 
 41  """ 
 42   
 43   
 44  import itertools 
 45  import numpy 
 46  import re 
 47  import sys 
 48  from xml.sax.saxutils import escape as xmlescape 
 49  from xml.sax.xmlreader import AttributesImpl as Attributes 
 50   
 51   
 52  from glue import git_version 
 53  from . import ligolw 
 54  from . import tokenizer 
 55  from . import types as ligolwtypes 
 56  from six.moves import map, range 
 57   
 58   
 59  __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 
 60  __version__ = "git id %s" % git_version.id 
 61  __date__ = git_version.date 
62 63 64 # 65 # ============================================================================= 66 # 67 # Utilities 68 # 69 # ============================================================================= 70 # 71 72 73 -def get_array(xmldoc, name):
74 """ 75 Scan xmldoc for an array named name. Raises ValueError if not 76 exactly 1 such array is found. 77 """ 78 arrays = Array.getArraysByName(xmldoc, name) 79 if len(arrays) != 1: 80 raise ValueError("document must contain exactly one %s array" % Array.ArrayName(name)) 81 return arrays[0]
82
83 84 # 85 # ============================================================================= 86 # 87 # Element Classes 88 # 89 # ============================================================================= 90 # 91 92 93 -class ArrayStream(ligolw.Stream):
94 """ 95 High-level Stream element for use inside Arrays. This element 96 knows how to parse the delimited character stream into the parent's 97 array attribute, and knows how to turn the parent's array attribute 98 back into a character stream. 99 """ 100 101 Delimiter = ligolw.attributeproxy(u"Delimiter", default = u" ") 102
103 - def __init__(self, *args):
104 super(ArrayStream, self).__init__(*args) 105 try: 106 self.Encoding 107 except AttributeError: 108 pass 109 else: 110 raise ligolw.ElementError("non-default encoding '%s' not supported. if this is critical, please report." % self.Encoding) 111 self._tokenizer = tokenizer.Tokenizer(self.Delimiter)
112
113 - def config(self, parentNode):
114 # some initialization that can only be done once parentNode 115 # has been set. 116 self._tokenizer.set_types([ligolwtypes.ToPyType[parentNode.Type]]) 117 parentNode.array = numpy.zeros(parentNode.get_shape(), ligolwtypes.ToNumPyType[parentNode.Type]) 118 self._array_view = parentNode.array.T.flat 119 self._index = 0 120 return self
121
122 - def appendData(self, content):
123 # tokenize buffer, and assign to array 124 tokens = tuple(self._tokenizer.append(content)) 125 next_index = self._index + len(tokens) 126 self._array_view[self._index : next_index] = tokens 127 self._index = next_index
128
129 - def endElement(self):
130 # stream tokenizer uses delimiter to identify end of each 131 # token, so add a final delimiter to induce the last token 132 # to get parsed. 133 self.appendData(self.Delimiter) 134 if self._index != len(self._array_view): 135 raise ValueError("length of Stream (%d elements) does not match array size (%d elements)" % (self._index, len(self._array_view))) 136 del self._array_view 137 del self._index
138
139 - def write(self, fileobj = sys.stdout, indent = u""):
140 # avoid symbol and attribute look-ups in inner loop 141 linelen = self.parentNode.array.shape[0] 142 lines = self.parentNode.array.size // linelen if self.parentNode.array.size else 0 143 tokens = map(ligolwtypes.FormatFunc[self.parentNode.Type], self.parentNode.array.T.flat) 144 islice = itertools.islice 145 join = self.Delimiter.join 146 w = fileobj.write 147 148 w(self.start_tag(indent)) 149 if lines: 150 newline = u"\n" + indent + ligolw.Indent 151 w(newline) 152 w(xmlescape(join(islice(tokens, linelen)))) 153 newline = self.Delimiter + newline 154 for i in range(lines - 1): 155 w(newline) 156 w(xmlescape(join(islice(tokens, linelen)))) 157 w(u"\n" + self.end_tag(indent) + u"\n")
158
159 160 -class Array(ligolw.Array):
161 """ 162 High-level Array element. 163 """
164 - class ArrayName(ligolw.LLWNameAttr):
165 dec_pattern = re.compile(r"(?P<Name>[a-zA-Z0-9_:]+):array\Z") 166 enc_pattern = u"%s:array"
167 168 Name = ligolw.attributeproxy(u"Name", enc = ArrayName.enc, dec = ArrayName) 169
170 - def __init__(self, *args):
171 """ 172 Initialize a new Array element. 173 """ 174 super(Array, self).__init__(*args) 175 self.array = None
176
177 - def get_shape(self):
178 """ 179 Return a tuple of this array's dimensions. This is done by 180 querying the Dim children. Note that once it has been 181 created, it is also possible to examine an Array object's 182 .array attribute directly, and doing that is much faster. 183 """ 184 return tuple(c.n for c in self.getElementsByTagName(ligolw.Dim.tagName))[::-1]
185 186 @classmethod
187 - def build(cls, name, array, dim_names = None):
188 """ 189 Construct a LIGO Light Weight XML Array document subtree 190 from a numpy array object. 191 192 Example: 193 194 >>> import numpy, sys 195 >>> a = numpy.arange(12, dtype = "double") 196 >>> a.shape = (4, 3) 197 >>> Array.build(u"test", a).write(sys.stdout) # doctest: +NORMALIZE_WHITESPACE 198 <Array Type="real_8" Name="test:array"> 199 <Dim>3</Dim> 200 <Dim>4</Dim> 201 <Stream Delimiter=" " Type="Local"> 202 0 3 6 9 203 1 4 7 10 204 2 5 8 11 205 </Stream> 206 </Array> 207 """ 208 # Type must be set for .__init__(); easier to set Name 209 # afterwards to take advantage of encoding handled by 210 # attribute proxy 211 elem = cls(Attributes({u"Type": ligolwtypes.FromNumPyType[str(array.dtype)]})) 212 elem.Name = name 213 if dim_names is None: 214 dim_names = [None] * len(array.shape) 215 elif len(dim_names) != len(array.shape): 216 raise ValueError("dim_names must be same length as number of dimensions") 217 for name, n in reversed(list(zip(dim_names, array.shape))): 218 child = elem.appendChild(ligolw.Dim()) 219 if name is not None: 220 child.Name = name 221 child.n = n 222 elem.appendChild(ArrayStream(Attributes({u"Type": ArrayStream.Type.default, u"Delimiter": ArrayStream.Delimiter.default}))) 223 elem.array = array 224 return elem
225 226 @classmethod
227 - def getArraysByName(cls, elem, name):
228 """ 229 Return a list of arrays with name name under elem. 230 """ 231 name = cls.ArrayName(name) 232 return elem.getElements(lambda e: (e.tagName == cls.tagName) and (e.Name == name))
233 234 # 235 # Element methods 236 # 237
245
246 247 # 248 # ============================================================================= 249 # 250 # Content Handler 251 # 252 # ============================================================================= 253 # 254 255 256 # 257 # Override portions of a ligolw.LIGOLWContentHandler class 258 # 259 260 261 -def use_in(ContentHandler):
262 """ 263 Modify ContentHandler, a sub-class of 264 glue.ligolw.LIGOLWContentHandler, to cause it to use the Array and 265 ArrayStream classes defined in this module when parsing XML 266 documents. 267 268 Example: 269 270 >>> from glue.ligolw import ligolw 271 >>> class MyContentHandler(ligolw.LIGOLWContentHandler): 272 ... pass 273 ... 274 >>> use_in(MyContentHandler) 275 <class 'glue.ligolw.array.MyContentHandler'> 276 """ 277 def startStream(self, parent, attrs, __orig_startStream = ContentHandler.startStream): 278 if parent.tagName == ligolw.Array.tagName: 279 return ArrayStream(attrs).config(parent) 280 return __orig_startStream(self, parent, attrs)
281 282 def startArray(self, parent, attrs): 283 return Array(attrs) 284 285 ContentHandler.startStream = startStream 286 ContentHandler.startArray = startArray 287 288 return ContentHandler 289