glue.ligolw.array

1 # Copyright (C) 2006--2016 Kipp Cannon 2 # 3 # This program is free software; you can redistribute it and/or modify it 4 # under the terms of the GNU General Public License as published by the 5 # Free Software Foundation; either version 3 of the License, or (at your 6 # option) any later version. 7 # 8 # This program is distributed in the hope that it will be useful, but 9 # WITHOUT ANY WARRANTY; without even the implied warranty of 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 11 # Public License for more details. 12 # 13 # You should have received a copy of the GNU General Public License along 14 # with this program; if not, write to the Free Software Foundation, Inc., 15 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 17 18 # 19 # ============================================================================= 20 # 21 # Preamble 22 # 23 # ============================================================================= 24 # 25 26 27 """ 28 While the ligolw module provides classes and parser support for reading and 29 writing LIGO Light Weight XML documents, this module supplements that code 30 with classes and parsers that add intelligence to the in-RAM document 31 representation. 32 33 In particular, the document tree associated with an Array element is 34 enhanced. During parsing, the Stream element in this module converts the 35 character data contained within it into the elements of a numpy array 36 object. The array has the appropriate dimensions and type. When the 37 document is written out again, the Stream element serializes the array back 38 into character data. 39 40 The array is stored as an attribute of the Array element. 41 """ 42 43 44 import itertools 45 import numpy 46 import re 47 import sys 48 from xml.sax.saxutils import escape as xmlescape 49 from xml.sax.xmlreader import AttributesImpl as Attributes 50 51 52 from glue import git_version 53 from . import ligolw 54 from . import tokenizer 55 from . import types as ligolwtypes 56 from six.moves import map, range 57 58 59 __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 60 __version__ = "git id %s" % git_version.id 61 __date__ = git_version.date

62 63 64 # 65 # ============================================================================= 66 # 67 # Utilities 68 # 69 # ============================================================================= 70 # 71 72 73 -def get_array(xmldoc, name):

74 """ 75 Scan xmldoc for an array named name. Raises ValueError if not 76 exactly 1 such array is found. 77 """ 78 arrays = Array.getArraysByName(xmldoc, name) 79 if len(arrays) != 1: 80 raise ValueError("document must contain exactly one %s array" % Array.ArrayName(name)) 81 return arrays[0]

82

83 84 # 85 # ============================================================================= 86 # 87 # Element Classes 88 # 89 # ============================================================================= 90 # 91 92 93 -class ArrayStream(ligolw.Stream):

94 """ 95 High-level Stream element for use inside Arrays. This element 96 knows how to parse the delimited character stream into the parent's 97 array attribute, and knows how to turn the parent's array attribute 98 back into a character stream. 99 """ 100 101 Delimiter = ligolw.attributeproxy(u"Delimiter", default = u" ") 102

103 - def __init__(self, *args):

104 super(ArrayStream, self).__init__(*args) 105 try: 106 self.Encoding 107 except AttributeError: 108 pass 109 else: 110 raise ligolw.ElementError("non-default encoding '%s' not supported. if this is critical, please report." % self.Encoding) 111 self._tokenizer = tokenizer.Tokenizer(self.Delimiter)

112

113 - def config(self, parentNode):

114 # some initialization that can only be done once parentNode 115 # has been set. 116 self._tokenizer.set_types([ligolwtypes.ToPyType[parentNode.Type]]) 117 parentNode.array = numpy.zeros(parentNode.get_shape(), ligolwtypes.ToNumPyType[parentNode.Type]) 118 self._array_view = parentNode.array.T.flat 119 self._index = 0 120 return self

121

122 - def appendData(self, content):

123 # tokenize buffer, and assign to array 124 tokens = tuple(self._tokenizer.append(content)) 125 next_index = self._index + len(tokens) 126 self._array_view[self._index : next_index] = tokens 127 self._index = next_index

128

129 - def endElement(self):

130 # stream tokenizer uses delimiter to identify end of each 131 # token, so add a final delimiter to induce the last token 132 # to get parsed. 133 self.appendData(self.Delimiter) 134 if self._index != len(self._array_view): 135 raise ValueError("length of Stream (%d elements) does not match array size (%d elements)" % (self._index, len(self._array_view))) 136 del self._array_view 137 del self._index

138

139 - def write(self, fileobj = sys.stdout, indent = u""):

140 # avoid symbol and attribute look-ups in inner loop 141 linelen = self.parentNode.array.shape[0] 142 lines = self.parentNode.array.size // linelen if self.parentNode.array.size else 0 143 tokens = map(ligolwtypes.FormatFunc[self.parentNode.Type], self.parentNode.array.T.flat) 144 islice = itertools.islice 145 join = self.Delimiter.join 146 w = fileobj.write 147 148 w(self.start_tag(indent)) 149 if lines: 150 newline = u"\n" + indent + ligolw.Indent 151 w(newline) 152 w(xmlescape(join(islice(tokens, linelen)))) 153 newline = self.Delimiter + newline 154 for i in range(lines - 1): 155 w(newline) 156 w(xmlescape(join(islice(tokens, linelen)))) 157 w(u"\n" + self.end_tag(indent) + u"\n")

158

159 160 -class Array(ligolw.Array):

161 """ 162 High-level Array element. 163 """

164 - class ArrayName(ligolw.LLWNameAttr):

165 dec_pattern = re.compile(r"(?P<Name>[a-zA-Z0-9_:]+):array\Z") 166 enc_pattern = u"%s:array"

167 168 Name = ligolw.attributeproxy(u"Name", enc = ArrayName.enc, dec = ArrayName) 169

170 - def __init__(self, *args):

171 """ 172 Initialize a new Array element. 173 """ 174 super(Array, self).__init__(*args) 175 self.array = None

176

177 - def get_shape(self):

178 """ 179 Return a tuple of this array's dimensions. This is done by 180 querying the Dim children. Note that once it has been 181 created, it is also possible to examine an Array object's 182 .array attribute directly, and doing that is much faster. 183 """ 184 return tuple(c.n for c in self.getElementsByTagName(ligolw.Dim.tagName))[::-1]

185 186 @classmethod

187 - def build(cls, name, array, dim_names = None):

188 """ 189 Construct a LIGO Light Weight XML Array document subtree 190 from a numpy array object. 191 192 Example: 193 194 >>> import numpy, sys 195 >>> a = numpy.arange(12, dtype = "double") 196 >>> a.shape = (4, 3) 197 >>> Array.build(u"test", a).write(sys.stdout) # doctest: +NORMALIZE_WHITESPACE 198 <Array Type="real_8" Name="test:array"> 199 <Dim>3</Dim> 200 <Dim>4</Dim> 201 <Stream Delimiter=" " Type="Local"> 202 0 3 6 9 203 1 4 7 10 204 2 5 8 11 205 </Stream> 206 </Array> 207 """ 208 # Type must be set for .__init__(); easier to set Name 209 # afterwards to take advantage of encoding handled by 210 # attribute proxy 211 elem = cls(Attributes({u"Type": ligolwtypes.FromNumPyType[str(array.dtype)]})) 212 elem.Name = name 213 if dim_names is None: 214 dim_names = [None] * len(array.shape) 215 elif len(dim_names) != len(array.shape): 216 raise ValueError("dim_names must be same length as number of dimensions") 217 for name, n in reversed(list(zip(dim_names, array.shape))): 218 child = elem.appendChild(ligolw.Dim()) 219 if name is not None: 220 child.Name = name 221 child.n = n 222 elem.appendChild(ArrayStream(Attributes({u"Type": ArrayStream.Type.default, u"Delimiter": ArrayStream.Delimiter.default}))) 223 elem.array = array 224 return elem

225 226 @classmethod

227 - def getArraysByName(cls, elem, name):

228 """ 229 Return a list of arrays with name name under elem. 230 """ 231 name = cls.ArrayName(name) 232 return elem.getElements(lambda e: (e.tagName == cls.tagName) and (e.Name == name))

233 234 # 235 # Element methods 236 # 237

238 - def unlink(self):

239 """ 240 Break internal references within the document tree rooted 241 on this element to promote garbage collection. 242 """ 243 super(Array, self).unlink() 244 self.array = None

245

246 247 # 248 # ============================================================================= 249 # 250 # Content Handler 251 # 252 # ============================================================================= 253 # 254 255 256 # 257 # Override portions of a ligolw.LIGOLWContentHandler class 258 # 259 260 261 -def use_in(ContentHandler):

262 """ 263 Modify ContentHandler, a sub-class of 264 glue.ligolw.LIGOLWContentHandler, to cause it to use the Array and 265 ArrayStream classes defined in this module when parsing XML 266 documents. 267 268 Example: 269 270 >>> from glue.ligolw import ligolw 271 >>> class MyContentHandler(ligolw.LIGOLWContentHandler): 272 ... pass 273 ... 274 >>> use_in(MyContentHandler) 275 <class 'glue.ligolw.array.MyContentHandler'> 276 """ 277 def startStream(self, parent, attrs, __orig_startStream = ContentHandler.startStream): 278 if parent.tagName == ligolw.Array.tagName: 279 return ArrayStream(attrs).config(parent) 280 return __orig_startStream(self, parent, attrs)

281 282 def startArray(self, parent, attrs): 283 return Array(attrs) 284 285 ContentHandler.startStream = startStream 286 ContentHandler.startArray = startArray 287 288 return ContentHandler 289

Source Code for Module glue.ligolw.array