Package glue :: Package ligolw :: Package utils :: Module ligolw_sqlite
[hide private]
[frames] | no frames]

Source Code for Module glue.ligolw.utils.ligolw_sqlite

  1  # 
  2  # Copyright (C) 2006-2014  Kipp Cannon 
  3  # 
  4  # This program is free software; you can redistribute it and/or modify it 
  5  # under the terms of the GNU General Public License as published by the 
  6  # Free Software Foundation; either version 3 of the License, or (at your 
  7  # option) any later version. 
  8  # 
  9  # This program is distributed in the hope that it will be useful, but 
 10  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 12  # Public License for more details. 
 13  # 
 14  # You should have received a copy of the GNU General Public License along 
 15  # with this program; if not, write to the Free Software Foundation, Inc., 
 16  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 17  # 
 18   
 19   
 20  # 
 21  # ============================================================================= 
 22  # 
 23  #                                   Preamble 
 24  # 
 25  # ============================================================================= 
 26  # 
 27   
 28   
 29  """ 
 30  Convert tabular data in LIGO LW XML files to and from SQL databases. 
 31   
 32  This module provides a library interface to the machinery used by the 
 33  ligolw_sqlite command-line tool, facilitating it's re-use in other 
 34  applications.  The real XML<-->database translation machinery is 
 35  implemented in the glue.ligolw.dbtables module.  The code here wraps the 
 36  machinery in that mdoule in functions that are closer to the command-line 
 37  level operations provided by the ligolw_sqlite program. 
 38  """ 
 39   
 40   
 41  import sqlite3 
 42  import sys 
 43   
 44   
 45  from glue import git_version 
 46  from .. import ligolw 
 47  from .. import dbtables 
 48  from .. import utils as ligolw_utils 
 49   
 50   
 51  __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 
 52  __version__ = "git id %s" % git_version.id 
 53  __date__ = git_version.date 
 54   
 55   
 56  # 
 57  # ============================================================================= 
 58  # 
 59  #                                 Library Code 
 60  # 
 61  # ============================================================================= 
 62  # 
 63   
 64   
 65  # 
 66  # Open database 
 67  # 
 68   
 69   
70 -def setup(target, check_same_thread = True):
71 connection = sqlite3.connect(target, check_same_thread = check_same_thread) 72 dbtables.idmap_sync(connection) 73 return connection
74 75 76 # 77 # How to insert 78 # 79 80
81 -def update_ids(connection, xmldoc, verbose = False):
82 """ 83 For internal use only. 84 """ 85 # NOTE: it's critical that the xmldoc object be retrieved *before* 86 # the rows whose IDs need to be updated are inserted. The xml 87 # retrieval resets the "last max row ID" values inside the table 88 # objects, so if retrieval of the xmldoc is deferred until after 89 # the rows are inserted, nothing will get updated. therefore, the 90 # connection and xmldoc need to be passed separately to this 91 # function, even though it seems this function could reconstruct 92 # the xmldoc itself from the connection. 93 table_elems = xmldoc.getElementsByTagName(ligolw.Table.tagName) 94 for i, tbl in enumerate(table_elems): 95 if verbose: 96 sys.stderr.write("updating IDs: %d%%\r" % (100.0 * i / len(table_elems))) 97 tbl.applyKeyMapping() 98 if verbose: 99 sys.stderr.write("updating IDs: 100%\n") 100 101 # reset ID mapping for next document 102 dbtables.idmap_reset(connection)
103 104
105 -def insert_from_url(url, preserve_ids = False, verbose = False, contenthandler = None):
106 """ 107 Parse and insert the LIGO Light Weight document at the URL into the 108 database with which the content handler is associated. If 109 preserve_ids is False (default), then row IDs are modified during 110 the insert process to prevent collisions with IDs already in the 111 database. If preserve_ids is True then IDs are not modified; this 112 will result in database consistency violations if any of the IDs of 113 newly-inserted rows collide with row IDs already in the database, 114 and is generally only sensible when inserting a document into an 115 empty database. If verbose is True then progress reports will be 116 printed to stderr. See glue.ligolw.dbtables.use_in() for more 117 information about constructing a suitable content handler class. 118 """ 119 # 120 # enable/disable ID remapping 121 # 122 123 orig_DBTable_append = dbtables.DBTable.append 124 125 if not preserve_ids: 126 try: 127 dbtables.idmap_create(contenthandler.connection) 128 except sqlite3.OperationalError: 129 # assume table already exists 130 pass 131 dbtables.idmap_sync(contenthandler.connection) 132 dbtables.DBTable.append = dbtables.DBTable._remapping_append 133 else: 134 dbtables.DBTable.append = dbtables.DBTable._append 135 136 try: 137 # 138 # load document. this process inserts the document's contents into 139 # the database. the XML tree constructed by this process contains 140 # a table object for each table found in the newly-inserted 141 # document and those table objects' last_max_rowid values have been 142 # initialized prior to rows being inserted. therefore, this is the 143 # XML tree that must be passed to update_ids in order to ensure (a) 144 # that all newly-inserted tables are processed and (b) all 145 # newly-inserted rows are processed. NOTE: it is assumed the 146 # content handler is creating DBTable instances in the XML tree, 147 # not regular Table instances, but this is not checked. 148 # 149 150 xmldoc = ligolw_utils.load_url(url, verbose = verbose, contenthandler = contenthandler) 151 152 # 153 # update references to row IDs and cleanup ID remapping 154 # 155 156 if not preserve_ids: 157 update_ids(contenthandler.connection, xmldoc, verbose = verbose) 158 159 finally: 160 dbtables.DBTable.append = orig_DBTable_append 161 162 # 163 # done. unlink the document to delete database cursor objects it 164 # retains 165 # 166 167 contenthandler.connection.commit() 168 xmldoc.unlink()
169 170
171 -def insert_from_xmldoc(connection, source_xmldoc, preserve_ids = False, verbose = False):
172 """ 173 Insert the tables from an in-ram XML document into the database at 174 the given connection. If preserve_ids is False (default), then row 175 IDs are modified during the insert process to prevent collisions 176 with IDs already in the database. If preserve_ids is True then IDs 177 are not modified; this will result in database consistency 178 violations if any of the IDs of newly-inserted rows collide with 179 row IDs already in the database, and is generally only sensible 180 when inserting a document into an empty database. If verbose is 181 True then progress reports will be printed to stderr. 182 """ 183 # 184 # enable/disable ID remapping 185 # 186 187 orig_DBTable_append = dbtables.DBTable.append 188 189 if not preserve_ids: 190 try: 191 dbtables.idmap_create(connection) 192 except sqlite3.OperationalError: 193 # assume table already exists 194 pass 195 dbtables.idmap_sync(connection) 196 dbtables.DBTable.append = dbtables.DBTable._remapping_append 197 else: 198 dbtables.DBTable.append = dbtables.DBTable._append 199 200 try: 201 # 202 # create a place-holder XML representation of the target 203 # document so we can pass the correct tree to update_ids(). 204 # note that only tables present in the source document need 205 # ID ramapping, so xmldoc only contains representations of 206 # the tables in the target document that are also in the 207 # source document 208 # 209 210 xmldoc = ligolw.Document() 211 xmldoc.appendChild(ligolw.LIGO_LW()) 212 213 # 214 # iterate over tables in the source XML tree, inserting 215 # each into the target database 216 # 217 218 for tbl in source_xmldoc.getElementsByTagName(ligolw.Table.tagName): 219 # 220 # instantiate the correct table class, connected to the 221 # target database, and save in XML tree 222 # 223 224 name = tbl.Name 225 try: 226 cls = dbtables.TableByName[name] 227 except KeyError: 228 cls = dbtables.DBTable 229 dbtbl = xmldoc.childNodes[-1].appendChild(cls(tbl.attributes, connection = connection)) 230 231 # 232 # copy table element child nodes from source XML tree 233 # 234 235 for elem in tbl.childNodes: 236 if elem.tagName == ligolw.Stream.tagName: 237 dbtbl._end_of_columns() 238 dbtbl.appendChild(type(elem)(elem.attributes)) 239 240 # 241 # copy table rows from source XML tree 242 # 243 244 for row in tbl: 245 dbtbl.append(row) 246 dbtbl._end_of_rows() 247 248 # 249 # update references to row IDs and clean up ID remapping 250 # 251 252 if not preserve_ids: 253 update_ids(connection, xmldoc, verbose = verbose) 254 255 finally: 256 dbtables.DBTable.append = orig_DBTable_append 257 258 # 259 # done. unlink the document to delete database cursor objects it 260 # retains 261 # 262 263 connection.commit() 264 xmldoc.unlink()
265 266
267 -def insert_from_urls(urls, contenthandler, **kwargs):
268 """ 269 Iterate over a sequence of URLs, calling insert_from_url() on each, 270 then build the indexes indicated by the metadata in lsctables.py. 271 See insert_from_url() for a description of the additional 272 arguments. 273 """ 274 verbose = kwargs.get("verbose", False) 275 276 # 277 # load documents 278 # 279 280 for n, url in enumerate(urls, 1): 281 if verbose: 282 sys.stderr.write("%d/%d:" % (n, len(urls))) 283 insert_from_url(url, contenthandler = contenthandler, **kwargs) 284 285 # 286 # done. build indexes 287 # 288 289 dbtables.build_indexes(contenthandler.connection, verbose)
290 291 292 # 293 # How to extract 294 # 295 296
297 -def extract(connection, filename, table_names = None, verbose = False, xsl_file = None):
298 """ 299 Convert the database at the given connection to a tabular LIGO 300 Light-Weight XML document. The XML document is written to the file 301 named filename. If table_names is not None, it should be a 302 sequence of strings and only the tables in that sequence will be 303 converted. If verbose is True then progress messages will be 304 printed to stderr. 305 """ 306 xmldoc = ligolw.Document() 307 xmldoc.appendChild(dbtables.get_xml(connection, table_names)) 308 ligolw_utils.write_filename(xmldoc, filename, gz = (filename or "stdout").endswith(".gz"), verbose = verbose, xsl_file = xsl_file) 309 310 # delete cursors 311 xmldoc.unlink()
312