Package glue :: Package ligolw :: Package utils :: Module ligolw_add
[hide private]
[frames] | no frames]

Source Code for Module glue.ligolw.utils.ligolw_add

  1  # Copyright (C) 2006  Kipp Cannon 
  2  # 
  3  # This program is free software; you can redistribute it and/or modify it 
  4  # under the terms of the GNU General Public License as published by the 
  5  # Free Software Foundation; either version 3 of the License, or (at your 
  6  # option) any later version. 
  7  # 
  8  # This program is distributed in the hope that it will be useful, but 
  9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 11  # Public License for more details. 
 12  # 
 13  # You should have received a copy of the GNU General Public License along 
 14  # with this program; if not, write to the Free Software Foundation, Inc., 
 15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 16   
 17   
 18  # 
 19  # ============================================================================= 
 20  # 
 21  #                                   Preamble 
 22  # 
 23  # ============================================================================= 
 24  # 
 25   
 26   
 27  """ 
 28  Add (merge) LIGO LW XML files containing LSC tables. 
 29  """ 
 30   
 31   
 32  import os 
 33  import sys 
 34  from six.moves import urllib 
 35   
 36   
 37  from glue import git_version 
 38  from .. import ligolw 
 39  from .. import table 
 40  from .. import lsctables 
 41  from .. import utils 
 42   
 43   
 44  __author__ = "Kipp Cannon <kipp.cannon@ligo.org>" 
 45  __version__ = "git id %s" % git_version.id 
 46  __date__ = git_version.date 
 47   
 48   
 49  # 
 50  # ============================================================================= 
 51  # 
 52  #                                    Input 
 53  # 
 54  # ============================================================================= 
 55  # 
 56   
 57   
58 -def url2path(url):
59 """ 60 If url identifies a file on the local host, return the path to the 61 file otherwise raise ValueError. 62 """ 63 scheme, host, path, nul, nul, nul = urllib.parse.urlparse(url) 64 if scheme.lower() in ("", "file") and host.lower() in ("", "localhost"): 65 return path 66 raise ValueError(url)
67 68
69 -def remove_input(urls, preserves, verbose = False):
70 """ 71 Attempt to delete all files identified by the URLs in urls except 72 any that are the same as the files in the preserves list. 73 """ 74 for path in map(url2path, urls): 75 if any(os.path.samefile(path, preserve) for preserve in preserves): 76 continue 77 if verbose: 78 sys.stderr.write("removing \"%s\" ...\n" % path) 79 try: 80 os.remove(path) 81 except: 82 pass
83 84 85 # 86 # ============================================================================= 87 # 88 # Document Merge 89 # 90 # ============================================================================= 91 # 92 93
94 -def reassign_ids(doc, verbose = False):
95 """ 96 Assign new IDs to all rows in all LSC tables in doc so that there 97 are no collisions when the LIGO_LW elements are merged. 98 """ 99 # Can't simply run reassign_ids() on doc because we need to 100 # construct a fresh old --> new mapping within each LIGO_LW block. 101 for n, elem in enumerate(doc.childNodes, 1): 102 if verbose: 103 sys.stderr.write("reassigning row IDs: %.1f%%\r" % (100.0 * n / len(doc.childNodes))) 104 if elem.tagName == ligolw.LIGO_LW.tagName: 105 table.reassign_ids(elem) 106 if verbose: 107 sys.stderr.write("reassigning row IDs: 100.0%\n") 108 return doc
109 110
111 -def merge_ligolws(elem):
112 """ 113 Merge all LIGO_LW elements that are immediate children of elem by 114 appending their children to the first. 115 """ 116 ligolws = [child for child in elem.childNodes if child.tagName == ligolw.LIGO_LW.tagName] 117 if ligolws: 118 dest = ligolws.pop(0) 119 for src in ligolws: 120 # copy children; LIGO_LW elements have no attributes 121 for elem in src.childNodes: 122 dest.appendChild(elem) 123 # unlink from parent 124 if src.parentNode is not None: 125 src.parentNode.removeChild(src) 126 return elem
127 128
129 -def compare_table_cols(a, b):
130 """ 131 Return False if the two tables a and b have the same columns 132 (ignoring order) according to LIGO LW name conventions, return True 133 otherwise. 134 """ 135 return cmp(sorted((col.Name, col.Type) for col in a.getElementsByTagName(ligolw.Column.tagName)), sorted((col.Name, col.Type) for col in b.getElementsByTagName(ligolw.Column.tagName)))
136 137
138 -def merge_compatible_tables(elem):
139 """ 140 Below the given element, find all Tables whose structure is 141 described in lsctables, and merge compatible ones of like type. 142 That is, merge all SnglBurstTables that have the same columns into 143 a single table, etc.. 144 """ 145 for name in lsctables.TableByName.keys(): 146 tables = table.Table.getTablesByName(elem, name) 147 if tables: 148 dest = tables.pop(0) 149 for src in tables: 150 if src.Name != dest.Name: 151 # src and dest have different names 152 continue 153 # src and dest have the same names 154 if compare_table_cols(dest, src): 155 # but they have different columns 156 raise ValueError("document contains %s tables with incompatible columns" % dest.Name) 157 # and the have the same columns 158 # copy src rows to dest 159 for row in src: 160 dest.append(row) 161 # unlink src from parent 162 if src.parentNode is not None: 163 src.parentNode.removeChild(src) 164 return elem
165 166 167 # 168 # ============================================================================= 169 # 170 # Library API 171 # 172 # ============================================================================= 173 # 174 175
176 -class DefaultContentHandler(ligolw.LIGOLWContentHandler):
177 """ 178 Default content handler used by ligolw_add(). Not intended for use 179 outside this module. 180 """ 181 pass
182 lsctables.use_in(DefaultContentHandler) 183 184
185 -def ligolw_add(xmldoc, urls, non_lsc_tables_ok = False, verbose = False, contenthandler = DefaultContentHandler):
186 """ 187 An implementation of the LIGO LW add algorithm. urls is a list of 188 URLs (or filenames) to load, xmldoc is the XML document tree to 189 which they should be added. 190 """ 191 # Input 192 for n, url in enumerate(urls): 193 if verbose: 194 sys.stderr.write("%d/%d:" % (n + 1, len(urls))) 195 utils.load_url(url, verbose = verbose, xmldoc = xmldoc, contenthandler = contenthandler) 196 197 # ID reassignment 198 if not non_lsc_tables_ok and lsctables.HasNonLSCTables(xmldoc): 199 raise ValueError("non-LSC tables found. Use --non-lsc-tables-ok to force") 200 reassign_ids(xmldoc, verbose = verbose) 201 202 # Document merge 203 if verbose: 204 sys.stderr.write("merging elements ...\n") 205 merge_ligolws(xmldoc) 206 merge_compatible_tables(xmldoc) 207 208 return xmldoc
209