glue.markup

1 # This code is in the public domain, it comes 2 # with absolutely no warranty and you can do 3 # absolutely whatever you want with it. 4 5 import six 6 from six.moves import map 7 from six.moves import range 8 from six.moves import zip 9 __date__ = '1 October 2012' 10 __version__ = '1.9' 11 __doc__= """ 12 This is markup.py - a Python module that attempts to 13 make it easier to generate HTML/XML from a Python program 14 in an intuitive, lightweight, customizable and pythonic way. 15 16 The code is in the public domain. 17 18 Version: %s as of %s. 19 20 Documentation and further info is at http://markup.sourceforge.net/ 21 22 Please send bug reports, feature requests, enhancement 23 ideas or questions to nogradi at gmail dot com. 24 25 Installation: drop markup.py somewhere into your Python path. 26 """ % ( __version__, __date__ ) 27 28 try: 29 six.string_types 30 import string 31 except: 32 # python 3 33 six.string_types = str 34 string = str 35 36 try: # python < 3 37 long 38 except NameError: # python >= 3 39 long = int 40 41 # tags which are reserved python keywords will be referred 42 # to by a leading underscore otherwise we end up with a syntax error 43 import keyword 44

45 -class element:

46 """This class handles the addition of a new element.""" 47

48 - def __init__( self, tag, case='lower', parent=None ):

49 self.parent = parent 50 51 if case == 'upper': 52 self.tag = tag.upper( ) 53 elif case == 'lower': 54 self.tag = tag.lower( ) 55 elif case =='given': 56 self.tag = tag 57 else: 58 self.tag = tag

59

60 - def __call__( self, *args, **kwargs ):

61 if len( args ) > 1: 62 raise ArgumentError( self.tag ) 63 64 # if class_ was defined in parent it should be added to every element 65 if self.parent is not None and self.parent.class_ is not None: 66 if 'class_' not in kwargs: 67 kwargs['class_'] = self.parent.class_ 68 69 if self.parent is None and len( args ) == 1: 70 x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] 71 return '\n'.join( x ) 72 elif self.parent is None and len( args ) == 0: 73 x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] 74 return '\n'.join( x ) 75 76 if self.tag in self.parent.twotags: 77 for myarg, mydict in _argsdicts( args, kwargs ): 78 self.render( self.tag, False, myarg, mydict ) 79 elif self.tag in self.parent.onetags: 80 if len( args ) == 0: 81 for myarg, mydict in _argsdicts( args, kwargs ): 82 self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0 83 else: 84 raise ClosingError( self.tag ) 85 elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: 86 raise DeprecationError( self.tag ) 87 else: 88 raise InvalidElementError( self.tag, self.parent.mode )

89

90 - def render( self, tag, single, between, kwargs ):

91 """Append the actual tags to content.""" 92 93 out = "<%s" % tag 94 for key, value in list( kwargs.items( ) ): 95 if value is not None: # when value is None that means stuff like <... checked> 96 key = key.strip('_') # strip this so class_ will mean class, etc. 97 if key == 'http_equiv': # special cases, maybe change _ to - overall? 98 key = 'http-equiv' 99 elif key == 'accept_charset': 100 key = 'accept-charset' 101 out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) 102 else: 103 out = "%s %s" % ( out, key ) 104 if between is not None: 105 out = "%s>%s</%s>" % ( out, between, tag ) 106 else: 107 if single: 108 out = "%s />" % out 109 else: 110 out = "%s>" % out 111 if self.parent is not None: 112 self.parent.content.append( out ) 113 else: 114 return out

115

116 - def close( self ):

117 """Append a closing tag unless element has only opening tag.""" 118 119 if self.tag in self.parent.twotags: 120 self.parent.content.append( "</%s>" % self.tag ) 121 elif self.tag in self.parent.onetags: 122 raise ClosingError( self.tag ) 123 elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: 124 raise DeprecationError( self.tag )

125

126 - def open( self, **kwargs ):

127 """Append an opening tag.""" 128 129 if self.tag in self.parent.twotags or self.tag in self.parent.onetags: 130 self.render( self.tag, False, None, kwargs ) 131 elif self.mode == 'strict_html' and self.tag in self.parent.deptags: 132 raise DeprecationError( self.tag )

133

134 -class page:

135 """This is our main class representing a document. Elements are added 136 as attributes of an instance of this class.""" 137

138 - def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ):

139 """Stuff that effects the whole document. 140 141 mode -- 'strict_html' for HTML 4.01 (default) 142 'html' alias for 'strict_html' 143 'loose_html' to allow some deprecated elements 144 'xml' to allow arbitrary elements 145 146 case -- 'lower' element names will be printed in lower case (default) 147 'upper' they will be printed in upper case 148 'given' element names will be printed as they are given 149 150 onetags -- list or tuple of valid elements with opening tags only 151 twotags -- list or tuple of valid elements with both opening and closing tags 152 these two keyword arguments may be used to select 153 the set of valid elements in 'xml' mode 154 invalid elements will raise appropriate exceptions 155 156 separator -- string to place between added elements, defaults to newline 157 158 class_ -- a class that will be added to every element if defined""" 159 160 valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ] 161 valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON", 162 "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET", 163 "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS", 164 "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", 165 "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE", 166 "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", 167 "TT", "UL", "VAR" ] 168 deprecated_onetags = [ "BASEFONT", "ISINDEX" ] 169 deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ] 170 171 self.header = [ ] 172 self.content = [ ] 173 self.footer = [ ] 174 self.case = case 175 self.separator = separator 176 177 # init( ) sets it to True so we know that </body></html> has to be printed at the end 178 self._full = False 179 self.class_= class_ 180 181 if mode == 'strict_html' or mode == 'html': 182 self.onetags = valid_onetags 183 self.onetags += list( map( string.lower, self.onetags ) ) 184 self.twotags = valid_twotags 185 self.twotags += list( map( string.lower, self.twotags ) ) 186 self.deptags = deprecated_onetags + deprecated_twotags 187 self.deptags += list( map( string.lower, self.deptags ) ) 188 self.mode = 'strict_html' 189 elif mode == 'loose_html': 190 self.onetags = valid_onetags + deprecated_onetags 191 self.onetags += list( map( string.lower, self.onetags ) ) 192 self.twotags = valid_twotags + deprecated_twotags 193 self.twotags += list( map( string.lower, self.twotags ) ) 194 self.mode = mode 195 elif mode == 'xml': 196 if onetags and twotags: 197 self.onetags = onetags 198 self.twotags = twotags 199 elif ( onetags and not twotags ) or ( twotags and not onetags ): 200 raise CustomizationError( ) 201 else: 202 self.onetags = russell( ) 203 self.twotags = russell( ) 204 self.mode = mode 205 else: 206 raise ModeError( mode )

207

208 - def __getattr__( self, attr ):

209 210 # tags should start with double underscore 211 if attr.startswith("__") and attr.endswith("__"): 212 raise AttributeError( attr ) 213 # tag with single underscore should be a reserved keyword 214 if attr.startswith( '_' ): 215 attr = attr.lstrip( '_' ) 216 if attr not in keyword.kwlist: 217 raise AttributeError( attr ) 218 219 return element( attr, case=self.case, parent=self )

220

221 - def __str__( self ):

222 223 if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ): 224 end = [ '</body>', '</html>' ] 225 else: 226 end = [ ] 227 228 return self.separator.join( self.header + self.content + self.footer + end )

229

230 - def __call__( self, escape=False ):

231 """Return the document as a string. 232 233 escape -- False print normally 234 True replace < and > by < and > 235 the default escape sequences in most browsers""" 236 237 if escape: 238 return _escape( self.__str__( ) ) 239 else: 240 return self.__str__( )

241

242 - def add( self, text ):

243 """This is an alias to addcontent.""" 244 self.addcontent( text )

245

246 - def addfooter( self, text ):

247 """Add some text to the bottom of the document""" 248 self.footer.append( text )

249

250 - def addheader( self, text ):

251 """Add some text to the top of the document""" 252 self.header.append( text )

253

254 - def addcontent( self, text ):

255 """Add some text to the main part of the document""" 256 self.content.append( text )

257 258

259 - def init( self, lang='en', css=None, metainfo=None, title=None, header=None, 260 footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None, base=None ):

261 """This method is used for complete documents with appropriate 262 doctype, encoding, title, etc information. For an HTML/XML snippet 263 omit this method. 264 265 lang -- language, usually a two character string, will appear 266 as <html lang='en'> in html mode (ignored in xml mode) 267 268 css -- Cascading Style Sheet filename as a string or a list of 269 strings for multiple css files (ignored in xml mode) 270 271 metainfo -- a dictionary in the form { 'name':'content' } to be inserted 272 into meta element(s) as <meta name='name' content='content'> 273 (ignored in xml mode) 274 275 base -- set the <base href="..."> tag in <head> 276 277 bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added 278 as attributes of the <body> element as <body key='value' ... > 279 (ignored in xml mode) 280 281 script -- dictionary containing src:type pairs, <script type='text/type' src=src></script> 282 or a list of [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for all 283 284 title -- the title of the document as a string to be inserted into 285 a title element as <title>my title</title> (ignored in xml mode) 286 287 header -- some text to be inserted right after the <body> element 288 (ignored in xml mode) 289 290 footer -- some text to be inserted right before the </body> element 291 (ignored in xml mode) 292 293 charset -- a string defining the character set, will be inserted into a 294 <meta http-equiv='Content-Type' content='text/html; charset=myset'> 295 element (ignored in xml mode) 296 297 encoding -- a string defining the encoding, will be put into to first line of 298 the document as <?xml version='1.0' encoding='myencoding' ?> in 299 xml mode (ignored in html mode) 300 301 doctype -- the document type string, defaults to 302 <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'> 303 in html mode (ignored in xml mode)""" 304 305 self._full = True 306 307 if self.mode == 'strict_html' or self.mode == 'loose_html': 308 if doctype is None: 309 doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>" 310 self.header.append( doctype ) 311 self.html( lang=lang ) 312 self.head( ) 313 if charset is not None: 314 self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset ) 315 if metainfo is not None: 316 self.metainfo( metainfo ) 317 if css is not None: 318 self.css( css ) 319 if title is not None: 320 self.title( title ) 321 if script is not None: 322 self.scripts( script ) 323 if base is not None: 324 self.base( href='%s' % base ) 325 self.head.close() 326 if bodyattrs is not None: 327 self.body( **bodyattrs ) 328 else: 329 self.body( ) 330 if header is not None: 331 self.content.append( header ) 332 if footer is not None: 333 self.footer.append( footer ) 334 335 elif self.mode == 'xml': 336 if doctype is None: 337 if encoding is not None: 338 doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding 339 else: 340 doctype = "<?xml version='1.0' ?>" 341 self.header.append( doctype )

342

343 - def css( self, filelist ):

344 """This convenience function is only useful for html. 345 It adds css stylesheet(s) to the document via the <link> element.""" 346 347 if isinstance( filelist, six.string_types ): 348 self.link( href=filelist, rel='stylesheet', type='text/css', media='all' ) 349 else: 350 for file in filelist: 351 self.link( href=file, rel='stylesheet', type='text/css', media='all' )

352

353 - def metainfo( self, mydict ):

354 """This convenience function is only useful for html. 355 It adds meta information via the <meta> element, the argument is 356 a dictionary of the form { 'name':'content' }.""" 357 358 if isinstance( mydict, dict ): 359 for name, content in list( mydict.items( ) ): 360 self.meta( name=name, content=content ) 361 else: 362 raise TypeError( "Metainfo should be called with a dictionary argument of name:content pairs." )

363

364 - def scripts( self, mydict ):

365 """Only useful in html, mydict is dictionary of src:type pairs or a list 366 of script sources [ 'src1', 'src2', ... ] in which case 'javascript' is assumed for type. 367 Will be rendered as <script type='text/type' src=src></script>""" 368 369 if isinstance( mydict, dict ): 370 for src, type in list( mydict.items( ) ): 371 self.script( '', src=src, type='text/%s' % type ) 372 else: 373 try: 374 for src in mydict: 375 self.script( '', src=src, type='text/javascript' ) 376 except: 377 raise TypeError( "Script should be given a dictionary of src:type pairs or a list of javascript src's." )

378 379

380 -class _oneliner:

381 """An instance of oneliner returns a string corresponding to one element. 382 This class can be used to write 'oneliners' that return a string 383 immediately so there is no need to instantiate the page class.""" 384

385 - def __init__( self, case='lower' ):

386 self.case = case

387

388 - def __getattr__( self, attr ):

389 390 # tags should start with double underscore 391 if attr.startswith("__") and attr.endswith("__"): 392 raise AttributeError( attr ) 393 # tag with single underscore should be a reserved keyword 394 if attr.startswith( '_' ): 395 attr = attr.lstrip( '_' ) 396 if attr not in keyword.kwlist: 397 raise AttributeError( attr ) 398 399 return element( attr, case=self.case, parent=None )

400 401 oneliner = _oneliner( case='lower' ) 402 upper_oneliner = _oneliner( case='upper' ) 403 given_oneliner = _oneliner( case='given' ) 404

405 -def _argsdicts( args, mydict ):

406 """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1.""" 407 408 if len( args ) == 0: 409 args = None, 410 elif len( args ) == 1: 411 args = _totuple( args[0] ) 412 else: 413 raise Exception( "We should have never gotten here." ) 414 415 mykeys = list( mydict.keys( ) ) 416 myvalues = list( map( _totuple, list( mydict.values( ) ) ) ) 417 418 maxlength = max( list( map( len, [ args ] + myvalues ) ) ) 419 420 for i in range( maxlength ): 421 thisdict = { } 422 for key, value in zip( mykeys, myvalues ): 423 try: 424 thisdict[ key ] = value[i] 425 except IndexError: 426 thisdict[ key ] = value[-1] 427 try: 428 thisarg = args[i] 429 except IndexError: 430 thisarg = args[-1] 431 432 yield thisarg, thisdict

433

434 -def _totuple( x ):

435 """Utility stuff to convert string, int, long, float, None or anything to a usable tuple.""" 436 437 if isinstance( x, six.string_types ): 438 out = x, 439 elif isinstance( x, ( int, long, float ) ): 440 out = str( x ), 441 elif x is None: 442 out = None, 443 else: 444 out = tuple( x ) 445 446 return out

447

448 -def escape( text, newline=False ):

449 """Escape special html characters.""" 450 451 if isinstance( text, six.string_types ): 452 if '&' in text: 453 text = text.replace( '&', '&' ) 454 if '>' in text: 455 text = text.replace( '>', '>' ) 456 if '<' in text: 457 text = text.replace( '<', '<' ) 458 if '\"' in text: 459 text = text.replace( '\"', '"' ) 460 if '\'' in text: 461 text = text.replace( '\'', '"' ) 462 if newline: 463 if '\n' in text: 464 text = text.replace( '\n', '<br>' ) 465 466 return text

467 468 _escape = escape 469

470 -def unescape( text ):

471 """Inverse of escape.""" 472 473 if isinstance( text, six.string_types ): 474 if '&' in text: 475 text = text.replace( '&', '&' ) 476 if '>' in text: 477 text = text.replace( '>', '>' ) 478 if '<' in text: 479 text = text.replace( '<', '<' ) 480 if '"' in text: 481 text = text.replace( '"', '\"' ) 482 483 return text

484

485 -class dummy:

486 """A dummy class for attaching attributes.""" 487 pass

488 489 doctype = dummy( ) 490 doctype.frameset = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">""" 491 doctype.strict = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">""" 492 doctype.loose = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">""" 493

494 -class russell:

495 """A dummy class that contains anything.""" 496

497 - def __contains__( self, item ):

498 return True

499 500

501 -class MarkupError( Exception ):

502 """All our exceptions subclass this."""

503 - def __str__( self ):

504 return self.message

505

506 -class ClosingError( MarkupError ):

507 - def __init__( self, tag ):

508 self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag

509

510 -class OpeningError( MarkupError ):

511 - def __init__( self, tag ):

512 self.message = "The element '%s' can not be opened." % tag

513

514 -class ArgumentError( MarkupError ):

515 - def __init__( self, tag ):

516 self.message = "The element '%s' was called with more than one non-keyword argument." % tag

517

518 -class InvalidElementError( MarkupError ):

519 - def __init__( self, tag, mode ):

520 self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode )

521

522 -class DeprecationError( MarkupError ):

523 - def __init__( self, tag ):

524 self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag

525

526 -class ModeError( MarkupError ):

527 - def __init__( self, mode ):

528 self.message = "Mode '%s' is invalid, possible values: strict_html, html (alias for strict_html), loose_html, xml." % mode

529

530 -class CustomizationError( MarkupError ):

531 - def __init__( self ):

532 self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'."

533 534 if __name__ == '__main__': 535 import sys 536 sys.stdout.write( __doc__ ) 537

Source Code for Module glue.markup