245 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			245 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
| """
 | |
| module for generating and serializing xml and html structures
 | |
| by using simple python objects.
 | |
| 
 | |
| (c) holger krekel, holger at merlinux eu. 2009
 | |
| """
 | |
| import py
 | |
| import sys, re
 | |
| 
 | |
| if sys.version_info >= (3,0):
 | |
|     def u(s):
 | |
|         return s
 | |
|     def unicode(x):
 | |
|         if hasattr(x, '__unicode__'):
 | |
|             return x.__unicode__()
 | |
|         return str(x)
 | |
| else:
 | |
|     def u(s):
 | |
|         return unicode(s)
 | |
|     unicode = unicode
 | |
| 
 | |
| 
 | |
| class NamespaceMetaclass(type):
 | |
|     def __getattr__(self, name):
 | |
|         if name[:1] == '_':
 | |
|             raise AttributeError(name)
 | |
|         if self == Namespace:
 | |
|             raise ValueError("Namespace class is abstract")
 | |
|         tagspec = self.__tagspec__
 | |
|         if tagspec is not None and name not in tagspec:
 | |
|             raise AttributeError(name)
 | |
|         classattr = {}
 | |
|         if self.__stickyname__:
 | |
|             classattr['xmlname'] = name
 | |
|         cls = type(name, (self.__tagclass__,), classattr)
 | |
|         setattr(self, name, cls)
 | |
|         return cls
 | |
| 
 | |
| class Tag(list):
 | |
|     class Attr(object):
 | |
|         def __init__(self, **kwargs):
 | |
|             self.__dict__.update(kwargs)
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         super(Tag, self).__init__(args)
 | |
|         self.attr = self.Attr(**kwargs)
 | |
| 
 | |
|     def __unicode__(self):
 | |
|         return self.unicode(indent=0)
 | |
|     __str__ = __unicode__
 | |
| 
 | |
|     def unicode(self, indent=2):
 | |
|         l = []
 | |
|         SimpleUnicodeVisitor(l.append, indent).visit(self)
 | |
|         return "".join(l)
 | |
| 
 | |
|     def __repr__(self):
 | |
|         name = self.__class__.__name__
 | |
|         return "<%r tag object %d>" % (name, id(self))
 | |
| 
 | |
| Namespace = NamespaceMetaclass('Namespace', (object, ), {
 | |
|     '__tagspec__': None,
 | |
|     '__tagclass__': Tag,
 | |
|     '__stickyname__': False,
 | |
| })
 | |
| 
 | |
| class HtmlTag(Tag):
 | |
|     def unicode(self, indent=2):
 | |
|         l = []
 | |
|         HtmlVisitor(l.append, indent, shortempty=False).visit(self)
 | |
|         return u("").join(l)
 | |
| 
 | |
| # exported plain html namespace
 | |
| class html(Namespace):
 | |
|     __tagclass__ = HtmlTag
 | |
|     __stickyname__ = True
 | |
|     __tagspec__ = dict([(x,1) for x in (
 | |
|         'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
 | |
|         'blockquote,body,br,button,caption,center,cite,code,col,'
 | |
|         'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
 | |
|         'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
 | |
|         'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
 | |
|         'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
 | |
|         'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
 | |
|         'select,small,span,strike,strong,style,sub,sup,table,'
 | |
|         'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
 | |
|         'base,basefont,frame,hr,isindex,param,samp,var'
 | |
|     ).split(',') if x])
 | |
| 
 | |
|     class Style(object):
 | |
|         def __init__(self, **kw):
 | |
|             for x, y in kw.items():
 | |
|                 x = x.replace('_', '-')
 | |
|                 setattr(self, x, y)
 | |
| 
 | |
| 
 | |
| class raw(object):
 | |
|     """just a box that can contain a unicode string that will be
 | |
|     included directly in the output"""
 | |
|     def __init__(self, uniobj):
 | |
|         self.uniobj = uniobj
 | |
| 
 | |
| class SimpleUnicodeVisitor(object):
 | |
|     """ recursive visitor to write unicode. """
 | |
|     def __init__(self, write, indent=0, curindent=0, shortempty=True):
 | |
|         self.write = write
 | |
|         self.cache = {}
 | |
|         self.visited = {} # for detection of recursion
 | |
|         self.indent = indent
 | |
|         self.curindent = curindent
 | |
|         self.parents = []
 | |
|         self.shortempty = shortempty  # short empty tags or not
 | |
| 
 | |
|     def visit(self, node):
 | |
|         """ dispatcher on node's class/bases name. """
 | |
|         cls = node.__class__
 | |
|         try:
 | |
|             visitmethod = self.cache[cls]
 | |
|         except KeyError:
 | |
|             for subclass in cls.__mro__:
 | |
|                 visitmethod = getattr(self, subclass.__name__, None)
 | |
|                 if visitmethod is not None:
 | |
|                     break
 | |
|             else:
 | |
|                 visitmethod = self.object
 | |
|             self.cache[cls] = visitmethod
 | |
|         visitmethod(node)
 | |
| 
 | |
|     def object(self, obj):
 | |
|         #self.write(obj)
 | |
|         self.write(escape(unicode(obj)))
 | |
| 
 | |
|     def raw(self, obj):
 | |
|         self.write(obj.uniobj)
 | |
| 
 | |
|     def list(self, obj):
 | |
|         assert id(obj) not in self.visited
 | |
|         self.visited[id(obj)] = 1
 | |
|         map(self.visit, obj)
 | |
| 
 | |
|     def Tag(self, tag):
 | |
|         assert id(tag) not in self.visited
 | |
|         try:
 | |
|             tag.parent = self.parents[-1]
 | |
|         except IndexError:
 | |
|             tag.parent = None
 | |
|         self.visited[id(tag)] = 1
 | |
|         tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
 | |
|         if self.curindent and not self._isinline(tagname):
 | |
|             self.write("\n" + u(' ') * self.curindent)
 | |
|         if tag:
 | |
|             self.curindent += self.indent
 | |
|             self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
 | |
|             self.parents.append(tag)
 | |
|             for x in tag:
 | |
|                 self.visit(x)
 | |
|             self.parents.pop()
 | |
|             self.write(u('</%s>') % tagname)
 | |
|             self.curindent -= self.indent
 | |
|         else:
 | |
|             nameattr = tagname+self.attributes(tag)
 | |
|             if self._issingleton(tagname):
 | |
|                 self.write(u('<%s/>') % (nameattr,))
 | |
|             else:
 | |
|                 self.write(u('<%s></%s>') % (nameattr, tagname))
 | |
| 
 | |
|     def attributes(self, tag):
 | |
|         # serialize attributes
 | |
|         attrlist = dir(tag.attr)
 | |
|         attrlist.sort()
 | |
|         l = []
 | |
|         for name in attrlist:
 | |
|             res = self.repr_attribute(tag.attr, name)
 | |
|             if res is not None:
 | |
|                 l.append(res)
 | |
|         l.extend(self.getstyle(tag))
 | |
|         return u("").join(l)
 | |
| 
 | |
|     def repr_attribute(self, attrs, name):
 | |
|         if name[:2] != '__':
 | |
|             value = getattr(attrs, name)
 | |
|             if name.endswith('_'):
 | |
|                 name = name[:-1]
 | |
|             return ' %s="%s"' % (name, escape(unicode(value)))
 | |
| 
 | |
|     def getstyle(self, tag):
 | |
|         """ return attribute list suitable for styling. """
 | |
|         try:
 | |
|             styledict = tag.style.__dict__
 | |
|         except AttributeError:
 | |
|             return []
 | |
|         else:
 | |
|             stylelist = [x+': ' + y for x,y in styledict.items()]
 | |
|             return [u(' style="%s"') % u('; ').join(stylelist)]
 | |
| 
 | |
|     def _issingleton(self, tagname):
 | |
|         """can (and will) be overridden in subclasses"""
 | |
|         return self.shortempty
 | |
| 
 | |
|     def _isinline(self, tagname):
 | |
|         """can (and will) be overridden in subclasses"""
 | |
|         return False
 | |
| 
 | |
| class HtmlVisitor(SimpleUnicodeVisitor):
 | |
| 
 | |
|     single = dict([(x, 1) for x in
 | |
|                 ('br,img,area,param,col,hr,meta,link,base,'
 | |
|                     'input,frame').split(',')])
 | |
|     inline = dict([(x, 1) for x in
 | |
|                 ('a abbr acronym b basefont bdo big br cite code dfn em font '
 | |
|                  'i img input kbd label q s samp select small span strike '
 | |
|                  'strong sub sup textarea tt u var'.split(' '))])
 | |
| 
 | |
|     def repr_attribute(self, attrs, name):
 | |
|         if name == 'class_':
 | |
|             value = getattr(attrs, name)
 | |
|             if value is None:
 | |
|                 return
 | |
|         return super(HtmlVisitor, self).repr_attribute(attrs, name)
 | |
| 
 | |
|     def _issingleton(self, tagname):
 | |
|         return tagname in self.single
 | |
| 
 | |
|     def _isinline(self, tagname):
 | |
|         return tagname in self.inline
 | |
| 
 | |
| 
 | |
| class _escape:
 | |
|     def __init__(self):
 | |
|         self.escape = {
 | |
|             u('"') : u('"'), u('<') : u('<'), u('>') : u('>'),
 | |
|             u('&') : u('&'), u("'") : u('''),
 | |
|             }
 | |
|         self.charef_rex = re.compile(u("|").join(self.escape.keys()))
 | |
| 
 | |
|     def _replacer(self, match):
 | |
|         return self.escape[match.group(0)]
 | |
| 
 | |
|     def __call__(self, ustring):
 | |
|         """ xml-escape the given unicode string. """
 | |
|         ustring = unicode(ustring)
 | |
|         return self.charef_rex.sub(self._replacer, ustring)
 | |
| 
 | |
| escape = _escape()
 |