244 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			244 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Python
		
	
	
	
| """
 | |
| module for generating and serializing xml and html structures
 | |
| by using simple python objects. 
 | |
| 
 | |
| (c) holger krekel, holger at merlinux eu. 2009
 | |
| """ 
 | |
| import py
 | |
| import sys, re
 | |
| 
 | |
| if sys.version_info >= (3,0):
 | |
|     def u(s): 
 | |
|         return s
 | |
|     def unicode(x):
 | |
|         if hasattr(x, '__unicode__'):
 | |
|             return x.__unicode__()
 | |
|         return str(x)
 | |
| else:
 | |
|     def u(s):
 | |
|         return unicode(s)
 | |
|     unicode = unicode 
 | |
|     
 | |
| 
 | |
| class NamespaceMetaclass(type): 
 | |
|     def __getattr__(self, name): 
 | |
|         if name[:1] == '_': 
 | |
|             raise AttributeError(name) 
 | |
|         if self == Namespace: 
 | |
|             raise ValueError("Namespace class is abstract") 
 | |
|         tagspec = self.__tagspec__
 | |
|         if tagspec is not None and name not in tagspec: 
 | |
|             raise AttributeError(name) 
 | |
|         classattr = {}
 | |
|         if self.__stickyname__: 
 | |
|             classattr['xmlname'] = name 
 | |
|         cls = type(name, (self.__tagclass__,), classattr) 
 | |
|         setattr(self, name, cls) 
 | |
|         return cls 
 | |
| 
 | |
| class Tag(list):
 | |
|     class Attr(object): 
 | |
|         def __init__(self, **kwargs): 
 | |
|             self.__dict__.update(kwargs) 
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         super(Tag, self).__init__(args)
 | |
|         self.attr = self.Attr(**kwargs) 
 | |
| 
 | |
|     def __unicode__(self):
 | |
|         return self.unicode(indent=0) 
 | |
|     __str__ = __unicode__
 | |
| 
 | |
|     def unicode(self, indent=2):
 | |
|         l = []
 | |
|         SimpleUnicodeVisitor(l.append, indent).visit(self) 
 | |
|         return "".join(l) 
 | |
| 
 | |
|     def __repr__(self):
 | |
|         name = self.__class__.__name__ 
 | |
|         return "<%r tag object %d>" % (name, id(self))
 | |
|     
 | |
| Namespace = NamespaceMetaclass('Namespace', (object, ), {
 | |
|     '__tagspec__': None, 
 | |
|     '__tagclass__': Tag, 
 | |
|     '__stickyname__': False, 
 | |
| })
 | |
| 
 | |
| class HtmlTag(Tag): 
 | |
|     def unicode(self, indent=2):
 | |
|         l = []
 | |
|         HtmlVisitor(l.append, indent, shortempty=False).visit(self) 
 | |
|         return u("").join(l) 
 | |
| 
 | |
| # exported plain html namespace 
 | |
| class html(Namespace):
 | |
|     __tagclass__ = HtmlTag
 | |
|     __stickyname__ = True 
 | |
|     __tagspec__ = dict([(x,1) for x in ( 
 | |
|         'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
 | |
|         'blockquote,body,br,button,caption,center,cite,code,col,'
 | |
|         'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
 | |
|         'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
 | |
|         'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
 | |
|         'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
 | |
|         'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
 | |
|         'select,small,span,strike,strong,style,sub,sup,table,'
 | |
|         'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
 | |
|         'base,basefont,frame,hr,isindex,param,samp,var'
 | |
|     ).split(',') if x])
 | |
| 
 | |
|     class Style(object): 
 | |
|         def __init__(self, **kw): 
 | |
|             for x, y in kw.items():
 | |
|                 x = x.replace('_', '-')
 | |
|                 setattr(self, x, y) 
 | |
| 
 | |
| 
 | |
| class raw(object):
 | |
|     """just a box that can contain a unicode string that will be
 | |
|     included directly in the output"""
 | |
|     def __init__(self, uniobj):
 | |
|         self.uniobj = uniobj
 | |
| 
 | |
| class SimpleUnicodeVisitor(object):
 | |
|     """ recursive visitor to write unicode. """
 | |
|     def __init__(self, write, indent=0, curindent=0, shortempty=True): 
 | |
|         self.write = write
 | |
|         self.cache = {}
 | |
|         self.visited = {} # for detection of recursion
 | |
|         self.indent = indent 
 | |
|         self.curindent = curindent
 | |
|         self.parents = []
 | |
|         self.shortempty = shortempty  # short empty tags or not 
 | |
| 
 | |
|     def visit(self, node): 
 | |
|         """ dispatcher on node's class/bases name. """
 | |
|         cls = node.__class__
 | |
|         try:
 | |
|             visitmethod = self.cache[cls]   
 | |
|         except KeyError:
 | |
|             for subclass in cls.__mro__: 
 | |
|                 visitmethod = getattr(self, subclass.__name__, None)
 | |
|                 if visitmethod is not None:
 | |
|                     break
 | |
|             else:
 | |
|                 visitmethod = self.object 
 | |
|             self.cache[cls] = visitmethod
 | |
|         visitmethod(node) 
 | |
| 
 | |
|     def object(self, obj):
 | |
|         #self.write(obj) 
 | |
|         self.write(escape(unicode(obj)))
 | |
| 
 | |
|     def raw(self, obj):
 | |
|         self.write(obj.uniobj) 
 | |
| 
 | |
|     def list(self, obj):  
 | |
|         assert id(obj) not in self.visited
 | |
|         self.visited[id(obj)] = 1
 | |
|         map(self.visit, obj) 
 | |
| 
 | |
|     def Tag(self, tag):
 | |
|         assert id(tag) not in self.visited
 | |
|         try: 
 | |
|             tag.parent = self.parents[-1]
 | |
|         except IndexError: 
 | |
|             tag.parent = None 
 | |
|         self.visited[id(tag)] = 1
 | |
|         tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
 | |
|         if self.curindent and not self._isinline(tagname):
 | |
|             self.write("\n" + u(' ') * self.curindent) 
 | |
|         if tag:
 | |
|             self.curindent += self.indent 
 | |
|             self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
 | |
|             self.parents.append(tag) 
 | |
|             for x in tag:
 | |
|                 self.visit(x)
 | |
|             self.parents.pop() 
 | |
|             self.write(u('</%s>') % tagname) 
 | |
|             self.curindent -= self.indent 
 | |
|         else:
 | |
|             nameattr = tagname+self.attributes(tag) 
 | |
|             if self._issingleton(tagname): 
 | |
|                 self.write(u('<%s/>') % (nameattr,))
 | |
|             else: 
 | |
|                 self.write(u('<%s></%s>') % (nameattr, tagname))
 | |
| 
 | |
|     def attributes(self, tag):
 | |
|         # serialize attributes
 | |
|         attrlist = dir(tag.attr) 
 | |
|         attrlist.sort() 
 | |
|         l = []
 | |
|         for name in attrlist: 
 | |
|             res = self.repr_attribute(tag.attr, name)
 | |
|             if res is not None: 
 | |
|                 l.append(res) 
 | |
|         l.extend(self.getstyle(tag))
 | |
|         return u("").join(l)
 | |
| 
 | |
|     def repr_attribute(self, attrs, name): 
 | |
|         if name[:2] != '__': 
 | |
|             value = getattr(attrs, name) 
 | |
|             if name.endswith('_'): 
 | |
|                 name = name[:-1]
 | |
|             return ' %s="%s"' % (name, escape(unicode(value)))
 | |
| 
 | |
|     def getstyle(self, tag): 
 | |
|         """ return attribute list suitable for styling. """ 
 | |
|         try: 
 | |
|             styledict = tag.style.__dict__
 | |
|         except AttributeError: 
 | |
|             return [] 
 | |
|         else: 
 | |
|             stylelist = [x+': ' + y for x,y in styledict.items()]
 | |
|             return [u(' style="%s"') % u('; ').join(stylelist)]
 | |
| 
 | |
|     def _issingleton(self, tagname):
 | |
|         """can (and will) be overridden in subclasses"""
 | |
|         return self.shortempty
 | |
| 
 | |
|     def _isinline(self, tagname):
 | |
|         """can (and will) be overridden in subclasses"""
 | |
|         return False
 | |
| 
 | |
| class HtmlVisitor(SimpleUnicodeVisitor): 
 | |
|     
 | |
|     single = dict([(x, 1) for x in 
 | |
|                 ('br,img,area,param,col,hr,meta,link,base,'
 | |
|                     'input,frame').split(',')])
 | |
|     inline = dict([(x, 1) for x in
 | |
|                 ('a abbr acronym b basefont bdo big br cite code dfn em font '
 | |
|                  'i img input kbd label q s samp select small span strike '
 | |
|                  'strong sub sup textarea tt u var'.split(' '))])
 | |
| 
 | |
|     def repr_attribute(self, attrs, name): 
 | |
|         if name == 'class_':
 | |
|             value = getattr(attrs, name) 
 | |
|             if value is None: 
 | |
|                 return
 | |
|         return super(HtmlVisitor, self).repr_attribute(attrs, name) 
 | |
| 
 | |
|     def _issingleton(self, tagname):
 | |
|         return tagname in self.single
 | |
| 
 | |
|     def _isinline(self, tagname):
 | |
|         return tagname in self.inline
 | |
| 
 | |
|        
 | |
| class _escape:
 | |
|     def __init__(self):
 | |
|         self.escape = {
 | |
|             u('"') : u('"'), u('<') : u('<'), u('>') : u('>'), 
 | |
|             u('&') : u('&'), u("'") : u('''),
 | |
|             }
 | |
|         self.charef_rex = re.compile(u("|").join(self.escape.keys()))
 | |
| 
 | |
|     def _replacer(self, match):
 | |
|         return self.escape[match.group(0)]
 | |
| 
 | |
|     def __call__(self, ustring):
 | |
|         """ xml-escape the given unicode string. """
 | |
|         return self.charef_rex.sub(self._replacer, ustring)
 | |
| 
 | |
| escape = _escape()
 |