#!/usr/bin/python """ $Id: XMLElement.py,v 1.7 2001年12月20日 15:36:47 connolly Exp $ @@ review differences between this API and the DOM spec, python DOM implementations """ from string import find import StringIO import XMLWriter from XMLAttrs import Attrs class Element: def __init__(self, name, nsName=None, prefix=''): """if name contains a colon, nsName is assumed to be not None, and prefix is assumed to match the substring before the colon.""" self._name = name self._attrs = {} # (ln, ns) -> (value, prefix) self._content = [] self._bindings = {} self._nsName = nsName ln = name if nsName and prefix: ln = name[len(prefix)+1:] self._ln = ln if nsName None: self._bindings[prefix] = nsName def namespaceName(self): return self._nsName def localName(self): return self._ln def bindNS(self, nsName, prefix): n = self._bindings.get(prefix) if n is None: self._bindings[prefix] = nsName else: if n nsName: raise AmbiguousNamespacePrefix() # else already declared as such. def writeTo(self, xwr, bindings={}): attrs = {} subBindings = None # add namespace declarations for all bindings # that aren't already declared for prefix, nsName in self._bindings.items(): if not bindings.get(prefix) == nsName: if prefix == '': attrs['xmlns'] = nsName else: attrs['xmlns:' + prefix] = nsName if not subBindings: subBindings = bindings.copy() subBindings[prefix] = nsName for qn, pv in self._attrs.items(): ln, ns = qn value, prefix = pv if prefix: attrs[prefix+':'+ln] = value else: attrs[ln] = value xwr.startElement(self._name, Attrs(attrs.items())) self.writeContentTo(xwr, subBindings or bindings) xwr.endElement(self._name) def writeContentTo(self, xwr, bindings={}): for c in self._content: if isinstance(c, Element): c.writeTo(xwr, bindings) else: xwr.characters(c) def __str__(self): strFp = StringIO.StringIO() xwr = XMLWriter.T(strFp) self.writeTo(xwr) return strFp.getvalue() def content(self): strFp = StringIO.StringIO() xwr = XMLWriter.T(strFp) self.writeContentTo(xwr) return strFp.getvalue() def appendData(self, c): self._content.append(c) def appendChild(self, c): self._content.append(c) def nextChild(self, after=None): """raises IndexError if c isn't a child of this elt. pass after=None to get the first child, if any. returns None if there are no more. """ raise RuntimeError, """OOPS! a string can occur more than once in the content; endless loop! this interface is borken.""" sc = self._content if after is None: i = 0 else: i = sc.index(after) + 1 if i with ", len(self._content), "children; looking for ", name r = [] # per DOM, probably should be some NodeList class for p in self._content: if isinstance(p, Element) and p._name == name: r.append(p) return r def getText(self): r = '' for c in self._content: if isinstance(c, Element): r = r + c.getText() else: r = r + c return r def addAttribute(self, n, v, nsName=None, prefix=''): ln = n if nsName and prefix: ln = n[len(prefix)+1:] if self._attrs.has_key((ln, nsName)): raise DuplicateAttribute() if nsName None: self.bindNS(nsName, prefix) self._attrs[(ln, nsName)] = (v, prefix) def getAttribute(self, n, default=None, nsName=None, prefix=''): ln = n if nsName and prefix: ln = n[len(prefix)+1:] vp = self._attrs.get((ln, nsName)) if vp: return vp[0] else: return default class DuplicateAttribute: pass class AmbiguousNamespacePrefix: """This is an exception that results from trying to bind the same previx to two different namespace names when adding an attribute""" pass from xml.sax.saxlib import DocumentHandler, AttributeList class Sink(DocumentHandler): def __init__(self): self._elts = [] self._bindings = [] self._root = None def startElement(self, n, attrs): if self._bindings: b = self._bindings[-1].copy() else: b = {} for i in range(len(attrs)): an = attrs.getName(i) av = attrs.getValue(i) if an == 'xmlns': b[''] = av elif an[:len('xmlns:')] == 'xmlns:': pfx = an[len('xmlns:'):] #print "@bind: n" , an, "pfx", pfx b[pfx] = av self._bindings.append(b) c = find(n, ':') if c>0: pfx = n[:c] #print "@elt: n" , n, "pfx", pfx e = Element(n, b[pfx], pfx) else: e = Element(n) for i in range(len(attrs)): an = attrs.getName(i) av = attrs.getValue(i) c = find(an, ':') if c>0: pfx = an[:c] if pfx != 'xmlns': e.addAttribute(an, av, b[pfx], pfx) else: e.addAttribute(an, av) eStack = self._elts if len(eStack): eStack[-1].appendChild(e) eStack.append(e) def endElement(self, n): if len(self._elts) == 1: self._root = self._elts[0] del self._elts[-1] del self._bindings[-1] def characters(self, ch, start=0, length=-1): if length == -1: end = len(ch) else: end = start+length self._elts[-1].appendChild(ch[start:end]) def root(self): return self._root def test(): import traceback foo = Element('foo') print "== simple empty element" print foo print "== with child" bar = Element('bar') foo.appendChild(bar) print foo print "== with mixed content" foo.appendChild("abcde") foo.appendChild(Element('baz')) print foo foo.addAttribute('attr', 'val') print "== with attr" print foo print "== Namespace test: my XSLT template" xslt = 'http://www.w3.org/1999/XSL/Transform' rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' xhtml = 'http://www.w3.org/1999/xhtml' transform = Element('xsl:transform', xslt, 'xsl') transform.addAttribute('version', '1.0') transform.bindNS(rdf, 'web') transform.bindNS(xhtml, 'h') div = Element('div', xhtml) transform.appendChild(div) addr = Element('address', xhtml) addr.appendChild('Dan Connolly ') br = Element('br', xhtml) br.addAttribute('class', '') addr.appendChild(br) addr.appendChild("$Id: XMLElement.py,v 1.7 2001年12月20日 15:36:47 connolly Exp $") div.appendChild(addr) template = Element('xsl:template', xslt, 'xsl') transform.appendChild(template) template.addAttribute('match', 'text()|@*') print transform print "===== Exceptions" print "== testing duplicate attr error" try: foo.addAttribute('attr', 'val2') except DuplicateAttribute: traceback.print_exc() print print "== testing ns redecl" try: transform.addAttribute('h:previous', 'v0.9', 'http://example.org/history', 'h') print transform except AmbiguousNamespacePrefix: traceback.print_exc() def fromFile(fp, p=None): if p is None: from xml.sax.drivers.drv_pyexpat import create_parser p = create_parser() s = Sink() p.setDocumentHandler(s) # xml-howto.html says setContentHandler. bug. p.parseFile(fp) e = s.root() return e def test2(): import sys e = fromFile(sys.stdin) print "text: ", e.getText() print "all=======" e.writeTo(XMLWriter.T(sys.stdout)) if __name__ == '__main__': test2()

AltStyle によって変換されたページ (->オリジナル) /