[Python-checkins] CVS: python/dist/src/Lib/xml/dom minidom.py,NONE,1.1 pulldom.py,NONE,1.1

Fred L. Drake python-dev@python.org
2000年6月29日 12:39:59 -0700


Update of /cvsroot/python/python/dist/src/Lib/xml/dom
In directory slayer.i.sourceforge.net:/tmp/cvs-serv5599
Added Files:
	minidom.py pulldom.py 
Log Message:
Paul Prescod <paul@prescod.net>:
W3C DOM implementation for Python.
--- NEW FILE ---
import pulldom
import string
from StringIO import StringIO
import types
"""
minidom.py -- a lightweight DOM implementation based on SAX.
Todo:
=====
 * convenience methods for getting elements and text.
 * more testing
 * bring some of the writer and linearizer code into conformance with this
 interface
 * SAX 2 namespaces
"""
class Node:
 ELEMENT_NODE = 1
 ATTRIBUTE_NODE = 2
 TEXT_NODE = 3
 CDATA_SECTION_NODE = 4
 ENTITY_REFERENCE_NODE = 5
 ENTITY_NODE = 6
 PROCESSING_INSTRUCTION_NODE = 7
 COMMENT_NODE = 8
 DOCUMENT_NODE = 9
 DOCUMENT_TYPE_NODE = 10
 DOCUMENT_FRAGMENT_NODE = 11
 NOTATION_NODE = 12
 allnodes=[]
 def __init__( self ):
 self.childNodes=[]
 Node.allnodes.append( repr( id( self ))+repr( self.__class__ ))
 def __getattr__( self, key ):
 if key[0:2]=="__": raise AttributeError
 # getattr should never call getattr!
 if self.__dict__.has_key("inGetAttr"): 
 del self.inGetAttr
 raise AttributeError, key
 prefix,attrname=key[:5],key[5:]
 if prefix=="_get_":
 self.inGetAttr=1
 if hasattr( self, attrname ): 
 del self.inGetAttr
 return (lambda self=self, attrname=attrname: 
 getattr( self, attrname ))
 else:
 del self.inGetAttr
 raise AttributeError, key
 else:
 self.inGetAttr=1
 try:
 func = getattr( self, "_get_"+key )
 except AttributeError:
 raise AttributeError, key
 del self.inGetAttr
 return func()
 def __nonzero__(self): return 1
 def toxml( self ):
 writer=StringIO()
 self.writexml( writer )
 return writer.getvalue()
 def hasChildNodes( self ):
 if self.childNodes: return 1
 else: return 0
 def insertBefore( self, newChild, refChild):
 index=self.childNodes.index( refChild )
 self.childNodes.insert( index, newChild )
 def appendChild( self, node ):
 self.childNodes.append( node )
 def unlink( self ):
 self.parentNode=None
 while self.childNodes:
 self.childNodes[-1].unlink()
 del self.childNodes[-1] # probably not most efficient!
 self.childNodes=None
 if self.attributes:
 for attr in self.attributes.values():
 attr.unlink()
 self.attributes=None
 index=Node.allnodes.index( repr( id( self ))+repr( self.__class__ ))
 del Node.allnodes[index]
def _write_data( writer, data):
 "Writes datachars to writer."
 data=string.replace(data,"&","&amp;")
 data=string.replace(data,"<","&lt;")
 data=string.replace(data,"\"","&quot;")
 data=string.replace(data,">","&gt;")
 writer.write(data)
def _closeElement( element ):
 del element.parentNode
 for node in element.elements:
 _closeElement( node )
def _getElementsByTagNameHelper( parent, name, rc ):
 for node in parent.childNodes:
 if node.nodeType==Node.ELEMENT_NODE and\
 (name=="*" or node.tagName==name):
 rc.append( node )
 _getElementsByTagNameHelper( node, name, rc )
 return rc
def _getElementsByTagNameNSHelper( parent, nsURI, localName, rc ):
 for node in parent.childNodes:
 if (node.nodeType==Node.ELEMENT_NODE ):
 if ((localName=="*" or node.tagName==localName) and
 (nsURI=="*" or node.namespaceURI==nsURI)):
 rc.append( node )
 _getElementsByTagNameNSHelper( node, name, rc )
class Attr(Node):
 nodeType=Node.ATTRIBUTE_NODE
 def __init__( self, qName, namespaceURI="", prefix="",
 localName=None ):
 Node.__init__( self )
 assert qName
 # skip setattr for performance
 self.__dict__["nodeName"] = self.__dict__["name"] = qName
 self.__dict__["localName"]=localName or qName
 self.__dict__["prefix"]=prefix
 self.__dict__["namespaceURI"]=namespaceURI
 # nodeValue and value are set elsewhere
 self.attributes=None
 def __setattr__( self, name, value ):
 if name in ("value", "nodeValue" ):
 self.__dict__["value"]=self.__dict__["nodeValue"]=value
 else:
 self.__dict__[name]=value
class AttributeList:
 # the attribute list is a transient interface to the underlying dictionaries
 # mutations here will change the underlying element's dictionary
 def __init__( self, attrs, attrsNS ):
 self.__attrs=attrs
 self.__attrsNS=attrs
 self.length=len( self.__attrs.keys() )
 def item( self, index ):
 try:
 return self[self.keys()[index]]
 except IndexError:
 return None
 
 def items( self ):
 return map( lambda node: (node.tagName, node.value),
 self.__attrs.values() )
 def itemsNS( self ):
 return map( lambda node: ((node.URI, node.localName), node.value),
 self.__attrs.values() )
 
 def keys( self ):
 return self.__attrs.keys()
 def keysNS( self ):
 return self.__attrsNS.keys()
 def values( self ):
 return self.__attrs.values()
 def __len__( self ):
 return self.length
 def __cmp__( self, other ):
 if self.__attrs is other.__attrs: 
 return 0
 else: 
 return cmp( id( self ), id( other ) )
 #FIXME: is it appropriate to return .value?
 def __getitem__( self, attname_or_tuple ):
 if type( attname_or_tuple ) == type( (1,2) ):
 return self.__attrsNS[attname_or_tuple].value
 else:
 return self.__attrs[attname_or_tuple].value
 def __setitem__( self, attname ):
 raise TypeError, "object does not support item assignment"
 
class Element( Node ):
 nodeType=Node.ELEMENT_NODE
 def __init__( self, tagName, namespaceURI="", prefix="",
 localName=None ):
 Node.__init__( self )
 self.tagName = self.nodeName = tagName
 self.localName=localName or tagName
 self.prefix=prefix
 self.namespaceURI=namespaceURI
 self.nodeValue=None
 self.__attrs={} # attributes are double-indexed:
 self.__attrsNS={}# tagName -> Attribute
 # URI,localName -> Attribute
 # in the future: consider lazy generation of attribute objects
 # this is too tricky for now because of headaches
 # with namespaces.
 def getAttribute( self, attname ):
 return self.__attrs[attname].value
 def getAttributeNS( self, namespaceURI, localName ):
 return self.__attrsNS[(namespaceURI, localName)].value
 
 def setAttribute( self, attname, value ):
 attr=Attr( attname )
 # for performance
 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
 self.setAttributeNode( attr )
 def setAttributeNS( self, namespaceURI, qualifiedName, value ):
 attr=createAttributeNS( namespaceURI, qualifiedName )
 # for performance
 attr.__dict__["value"]=attr.__dict__["nodeValue"]=value
 self.setAttributeNode( attr )
 def setAttributeNode( self, attr ):
 self.__attrs[attr.name]=attr
 self.__attrsNS[(attr.namespaceURI,attr.localName)]=attr
 def removeAttribute( self, name ):
 attr = self.__attrs[name]
 self.removeAttributeNode( attr )
 def removeAttributeNS( self, namespaceURI, localName ):
 attr = self.__attrsNS[(uri, localName)]
 self.removeAttributeNode( attr )
 def removeAttributeNode( self, node ):
 del self.__attrs[node.name]
 del self.__attrsNS[(node.namespaceURI, node.localName)]
 
 def getElementsByTagName( self, name ):
 return _getElementsByTagNameHelper( self, name, [] )
 def getElementsByTagNameNS(self,namespaceURI,localName):
 _getElementsByTagNameNSHelper( self, namespaceURI, localName, [] )
 def __repr__( self ):
 return "<DOM Element:"+self.tagName+" at "+`id( self )` +" >"
 def writexml(self, writer):
 writer.write("<"+self.tagName)
 
 a_names=self._get_attributes().keys()
 a_names.sort()
 for a_name in a_names:
 writer.write(" "+a_name+"=\"")
 _write_data(writer, self._get_attributes()[a_name])
 writer.write("\"")
 if self.childNodes:
 writer.write(">")
 for node in self.childNodes:
 node.writexml( writer )
 writer.write("</"+self.tagName+">")
 else:
 writer.write("/>")
 def _get_attributes( self ):
 return AttributeList( self.__attrs, self.__attrsNS )
class Comment( Node ):
 nodeType=Node.COMMENT_NODE
 def __init__(self, data ):
 Node.__init__( self )
 self.data=self.nodeValue=data
 self.nodeName="#comment"
 self.attributes=None
 def writexml( self, writer ):
 writer.write( "<!--" + self.data + "-->" )
class ProcessingInstruction( Node ):
 nodeType=Node.PROCESSING_INSTRUCTION_NODE
 def __init__(self, target, data ):
 Node.__init__( self )
 self.target = self.nodeName = target
 self.data = self.nodeValue = data
 self.attributes=None
 def writexml( self, writer ):
 writer.write( "<?" + self.target +" " + self.data+ "?>" )
class Text( Node ):
 nodeType=Node.TEXT_NODE
 nodeName="#text"
 def __init__(self, data ):
 Node.__init__( self )
 self.data = self.nodeValue = data
 self.attributes=None
 def __repr__(self):
 if len( self.data )> 10:
 dotdotdot="..."
 else:
 dotdotdot=""
 return "<DOM Text node \"" + self.data[0:10] + dotdotdot+"\">"
 def writexml( self, writer ):
 _write_data( writer, self.data )
class Document( Node ):
 nodeType=Node.DOCUMENT_NODE
 def __init__( self ):
 Node.__init__( self )
 self.documentElement=None
 self.attributes=None
 self.nodeName="#document"
 self.nodeValue=None
 createElement=Element
 createTextNode=Text
 createComment=Comment
 createProcessingInstruction=ProcessingInstruction
 createAttribute=Attr
 def createElementNS(self, namespaceURI, qualifiedName):
 fields = string.split(qualifiedName, ':')
 if len(fields) == 2:
 prefix = fields[0]
 localName = fields[1]
 elif len(fields) == 1:
 prefix = ''
 localName = fields[0] 
 return Element(self, qualifiedName, namespaceURI, prefix, localName)
 def createAttributeNS(self, namespaceURI, qualifiedName):
 fields = string.split(qualifiedName,':')
 if len(fields) == 2:
 localName = fields[1]
 prefix = fields[0]
 elif len(fields) == 1:
 localName = fields[0]
 prefix = None
 return Attr(qualifiedName, namespaceURI, prefix, localName)
 def getElementsByTagNameNS(self,namespaceURI,localName):
 _getElementsByTagNameNSHelper( self, namespaceURI, localName )
 def close( self ):
 for node in self.elements:
 _closeElement( node )
 def unlink( self ):
 self.documentElement=None
 Node.unlink( self )
 def getElementsByTagName( self, name ):
 rc=[]
 _getElementsByTagNameHelper( self, name, rc )
 return rc
 def writexml( self, writer ):
 for node in self.childNodes:
 node.writexml( writer )
def _doparse( func, args, kwargs ):
 events=apply( func, args, kwargs )
 (toktype, rootNode)=events.getEvent()
 events.expandNode( rootNode )
 return rootNode
def parse( *args, **kwargs ):
 return _doparse( pulldom.parse, args, kwargs )
def parseString( *args, **kwargs ):
 return _doparse( pulldom.parseString, args, kwargs )
--- NEW FILE ---
import minidom
import types
import string
import sys
import pyexpat
from xml.sax import ExpatParser
#todo: SAX2/namespace handling
START_ELEMENT="START_ELEMENT"
END_ELEMENT="END_ELEMENT"
COMMENT="COMMENT"
START_DOCUMENT="START_DOCUMENT"
END_DOCUMENT="END_DOCUMENT"
PROCESSING_INSTRUCTION="PROCESSING_INSTRUCTION"
IGNORABLE_WHITESPACE="IGNORABLE_WHITESPACE"
CHARACTERS="CHARACTERS"
class PullDOM:
 def __init__( self ):
 self.firstEvent=[None,None]
 self.lastEvent=self.firstEvent
 def setDocumentLocator( self, locator ): pass
 def startElement( self, tagName , attrs ):
 if not hasattr( self, "curNode" ):
 # FIXME: hack!
 self.startDocument( )
 node = self.document.createElement( tagName ) #FIXME namespaces!
 for attr in attrs.keys():
 node.setAttribute( attr, attrs[attr] )
 
 parent=self.curNode
 node.parentNode = parent
 if parent.childNodes:
 node.previousSibling=parent.childNodes[-1]
 node.previousSibling.nextSibling=node
 self.curNode = node
 # FIXME: do I have to screen namespace attributes
 self.lastEvent[1]=[(START_ELEMENT, node), None ]
 self.lastEvent=self.lastEvent[1]
 #self.events.append( (START_ELEMENT, node) )
 def endElement( self, name ):
 node = self.curNode
 self.lastEvent[1]=[(END_ELEMENT, node), None ]
 self.lastEvent=self.lastEvent[1]
 #self.events.append( (END_ELEMENT, node ))
 self.curNode = node.parentNode
 def comment( self, s):
 node = self.document.createComment ( s )
 parent=self.curNode
 node.parentNode=parent
 if parent.childNodes:
 node.previousSibling=parent.childNodes[-1]
 node.previousSibling.nextSibling=node
 self.lastEvent[1]=[(COMMENT, node), None ]
 self.lastEvent=self.lastEvent[1]
 #self.events.append( (COMMENT, node ))
 def processingInstruction( self, target, data ):
 node = self.document.createProcessingInstruction( target, data )
 #self.appendChild( node )
 
 parent=self.curNode
 node.parentNode=parent
 if parent.childNodes:
 node.previousSibling=parent.childNodes[-1]
 node.previousSibling.nextSibling=node
 self.lastEvent[1]=[(PROCESSING_INSTRUCTION, node), None ]
 self.lastEvent=self.lastEvent[1]
 #self.events.append( (PROCESSING_INSTRUCTION, node) )
 def ignorableWhitespace( self, chars ):
 node = self.document.createTextNode( chars[start:start+length] )
 parent=self.curNode
 node.parentNode=parent
 if parent.childNodes:
 node.previousSibling=parent.childNodes[-1]
 node.previousSibling.nextSibling=node
 self.lastEvent[1]=[(IGNORABLE_WHITESPACE, node), None ]
 self.lastEvent=self.lastEvent[1]
 #self.events.append( (IGNORABLE_WHITESPACE, node))
 def characters( self, chars ):
 node = self.document.createTextNode( chars )
 node.parentNode=self.curNode
 self.lastEvent[1]=[(CHARACTERS, node), None ]
 self.lastEvent=self.lastEvent[1]
 def startDocument( self ):
 node = self.curNode = self.document = minidom.Document()
 node.parentNode=None
 self.lastEvent[1]=[(START_DOCUMENT, node), None ]
 self.lastEvent=self.lastEvent[1]
 #self.events.append( (START_DOCUMENT, node) )
 
 def endDocument( self ):
 assert( not self.curNode.parentNode )
 for node in self.curNode.childNodes:
 if node.nodeType==node.ELEMENT_NODE:
 self.document.documentElement = node
 #if not self.document.documentElement:
 # raise Error, "No document element"
 self.lastEvent[1]=[(END_DOCUMENT, node), None ]
 #self.events.append( (END_DOCUMENT, self.curNode) )
class ErrorHandler:
 def warning( self, exception ):
 print exception
 def error( self, exception ):
 raise exception 
 def fatalError( self, exception ):
 raise exception 
class DOMEventStream:
 def __init__( self, stream, parser, bufsize ):
 self.stream=stream
 self.parser=parser
 self.bufsize=bufsize
 self.reset()
 def reset( self ):
 self.pulldom = PullDOM()
 self.parser.setContentHandler( self.pulldom )
 def __getitem__( self, pos ):
 rc=self.getEvent()
 if rc: return rc
 raise IndexError
 def expandNode( self, node ):
 event=self.getEvent()
 while event:
 token,cur_node=event
 if cur_node is node: return
 
 if token !=END_ELEMENT:
 cur_node.parentNode.childNodes.append( cur_node )
 event=self.getEvent()
 if node.nodeType==minidom.Node.DOCUMENT_NODE:
 for child in node.childNodes:
 if child.nodeType==minidom.Node.ELEMENT_NODE:
 node.documentElement=child
 def getEvent( self ):
 if not self.pulldom.firstEvent[1]:
 self.pulldom.lastEvent=self.pulldom.firstEvent
 while not self.pulldom.firstEvent[1]:
 buf=self.stream.read( self.bufsize )
 if not buf:
 #FIXME: why doesn't Expat close work?
 #self.parser.close()
 return None
 self.parser.feed( buf )
 rc=self.pulldom.firstEvent[1][0]
 self.pulldom.firstEvent[1]=self.pulldom.firstEvent[1][1]
 return rc
# FIXME: sax2
#def _getParser( ):
 # from xml.sax.saxexts import make_parser
 # expat doesn't report errors properly! Figure it out
 # return make_parser()
 # return make_parser("xml.sax.drivers.drv_xmllib")
 
def _getParser():
 return ExpatParser()
default_bufsize=(2**14)-20
# FIXME: move into sax package for common usage
def parse( stream_or_string, parser=None, bufsize=default_bufsize ):
 if type( stream_or_string ) == type( "" ):
 stream=open( stream_or_string )
 else:
 stream=stream_or_string
 if not parser: 
 parser=_getParser()
 return DOMEventStream( stream, parser, bufsize )
def parseString( string, parser=None ):
 try:
 import cStringIO
 stringio=cStringIO.StringIO
 except ImportError:
 import StringIO
 stringio=StringIO.StringIO
 
 bufsize=len( string )
 stringio( string )
 parser=_getParser()
 return DOMEventStream( buf, parser, bufsize )
#FIXME: Use Lars' instead!!!
class SAX_expat:
 "SAX driver for the Pyexpat C module."
 def __init__(self):
 self.parser=pyexpat.ParserCreate()
 self.started=0
 def setDocumentHandler( self, handler ):
 self.parser.StartElementHandler = handler.startElement
 self.parser.EndElementHandler = handler.endElement
 self.parser.CharacterDataHandler = handler.datachars
 self.parser.ProcessingInstructionHandler = handler.processingInstruction
 self.doc_handler=handler
 def setErrorHandler( self, handler ):
 self.err_handler=handler
 # --- Locator methods. Only usable after errors.
 def getLineNumber(self):
 return self.parser.ErrorLineNumber
 def getColumnNumber(self):
 return self.parser.ErrorColumnNumber 
 # --- Internal
 def __report_error(self):
 msg=pyexpat.ErrorString(self.parser.ErrorCode)
 self.err_handler.fatalError(msg)
 # --- EXPERIMENTAL PYTHON SAX EXTENSIONS
 
 def get_parser_name(self):
 return "pyexpat"
 def get_parser_version(self):
 return "Unknown"
 def get_driver_version(self):
 return version
 
 def is_validating(self):
 return 0
 def is_dtd_reading(self):
 return 0
 def reset(self):
 self.parser=pyexpat.ParserCreate()
 self.parser.StartElementHandler = self.startElement
 self.parser.EndElementHandler = self.endElement
 self.parser.CharacterDataHandler = self.characters
 self.parser.ProcessingInstructionHandler = self.processingInstruction
 
 def feed(self,data):
 if not self.started:
 self.doc_handler.startDocument()
 self.started=1 
 if not self.parser.Parse(data):
 self.__report_error()
 def close(self):
 if not self.parser.Parse("",1):
 self.__report_error()
 self.doc_handler.endDocument()
 self.parser = None

AltStyle によって変換されたページ (->オリジナル) /