[Python-checkins] r78123 - in python/branches/py3k: Lib/test/test_xml_etree.py Lib/xml/etree/ElementTree.py Misc/ACKS Misc/NEWS

antoine.pitrou python-checkins at python.org
Tue Feb 9 17:51:16 CET 2010


Author: antoine.pitrou
Date: Tue Feb 9 17:51:16 2010
New Revision: 78123
Log:
Issue #6233: ElementTree failed converting unicode characters to XML
entities when they could't be represented in the requested output
encoding. Patch by Jerry Chen.
Modified:
 python/branches/py3k/Lib/test/test_xml_etree.py
 python/branches/py3k/Lib/xml/etree/ElementTree.py
 python/branches/py3k/Misc/ACKS
 python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Lib/test/test_xml_etree.py
==============================================================================
--- python/branches/py3k/Lib/test/test_xml_etree.py	(original)
+++ python/branches/py3k/Lib/test/test_xml_etree.py	Tue Feb 9 17:51:16 2010
@@ -210,6 +210,17 @@
 """
 ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
 
+def check_issue6233():
+ """
+ >>> from xml.etree import ElementTree as ET
+
+ >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>")
+ >>> ET.tostring(e, 'ascii')
+ b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
+ >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding
+ >>> ET.tostring(e, 'ascii')
+ b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
+ """
 
 #
 # xinclude tests (samples from appendix C of the xinclude specification)
Modified: python/branches/py3k/Lib/xml/etree/ElementTree.py
==============================================================================
--- python/branches/py3k/Lib/xml/etree/ElementTree.py	(original)
+++ python/branches/py3k/Lib/xml/etree/ElementTree.py	Tue Feb 9 17:51:16 2010
@@ -662,9 +662,9 @@
 # write XML to file
 tag = node.tag
 if tag is Comment:
- file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
+ file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->")
 elif tag is ProcessingInstruction:
- file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
+ file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>")
 else:
 items = list(node.items())
 xmlns_items = [] # new namespaces in this scope
@@ -696,7 +696,7 @@
 if node.text or len(node):
 file.write(_encode(">", encoding))
 if node.text:
- file.write(_encode(_escape_cdata(node.text), encoding))
+ file.write(_encode_cdata(node.text, encoding))
 for n in node:
 self._write(file, n, encoding, namespaces)
 file.write(_encode("</" + tag + ">", encoding))
@@ -705,7 +705,7 @@
 for k, v in xmlns_items:
 del namespaces[v]
 if node.tail:
- file.write(_encode(_escape_cdata(node.tail), encoding))
+ file.write(_encode_cdata(node.tail, encoding))
 
 # --------------------------------------------------------------------
 # helpers
@@ -788,13 +788,16 @@
 # the following functions assume an ascii-compatible encoding
 # (or "utf-16")
 
-def _escape_cdata(text):
+def _encode_cdata(text, encoding):
 # escape character data
 try:
 text = text.replace("&", "&amp;")
 text = text.replace("<", "&lt;")
 text = text.replace(">", "&gt;")
- return text
+ if encoding:
+ return text.encode(encoding, "xmlcharrefreplace")
+ else:
+ return text
 except (TypeError, AttributeError):
 _raise_serialization_error(text)
 
Modified: python/branches/py3k/Misc/ACKS
==============================================================================
--- python/branches/py3k/Misc/ACKS	(original)
+++ python/branches/py3k/Misc/ACKS	Tue Feb 9 17:51:16 2010
@@ -131,6 +131,7 @@
 Brad Chapman
 David Chaum
 Nicolas Chauvat
+Jerry Chen
 Michael Chermside
 Albert Chin-A-Young
 Adal Chiriliuc
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Tue Feb 9 17:51:16 2010
@@ -242,6 +242,10 @@
 Library
 -------
 
+- Issue #6233: ElementTree failed converting unicode characters to XML
+ entities when they could't be represented in the requested output
+ encoding. Patch by Jerry Chen.
+
 - Issue #6003: add an argument to ``zipfile.Zipfile.writestr`` to
 specify the compression type.
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /