[Python-checkins] r85531 - in python/branches/py3k: Doc/howto/webservers.rst Doc/library/cgi.rst Doc/library/html.rst Doc/library/markup.rst Lib/cgi.py Lib/html/__init__.py Lib/http/server.py Lib/lib2to3/tests/test_util.py Lib/test/test_html.py Lib/test/test_xml_etree.py Misc/NEWS

georg.brandl python-checkins at python.org
Fri Oct 15 17:57:45 CEST 2010


Author: georg.brandl
Date: Fri Oct 15 17:57:45 2010
New Revision: 85531
Log:
#2830: add html.escape() helper and move cgi.escape() uses in the standard library to it. It defaults to quote=True and also escapes single quotes, which makes casual use safer. The cgi.escape() interface is not touched, but emits a (silent) PendingDeprecationWarning.
Added:
 python/branches/py3k/Doc/library/html.rst
 python/branches/py3k/Lib/test/test_html.py (contents, props changed)
Modified:
 python/branches/py3k/Doc/howto/webservers.rst
 python/branches/py3k/Doc/library/cgi.rst
 python/branches/py3k/Doc/library/markup.rst
 python/branches/py3k/Lib/cgi.py
 python/branches/py3k/Lib/html/__init__.py
 python/branches/py3k/Lib/http/server.py
 python/branches/py3k/Lib/lib2to3/tests/test_util.py
 python/branches/py3k/Lib/test/test_xml_etree.py
 python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Doc/howto/webservers.rst
==============================================================================
--- python/branches/py3k/Doc/howto/webservers.rst	(original)
+++ python/branches/py3k/Doc/howto/webservers.rst	Fri Oct 15 17:57:45 2010
@@ -293,7 +293,7 @@
 # -*- coding: UTF-8 -*-
 
 import sys, os
- from cgi import escape
+ from html import escape
 from flup.server.fcgi import WSGIServer
 
 def app(environ, start_response):
Modified: python/branches/py3k/Doc/library/cgi.rst
==============================================================================
--- python/branches/py3k/Doc/library/cgi.rst	(original)
+++ python/branches/py3k/Doc/library/cgi.rst	Fri Oct 15 17:57:45 2010
@@ -328,9 +328,9 @@
 attribute value delimited by double quotes, as in ``<a href="...">``. Note
 that single quotes are never translated.
 
- If the value to be quoted might include single- or double-quote characters,
- or both, consider using the :func:`~xml.sax.saxutils.quoteattr` function in the
- :mod:`xml.sax.saxutils` module instead.
+ .. deprecated:: 3.2
+ This function is unsafe because *quote* is false by default, and therefore
+ deprecated. Use :func:`html.escape` instead.
 
 
 .. _cgi-security:
@@ -508,8 +508,8 @@
 
 .. rubric:: Footnotes
 
-.. [#] Note that some recent versions of the HTML specification do state what order the
- field values should be supplied in, but knowing whether a request was
- received from a conforming browser, or even from a browser at all, is tedious
- and error-prone.
+.. [#] Note that some recent versions of the HTML specification do state what
+ order the field values should be supplied in, but knowing whether a request
+ was received from a conforming browser, or even from a browser at all, is
+ tedious and error-prone.
 
Added: python/branches/py3k/Doc/library/html.rst
==============================================================================
--- (empty file)
+++ python/branches/py3k/Doc/library/html.rst	Fri Oct 15 17:57:45 2010
@@ -0,0 +1,18 @@
+:mod:`html` --- HyperText Markup Language support
+=================================================
+
+.. module:: html
+ :synopsis: Helpers for manipulating HTML.
+
+.. versionadded:: 3.2
+
+
+This module defines utilities to manipulate HTML.
+
+.. function:: escape(s, quote=True)
+
+ Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe
+ sequences. Use this if you need to display text that might contain such
+ characters in HTML. If the optional flag *quote* is true, the characters
+ (``"``) and (``'``) are also translated; this helps for inclusion in an HTML
+ attribute value delimited by quotes, as in ``<a href="...">``.
Modified: python/branches/py3k/Doc/library/markup.rst
==============================================================================
--- python/branches/py3k/Doc/library/markup.rst	(original)
+++ python/branches/py3k/Doc/library/markup.rst	Fri Oct 15 17:57:45 2010
@@ -20,6 +20,7 @@
 
 .. toctree::
 
+ html.rst
 html.parser.rst
 html.entities.rst
 pyexpat.rst
Modified: python/branches/py3k/Lib/cgi.py
==============================================================================
--- python/branches/py3k/Lib/cgi.py	(original)
+++ python/branches/py3k/Lib/cgi.py	Fri Oct 15 17:57:45 2010
@@ -31,13 +31,13 @@
 # Imports
 # =======
 
-from operator import attrgetter
 from io import StringIO
 import sys
 import os
 import urllib.parse
 import email.parser
 from warnings import warn
+import html
 
 __all__ = ["MiniFieldStorage", "FieldStorage",
 "parse", "parse_qs", "parse_qsl", "parse_multipart",
@@ -800,8 +800,8 @@
 list = traceback.format_tb(tb, limit) + \
 traceback.format_exception_only(type, value)
 print("<PRE>%s<B>%s</B></PRE>" % (
- escape("".join(list[:-1])),
- escape(list[-1]),
+ html.escape("".join(list[:-1])),
+ html.escape(list[-1]),
 ))
 del tb
 
@@ -812,7 +812,7 @@
 print("<H3>Shell Environment:</H3>")
 print("<DL>")
 for key in keys:
- print("<DT>", escape(key), "<DD>", escape(environ[key]))
+ print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
 print("</DL>")
 print()
 
@@ -825,10 +825,10 @@
 print("<P>No form fields.")
 print("<DL>")
 for key in keys:
- print("<DT>" + escape(key) + ":", end=' ')
+ print("<DT>" + html.escape(key) + ":", end=' ')
 value = form[key]
- print("<i>" + escape(repr(type(value))) + "</i>")
- print("<DD>" + escape(repr(value)))
+ print("<i>" + html.escape(repr(type(value))) + "</i>")
+ print("<DD>" + html.escape(repr(value)))
 print("</DL>")
 print()
 
@@ -839,9 +839,9 @@
 try:
 pwd = os.getcwd()
 except os.error as msg:
- print("os.error:", escape(str(msg)))
+ print("os.error:", html.escape(str(msg)))
 else:
- print(escape(pwd))
+ print(html.escape(pwd))
 print()
 
 def print_arguments():
@@ -899,9 +899,9 @@
 # =========
 
 def escape(s, quote=None):
- '''Replace special characters "&", "<" and ">" to HTML-safe sequences.
- If the optional flag quote is true, the quotation mark character (")
- is also translated.'''
+ """Deprecated API."""
+ warn("cgi.escape is deprecated, use html.escape instead",
+ PendingDeprecationWarning, stacklevel=2)
 s = s.replace("&", "&amp;") # Must be done first!
 s = s.replace("<", "&lt;")
 s = s.replace(">", "&gt;")
@@ -909,6 +909,7 @@
 s = s.replace('"', "&quot;")
 return s
 
+
 def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
 import re
 return re.match(_vb_pattern, s)
Modified: python/branches/py3k/Lib/html/__init__.py
==============================================================================
--- python/branches/py3k/Lib/html/__init__.py	(original)
+++ python/branches/py3k/Lib/html/__init__.py	Fri Oct 15 17:57:45 2010
@@ -1 +1,20 @@
-# This directory is a Python package.
+"""
+General functions for HTML manipulation.
+"""
+
+
+_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
+_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
+ ord('"'): '&quot;', ord('\''): '&#x27;'}
+
+# NB: this is a candidate for a bytes/string polymorphic interface
+
+def escape(s, quote=True):
+ """
+ Replace special characters "&", "<" and ">" to HTML-safe sequences.
+ If the optional flag quote is true (the default), the quotation mark
+ character (") is also translated.
+ """
+ if quote:
+ return s.translate(_escape_map_full)
+ return s.translate(_escape_map)
Modified: python/branches/py3k/Lib/http/server.py
==============================================================================
--- python/branches/py3k/Lib/http/server.py	(original)
+++ python/branches/py3k/Lib/http/server.py	Fri Oct 15 17:57:45 2010
@@ -84,7 +84,7 @@
 
 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
 
-import cgi
+import html
 import email.message
 import email.parser
 import http.client
@@ -705,7 +705,7 @@
 return None
 list.sort(key=lambda a: a.lower())
 r = []
- displaypath = cgi.escape(urllib.parse.unquote(self.path))
+ displaypath = html.escape(urllib.parse.unquote(self.path))
 r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
 r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
 r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@@ -721,7 +721,7 @@
 displayname = name + "@"
 # Note: a link to a directory displays with @ and links with /
 r.append('<li><a href="%s">%s</a>\n'
- % (urllib.parse.quote(linkname), cgi.escape(displayname)))
+ % (urllib.parse.quote(linkname), html.escape(displayname)))
 r.append("</ul>\n<hr>\n</body>\n</html>\n")
 enc = sys.getfilesystemencoding()
 encoded = ''.join(r).encode(enc)
Modified: python/branches/py3k/Lib/lib2to3/tests/test_util.py
==============================================================================
--- python/branches/py3k/Lib/lib2to3/tests/test_util.py	(original)
+++ python/branches/py3k/Lib/lib2to3/tests/test_util.py	Fri Oct 15 17:57:45 2010
@@ -568,8 +568,8 @@
 
 def test_from_import(self):
 node = parse('bar()')
- fixer_util.touch_import("cgi", "escape", node)
- self.assertEqual(str(node), 'from cgi import escape\nbar()\n\n')
+ fixer_util.touch_import("html", "escape", node)
+ self.assertEqual(str(node), 'from html import escape\nbar()\n\n')
 
 def test_name_import(self):
 node = parse('bar()')
Added: python/branches/py3k/Lib/test/test_html.py
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/test/test_html.py	Fri Oct 15 17:57:45 2010
@@ -0,0 +1,24 @@
+"""
+Tests for the html module functions.
+"""
+
+import html
+import unittest
+from test.support import run_unittest
+
+
+class HtmlTests(unittest.TestCase):
+ def test_escape(self):
+ self.assertEqual(
+ html.escape('\'<script>"&foo;"</script>\''),
+ '&#x27;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#x27;')
+ self.assertEqual(
+ html.escape('\'<script>"&foo;"</script>\'', False),
+ '\'&lt;script&gt;"&amp;foo;"&lt;/script&gt;\'')
+
+
+def test_main():
+ run_unittest(HtmlTests)
+
+if __name__ == '__main__':
+ test_main()
Modified: python/branches/py3k/Lib/test/test_xml_etree.py
==============================================================================
--- python/branches/py3k/Lib/test/test_xml_etree.py	(original)
+++ python/branches/py3k/Lib/test/test_xml_etree.py	Fri Oct 15 17:57:45 2010
@@ -12,7 +12,7 @@
 # except if the test is specific to the Python implementation.
 
 import sys
-import cgi
+import html
 import unittest
 
 from test import support
@@ -1328,7 +1328,7 @@
 <p>Example.</p>
 <xi:include href="{}"/>
 </document>
-""".format(cgi.escape(SIMPLE_XMLFILE, True))
+""".format(html.escape(SIMPLE_XMLFILE, True))
 
 def xinclude_loader(href, parse="xml", encoding=None):
 try:
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Fri Oct 15 17:57:45 2010
@@ -24,6 +24,9 @@
 Library
 -------
 
+- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
+ characters by default. Deprecate ``cgi.escape()``. 
+
 - Issue 9409: Fix the regex to match all kind of filenames, for interactive
 debugging in doctests.
 


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /