[Python-checkins] bpo-34866: Add max_num_fields to cgi.FieldStorage (GH-9660) (GH-9969)

Victor Stinner webhook-mailer at python.org
Tue Oct 30 17:16:33 EDT 2018


https://github.com/python/cpython/commit/bc6f74a520112d25ef40324e3de4e8187ff2835d
commit: bc6f74a520112d25ef40324e3de4e8187ff2835d
branch: 2.7
author: matthewbelisle-wf <matthew.belisle at workiva.com>
committer: Victor Stinner <vstinner at redhat.com>
date: 2018年10月30日T22:16:26+01:00
summary:
bpo-34866: Add max_num_fields to cgi.FieldStorage (GH-9660) (GH-9969)
Adding `max_num_fields` to `cgi.FieldStorage` to make DOS attacks harder by
limiting the number of `MiniFieldStorage` objects created by `FieldStorage`.
(cherry picked from commit 209144831b0a19715bda3bd72b14a3e6192d9cc1)
files:
A Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst
M Doc/library/cgi.rst
M Doc/library/urlparse.rst
M Lib/cgi.py
M Lib/test/test_cgi.py
M Lib/urlparse.py
diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
index 1bfdb3906784..ecd62c8c0194 100644
--- a/Doc/library/cgi.rst
+++ b/Doc/library/cgi.rst
@@ -292,12 +292,12 @@ algorithms implemented in this module in other circumstances.
 passed to :func:`urlparse.parse_qs` unchanged.
 
 
-.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]])
+.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
 
 This function is deprecated in this module. Use :func:`urlparse.parse_qs`
 instead. It is maintained here only for backward compatibility.
 
-.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]])
+.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
 
 This function is deprecated in this module. Use :func:`urlparse.parse_qsl`
 instead. It is maintained here only for backward compatibility.
diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst
index b933dda3d242..22249da54fbb 100644
--- a/Doc/library/urlparse.rst
+++ b/Doc/library/urlparse.rst
@@ -126,7 +126,7 @@ The :mod:`urlparse` module defines the following functions:
 Added IPv6 URL parsing capabilities.
 
 
-.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]])
+.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
 
 Parse a query string given as a string argument (data of type
 :mimetype:`application/x-www-form-urlencoded`). Data are returned as a
@@ -143,14 +143,20 @@ The :mod:`urlparse` module defines the following functions:
 parsing errors. If false (the default), errors are silently ignored. If true,
 errors raise a :exc:`ValueError` exception.
 
+ The optional argument *max_num_fields* is the maximum number of fields to
+ read. If set, then throws a :exc:`ValueError` if there are more than
+ *max_num_fields* fields read.
+
 Use the :func:`urllib.urlencode` function to convert such dictionaries into
 query strings.
 
 .. versionadded:: 2.6
 Copied from the :mod:`cgi` module.
 
+ .. versionchanged:: 2.7.16
+ Added *max_num_fields* parameter.
 
-.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]])
+.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
 
 Parse a query string given as a string argument (data of type
 :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
@@ -166,12 +172,18 @@ The :mod:`urlparse` module defines the following functions:
 parsing errors. If false (the default), errors are silently ignored. If true,
 errors raise a :exc:`ValueError` exception.
 
+ The optional argument *max_num_fields* is the maximum number of fields to
+ read. If set, then throws a :exc:`ValueError` if there are more than
+ *max_num_fields* fields read.
+
 Use the :func:`urllib.urlencode` function to convert such lists of pairs into
 query strings.
 
 .. versionadded:: 2.6
 Copied from the :mod:`cgi` module.
 
+ .. versionchanged:: 2.7.16
+ Added *max_num_fields* parameter.
 
 .. function:: urlunparse(parts)
 
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 7c51b44db1a9..5b903e034773 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -184,11 +184,12 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
 return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
 
 
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
 """Parse a query given as a string argument."""
 warn("cgi.parse_qsl is deprecated, use urlparse.parse_qsl instead",
 PendingDeprecationWarning, 2)
- return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing)
+ return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing,
+ max_num_fields)
 
 def parse_multipart(fp, pdict):
 """Parse multipart input.
@@ -393,7 +394,8 @@ class FieldStorage:
 """
 
 def __init__(self, fp=None, headers=None, outerboundary="",
- environ=os.environ, keep_blank_values=0, strict_parsing=0):
+ environ=os.environ, keep_blank_values=0, strict_parsing=0,
+ max_num_fields=None):
 """Constructor. Read multipart/* until last part.
 
 Arguments, all optional:
@@ -420,10 +422,14 @@ def __init__(self, fp=None, headers=None, outerboundary="",
 If false (the default), errors are silently ignored.
 If true, errors raise a ValueError exception.
 
+ max_num_fields: int. If set, then __init__ throws a ValueError
+ if there are more than n fields read by parse_qsl().
+
 """
 method = 'GET'
 self.keep_blank_values = keep_blank_values
 self.strict_parsing = strict_parsing
+ self.max_num_fields = max_num_fields
 if 'REQUEST_METHOD' in environ:
 method = environ['REQUEST_METHOD'].upper()
 self.qs_on_post = None
@@ -606,10 +612,9 @@ def read_urlencoded(self):
 qs = self.fp.read(self.length)
 if self.qs_on_post:
 qs += '&' + self.qs_on_post
- self.list = list = []
- for key, value in urlparse.parse_qsl(qs, self.keep_blank_values,
- self.strict_parsing):
- list.append(MiniFieldStorage(key, value))
+ query = urlparse.parse_qsl(qs, self.keep_blank_values,
+ self.strict_parsing, self.max_num_fields)
+ self.list = [MiniFieldStorage(key, value) for key, value in query]
 self.skip_lines()
 
 FieldStorageClass = None
@@ -621,19 +626,38 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
 raise ValueError, 'Invalid boundary in multipart form: %r' % (ib,)
 self.list = []
 if self.qs_on_post:
- for key, value in urlparse.parse_qsl(self.qs_on_post,
- self.keep_blank_values, self.strict_parsing):
- self.list.append(MiniFieldStorage(key, value))
+ query = urlparse.parse_qsl(self.qs_on_post,
+ self.keep_blank_values,
+ self.strict_parsing,
+ self.max_num_fields)
+ self.list.extend(MiniFieldStorage(key, value)
+ for key, value in query)
 FieldStorageClass = None
 
+ # Propagate max_num_fields into the sub class appropriately
+ max_num_fields = self.max_num_fields
+ if max_num_fields is not None:
+ max_num_fields -= len(self.list)
+
 klass = self.FieldStorageClass or self.__class__
 part = klass(self.fp, {}, ib,
- environ, keep_blank_values, strict_parsing)
+ environ, keep_blank_values, strict_parsing,
+ max_num_fields)
+
 # Throw first part away
 while not part.done:
 headers = rfc822.Message(self.fp)
 part = klass(self.fp, headers, ib,
- environ, keep_blank_values, strict_parsing)
+ environ, keep_blank_values, strict_parsing,
+ max_num_fields)
+
+ if max_num_fields is not None:
+ max_num_fields -= 1
+ if part.list:
+ max_num_fields -= len(part.list)
+ if max_num_fields < 0:
+ raise ValueError('Max number of fields exceeded')
+
 self.list.append(part)
 self.skip_lines()
 
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index c9cf09525d71..743c2afbd4cd 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -1,3 +1,4 @@
+from io import BytesIO
 from test.test_support import run_unittest, check_warnings
 import cgi
 import os
@@ -316,6 +317,60 @@ def testQSAndUrlEncode(self):
 v = gen_result(data, environ)
 self.assertEqual(self._qs_result, v)
 
+ def test_max_num_fields(self):
+ # For application/x-www-form-urlencoded
+ data = '&'.join(['a=a']*11)
+ environ = {
+ 'CONTENT_LENGTH': str(len(data)),
+ 'CONTENT_TYPE': 'application/x-www-form-urlencoded',
+ 'REQUEST_METHOD': 'POST',
+ }
+
+ with self.assertRaises(ValueError):
+ cgi.FieldStorage(
+ fp=BytesIO(data.encode()),
+ environ=environ,
+ max_num_fields=10,
+ )
+
+ # For multipart/form-data
+ data = """---123
+Content-Disposition: form-data; name="a"
+
+3
+---123
+Content-Type: application/x-www-form-urlencoded
+
+a=4
+---123
+Content-Type: application/x-www-form-urlencoded
+
+a=5
+---123--
+"""
+ environ = {
+ 'CONTENT_LENGTH': str(len(data)),
+ 'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
+ 'QUERY_STRING': 'a=1&a=2',
+ 'REQUEST_METHOD': 'POST',
+ }
+
+ # 2 GET entities
+ # 1 top level POST entities
+ # 1 entity within the second POST entity
+ # 1 entity within the third POST entity
+ with self.assertRaises(ValueError):
+ cgi.FieldStorage(
+ fp=BytesIO(data.encode()),
+ environ=environ,
+ max_num_fields=4,
+ )
+ cgi.FieldStorage(
+ fp=BytesIO(data.encode()),
+ environ=environ,
+ max_num_fields=5,
+ )
+
 def testQSAndFormData(self):
 data = """
 ---123
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 4cd3d6743a96..f7c2b032b097 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -361,7 +361,7 @@ def unquote(s):
 append(item)
 return ''.join(res)
 
-def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
 """Parse a query given as a string argument.
 
 Arguments:
@@ -378,16 +378,20 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
 strict_parsing: flag indicating what to do with parsing errors.
 If false (the default), errors are silently ignored.
 If true, errors raise a ValueError exception.
+
+ max_num_fields: int. If set, then throws a ValueError if there
+ are more than n fields read by parse_qsl().
 """
 dict = {}
- for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
+ for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
+ max_num_fields):
 if name in dict:
 dict[name].append(value)
 else:
 dict[name] = [value]
 return dict
 
-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
 """Parse a query given as a string argument.
 
 Arguments:
@@ -404,8 +408,19 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
 false (the default), errors are silently ignored. If true,
 errors raise a ValueError exception.
 
+ max_num_fields: int. If set, then throws a ValueError if there
+ are more than n fields read by parse_qsl().
+
 Returns a list, as G-d intended.
 """
+ # If max_num_fields is defined then check that the number of fields
+ # is less than max_num_fields. This prevents a memory exhaustion DOS
+ # attack via post bodies with many fields.
+ if max_num_fields is not None:
+ num_fields = 1 + qs.count('&') + qs.count(';')
+ if max_num_fields < num_fields:
+ raise ValueError('Max number of fields exceeded')
+
 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 r = []
 for name_value in pairs:
diff --git a/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst
new file mode 100644
index 000000000000..90c146ce834e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst
@@ -0,0 +1,2 @@
+Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by
+limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /