diff --git a/Lib/gettext.py b/Lib/gettext.py --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -48,6 +48,7 @@ import locale, copy, io, os, re, struct, sys from errno import ENOENT +import ast __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', @@ -58,34 +59,109 @@ _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale') +class _PluralAST(ast.NodeVisitor): + """AST checker for plural form based on gettext-runtime/intl/plural.y + + allowed: + ternary op: if else (C: ? :) + unary ops: not (C: !) + logical ops: and, or (C: &&, ||) + compare ops: ==, !=, <, <=,>,>= + operators: +, -, % + names: limited to variable 'n' + numbers: limited to integers + other: braces, bool check + + forbidden: + unary ops: +, -, ~ + operators: **, <<,>>, &, |, ^ + everything else: strings, loops, function calls, attribute access... + + derivation: + '*' and '/' are not allowed as they are not used in real-life plurals + no more than 200 nodes are allowed + """ + + allowed_classes = ( + # name, number + ast.Name, ast.Num, + # ops + ast.BinOp, ast.UnaryOp, ast.BoolOp, + # cmp, if else, and, or + ast.Compare, ast.IfExp, ast.And, ast.Or, + # ==, !=,>,>=, <, <= + ast.Eq, ast.NotEq, ast.Gt, ast.GtE, ast.Lt, ast.LtE, + # %, +, - + ast.Mod, ast.Add, ast.Sub + # * / + #ast.Mult, ast.Div + ) + + # safe guard against deeply nested rule. The most complex example has 53. + max_visits = 200 + + def __init__(self, plural): + self.plural = plural + self.visits = 0 + + def check(self): + expr = ast.parse(self.plural, mode='eval') + return self.generic_visit(expr) + + def visit(self, node): + if not isinstance(node, self.allowed_classes): + raise ValueError('%r not allowed in plural form at %s.\n%s' % + (type(node).__name__, + getattr(node, 'col_offset', '?'), + self.plural)) + self.visits += 1 + if self.visits> self.max_visits: + raise ValueError('Expression %r is too complex' % self.plural) + return ast.NodeVisitor.visit(self, node) + + def visit_Name(self, node): + if node.id != 'n': + raise ValueError('Variable name %r not allowed' % node.id) + return node + + def visit_Num(self, node): + if not isinstance(node.n, int): + raise ValueError('%r at offset %i' % (node.n, node.col_offset)) + return node + + def visit_UnaryOp(self, node): + # filter UAdd, USub, Invert + if not isinstance(node.op, ast.Not): + raise ValueError('unary op %r is not supported' % + type(node).__name__) + return node + + +_RE_NON_DECIMAL = re.compile("(^|[^\d])0[\dbox]+") +_RE_NOT = re.compile(r'\!([^=])') +_RE_TERNARY = re.compile(r'(.*?)\?(.*?):(.*)') + def c2py(plural): """Gets a C expression as used in PO files for plural forms and returns a Python lambda function that implements an equivalent expression. """ - # Security check, allow only the "n" identifier - import token, tokenize - tokens = tokenize.generate_tokens(io.StringIO(plural).readline) - try: - danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n'] - except tokenize.TokenError: - raise ValueError('plural forms expression error, maybe unbalanced parenthesis') - else: - if danger: - raise ValueError('plural forms expression could be dangerous') + # standard doesn't allow binary, octal or hexadecimal representation + if _RE_NON_DECIMAL.search(plural): + raise ValueError('Numbers in binary, octal or hex representation ' + 'are not allowed.') # Replace some C operators by their Python equivalents plural = plural.replace('&&', ' and ') plural = plural.replace('||', ' or ') - expr = re.compile(r'\!([^=])') - plural = expr.sub(' not \1円', plural) + # "!n" to "not n" + plural = _RE_NOT.sub(' not \1円', plural) # Regular expression and replacement function used to transform # "a?b:c" to "b if a else c". - expr = re.compile(r'(.*?)\?(.*?):(.*)') def repl(x): return "(%s if %s else %s)" % (x.group(2), x.group(1), - expr.sub(repl, x.group(3))) + _RE_TERNARY.sub(repl, x.group(3))) # Code to transform the plural expression, taking care of parentheses stack = [''] @@ -94,20 +170,20 @@ stack.append('') elif c == ')': if len(stack) == 1: - # Actually, we never reach this code, because unbalanced - # parentheses get caught in the security check at the - # beginning. raise ValueError('unbalanced parenthesis in plural form') - s = expr.sub(repl, stack.pop()) + s = _RE_TERNARY.sub(repl, stack.pop()) stack[-1] += '(%s)' % s else: stack[-1] += c - plural = expr.sub(repl, stack.pop()) - + plural = _RE_TERNARY.sub(repl, stack.pop()) + plural = plural.strip() + try: + _PluralAST(plural).check() + except SyntaxError as e: + raise ValueError from e return eval('lambda n: int(%s)' % plural) - def _expand_lang(loc): loc = locale.normalize(loc) COMPONENT_CODESET = 1 << 0 diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -356,6 +356,79 @@ self.assertEqual(t.__class__, DummyGNUTranslations) +class GettextPluralTest(GettextBaseTest): + # Examples from http://www.gnu.org/software/gettext/manual/gettext.html + plural_formulas = [ + '1', + '!n', + 'n>1', + 'n!=1', + 'n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2', + 'n==1 ? 0 : n==2 ? 1 : 2', + 'n==1 ? 0 : (n==0 || (n%100> 0 && n%100 < 20)) ? 1 : 2', + 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2', + 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2', + '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2', + 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2', + 'n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3', + ] + + invalid_rules = [ + 'x>1', + '(n>1' + 'n>1)' + 'n+1' + '42**42**42', + 'int(n)', + 'import os', + 'os.chmod("/etc/passwd", 0o777)', + '"egg"', + ] + extra_invalid_rules = [ + '0123', + '0xa', + 'n>0x1', + '1.0', + '+n', + '-n', + '2*3', + ] + + def test_plural_formula(self): + for pf in self.plural_formulas: + f = gettext.c2py(pf) + for i in range(100): + f(i) + for pf in self.invalid_rules: + self.assertRaises(ValueError, gettext.c2py, pf) + for pf in self.extra_invalid_rules: + self.assertRaises(ValueError, gettext.c2py, pf) + + f = gettext.c2py('0') + self.assertEqual(f(0), 0) + self.assertEqual(f(1), 0) + self.assertEqual(f(2), 0) + + f = gettext.c2py('n>1') + self.assertEqual(f(0), 0) + self.assertEqual(f(1), 0) + self.assertEqual(f(2), 1) + + f = gettext.c2py('n!=1') + self.assertEqual(f(0), 1) + self.assertEqual(f(1), 0) + self.assertEqual(f(2), 1) + + f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2') + self.assertEqual(f(0), 2) + self.assertEqual(f(1), 0) + self.assertEqual(f(2), 1) + self.assertEqual(f(10), 1) + self.assertEqual(f(11), 1) + self.assertEqual(f(101), 0) + self.assertEqual(f(111), 1) + self.assertEqual(f(121), 0) + def test_main(): support.run_unittest(__name__)

AltStyle によって変換されたページ (->オリジナル) /