changeset: 88615:eb7565c212f1 branch: 3.3 parent: 88611:da35a4e724e5 user: Serhiy Storchaka date: Tue Jan 21 22:26:52 2014 +0200 files: Lib/test/test_exceptions.py Lib/test/test_traceback.py Misc/NEWS Python/pythonrun.c description: Issue #2382: SyntaxError cursor "^" now is written at correct position in most cases when multibyte characters are in line (before "^"). This still not works correctly with wide East Asian characters. diff -r da35a4e724e5 -r eb7565c212f1 Lib/test/test_exceptions.py --- a/Lib/test/test_exceptions.py Tue Jan 21 13:49:22 2014 -0600 +++ b/Lib/test/test_exceptions.py Tue Jan 21 22:26:52 2014 +0200 @@ -148,6 +148,19 @@ ckmsg(s, "'continue' not properly in loop") ckmsg("continue\n", "'continue' not properly in loop") + def testSyntaxErrorOffset(self): + def check(src, lineno, offset): + with self.assertRaises(SyntaxError) as cm: + compile(src, '', 'exec') + self.assertEqual(cm.exception.lineno, lineno) + self.assertEqual(cm.exception.offset, offset) + + check('def fact(x):\n\treturn x!\n', 2, 10) + check('1 +\n', 1, 4) + check('def spam():\n print(1)\n print(2)', 3, 10) + check('Python = "Python" +', 1, 20) + check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20) + @cpython_only def testSettingException(self): # test that setting an exception at the C level works even if the diff -r da35a4e724e5 -r eb7565c212f1 Lib/test/test_traceback.py --- a/Lib/test/test_traceback.py Tue Jan 21 13:49:22 2014 -0600 +++ b/Lib/test/test_traceback.py Tue Jan 21 22:26:52 2014 +0200 @@ -32,6 +32,9 @@ def syntax_error_bad_indentation(self): compile("def spam():\n print(1)\n print(2)", "?", "exec") + def syntax_error_with_caret_non_ascii(self): + compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec") + def test_caret(self): err = self.get_exception_format(self.syntax_error_with_caret, SyntaxError) @@ -46,6 +49,12 @@ self.assertTrue(err[2].count('\n') == 1) # and no additional newline self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place + err = self.get_exception_format(self.syntax_error_with_caret_non_ascii, + SyntaxError) + self.assertIn("^", err[2]) # third line has caret + self.assertTrue(err[2].count('\n') == 1) # and no additional newline + self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place + def test_nocaret(self): exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) err = traceback.format_exception_only(SyntaxError, exc) diff -r da35a4e724e5 -r eb7565c212f1 Misc/NEWS --- a/Misc/NEWS Tue Jan 21 13:49:22 2014 -0600 +++ b/Misc/NEWS Tue Jan 21 22:26:52 2014 +0200 @@ -10,6 +10,10 @@ Core and Builtins ----------------- +- Issue #2382: SyntaxError cursor "^" is now written at correct position in most + cases when multibyte characters are in line (before "^"). This still not + works correctly with wide East Asian characters. + - Issue #18960: The first line of Python script could be executed twice when the source encoding was specified on the second line. Now the source encoding declaration on the second line isn't effective if the first line contains diff -r da35a4e724e5 -r eb7565c212f1 Python/pythonrun.c --- a/Python/pythonrun.c Tue Jan 21 13:49:22 2014 -0600 +++ b/Python/pythonrun.c Tue Jan 21 22:26:52 2014 +0200 @@ -2226,6 +2226,7 @@ PyObject *v, *w, *errtype, *errtext; PyObject *msg_obj = NULL; char *msg = NULL; + int offset = err->offset; errtype = PyExc_SyntaxError; switch (err->error) { @@ -2310,11 +2311,20 @@ errtext = Py_None; Py_INCREF(Py_None); } else { - errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), + errtext = PyUnicode_DecodeUTF8(err->text, err->offset, "replace"); + if (errtext != NULL) { + Py_ssize_t len = strlen(err->text); + offset = (int)PyUnicode_GET_LENGTH(errtext); + if (len != err->offset) { + Py_DECREF(errtext); + errtext = PyUnicode_DecodeUTF8(err->text, len, + "replace"); + } + } } v = Py_BuildValue("(OiiN)", err->filename, - err->lineno, err->offset, errtext); + err->lineno, offset, errtext); if (v != NULL) { if (msg_obj) w = Py_BuildValue("(OO)", msg_obj, v);