[Python-checkins] cpython: Issue #28822: Adjust indices handling of PyUnicode_FindChar().

Tue Dec 20 10:03:56 EST 2016

https://hg.python.org/cpython/rev/ce6a6cc3765d
changeset: 105763:ce6a6cc3765d
parent: 105761:31df7d9863f3
user: Xiang Zhang <angwerzx at 126.com>
date: Tue Dec 20 22:52:33 2016 +0800
summary:
 Issue #28822: Adjust indices handling of PyUnicode_FindChar().
files:
 Doc/c-api/unicode.rst | 3 +++
 Lib/test/test_unicode.py | 23 +++++++++++++++++++++++
 Misc/NEWS | 3 +++
 Modules/_testcapimodule.c | 22 ++++++++++++++++++++++
 Objects/unicodeobject.c | 12 ++++--------
 5 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1625,6 +1625,9 @@
 
 .. versionadded:: 3.3
 
+ .. versionchanged:: 3.7
+ *start* and *end* are now adjusted to behave like ``str[start:end]``.
+
 
 .. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \
 Py_ssize_t start, Py_ssize_t end)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2728,6 +2728,29 @@
 self.assertEqual(unicode_asucs4(s, len(s), 1), s+'0円')
 self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
 
+ # Test PyUnicode_FindChar()
+ @support.cpython_only
+ def test_findchar(self):
+ from _testcapi import unicode_findchar
+
+ for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+ for i, ch in enumerate(str):
+ self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
+ self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
+
+ str = "!>_<!"
+ self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
+ self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
+ # start < end
+ self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
+ self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
+ # start >= end
+ self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
+ self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
+ # negative
+ self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
+ self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
+
 # Test PyUnicode_CopyCharacters()
 @support.cpython_only
 def test_copycharacters(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -545,6 +545,9 @@
 C API
 -----
 
+- Issue #28822: The indices parameters *start* and *end* of PyUnicode_FindChar()
+ are now adjusted to behave like ``str[start:end]``.
+
 - Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.
 
 - Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef,
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1888,6 +1888,27 @@
 }
 
 static PyObject *
+unicode_findchar(PyObject *self, PyObject *args)
+{
+ PyObject *str;
+ int direction;
+ unsigned int ch;
+ Py_ssize_t result;
+ Py_ssize_t start, end;
+
+ if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
+ &start, &end, &direction)) {
+ return NULL;
+ }
+
+ result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
+ if (result == -2)
+ return NULL;
+ else
+ return PyLong_FromSsize_t(result);
+}
+
+static PyObject *
 unicode_copycharacters(PyObject *self, PyObject *args)
 {
 PyObject *from, *to, *to_copy;
@@ -4121,6 +4142,7 @@
 {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
 {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
 {"unicode_asucs4", unicode_asucs4, METH_VARARGS},
+ {"unicode_findchar", unicode_findchar, METH_VARARGS},
 {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
 {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
 {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9461,16 +9461,12 @@
 int direction)
 {
 int kind;
- Py_ssize_t result;
+ Py_ssize_t len, result;
 if (PyUnicode_READY(str) == -1)
 return -2;
- if (start < 0 || end < 0) {
- PyErr_SetString(PyExc_IndexError, "string index out of range");
- return -2;
- }
- if (end > PyUnicode_GET_LENGTH(str))
- end = PyUnicode_GET_LENGTH(str);
- if (start >= end)
+ len = PyUnicode_GET_LENGTH(str);
+ ADJUST_INDICES(start, end, len);
+ if (end - start < 1)
 return -1;
 kind = PyUnicode_KIND(str);
 result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
-- 
Repository URL: https://hg.python.org/cpython