[Python-checkins] gh-94808: Cover `PyUnicode_Count` in CAPI (#96929)
encukou
webhook-mailer at python.org
Thu Oct 6 11:20:33 EDT 2022
https://github.com/python/cpython/commit/e63d7dae90d15957303688285daeebc2e931e04b
commit: e63d7dae90d15957303688285daeebc2e931e04b
branch: main
author: Nikita Sobolev <mail at sobolevn.me>
committer: encukou <encukou at gmail.com>
date: 2022-10-06T17:20:22+02:00
summary:
gh-94808: Cover `PyUnicode_Count` in CAPI (#96929)
files:
M Lib/test/test_unicode.py
M Modules/_testcapi/unicode.c
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 63bccb72e046..30faaaf83bec 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -2945,6 +2945,44 @@ def test_asutf8andsize(self):
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
+ # Test PyUnicode_Count()
+ @support.cpython_only
+ @unittest.skipIf(_testcapi is None, 'need _testcapi module')
+ def test_count(self):
+ from _testcapi import unicode_count
+
+ st = 'abcabd'
+ self.assertEqual(unicode_count(st, 'a', 0, len(st)), 2)
+ self.assertEqual(unicode_count(st, 'ab', 0, len(st)), 2)
+ self.assertEqual(unicode_count(st, 'abc', 0, len(st)), 1)
+ self.assertEqual(unicode_count(st, 'а', 0, len(st)), 0) # cyrillic "a"
+ # start < end
+ self.assertEqual(unicode_count(st, 'a', 3, len(st)), 1)
+ self.assertEqual(unicode_count(st, 'a', 4, len(st)), 0)
+ self.assertEqual(unicode_count(st, 'a', 0, sys.maxsize), 2)
+ # start >= end
+ self.assertEqual(unicode_count(st, 'abc', 0, 0), 0)
+ self.assertEqual(unicode_count(st, 'a', 3, 2), 0)
+ self.assertEqual(unicode_count(st, 'a', sys.maxsize, 5), 0)
+ # negative
+ self.assertEqual(unicode_count(st, 'ab', -len(st), -1), 2)
+ self.assertEqual(unicode_count(st, 'a', -len(st), -3), 1)
+ # wrong args
+ self.assertRaises(TypeError, unicode_count, 'a', 'a')
+ self.assertRaises(TypeError, unicode_count, 'a', 'a', 1)
+ self.assertRaises(TypeError, unicode_count, 1, 'a', 0, 1)
+ self.assertRaises(TypeError, unicode_count, 'a', 1, 0, 1)
+ # empty string
+ self.assertEqual(unicode_count('abc', '', 0, 3), 4)
+ self.assertEqual(unicode_count('abc', '', 1, 3), 3)
+ self.assertEqual(unicode_count('', '', 0, 1), 1)
+ self.assertEqual(unicode_count('', 'a', 0, 1), 0)
+ # different unicode kinds
+ for uni in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
+ for ch in uni:
+ self.assertEqual(unicode_count(uni, ch, 0, len(uni)), 1)
+ self.assertEqual(unicode_count(st, ch, 0, len(st)), 0)
+
# Test PyUnicode_FindChar()
@support.cpython_only
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c
index d0f1e2abdc82..d5c4a9e5b95e 100644
--- a/Modules/_testcapi/unicode.c
+++ b/Modules/_testcapi/unicode.c
@@ -223,6 +223,26 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, utf8_len);
}
+static PyObject *
+unicode_count(PyObject *self, PyObject *args)
+{
+ PyObject *str;
+ PyObject *substr;
+ Py_ssize_t result;
+ Py_ssize_t start, end;
+
+ if (!PyArg_ParseTuple(args, "UUnn:unicode_count", &str, &substr,
+ &start, &end)) {
+ return NULL;
+ }
+
+ result = PyUnicode_Count(str, substr, start, end);
+ if (result == -1)
+ return NULL;
+ else
+ return PyLong_FromSsize_t(result);
+}
+
static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
@@ -696,6 +716,7 @@ static PyMethodDef TestMethods[] = {
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
+ {"unicode_count", unicode_count, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{NULL},
More information about the Python-checkins
mailing list