Uh oh!
There was an error while loading. Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork 33.9k
Closed
Labels
interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)type-featureA feature request or enhancementA feature request or enhancement
Description
Feature or enhancement
Right now PyUnicode_Count from
cpython/Objects/unicodeobject.c
Lines 8968 to 9040 in cbdeda8
| Py_ssize_t | |
| PyUnicode_Count(PyObject*str, | |
| PyObject*substr, | |
| Py_ssize_tstart, | |
| Py_ssize_tend) | |
| { | |
| Py_ssize_tresult; | |
| intkind1, kind2; | |
| constvoid*buf1=NULL, *buf2=NULL; | |
| Py_ssize_tlen1, len2; | |
| if (ensure_unicode(str) <0||ensure_unicode(substr) <0) | |
| return-1; | |
| kind1=PyUnicode_KIND(str); | |
| kind2=PyUnicode_KIND(substr); | |
| if (kind1<kind2) | |
| return0; | |
| len1=PyUnicode_GET_LENGTH(str); | |
| len2=PyUnicode_GET_LENGTH(substr); | |
| ADJUST_INDICES(start, end, len1); | |
| if (end-start<len2) | |
| return0; | |
| buf1=PyUnicode_DATA(str); | |
| buf2=PyUnicode_DATA(substr); | |
| if (kind2!=kind1){ | |
| buf2=unicode_askind(kind2, buf2, len2, kind1); | |
| if (!buf2) | |
| goto onError; | |
| } | |
| switch (kind1){ | |
| casePyUnicode_1BYTE_KIND: | |
| if (PyUnicode_IS_ASCII(str) &&PyUnicode_IS_ASCII(substr)) | |
| result=asciilib_count( | |
| ((constPy_UCS1*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| else | |
| result=ucs1lib_count( | |
| ((constPy_UCS1*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| casePyUnicode_2BYTE_KIND: | |
| result=ucs2lib_count( | |
| ((constPy_UCS2*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| casePyUnicode_4BYTE_KIND: | |
| result=ucs4lib_count( | |
| ((constPy_UCS4*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| default: | |
| Py_UNREACHABLE(); | |
| } | |
| assert((kind2!=kind1) == (buf2!=PyUnicode_DATA(substr))); | |
| if (kind2!=kind1) | |
| PyMem_Free((void*)buf2); | |
| returnresult; | |
| onError: | |
| assert((kind2!=kind1) == (buf2!=PyUnicode_DATA(substr))); | |
| if (kind2!=kind1) | |
| PyMem_Free((void*)buf2); | |
| return-1; | |
| } |
unicode_count from cpython/Objects/unicodeobject.c
Lines 10854 to 10916 in cbdeda8
| staticPyObject* | |
| unicode_count(PyObject*self, PyObject*args) | |
| { | |
| PyObject*substring=NULL; /* initialize to fix a compiler warning */ | |
| Py_ssize_tstart=0; | |
| Py_ssize_tend=PY_SSIZE_T_MAX; | |
| PyObject*result; | |
| intkind1, kind2; | |
| constvoid*buf1, *buf2; | |
| Py_ssize_tlen1, len2, iresult; | |
| if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) | |
| returnNULL; | |
| kind1=PyUnicode_KIND(self); | |
| kind2=PyUnicode_KIND(substring); | |
| if (kind1<kind2) | |
| returnPyLong_FromLong(0); | |
| len1=PyUnicode_GET_LENGTH(self); | |
| len2=PyUnicode_GET_LENGTH(substring); | |
| ADJUST_INDICES(start, end, len1); | |
| if (end-start<len2) | |
| returnPyLong_FromLong(0); | |
| buf1=PyUnicode_DATA(self); | |
| buf2=PyUnicode_DATA(substring); | |
| if (kind2!=kind1){ | |
| buf2=unicode_askind(kind2, buf2, len2, kind1); | |
| if (!buf2) | |
| returnNULL; | |
| } | |
| switch (kind1){ | |
| casePyUnicode_1BYTE_KIND: | |
| iresult=ucs1lib_count( | |
| ((constPy_UCS1*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| casePyUnicode_2BYTE_KIND: | |
| iresult=ucs2lib_count( | |
| ((constPy_UCS2*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| casePyUnicode_4BYTE_KIND: | |
| iresult=ucs4lib_count( | |
| ((constPy_UCS4*)buf1) +start, end-start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| default: | |
| Py_UNREACHABLE(); | |
| } | |
| result=PyLong_FromSsize_t(iresult); | |
| assert((kind2==kind1) == (buf2==PyUnicode_DATA(substring))); | |
| if (kind2!=kind1) | |
| PyMem_Free((void*)buf2); | |
| returnresult; | |
| } |
They can be unified, because the do the same thing.
Pitch
Apparently unicode_count missed an optimization in 2011, otherwise they're equivalent (except arg parsing & converting the return value). Merging them could add the optimization to unicode_count.
If you want to work on that, note that there's also anylib_count that duplicates the main switch.
Previous discussion
Link: #96929
PR in the works.
Metadata
Metadata
Assignees
Labels
interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)type-featureA feature request or enhancementA feature request or enhancement