Unify `PyUncode_Count` and `unicode_count`

# Feature or enhancement

Right now `PyUnicode_Count` from https://github.com/python/cpython/blob/cbdeda8ce7a3543cb3376d70e4cd46fcf24f42a7/Objects/unicodeobject.c#L8968-L9040 and `unicode_count` from https://github.com/python/cpython/blob/cbdeda8ce7a3543cb3376d70e4cd46fcf24f42a7/Objects/unicodeobject.c#L10854-L10916 share a lot of code.

They can be unified, because the do the same thing.

# Pitch

[Citing](https://github.com/python/cpython/pull/96929#issuecomment-1270262538) @encukou:

> Apparently unicode_count missed [an optimization in 2011](https://github.com/python/cpython/commit/c3cec7868bf1019c0987f1e9aadb56d73fa93d61), otherwise they're equivalent (except arg parsing & converting the return value). Merging them could add the optimization to unicode_count.
If you want to work on that, note that there's also anylib_count that duplicates the main switch.

# Previous discussion



Link: https://github.com/python/cpython/pull/96929



PR in the works.

	Py_ssize_t
	PyUnicode_Count(PyObject*str,
	PyObject*substr,
	Py_ssize_tstart,
	Py_ssize_tend)
	{
	Py_ssize_tresult;
	intkind1, kind2;
	constvoidbuf1=NULL, buf2=NULL;
	Py_ssize_tlen1, len2;

	if (ensure_unicode(str) <0\|\|ensure_unicode(substr) <0)
	return-1;

	kind1=PyUnicode_KIND(str);
	kind2=PyUnicode_KIND(substr);
	if (kind1<kind2)
	return0;

	len1=PyUnicode_GET_LENGTH(str);
	len2=PyUnicode_GET_LENGTH(substr);
	ADJUST_INDICES(start, end, len1);
	if (end-start<len2)
	return0;

	buf1=PyUnicode_DATA(str);
	buf2=PyUnicode_DATA(substr);
	if (kind2!=kind1){
	buf2=unicode_askind(kind2, buf2, len2, kind1);
	if (!buf2)
	goto onError;
	}

	switch (kind1){
	casePyUnicode_1BYTE_KIND:
	if (PyUnicode_IS_ASCII(str) &&PyUnicode_IS_ASCII(substr))
	result=asciilib_count(
	((constPy_UCS1*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	else
	result=ucs1lib_count(
	((constPy_UCS1*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_2BYTE_KIND:
	result=ucs2lib_count(
	((constPy_UCS2*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_4BYTE_KIND:
	result=ucs4lib_count(
	((constPy_UCS4*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	break;
	default:
	Py_UNREACHABLE();
	}

	assert((kind2!=kind1) == (buf2!=PyUnicode_DATA(substr)));
	if (kind2!=kind1)
	PyMem_Free((void*)buf2);

	returnresult;
	onError:
	assert((kind2!=kind1) == (buf2!=PyUnicode_DATA(substr)));
	if (kind2!=kind1)
	PyMem_Free((void*)buf2);
	return-1;
	}

	staticPyObject*
	unicode_count(PyObjectself, PyObjectargs)
	{
	PyObjectsubstring=NULL; / initialize to fix a compiler warning */
	Py_ssize_tstart=0;
	Py_ssize_tend=PY_SSIZE_T_MAX;
	PyObject*result;
	intkind1, kind2;
	constvoidbuf1, buf2;
	Py_ssize_tlen1, len2, iresult;

	if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
	returnNULL;

	kind1=PyUnicode_KIND(self);
	kind2=PyUnicode_KIND(substring);
	if (kind1<kind2)
	returnPyLong_FromLong(0);

	len1=PyUnicode_GET_LENGTH(self);
	len2=PyUnicode_GET_LENGTH(substring);
	ADJUST_INDICES(start, end, len1);
	if (end-start<len2)
	returnPyLong_FromLong(0);

	buf1=PyUnicode_DATA(self);
	buf2=PyUnicode_DATA(substring);
	if (kind2!=kind1){
	buf2=unicode_askind(kind2, buf2, len2, kind1);
	if (!buf2)
	returnNULL;
	}
	switch (kind1){
	casePyUnicode_1BYTE_KIND:
	iresult=ucs1lib_count(
	((constPy_UCS1*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_2BYTE_KIND:
	iresult=ucs2lib_count(
	((constPy_UCS2*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	break;
	casePyUnicode_4BYTE_KIND:
	iresult=ucs4lib_count(
	((constPy_UCS4*)buf1) +start, end-start,
	buf2, len2, PY_SSIZE_T_MAX
	);
	break;
	default:
	Py_UNREACHABLE();
	}

	result=PyLong_FromSsize_t(iresult);

	assert((kind2==kind1) == (buf2==PyUnicode_DATA(substring)));
	if (kind2!=kind1)
	PyMem_Free((void*)buf2);

	returnresult;
	}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Unify `PyUncode_Count` and `unicode_count`#97982

Feature or enhancement

Pitch

Previous discussion

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Uh oh!

Unify PyUncode_Count and unicode_count#97982

Description

Feature or enhancement

Pitch

Previous discussion

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions

Unify `PyUncode_Count` and `unicode_count`#97982