Uh oh!
There was an error while loading. Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork 34k
gh-119182: Add PyUnicodeWriter_DecodeUTF8Stateful()#120639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Uh oh!
There was an error while loading. Please reload this page.
Changes from all commits
8aa73b7788a85fe67a8b4de56475e48eec775fa8ba3f284f81e018d26f29c53File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading. Please reload this page.
Jump to
Uh oh!
There was an error while loading. Please reload this page.
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -374,6 +374,119 @@ test_unicodewriter_recover_error(PyObject *self, PyObject *Py_UNUSED(args)) | ||
| } | ||
| static PyObject * | ||
| test_unicodewriter_decode_utf8(PyObject *self, PyObject *Py_UNUSED(args)) | ||
| { | ||
| // test PyUnicodeWriter_DecodeUTF8Stateful() | ||
| PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); | ||
| if (writer == NULL){ | ||
| return NULL; | ||
| } | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "ign\xFFore", -1, "ignore", NULL) < 0){ | ||
| goto error; | ||
| } | ||
| if (PyUnicodeWriter_WriteChar(writer, '-') < 0){ | ||
| goto error; | ||
| } | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "replace\xFF", -1, "replace", NULL) < 0){ | ||
vstinner marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| goto error; | ||
| } | ||
| if (PyUnicodeWriter_WriteChar(writer, '-') < 0){ | ||
| goto error; | ||
| } | ||
| // incomplete trailing UTF-8 sequence | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "replace", NULL) < 0){ | ||
| goto error; | ||
| } | ||
| PyObject *result = PyUnicodeWriter_Finish(writer); | ||
| if (result == NULL){ | ||
| return NULL; | ||
| } | ||
| assert(PyUnicode_EqualToUTF8(result, | ||
| "ignore-replace\xef\xbf\xbd" | ||
| "-incomplete\xef\xbf\xbd")); | ||
| Py_DECREF(result); | ||
| Py_RETURN_NONE; | ||
| error: | ||
| PyUnicodeWriter_Discard(writer); | ||
| return NULL; | ||
| } | ||
| static PyObject * | ||
| test_unicodewriter_decode_utf8_consumed(PyObject *self, PyObject *Py_UNUSED(args)) | ||
| { | ||
| // test PyUnicodeWriter_DecodeUTF8Stateful() | ||
| PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); | ||
| if (writer == NULL){ | ||
| return NULL; | ||
| } | ||
| Py_ssize_t consumed; | ||
| // valid string | ||
| consumed = 12345; | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "text", -1, NULL, &consumed) < 0){ | ||
vstinner marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| goto error; | ||
| } | ||
| assert(consumed == 4); | ||
| if (PyUnicodeWriter_WriteChar(writer, '-') < 0){ | ||
| goto error; | ||
| } | ||
| // non-ASCII | ||
| consumed = 12345; | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "\xC3\xA9-\xE2\x82\xAC", 6, NULL, &consumed) < 0){ | ||
| goto error; | ||
| } | ||
| assert(consumed == 6); | ||
| if (PyUnicodeWriter_WriteChar(writer, '-') < 0){ | ||
| goto error; | ||
| } | ||
| // consumed is 0 if write fails | ||
| consumed = 12345; | ||
| assert(PyUnicodeWriter_DecodeUTF8Stateful(writer, "invalid\xFF", -1, NULL, &consumed) < 0); | ||
Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This do nothing in non-debug build. MemberAuthor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assertions are always built in _testcapi.c: the NDEBUG macro is undefined early in parts.h. | ||
| PyErr_Clear(); | ||
| assert(consumed == 0); | ||
| // ignore error handler | ||
| consumed = 12345; | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "more\xFF", -1, "ignore", &consumed) < 0){ | ||
vstinner marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| goto error; | ||
| } | ||
| assert(consumed == 5); | ||
| if (PyUnicodeWriter_WriteChar(writer, '-') < 0){ | ||
| goto error; | ||
| } | ||
| // incomplete trailing UTF-8 sequence | ||
| consumed = 12345; | ||
| if (PyUnicodeWriter_DecodeUTF8Stateful(writer, "incomplete\xC3", -1, "ignore", &consumed) < 0){ | ||
| goto error; | ||
| } | ||
| assert(consumed == 10); | ||
| PyObject *result = PyUnicodeWriter_Finish(writer); | ||
| if (result == NULL){ | ||
| return NULL; | ||
| } | ||
| assert(PyUnicode_EqualToUTF8(result, | ||
| "text-\xC3\xA9-\xE2\x82\xAC-" | ||
| "more-incomplete")); | ||
| Py_DECREF(result); | ||
| Py_RETURN_NONE; | ||
| error: | ||
| PyUnicodeWriter_Discard(writer); | ||
| return NULL; | ||
| } | ||
| static PyObject * | ||
| test_unicodewriter_format(PyObject *self, PyObject *Py_UNUSED(args)) | ||
| { | ||
| @@ -436,6 +549,42 @@ test_unicodewriter_format_recover_error(PyObject *self, PyObject *Py_UNUSED(args | ||
| } | ||
| static PyObject * | ||
| test_unicodewriter_widechar(PyObject *self, PyObject *Py_UNUSED(args)) | ||
| { | ||
| PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); | ||
| if (writer == NULL){ | ||
| return NULL; | ||
| } | ||
| if (PyUnicodeWriter_WriteWideChar(writer, L"latin1=\xE9 IGNORED", 8) < 0){ | ||
| goto error; | ||
| } | ||
| if (PyUnicodeWriter_WriteWideChar(writer, L"-", 1) < 0){ | ||
| goto error; | ||
| } | ||
| if (PyUnicodeWriter_WriteWideChar(writer, L"euro=\u20AC", -1) < 0){ | ||
Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also test surrogate pairs and non-BMP characters. Since the code depends on the kind of the buffer string, you need to test different combinations: write different strings after writing a UCS2 or UCS4 string. I suggest to implement in C a function which creates a PyUnicodeWriter, write the first argument as a Python string, then covert the second argument to the | ||
| goto error; | ||
| } | ||
| if (PyUnicodeWriter_WriteChar(writer, '.') < 0){ | ||
| goto error; | ||
| } | ||
| PyObject *result = PyUnicodeWriter_Finish(writer); | ||
| if (result == NULL){ | ||
| return NULL; | ||
| } | ||
| assert(PyUnicode_EqualToUTF8(result, | ||
| "latin1=\xC3\xA9-euro=\xE2\x82\xAC.")); | ||
| Py_DECREF(result); | ||
| Py_RETURN_NONE; | ||
| error: | ||
| PyUnicodeWriter_Discard(writer); | ||
| return NULL; | ||
| } | ||
| static PyMethodDef TestMethods[] ={ | ||
| {"unicode_new", unicode_new, METH_VARARGS}, | ||
| {"unicode_fill", unicode_fill, METH_VARARGS}, | ||
| @@ -448,8 +597,11 @@ static PyMethodDef TestMethods[] ={ | ||
| {"test_unicodewriter_utf8", test_unicodewriter_utf8, METH_NOARGS}, | ||
| {"test_unicodewriter_invalid_utf8", test_unicodewriter_invalid_utf8, METH_NOARGS}, | ||
| {"test_unicodewriter_recover_error", test_unicodewriter_recover_error, METH_NOARGS}, | ||
| {"test_unicodewriter_decode_utf8", test_unicodewriter_decode_utf8, METH_NOARGS}, | ||
| {"test_unicodewriter_decode_utf8_consumed", test_unicodewriter_decode_utf8_consumed, METH_NOARGS}, | ||
| {"test_unicodewriter_format", test_unicodewriter_format, METH_NOARGS}, | ||
| {"test_unicodewriter_format_recover_error", test_unicodewriter_format_recover_error, METH_NOARGS}, | ||
| {"test_unicodewriter_widechar", test_unicodewriter_widechar, METH_NOARGS}, | ||
| {NULL}, | ||
| }; | ||
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.