Uh oh!
There was an error while loading. Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork 34k
gh-111495: Add tests for PyCodec_* C API#123343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Uh oh!
There was an error while loading. Please reload this page.
Merged
Changes from all commits
Commits
Show all changes
23 commits Select commit Hold shift + click to select a range
ce7c135 add tests for C API `codecs`
picnixz f9e350a add Python tests for `_codecs`
picnixz 15b6811 fix size bug
picnixz 8048ae1 rename test class
picnixz 8487b46 Revert "fix size bug"
picnixz 2dbe09a Merge branch 'main' into test/c-api-codec-111495
picnixz 0097f2a Disable tests that are known to crash.
picnixz 303b13c address Victor's review
picnixz 4f474dd update tests to reflect user errors
picnixz d49743c Merge remote-tracking branch 'upstream/main' into test/c-api-codec-11…
picnixz 87ee0d2 fix C API codec tests
picnixz 6a36eb0 small hack to make the test suite correct
picnixz 145b285 remove un-necessary imports
picnixz dc9af16 Merge remote-tracking branch 'upstream/main' into test/c-api-codec-11…
picnixz 7be1f55 use `_codecs._unregister_error` to cleanup test state
picnixz f72be5c indicate some semantics for NULL case being tested
picnixz 4d02c6c revert a cosmetic change
picnixz 0f26ca7 Move `PyCodec_NameReplaceErrors` test to the `_testlimitedcapi` module
picnixz 1399779 add comment for why we do not test `_PyCodec_UnregisterError`
picnixz 914151e update a comment
picnixz 8dd7e8d revert one cosmetic change
picnixz 1e6a5ce Fix Windows compilation
picnixz 2ba5f03 address Victor's review
picnixz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Uh oh!
There was an error while loading. Please reload this page.
Jump to
Jump to file
Failed to load files.
Loading
Uh oh!
There was an error while loading. Please reload this page.
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,20 @@ | ||
| import unittest | ||
| import codecs | ||
| import contextlib | ||
| import io | ||
| import re | ||
| import sys | ||
| import unittest | ||
| import unittest.mock as mock | ||
| import _testcapi | ||
| from test.support import import_helper | ||
| _testlimitedcapi = import_helper.import_module('_testlimitedcapi') | ||
| NULL = None | ||
| BAD_ARGUMENT = re.escape('bad argument type for built-in operation') | ||
| class CAPITest(unittest.TestCase): | ||
| class CAPIUnicodeTest(unittest.TestCase): | ||
| # TODO: Test the following functions: | ||
| # | ||
| # PyUnicode_BuildEncodingMap | ||
| @@ -516,5 +523,291 @@ def test_asrawunicodeescapestring(self): | ||
| # CRASHES asrawunicodeescapestring(NULL) | ||
| class CAPICodecs(unittest.TestCase): | ||
| def setUp(self): | ||
| # Encoding names are normalized internally by converting them | ||
| # to lowercase and their hyphens are replaced by underscores. | ||
| self.encoding_name = 'test.test_capi.test_codecs.codec_reversed' | ||
| # Make sure that our custom codec is not already registered (that | ||
| # way we know whether we correctly unregistered the custom codec | ||
| # after a test or not). | ||
| self.assertRaises(LookupError, codecs.lookup, self.encoding_name) | ||
| # create the search function without registering yet | ||
| self._create_custom_codec() | ||
| def _create_custom_codec(self): | ||
| def codec_encoder(m, errors='strict'): | ||
| return (type(m)().join(reversed(m)), len(m)) | ||
| def codec_decoder(c, errors='strict'): | ||
| return (type(c)().join(reversed(c)), len(c)) | ||
| class IncrementalEncoder(codecs.IncrementalEncoder): | ||
| def encode(self, input, final=False): | ||
| return codec_encoder(input) | ||
| class IncrementalDecoder(codecs.IncrementalDecoder): | ||
| def decode(self, input, final=False): | ||
| return codec_decoder(input) | ||
| class StreamReader(codecs.StreamReader): | ||
| def encode(self, input, errors='strict'): | ||
| return codec_encoder(input, errors=errors) | ||
| def decode(self, input, errors='strict'): | ||
| return codec_decoder(input, errors=errors) | ||
| class StreamWriter(codecs.StreamWriter): | ||
| def encode(self, input, errors='strict'): | ||
| return codec_encoder(input, errors=errors) | ||
| def decode(self, input, errors='strict'): | ||
| return codec_decoder(input, errors=errors) | ||
| info = codecs.CodecInfo( | ||
| encode=codec_encoder, | ||
| decode=codec_decoder, | ||
| streamreader=StreamReader, | ||
| streamwriter=StreamWriter, | ||
| incrementalencoder=IncrementalEncoder, | ||
| incrementaldecoder=IncrementalDecoder, | ||
| name=self.encoding_name | ||
| ) | ||
| def search_function(encoding): | ||
| if encoding == self.encoding_name: | ||
| return info | ||
| return None | ||
| self.codec_info = info | ||
| self.search_function = search_function | ||
| @contextlib.contextmanager | ||
| def use_custom_encoder(self): | ||
| self.assertRaises(LookupError, codecs.lookup, self.encoding_name) | ||
| codecs.register(self.search_function) | ||
| yield | ||
| codecs.unregister(self.search_function) | ||
| self.assertRaises(LookupError, codecs.lookup, self.encoding_name) | ||
| def test_codec_register(self): | ||
| search_function, encoding = self.search_function, self.encoding_name | ||
| # register the search function using the C API | ||
| self.assertIsNone(_testcapi.codec_register(search_function)) | ||
| # in case the test failed before cleaning up | ||
| self.addCleanup(codecs.unregister, self.search_function) | ||
| self.assertIs(codecs.lookup(encoding), search_function(encoding)) | ||
| self.assertEqual(codecs.encode('123', encoding=encoding), '321') | ||
| # unregister the search function using the regular API | ||
| codecs.unregister(search_function) | ||
| self.assertRaises(LookupError, codecs.lookup, encoding) | ||
| def test_codec_unregister(self): | ||
| search_function, encoding = self.search_function, self.encoding_name | ||
| self.assertRaises(LookupError, codecs.lookup, encoding) | ||
| # register the search function using the regular API | ||
| codecs.register(search_function) | ||
| # in case the test failed before cleaning up | ||
| self.addCleanup(codecs.unregister, self.search_function) | ||
| self.assertIsNotNone(codecs.lookup(encoding)) | ||
| # unregister the search function using the C API | ||
| self.assertIsNone(_testcapi.codec_unregister(search_function)) | ||
| self.assertRaises(LookupError, codecs.lookup, encoding) | ||
| def test_codec_known_encoding(self): | ||
| self.assertRaises(LookupError, codecs.lookup, 'unknown-codec') | ||
| self.assertFalse(_testcapi.codec_known_encoding('unknown-codec')) | ||
| self.assertFalse(_testcapi.codec_known_encoding('unknown_codec')) | ||
| self.assertFalse(_testcapi.codec_known_encoding('UNKNOWN-codec')) | ||
| encoding_name = self.encoding_name | ||
| self.assertRaises(LookupError, codecs.lookup, encoding_name) | ||
| codecs.register(self.search_function) | ||
| self.addCleanup(codecs.unregister, self.search_function) | ||
| for name in [ | ||
| encoding_name, | ||
| encoding_name.upper(), | ||
| encoding_name.replace('_', '-'), | ||
| ]: | ||
| with self.subTest(name): | ||
| self.assertTrue(_testcapi.codec_known_encoding(name)) | ||
| def test_codec_encode(self): | ||
| encode = _testcapi.codec_encode | ||
| self.assertEqual(encode('a', 'utf-8', NULL), b'a') | ||
| self.assertEqual(encode('a', 'utf-8', 'strict'), b'a') | ||
| self.assertEqual(encode('[é]', 'ascii', 'ignore'), b'[]') | ||
| self.assertRaises(TypeError, encode, NULL, 'ascii', 'strict') | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| encode('a', NULL, 'strict') | ||
| def test_codec_decode(self): | ||
| decode = _testcapi.codec_decode | ||
| s = 'a\xa1\u4f60\U0001f600' | ||
| b = s.encode() | ||
| self.assertEqual(decode(b, 'utf-8', 'strict'), s) | ||
| self.assertEqual(decode(b, 'utf-8', NULL), s) | ||
| self.assertEqual(decode(b, 'latin1', 'strict'), b.decode('latin1')) | ||
| self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', 'strict') | ||
| self.assertRaises(UnicodeDecodeError, decode, b, 'ascii', NULL) | ||
| self.assertEqual(decode(b, 'ascii', 'replace'), 'a' + '\ufffd'*9) | ||
| # _codecs.decode() only reports an unknown error handling name when | ||
| # the corresponding error handling function is used; this difers | ||
| # from PyUnicode_Decode() which checks that both the encoding and | ||
| # the error handling name are recognized before even attempting to | ||
| # call the decoder. | ||
| self.assertEqual(decode(b'', 'utf-8', 'unknown-error-handler'), '') | ||
| self.assertEqual(decode(b'a', 'utf-8', 'unknown-error-handler'), 'a') | ||
| self.assertRaises(TypeError, decode, NULL, 'ascii', 'strict') | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| decode(b, NULL, 'strict') | ||
| def test_codec_encoder(self): | ||
| codec_encoder = _testcapi.codec_encoder | ||
| with self.use_custom_encoder(): | ||
| encoder = codec_encoder(self.encoding_name) | ||
| self.assertIs(encoder, self.codec_info.encode) | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| codec_encoder(NULL) | ||
| def test_codec_decoder(self): | ||
| codec_decoder = _testcapi.codec_decoder | ||
| with self.use_custom_encoder(): | ||
| decoder = codec_decoder(self.encoding_name) | ||
| self.assertIs(decoder, self.codec_info.decode) | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| codec_decoder(NULL) | ||
| def test_codec_incremental_encoder(self): | ||
| codec_incremental_encoder = _testcapi.codec_incremental_encoder | ||
| with self.use_custom_encoder(): | ||
| encoding = self.encoding_name | ||
| for errors in ['strict', NULL]: | ||
| with self.subTest(errors): | ||
| encoder = codec_incremental_encoder(encoding, errors) | ||
| self.assertIsInstance(encoder, self.codec_info.incrementalencoder) | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| codec_incremental_encoder(NULL, 'strict') | ||
| def test_codec_incremental_decoder(self): | ||
| codec_incremental_decoder = _testcapi.codec_incremental_decoder | ||
| with self.use_custom_encoder(): | ||
| encoding = self.encoding_name | ||
| for errors in ['strict', NULL]: | ||
| with self.subTest(errors): | ||
| decoder = codec_incremental_decoder(encoding, errors) | ||
| self.assertIsInstance(decoder, self.codec_info.incrementaldecoder) | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| codec_incremental_decoder(NULL, 'strict') | ||
| def test_codec_stream_reader(self): | ||
| codec_stream_reader = _testcapi.codec_stream_reader | ||
| with self.use_custom_encoder(): | ||
| encoding, stream = self.encoding_name, io.StringIO() | ||
| for errors in ['strict', NULL]: | ||
| with self.subTest(errors): | ||
| writer = codec_stream_reader(encoding, stream, errors) | ||
| self.assertIsInstance(writer, self.codec_info.streamreader) | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| codec_stream_reader(NULL, stream, 'strict') | ||
| def test_codec_stream_writer(self): | ||
| codec_stream_writer = _testcapi.codec_stream_writer | ||
| with self.use_custom_encoder(): | ||
| encoding, stream = self.encoding_name, io.StringIO() | ||
| for errors in ['strict', NULL]: | ||
| with self.subTest(errors): | ||
| writer = codec_stream_writer(encoding, stream, errors) | ||
| self.assertIsInstance(writer, self.codec_info.streamwriter) | ||
picnixz marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| with self.assertRaisesRegex(TypeError, BAD_ARGUMENT): | ||
| codec_stream_writer(NULL, stream, 'strict') | ||
| class CAPICodecErrors(unittest.TestCase): | ||
| def test_codec_register_error(self): | ||
| # for cleaning up between tests | ||
| from _codecs import _unregister_error as _codecs_unregister_error | ||
| self.assertRaises(LookupError, _testcapi.codec_lookup_error, 'custom') | ||
| def custom_error_handler(exc): | ||
| raise exc | ||
| error_handler = mock.Mock(wraps=custom_error_handler) | ||
| _testcapi.codec_register_error('custom', error_handler) | ||
picnixz marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| self.addCleanup(_codecs_unregister_error, 'custom') | ||
| self.assertRaises(UnicodeEncodeError, codecs.encode, | ||
| '\xff', 'ascii', errors='custom') | ||
| error_handler.assert_called_once() | ||
| error_handler.reset_mock() | ||
| self.assertRaises(UnicodeDecodeError, codecs.decode, | ||
| b'\xff', 'ascii', errors='custom') | ||
| error_handler.assert_called_once() | ||
| # _codecs._unregister_error directly delegates to the internal C | ||
| # function so a Python-level function test is sufficient (it is | ||
| # tested in test_codeccallbacks). | ||
| def test_codec_lookup_error(self): | ||
| codec_lookup_error = _testcapi.codec_lookup_error | ||
| self.assertIs(codec_lookup_error(NULL), codecs.strict_errors) | ||
| self.assertIs(codec_lookup_error('strict'), codecs.strict_errors) | ||
| self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors) | ||
| self.assertIs(codec_lookup_error('replace'), codecs.replace_errors) | ||
| self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors) | ||
| self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors) | ||
| self.assertRaises(LookupError, codec_lookup_error, 'unknown') | ||
| def test_codec_error_handlers(self): | ||
| exceptions = [ | ||
| # A UnicodeError with an empty message currently crashes: | ||
| # See: https://github.com/python/cpython/issues/123378 | ||
| # UnicodeEncodeError('bad', '', 0, 1, 'reason'), | ||
| UnicodeEncodeError('bad', 'x', 0, 1, 'reason'), | ||
| UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'), | ||
| UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'), | ||
| ] | ||
| strict_handler = _testcapi.codec_strict_errors | ||
| for exc in exceptions: | ||
| with self.subTest(handler=strict_handler, exc=exc): | ||
| self.assertRaises(UnicodeEncodeError, strict_handler, exc) | ||
| for handler in [ | ||
| _testcapi.codec_ignore_errors, | ||
| _testcapi.codec_replace_errors, | ||
| _testcapi.codec_xmlcharrefreplace_errors, | ||
| _testlimitedcapi.codec_namereplace_errors, | ||
| ]: | ||
| for exc in exceptions: | ||
| with self.subTest(handler=handler, exc=exc): | ||
| self.assertIsInstance(handler(exc), tuple) | ||
| if __name__ == "__main__": | ||
| unittest.main() | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Oops, something went wrong.
Uh oh!
There was an error while loading. Please reload this page.
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.