Uh oh!
There was an error while loading. Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork 33.9k
gh-103861: Fix Zip64 extensions not being properly applied in some cases#103863
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Uh oh!
There was an error while loading. Please reload this page.
Merged
Changes from all commits
Commits
Show all changes
6 commits Select commit Hold shift + click to select a range
c42700d Fix Zip64 extensions not being properly applied in some cases
pR0Ps d24c6a4 Fix edge cases in checking if zip64 extensions are required
pR0Ps 9b66476 SQUASHME: Add zip64 default back to FileHeader
pR0Ps b68e70f SQUASHME: Added comments to tests
pR0Ps ee7c78b Restore ZipInfo.FileHeader zip64=None default to maintain backwards c…
gpshead 0276ce9 Merge branch 'main' into bugfix/force-zip64
gpshead File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Uh oh!
There was an error while loading. Please reload this page.
Jump to
Jump to file
Failed to load files.
Loading
Uh oh!
There was an error while loading. Please reload this page.
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1080,6 +1080,159 @@ def test_generated_valid_zip64_extra(self): | ||
| self.assertEqual(zinfo.header_offset, expected_header_offset) | ||
| self.assertEqual(zf.read(zinfo), expected_content) | ||
| def test_force_zip64(self): | ||
| """Test that forcing zip64 extensions correctly notes this in the zip file""" | ||
| # GH-103861 describes an issue where forcing a small file to use zip64 | ||
| # extensions would add a zip64 extra record, but not change the data | ||
| # sizes to 0xFFFFFFFF to indicate to the extractor that the zip64 | ||
| # record should be read. Additionally, it would not set the required | ||
| # version to indicate that zip64 extensions are required to extract it. | ||
| # This test replicates the situation and reads the raw data to specifically ensure: | ||
| # - The required extract version is always >= ZIP64_VERSION | ||
| # - The compressed and uncompressed size in the file headers are both | ||
| # 0xFFFFFFFF (ie. point to zip64 record) | ||
| # - The zip64 record is provided and has the correct sizes in it | ||
| # Other aspects of the zip are checked as well, but verifying the above is the main goal. | ||
| # Because this is hard to verify by parsing the data as a zip, the raw | ||
| # bytes are checked to ensure that they line up with the zip spec. | ||
| # The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT | ||
| # The relevent sections for this test are: | ||
| # - 4.3.7 for local file header | ||
| # - 4.5.3 for zip64 extra field | ||
| data = io.BytesIO() | ||
| with zipfile.ZipFile(data, mode="w", allowZip64=True) as zf: | ||
| with zf.open("text.txt", mode="w", force_zip64=True) as zi: | ||
| zi.write(b"_") | ||
| zipdata = data.getvalue() | ||
| # pull out and check zip information | ||
| ( | ||
| header, vers, os, flags, comp, csize, usize, fn_len, | ||
| ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, cd_sig | ||
| ) = struct.unpack("<4sBBHH8xIIHH8shhQQx4s", zipdata[:63]) | ||
| self.assertEqual(header, b"PK\x03\x04") # local file header | ||
| self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract | ||
gpshead marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| self.assertEqual(os, 0) # compatible with MS-DOS | ||
| self.assertEqual(flags, 0) # no flags | ||
| self.assertEqual(comp, 0) # compression method = stored | ||
| self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra | ||
| self.assertEqual(usize, 0xFFFFFFFF) | ||
| self.assertEqual(fn_len, 8) # filename len | ||
| self.assertEqual(ex_total_len, 20) # size of extra records | ||
| self.assertEqual(ex_id, 1) # Zip64 extra record | ||
| self.assertEqual(ex_len, 16) # 16 bytes of data | ||
| self.assertEqual(ex_usize, 1) # uncompressed size | ||
| self.assertEqual(ex_csize, 1) # compressed size | ||
| self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next | ||
| z = zipfile.ZipFile(io.BytesIO(zipdata)) | ||
| zinfos = z.infolist() | ||
| self.assertEqual(len(zinfos), 1) | ||
| self.assertGreaterEqual(zinfos[0].extract_version, zipfile.ZIP64_VERSION) # requires zip64 to extract | ||
| def test_unseekable_zip_unknown_filesize(self): | ||
| """Test that creating a zip with/without seeking will raise a RuntimeError if zip64 was required but not used""" | ||
| def make_zip(fp): | ||
| with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf: | ||
| with zf.open("text.txt", mode="w", force_zip64=False) as zi: | ||
| zi.write(b"_" * (zipfile.ZIP64_LIMIT + 1)) | ||
| self.assertRaises(RuntimeError, make_zip, io.BytesIO()) | ||
| self.assertRaises(RuntimeError, make_zip, Unseekable(io.BytesIO())) | ||
| def test_zip64_required_not_allowed_fail(self): | ||
| """Test that trying to add a large file to a zip that doesn't allow zip64 extensions fails on add""" | ||
| def make_zip(fp): | ||
| with zipfile.ZipFile(fp, mode="w", allowZip64=False) as zf: | ||
| # pretend zipfile.ZipInfo.from_file was used to get the name and filesize | ||
| info = zipfile.ZipInfo("text.txt") | ||
| info.file_size = zipfile.ZIP64_LIMIT + 1 | ||
| zf.open(info, mode="w") | ||
| self.assertRaises(zipfile.LargeZipFile, make_zip, io.BytesIO()) | ||
| self.assertRaises(zipfile.LargeZipFile, make_zip, Unseekable(io.BytesIO())) | ||
| def test_unseekable_zip_known_filesize(self): | ||
| """Test that creating a zip without seeking will use zip64 extensions if the file size is provided up-front""" | ||
| # This test ensures that the zip will use a zip64 data descriptor (same | ||
| # as a regular data descriptor except the sizes are 8 bytes instead of | ||
| # 4) record to communicate the size of a file if the zip is being | ||
| # written to an unseekable stream. | ||
| # Because this sort of thing is hard to verify by parsing the data back | ||
| # in as a zip, this test looks at the raw bytes created to ensure that | ||
| # the correct data has been generated. | ||
| # The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT | ||
| # The relevent sections for this test are: | ||
| # - 4.3.7 for local file header | ||
| # - 4.3.9 for the data descriptor | ||
| # - 4.5.3 for zip64 extra field | ||
| file_size = zipfile.ZIP64_LIMIT + 1 | ||
| def make_zip(fp): | ||
| with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf: | ||
| # pretend zipfile.ZipInfo.from_file was used to get the name and filesize | ||
| info = zipfile.ZipInfo("text.txt") | ||
| info.file_size = file_size | ||
| with zf.open(info, mode="w", force_zip64=False) as zi: | ||
| zi.write(b"_" * file_size) | ||
| return fp | ||
| # check seekable file information | ||
| seekable_data = make_zip(io.BytesIO()).getvalue() | ||
| ( | ||
| header, vers, os, flags, comp, csize, usize, fn_len, | ||
| ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, | ||
| cd_sig | ||
| ) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s".format(file_size), seekable_data[:62 + file_size]) | ||
gpshead marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading. Please reload this page. | ||
| self.assertEqual(header, b"PK\x03\x04") # local file header | ||
| self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract | ||
| self.assertEqual(os, 0) # compatible with MS-DOS | ||
| self.assertEqual(flags, 0) # no flags set | ||
| self.assertEqual(comp, 0) # compression method = stored | ||
| self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra | ||
| self.assertEqual(usize, 0xFFFFFFFF) | ||
| self.assertEqual(fn_len, 8) # filename len | ||
| self.assertEqual(ex_total_len, 20) # size of extra records | ||
| self.assertEqual(ex_id, 1) # Zip64 extra record | ||
| self.assertEqual(ex_len, 16) # 16 bytes of data | ||
| self.assertEqual(ex_usize, file_size) # uncompressed size | ||
| self.assertEqual(ex_csize, file_size) # compressed size | ||
| self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next | ||
| # check unseekable file information | ||
| unseekable_data = make_zip(Unseekable(io.BytesIO())).fp.getvalue() | ||
| ( | ||
| header, vers, os, flags, comp, csize, usize, fn_len, | ||
| ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, | ||
| dd_header, dd_usize, dd_csize, cd_sig | ||
| ) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s4xQQ4s".format(file_size), unseekable_data[:86 + file_size]) | ||
| self.assertEqual(header, b"PK\x03\x04") # local file header | ||
| self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract | ||
| self.assertEqual(os, 0) # compatible with MS-DOS | ||
| self.assertEqual("{:b}".format(flags), "1000") # streaming flag set | ||
| self.assertEqual(comp, 0) # compression method = stored | ||
| self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra | ||
| self.assertEqual(usize, 0xFFFFFFFF) | ||
| self.assertEqual(fn_len, 8) # filename len | ||
| self.assertEqual(ex_total_len, 20) # size of extra records | ||
| self.assertEqual(ex_id, 1) # Zip64 extra record | ||
| self.assertEqual(ex_len, 16) # 16 bytes of data | ||
| self.assertEqual(ex_usize, 0) # uncompressed size - 0 to defer to data descriptor | ||
| self.assertEqual(ex_csize, 0) # compressed size - 0 to defer to data descriptor | ||
| self.assertEqual(dd_header, b"PK\07\x08") # data descriptor | ||
| self.assertEqual(dd_usize, file_size) # file size (8 bytes because zip64) | ||
| self.assertEqual(dd_csize, file_size) # compressed size (8 bytes because zip64) | ||
| self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next | ||
| @requires_zlib() | ||
| class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles, | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
2 changes: 2 additions & 0 deletions 2 Misc/NEWS.d/next/Library/2023-04-25-19-58-13.gh-issue-103861.JeozgD.rst
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| Fix ``zipfile.Zipfile`` creating invalid zip files when ``force_zip64`` was | ||
| used to add files to them. Patch by Carey Metcalfe. |
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.