Skip to content

Commit 5136ac0

Browse files
committed
Issue #13645: pyc files now contain the size of the corresponding source
code, to avoid timestamp collisions (especially on filesystems with a low timestamp resolution) when checking for freshness of the bytecode.
1 parent 1f918c1 commit 5136ac0

File tree

14 files changed

+166
-48
lines changed

14 files changed

+166
-48
lines changed

‎Doc/library/importlib.rst‎

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,30 @@ are also provided to help in implementing the core ABCs.
239239
optimization to speed up loading by removing the parsing step of Python's
240240
compiler, and so no bytecode-specific API is exposed.
241241

242+
.. method:: path_stats(self, path)
243+
244+
Optional abstract method which returns a :class:`dict` containing
245+
metadata about the specifed path. Supported dictionary keys are:
246+
247+
- ``'mtime'`` (mandatory): an integer or floating-point number
248+
representing the modification time of the source code;
249+
- ``'size'`` (optional): the size in bytes of the source code.
250+
251+
Any other keys in the dictionary are ignored, to allow for future
252+
extensions.
253+
254+
.. versionadded:: 3.3
255+
242256
.. method:: path_mtime(self, path)
243257

244258
Optional abstract method which returns the modification time for the
245259
specified path.
246260

261+
.. deprecated:: 3.3
262+
This method is deprecated in favour of :meth:`path_stats`. You don't
263+
have to implement it, but it is still available for compatibility
264+
purposes.
265+
247266
.. method:: set_data(self, path, data)
248267

249268
Optional abstract method which writes the specified bytes to a file

‎Lib/importlib/_bootstrap.py‎

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -331,25 +331,40 @@ def is_package(self, fullname):
331331
filename=self.get_filename(fullname).rpartition(path_sep)[2]
332332
returnfilename.rsplit('.', 1)[0] =='__init__'
333333

334-
def_bytes_from_bytecode(self, fullname, data, source_mtime):
334+
def_bytes_from_bytecode(self, fullname, data, source_stats):
335335
"""Return the marshalled bytes from bytecode, verifying the magic
336-
numberand timestamp along the way.
336+
number, timestamp and source size along the way.
337337
338-
If source_mtime is None then skip the timestamp check.
338+
If source_stats is None then skip the timestamp check.
339339
340340
"""
341341
magic=data[:4]
342342
raw_timestamp=data[4:8]
343+
raw_size=data[8:12]
343344
iflen(magic) !=4ormagic!=imp.get_magic():
344345
raiseImportError("bad magic number in{}".format(fullname))
345346
eliflen(raw_timestamp) !=4:
346347
raiseEOFError("bad timestamp in{}".format(fullname))
347-
elifsource_mtimeisnotNone:
348-
ifmarshal._r_long(raw_timestamp) !=source_mtime:
349-
raiseImportError("bytecode is stale for{}".format(fullname))
348+
eliflen(raw_size) !=4:
349+
raiseEOFError("bad size in{}".format(fullname))
350+
ifsource_statsisnotNone:
351+
try:
352+
source_mtime=int(source_stats['mtime'])
353+
exceptKeyError:
354+
pass
355+
else:
356+
ifmarshal._r_long(raw_timestamp) !=source_mtime:
357+
raiseImportError("bytecode is stale for{}".format(fullname))
358+
try:
359+
source_size=source_stats['size'] &0xFFFFFFFF
360+
exceptKeyError:
361+
pass
362+
else:
363+
ifmarshal._r_long(raw_size) !=source_size:
364+
raiseImportError("bytecode is stale for{}".format(fullname))
350365
# Can't return the code object as errors from marshal loading need to
351366
# propagate even when source is available.
352-
returndata[8:]
367+
returndata[12:]
353368

354369
@module_for_loader
355370
def_load_module(self, module, *, sourceless=False):
@@ -377,11 +392,20 @@ class SourceLoader(_LoaderBasics):
377392
defpath_mtime(self, path):
378393
"""Optional method that returns the modification time (an int) for the
379394
specified path, where path is a str.
395+
"""
396+
raiseNotImplementedError
380397

381-
Implementing this method allows the loader to read bytecode files.
398+
defpath_stats(self, path):
399+
"""Optional method returning a metadata dict for the specified path
400+
to by the path (str).
401+
Possible keys:
402+
- 'mtime' (mandatory) is the numeric timestamp of last source
403+
code modification;
404+
- 'size' (optional) is the size in bytes of the source code.
382405
406+
Implementing this method allows the loader to read bytecode files.
383407
"""
384-
raiseNotImplementedError
408+
return{'mtime': self.path_mtime(path)}
385409

386410
defset_data(self, path, data):
387411
"""Optional method which writes data (bytes) to a file path (a str).
@@ -407,7 +431,7 @@ def get_source(self, fullname):
407431
defget_code(self, fullname):
408432
"""Concrete implementation of InspectLoader.get_code.
409433
410-
Reading of bytecode requires path_mtime to be implemented. To write
434+
Reading of bytecode requires path_stats to be implemented. To write
411435
bytecode, set_data must also be implemented.
412436
413437
"""
@@ -416,18 +440,19 @@ def get_code(self, fullname):
416440
source_mtime=None
417441
ifbytecode_pathisnotNone:
418442
try:
419-
source_mtime=self.path_mtime(source_path)
443+
st=self.path_stats(source_path)
420444
exceptNotImplementedError:
421445
pass
422446
else:
447+
source_mtime=int(st['mtime'])
423448
try:
424449
data=self.get_data(bytecode_path)
425450
exceptIOError:
426451
pass
427452
else:
428453
try:
429454
bytes_data=self._bytes_from_bytecode(fullname, data,
430-
source_mtime)
455+
st)
431456
except (ImportError, EOFError):
432457
pass
433458
else:
@@ -448,6 +473,7 @@ def get_code(self, fullname):
448473
# throw an exception.
449474
data=bytearray(imp.get_magic())
450475
data.extend(marshal._w_long(source_mtime))
476+
data.extend(marshal._w_long(len(source_bytes)))
451477
data.extend(marshal.dumps(code_object))
452478
try:
453479
self.set_data(bytecode_path, data)
@@ -492,9 +518,10 @@ class _SourceFileLoader(_FileLoader, SourceLoader):
492518

493519
"""Concrete implementation of SourceLoader using the file system."""
494520

495-
defpath_mtime(self, path):
496-
"""Return the modification time for the path."""
497-
returnint(_os.stat(path).st_mtime)
521+
defpath_stats(self, path):
522+
"""Return the metadat for the path."""
523+
st=_os.stat(path)
524+
return{'mtime': st.st_mtime, 'size': st.st_size}
498525

499526
defset_data(self, path, data):
500527
"""Write bytes data to a file."""

‎Lib/importlib/abc.py‎

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
123123

124124
defpath_mtime(self, path):
125125
"""Return the (int) modification time for the path (str)."""
126-
raiseNotImplementedError
126+
ifself.path_stats.__func__isSourceLoader.path_stats:
127+
raiseNotImplementedError
128+
returnint(self.path_stats(path)['mtime'])
129+
130+
defpath_stats(self, path):
131+
"""Return a metadata dict for the source pointed to by the path (str).
132+
Possible keys:
133+
- 'mtime' (mandatory) is the numeric timestamp of last source
134+
code modification;
135+
- 'size' (optional) is the size in bytes of the source code.
136+
"""
137+
ifself.path_mtime.__func__isSourceLoader.path_mtime:
138+
raiseNotImplementedError
139+
return{'mtime': self.path_mtime(path)}
127140

128141
defset_data(self, path, data):
129142
"""Write the bytes to the path (if possible).

‎Lib/importlib/test/source/test_abc_loader.py‎

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .. importutil
66
from . importutilassource_util
77

8+
importcollections
89
importimp
910
importinspect
1011
importio
@@ -40,8 +41,10 @@ class SourceLoaderMock(SourceOnlyLoaderMock):
4041
def__init__(self, path, magic=imp.get_magic()):
4142
super().__init__(path)
4243
self.bytecode_path=imp.cache_from_source(self.path)
44+
self.source_size=len(self.source)
4345
data=bytearray(magic)
4446
data.extend(marshal._w_long(self.source_mtime))
47+
data.extend(marshal._w_long(self.source_size))
4548
code_object=compile(self.source, self.path, 'exec',
4649
dont_inherit=True)
4750
data.extend(marshal.dumps(code_object))
@@ -56,9 +59,9 @@ def get_data(self, path):
5659
else:
5760
raiseIOError
5861

59-
defpath_mtime(self, path):
62+
defpath_stats(self, path):
6063
assertpath==self.path
61-
returnself.source_mtime
64+
return{'mtime': self.source_mtime, 'size': self.source_size}
6265

6366
defset_data(self, path, data):
6467
self.written[path] =bytes(data)
@@ -657,6 +660,7 @@ def verify_code(self, code_object, *, bytecode_written=False):
657660
self.assertIn(self.cached, self.loader.written)
658661
data=bytearray(imp.get_magic())
659662
data.extend(marshal._w_long(self.loader.source_mtime))
663+
data.extend(marshal._w_long(self.loader.source_size))
660664
data.extend(marshal.dumps(code_object))
661665
self.assertEqual(self.loader.written[self.cached], bytes(data))
662666

@@ -847,7 +851,7 @@ def test_SourceLoader(self):
847851
# Required abstractmethods.
848852
self.raises_NotImplementedError(ins, 'get_filename', 'get_data')
849853
# Optional abstractmethods.
850-
self.raises_NotImplementedError(ins,'path_mtime', 'set_data')
854+
self.raises_NotImplementedError(ins,'path_stats', 'set_data')
851855

852856
deftest_PyLoader(self):
853857
self.raises_NotImplementedError(self.PyLoader(), 'source_path',

‎Lib/importlib/test/source/test_file_loader.py‎

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,6 @@ def test_module_reuse(self):
7070
module_dict_id=id(module.__dict__)
7171
withopen(mapping['_temp'], 'w') asfile:
7272
file.write("testing_var = 42\n")
73-
# For filesystems where the mtime is only to a second granularity,
74-
# everything that has happened above can be too fast;
75-
# force an mtime on the source that is guaranteed to be different
76-
# than the original mtime.
77-
loader.path_mtime=self.fake_mtime(loader.path_mtime)
7873
module=loader.load_module('_temp')
7974
self.assertTrue('testing_var'inmodule.__dict__,
8075
"'testing_var' not in "
@@ -190,10 +185,17 @@ def _test_partial_timestamp(self, test, *, del_source=False):
190185
del_source=del_source)
191186
test('_temp', mapping, bc_path)
192187

188+
def_test_partial_size(self, test, *, del_source=False):
189+
withsource_util.create_modules('_temp') asmapping:
190+
bc_path=self.manipulate_bytecode('_temp', mapping,
191+
lambdabc: bc[:11],
192+
del_source=del_source)
193+
test('_temp', mapping, bc_path)
194+
193195
def_test_no_marshal(self, *, del_source=False):
194196
withsource_util.create_modules('_temp') asmapping:
195197
bc_path=self.manipulate_bytecode('_temp', mapping,
196-
lambdabc: bc[:8],
198+
lambdabc: bc[:12],
197199
del_source=del_source)
198200
file_path=mapping['_temp'] ifnotdel_sourceelsebc_path
199201
withself.assertRaises(EOFError):
@@ -202,7 +204,7 @@ def _test_no_marshal(self, *, del_source=False):
202204
def_test_non_code_marshal(self, *, del_source=False):
203205
withsource_util.create_modules('_temp') asmapping:
204206
bytecode_path=self.manipulate_bytecode('_temp', mapping,
205-
lambdabc: bc[:8] +marshal.dumps(b'abcd'),
207+
lambdabc: bc[:12] +marshal.dumps(b'abcd'),
206208
del_source=del_source)
207209
file_path=mapping['_temp'] ifnotdel_sourceelsebytecode_path
208210
withself.assertRaises(ImportError):
@@ -211,7 +213,7 @@ def _test_non_code_marshal(self, *, del_source=False):
211213
def_test_bad_marshal(self, *, del_source=False):
212214
withsource_util.create_modules('_temp') asmapping:
213215
bytecode_path=self.manipulate_bytecode('_temp', mapping,
214-
lambdabc: bc[:8] +b'<test>',
216+
lambdabc: bc[:12] +b'<test>',
215217
del_source=del_source)
216218
file_path=mapping['_temp'] ifnotdel_sourceelsebytecode_path
217219
withself.assertRaises(EOFError):
@@ -235,15 +237,15 @@ def test_empty_file(self):
235237
deftest(name, mapping, bytecode_path):
236238
self.import_(mapping[name], name)
237239
withopen(bytecode_path, 'rb') asfile:
238-
self.assertGreater(len(file.read()), 8)
240+
self.assertGreater(len(file.read()), 12)
239241

240242
self._test_empty_file(test)
241243

242244
deftest_partial_magic(self):
243245
deftest(name, mapping, bytecode_path):
244246
self.import_(mapping[name], name)
245247
withopen(bytecode_path, 'rb') asfile:
246-
self.assertGreater(len(file.read()), 8)
248+
self.assertGreater(len(file.read()), 12)
247249

248250
self._test_partial_magic(test)
249251

@@ -254,7 +256,7 @@ def test_magic_only(self):
254256
deftest(name, mapping, bytecode_path):
255257
self.import_(mapping[name], name)
256258
withopen(bytecode_path, 'rb') asfile:
257-
self.assertGreater(len(file.read()), 8)
259+
self.assertGreater(len(file.read()), 12)
258260

259261
self._test_magic_only(test)
260262

@@ -276,10 +278,21 @@ def test_partial_timestamp(self):
276278
deftest(name, mapping, bc_path):
277279
self.import_(mapping[name], name)
278280
withopen(bc_path, 'rb') asfile:
279-
self.assertGreater(len(file.read()), 8)
281+
self.assertGreater(len(file.read()), 12)
280282

281283
self._test_partial_timestamp(test)
282284

285+
@source_util.writes_bytecode_files
286+
deftest_partial_size(self):
287+
# When the size is partial, regenerate the .pyc, else
288+
# raise EOFError.
289+
deftest(name, mapping, bc_path):
290+
self.import_(mapping[name], name)
291+
withopen(bc_path, 'rb') asfile:
292+
self.assertGreater(len(file.read()), 12)
293+
294+
self._test_partial_size(test)
295+
283296
@source_util.writes_bytecode_files
284297
deftest_no_marshal(self):
285298
# When there is only the magic number and timestamp, raise EOFError.
@@ -375,6 +388,13 @@ def test(name, mapping, bytecode_path):
375388

376389
self._test_partial_timestamp(test, del_source=True)
377390

391+
deftest_partial_size(self):
392+
deftest(name, mapping, bytecode_path):
393+
withself.assertRaises(EOFError):
394+
self.import_(bytecode_path, name)
395+
396+
self._test_partial_size(test, del_source=True)
397+
378398
deftest_no_marshal(self):
379399
self._test_no_marshal(del_source=True)
380400

‎Lib/pkgutil.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def read_code(stream):
2121
ifmagic!=imp.get_magic():
2222
returnNone
2323

24-
stream.read(4) # Skip timestamp
24+
stream.read(8) # Skip timestamp and size
2525
returnmarshal.load(stream)
2626

2727

‎Lib/py_compile.py‎

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
110110
"""
111111
withtokenize.open(file) asf:
112112
try:
113-
timestamp=int(os.fstat(f.fileno()).st_mtime)
113+
st=os.fstat(f.fileno())
114114
exceptAttributeError:
115-
timestamp=int(os.stat(file).st_mtime)
115+
st=os.stat(file)
116+
timestamp=int(st.st_mtime)
117+
size=st.st_size&0xFFFFFFFF
116118
codestring=f.read()
117119
try:
118120
codeobject=builtins.compile(codestring, dfileorfile, 'exec',
@@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
139141
withopen(cfile, 'wb') asfc:
140142
fc.write(b'\0\0\0\0')
141143
wr_long(fc, timestamp)
144+
wr_long(fc, size)
142145
marshal.dump(codeobject, fc)
143146
fc.flush()
144147
fc.seek(0, 0)

‎Lib/test/test_import.py‎

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ def test_module_without_source(self):
380380
deftest_foreign_code(self):
381381
py_compile.compile(self.file_name)
382382
withopen(self.compiled_name, "rb") asf:
383-
header=f.read(8)
383+
header=f.read(12)
384384
code=marshal.load(f)
385385
constants=list(code.co_consts)
386386
foreign_code=test_main.__code__
@@ -644,6 +644,16 @@ def cleanup():
644644
self.assertEqual(sys.modules['pep3147.foo'].__cached__,
645645
os.path.join(os.curdir, foo_pyc))
646646

647+
deftest_recompute_pyc_same_second(self):
648+
# Even when the source file doesn't change timestamp, a change in
649+
# source size is enough to trigger recomputation of the pyc file.
650+
__import__(TESTFN)
651+
unload(TESTFN)
652+
withopen(self.source, 'a') asfp:
653+
print("x = 5", file=fp)
654+
m=__import__(TESTFN)
655+
self.assertEqual(m.x, 5)
656+
647657

648658
classRelativeImportFromImportlibTests(test_relative_imports.RelativeImports):
649659

0 commit comments

Comments
(0)