Fix non-deterministic python compilation
Timestamps in pyc files and zip archives were causing cache misses in RBE.
Bug: b/322788229
Test: Ran a few local builds and verified improved cache hit rate for metalava actions.
Change-Id: I9375694d26a107f5c0c43bbd685bd8e86349d187
diff --git a/python/scripts/precompile_python.py b/python/scripts/precompile_python.py
index e12e7d2..80e7c76 100644
--- a/python/scripts/precompile_python.py
+++ b/python/scripts/precompile_python.py
@@ -16,6 +16,7 @@
import argparse
import py_compile
import os
+import sys
import shutil
import tempfile
import zipfile
@@ -23,22 +24,31 @@
# This file needs to support both python 2 and 3.
-def process_one_file(name, inf, outzip):
- if not name.endswith('.py'):
- outzip.writestr(name, inf.read())
+def process_one_file(info, infile, outzip):
+ if not info.filename.endswith('.py'):
+ outzip.writestr(info, infile.read())
return
# Unfortunately py_compile requires the input/output files to be written
# out to disk.
with tempfile.NamedTemporaryFile(prefix="Soong_precompile_", delete=False) as tmp:
- shutil.copyfileobj(inf, tmp)
+ shutil.copyfileobj(infile, tmp)
in_name = tmp.name
with tempfile.NamedTemporaryFile(prefix="Soong_precompile_", delete=False) as tmp:
out_name = tmp.name
try:
- py_compile.compile(in_name, out_name, name, doraise=True)
+ # Ensure deterministic pyc by using the hash rather than timestamp.
+ # This is required to improve caching in accelerated builds.
+ # Only works on Python 3.7+ (see https://docs.python.org/3/library/py_compile.html#py_compile.PycInvalidationMode)
+ # which should cover most updated branches and developer machines.
+ if sys.version_info >= (3, 7):
+ py_compile.compile(in_name, out_name, info.filename, doraise=True, invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH)
+ else:
+ py_compile.compile(in_name, out_name, info.filename, doraise=True)
with open(out_name, 'rb') as f:
- outzip.writestr(name + 'c', f.read())
+ info.filename = info.filename + 'c'
+ # Use ZipInfo rather than str to reuse timestamps for deterministic zip files.
+ outzip.writestr(info, f.read())
finally:
os.remove(in_name)
os.remove(out_name)
@@ -52,9 +62,9 @@
with open(args.dst_zip, 'wb') as outf, open(args.src_zip, 'rb') as inf:
with zipfile.ZipFile(outf, mode='w') as outzip, zipfile.ZipFile(inf, mode='r') as inzip:
- for name in inzip.namelist():
- with inzip.open(name, mode='r') as inzipf:
- process_one_file(name, inzipf, outzip)
+ for info in inzip.infolist():
+ with inzip.open(info.filename, mode='r') as inzipf:
+ process_one_file(info, inzipf, outzip)
if __name__ == "__main__":