Skip to content

GH-135904: Add tests for the JIT build process #136766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
78 changes: 78 additions & 0 deletions Lib/test/test_jit_stencils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import asyncio
import pathlib
import shlex
import sysconfig
import tempfile
import test.support
import test.test_tools
import test.support.script_helper
import unittest

_CPYTHON = pathlib.Path(test.support.REPO_ROOT).resolve()
_TOOLS_JIT = _CPYTHON / "Tools" / "jit"
_TOOLS_JIT_TEST = _TOOLS_JIT / "test"
_TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H = _TOOLS_JIT_TEST / "test_executor_cases.c.h"
_TOOLS_JIT_BUILD_PY = _TOOLS_JIT / "build.py"

test.test_tools.skip_if_missing("jit")
with test.test_tools.imports_under_tool("jit"):
import _llvm

@test.support.cpython_only
@unittest.skipIf(test.support.Py_DEBUG, "Debug stencils aren't tested.")
@unittest.skipIf(test.support.Py_GIL_DISABLED, "Free-threaded stencils aren't tested.")
class TestJITStencils(unittest.TestCase):

def _build_jit_stencils(self, target: str) -> str:
with tempfile.TemporaryDirectory() as work:
jit_stencils_h = pathlib.Path(work, f"jit_stencils-{target}.h").resolve()
pyconfig_h = pathlib.Path(sysconfig.get_config_h_filename()).resolve()
result, args = test.support.script_helper.run_python_until_end(
_TOOLS_JIT_BUILD_PY,
"--input-file", _TOOLS_JIT_TEST_TEST_EXECUTOR_CASES_C_H,
"--output-dir", jit_stencils_h.parent,
"--pyconfig-dir", pyconfig_h.parent,
target,
__isolated=False,
# Windows leaks temporary files on failure because the JIT build
# process is async. This forces it to be "sync" for this test:
PYTHON_CPU_COUNT="1",
)
if result.rc:
self.skipTest(f"Build failed: {shlex.join(map(str, args))}")
body = jit_stencils_h.read_text()
# Strip out two lines of header comments:
_, _, body = body.split("\n", 2)
return body

def _check_jit_stencils(
self, expected: str, actual: str, test_jit_stencils_h: pathlib.Path
) -> None:
try:
self.assertEqual(expected.strip("\n"), actual.strip("\n"))
except AssertionError as e:
# Make it easy to re-validate the expected output:
relative = test_jit_stencils_h.relative_to(_CPYTHON)
message = f"If this is expected, replace {relative} with:"
banner = "=" * len(message)
e.add_note("\n".join([banner, message, banner]))
e.add_note(actual)
raise

def test_jit_stencils(self):
if not asyncio.run(_llvm._find_tool("clang")):
self.skipTest(f"LLVM {_llvm._LLVM_VERSION} isn't installed.")
self.maxDiff = None
found = False
for test_jit_stencils_h in _TOOLS_JIT_TEST.glob("test_jit_stencils-*.h"):
target = test_jit_stencils_h.stem.removeprefix("test_jit_stencils-")
with self.subTest(target):
expected = test_jit_stencils_h.read_text()
actual = self._build_jit_stencils(target)
found = True
self._check_jit_stencils(expected, actual, test_jit_stencils_h)
self.assertTrue(found, "No JIT stencils built!")


if __name__ == "__main__":
unittest.main()
5 changes: 4 additions & 1 deletion Tools/jit/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ class ELFSection(typing.TypedDict):
Index: int
Info: int
Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]]
SectionData: dict[typing.Literal["Bytes"], list[int]]
SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
Size: int
Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]]
Type: dict[typing.Literal["Name"], str]

Expand All @@ -117,4 +118,6 @@ class MachOSection(typing.TypedDict):
list[dict[typing.Literal["Relocation"], MachORelocation]]
]
SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]]
Segment: dict[typing.Literal["Value"], str]
Size: int
Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]]
23 changes: 17 additions & 6 deletions Tools/jit/_stencils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ class HoleValue(enum.Enum):
CODE = enum.auto()
# The base address of the read-only data for this uop:
DATA = enum.auto()
# The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET):
ERROR_TARGET = enum.auto()
# The address of the current executor (exposed as _JIT_EXECUTOR):
EXECUTOR = enum.auto()
# The base address of the "global" offset table located in the read-only data.
# Shouldn't be present in the final stencils, since these are all replaced with
# equivalent DATA values:
GOT = enum.auto()
# The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET):
JUMP_TARGET = enum.auto()
# The current uop's oparg (exposed as _JIT_OPARG):
OPARG = enum.auto()
# The current uop's operand0 on 64-bit platforms (exposed as _JIT_OPERAND0):
Expand All @@ -39,10 +43,9 @@ class HoleValue(enum.Enum):
OPERAND1_LO = enum.auto()
# The current uop's target (exposed as _JIT_TARGET):
TARGET = enum.auto()
# The base address of the machine code for the jump target (exposed as _JIT_JUMP_TARGET):
JUMP_TARGET = enum.auto()
# The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET):
ERROR_TARGET = enum.auto()
# Writable data, which we don't support! Optimistically remove their data
# from the stencil, and raise later if they're actually used:
WRITABLE = enum.auto()
# A hardcoded value of zero (used for symbol lookups):
ZERO = enum.auto()

Expand Down Expand Up @@ -96,9 +99,11 @@ class HoleValue(enum.Enum):
_HOLE_EXPRS = {
HoleValue.CODE: "(uintptr_t)code",
HoleValue.DATA: "(uintptr_t)data",
HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]",
HoleValue.EXECUTOR: "(uintptr_t)executor",
# These should all have been turned into DATA values by process_relocations:
# HoleValue.GOT: "",
HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]",
HoleValue.OPARG: "instruction->oparg",
HoleValue.OPERAND0: "instruction->operand0",
HoleValue.OPERAND0_HI: "(instruction->operand0 >> 32)",
Expand All @@ -107,8 +112,8 @@ class HoleValue(enum.Enum):
HoleValue.OPERAND1_HI: "(instruction->operand1 >> 32)",
HoleValue.OPERAND1_LO: "(instruction->operand1 & UINT32_MAX)",
HoleValue.TARGET: "instruction->target",
HoleValue.JUMP_TARGET: "state->instruction_starts[instruction->jump_target]",
HoleValue.ERROR_TARGET: "state->instruction_starts[instruction->error_target]",
# These should all have raised an error if they were actually used:
# HoleValue.WRITABLE: "",
HoleValue.ZERO: "",
}

Expand Down Expand Up @@ -246,6 +251,12 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None:
self.data.pad(8)
for stencil in [self.code, self.data]:
for hole in stencil.holes:
if hole.symbol in self.symbols:
value, _ = self.symbols[hole.symbol]
if value is HoleValue.WRITABLE:
raise ValueError(
f"Writable data ({hole.symbol}) is not supported!"
)
if hole.value is HoleValue.GOT:
assert hole.symbol is not None
hole.value = HoleValue.DATA
Expand Down
95 changes: 63 additions & 32 deletions Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class _Target(typing.Generic[_S, _R]):
verbose: bool = False
cflags: str = ""
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
input_file: pathlib.Path = PYTHON_EXECUTOR_CASES_C_H
pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve()

def _get_nop(self) -> bytes:
Expand All @@ -68,7 +69,7 @@ def _compute_digest(self) -> str:
hasher.update(self.debug.to_bytes())
hasher.update(self.cflags.encode())
# These dependencies are also reflected in _JITSources in regen.targets:
hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
hasher.update(self.input_file.read_bytes())
hasher.update((self.pyconfig_dir / "pyconfig.h").read_bytes())
for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
for filename in filenames:
Expand All @@ -82,10 +83,15 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
if output is not None:
# Make sure that full paths don't leak out (for reproducibility):
long, short = str(path), str(path.name)
group.code.disassembly.extend(
line.expandtabs().strip().replace(long, short)
for line in output.splitlines()
)
lines = output.splitlines()
started = False
for line in lines:
if line.lstrip().startswith("0:"):
started = True
if started:
cleaned = line.replace(long, short).expandtabs().strip()
if cleaned:
group.code.disassembly.append(cleaned)
args = [
"--elf-output-style=JSON",
"--expand-relocs",
Expand Down Expand Up @@ -181,10 +187,12 @@ async def _compile(
return await self._parse(o)

async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
generated_cases = self.input_file.read_text()
cases_and_opnames = sorted(
re.findall(
r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL
r"^ {8}(case (\w+): \{\n.*?\n {8}\})",
generated_cases,
flags=re.DOTALL | re.MULTILINE,
)
)
tasks = []
Expand Down Expand Up @@ -260,7 +268,7 @@ def _handle_section(
if "SectionData" in section:
section_data_bytes = section["SectionData"]["Bytes"]
else:
# Zeroed BSS data, seen with printf debugging calls:
# Zeroed BSS data:
section_data_bytes = [0] * section["RawDataSize"]
if "IMAGE_SCN_MEM_EXECUTE" in flags:
value = _stencils.HoleValue.CODE
Expand All @@ -270,6 +278,10 @@ def _handle_section(
stencil = group.data
else:
return
if "IMAGE_SCN_MEM_WRITE" in flags:
assert value is _stencils.HoleValue.DATA
value = _stencils.HoleValue.WRITABLE
section_data_bytes = []
base = len(stencil.body)
group.symbols[section["Number"]] = value, base
stencil.body.extend(section_data_bytes)
Expand Down Expand Up @@ -372,29 +384,39 @@ def _handle_section(
if value is _stencils.HoleValue.CODE:
stencil = group.code
else:
assert value is _stencils.HoleValue.DATA
assert value in (_stencils.HoleValue.DATA, _stencils.HoleValue.WRITABLE)
stencil = group.data
for wrapped_relocation in section["Relocations"]:
relocation = wrapped_relocation["Relocation"]
hole = self._handle_relocation(base, relocation, stencil.body)
stencil.holes.append(hole)
elif section_type == "SHT_PROGBITS":
elif section_type in {"SHT_PROGBITS", "SHT_NOBITS"}:
if "SHF_ALLOC" not in flags:
return
if "SectionData" in section:
section_data_bytes = section["SectionData"]["Bytes"]
else:
# Zeroed BSS data:
section_data_bytes = [0] * section["Size"]
if "SHF_EXECINSTR" in flags:
value = _stencils.HoleValue.CODE
stencil = group.code
else:
value = _stencils.HoleValue.DATA
stencil = group.data
group.symbols[section["Index"]] = value, len(stencil.body)
if "SHF_WRITE" in flags:
assert value is _stencils.HoleValue.DATA
value = _stencils.HoleValue.WRITABLE
section_data_bytes = []
base = len(stencil.body)
group.symbols[section["Index"]] = value, base
stencil.body.extend(section_data_bytes)
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
offset = len(stencil.body) + symbol["Value"]
offset = base + symbol["Value"]
name = symbol["Name"]["Name"]
name = name.removeprefix(self.symbol_prefix)
group.symbols[name] = value, offset
stencil.body.extend(section["SectionData"]["Bytes"])
assert not section["Relocations"]
else:
assert section_type in {
Expand Down Expand Up @@ -452,33 +474,35 @@ class _MachO(
def _handle_section(
self, section: _schema.MachOSection, group: _stencils.StencilGroup
) -> None:
assert section["Address"] >= len(group.code.body)
assert "SectionData" in section
if "SectionData" in section:
section_data_bytes = section["SectionData"]["Bytes"]
else:
# Zeroed BSS data:
section_data_bytes = [0] * section["Size"]
flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
name = section["Name"]["Value"]
name = name.removeprefix(self.symbol_prefix)
if "Debug" in flags:
return
if "PureInstructions" in flags:
value = _stencils.HoleValue.CODE
stencil = group.code
start_address = 0
group.symbols[name] = value, section["Address"] - start_address
else:
value = _stencils.HoleValue.DATA
stencil = group.data
start_address = len(group.code.body)
group.symbols[name] = value, len(group.code.body)
base = section["Address"] - start_address
segment = section["Segment"]["Value"]
assert segment in {"__DATA", "__TEXT"}, segment
if segment == "__DATA":
value = _stencils.HoleValue.WRITABLE
section_data_bytes = []
base = len(stencil.body)
group.symbols[section["Index"]] = value, base
stencil.body.extend(
[0] * (section["Address"] - len(group.code.body) - len(group.data.body))
)
stencil.body.extend(section["SectionData"]["Bytes"])
stencil.body.extend(section_data_bytes)
name = section["Name"]["Value"]
name = name.removeprefix(self.symbol_prefix)
group.symbols[name] = value, base
assert "Symbols" in section
for wrapped_symbol in section["Symbols"]:
symbol = wrapped_symbol["Symbol"]
offset = symbol["Value"] - start_address
offset = base + symbol["Value"] - section["Address"]
name = symbol["Name"]["Name"]
name = name.removeprefix(self.symbol_prefix)
group.symbols[name] = value, offset
Expand Down Expand Up @@ -557,38 +581,45 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
optimizer: type[_optimizers.Optimizer]
target: _COFF32 | _COFF64 | _ELF | _MachO
if re.fullmatch(r"aarch64-apple-darwin.*", host):
host = "aarch64-apple-darwin"
condition = "defined(__aarch64__) && defined(__APPLE__)"
optimizer = _optimizers.OptimizerAArch64
target = _MachO(host, condition, optimizer=optimizer)
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
args = ["-fms-runtime-lib=dll", "-fplt"]
host = "aarch64-pc-windows-msvc"
condition = "defined(_M_ARM64)"
args = ["-fms-runtime-lib=dll", "-fplt"]
optimizer = _optimizers.OptimizerAArch64
target = _COFF64(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
host = "aarch64-unknown-linux-gnu"
condition = "defined(__aarch64__) && defined(__linux__)"
# -mno-outline-atomics: Keep intrinsics from being emitted.
args = ["-fpic", "-mno-outline-atomics"]
condition = "defined(__aarch64__) && defined(__linux__)"
optimizer = _optimizers.OptimizerAArch64
target = _ELF(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"i686-pc-windows-msvc", host):
host = "i686-pc-windows-msvc"
condition = "defined(_M_IX86)"
# -Wno-ignored-attributes: __attribute__((preserve_none)) is not supported here.
args = ["-DPy_NO_ENABLE_SHARED", "-Wno-ignored-attributes"]
optimizer = _optimizers.OptimizerX86
condition = "defined(_M_IX86)"
target = _COFF32(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"x86_64-apple-darwin.*", host):
host = "x86_64-apple-darwin"
condition = "defined(__x86_64__) && defined(__APPLE__)"
optimizer = _optimizers.OptimizerX86
target = _MachO(host, condition, optimizer=optimizer)
elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
args = ["-fms-runtime-lib=dll"]
host = "x86_64-pc-windows-msvc"
condition = "defined(_M_X64)"
args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerX86
target = _COFF64(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
host = "x86_64-unknown-linux-gnu"
condition = "defined(__x86_64__) && defined(__linux__)"
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
optimizer = _optimizers.OptimizerX86
target = _ELF(host, condition, args=args, optimizer=optimizer)
else:
Expand Down
Loading
Loading