Skip to content

Commit 3ab3bd4

Browse files
committed
Regenerate uop ids/metadata/tier2 cases per target
1 parent ab3e9bf commit 3ab3bd4

8 files changed

Lines changed: 65015 additions & 8333 deletions

File tree

Include/internal/pycore_uop_ids.h

Lines changed: 1998 additions & 19 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 12098 additions & 4797 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/executor_cases.c.h

Lines changed: 50674 additions & 3389 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Tools/cases_generator/analyzer.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,15 +1368,17 @@ def is_large(uop: Uop) -> bool:
13681368
return len(list(uop.body.tokens())) > 120
13691369

13701370

1371-
def get_uop_cache_depths(uop: Uop) -> Iterator[tuple[int, int, int]]:
1371+
def get_uop_cache_depths(
1372+
uop: Uop, max_cached_register: int = MAX_GENERATED_CACHED_REGISTER
1373+
) -> Iterator[tuple[int, int, int]]:
13721374
if uop.name == "_SPILL_OR_RELOAD":
1373-
for inputs in range(MAX_GENERATED_CACHED_REGISTER + 1):
1374-
for outputs in range(MAX_GENERATED_CACHED_REGISTER + 1):
1375+
for inputs in range(max_cached_register + 1):
1376+
for outputs in range(max_cached_register + 1):
13751377
if inputs != outputs:
13761378
yield inputs, outputs, inputs
13771379
return
13781380
if uop.name in ("_DEOPT", "_HANDLE_PENDING_AND_DEOPT", "_EXIT_TRACE", "_DYNAMIC_EXIT"):
1379-
for i in range(MAX_GENERATED_CACHED_REGISTER + 1):
1381+
for i in range(max_cached_register + 1):
13801382
yield i, 0, 0
13811383
return
13821384
if uop.name in ("_START_EXECUTOR", "_JUMP_TO_TOP", "_COLD_EXIT"):
@@ -1398,20 +1400,20 @@ def get_uop_cache_depths(uop: Uop) -> Iterator[tuple[int, int, int]]:
13981400
has_array = True
13991401
break
14001402
ideal_outputs += 1
1401-
if ideal_inputs > MAX_GENERATED_CACHED_REGISTER:
1402-
ideal_inputs = MAX_GENERATED_CACHED_REGISTER
1403-
if ideal_outputs > MAX_GENERATED_CACHED_REGISTER:
1404-
ideal_outputs = MAX_GENERATED_CACHED_REGISTER
1403+
if ideal_inputs > max_cached_register:
1404+
ideal_inputs = max_cached_register
1405+
if ideal_outputs > max_cached_register:
1406+
ideal_outputs = max_cached_register
14051407
at_end = uop.properties.sync_sp or uop.properties.side_exit_at_end
14061408
exit_depth = ideal_outputs if at_end else ideal_inputs
14071409
if uop.properties.escapes or uop.properties.sync_sp or has_array or is_large(uop):
14081410
yield ideal_inputs, ideal_outputs, exit_depth
14091411
return
1410-
for inputs in range(MAX_GENERATED_CACHED_REGISTER + 1):
1412+
for inputs in range(max_cached_register + 1):
14111413
outputs = ideal_outputs - ideal_inputs + inputs
14121414
if outputs < ideal_outputs:
14131415
outputs = ideal_outputs
1414-
elif outputs > MAX_GENERATED_CACHED_REGISTER:
1416+
elif outputs > max_cached_register:
14151417
continue
14161418
yield inputs, outputs, outputs if at_end else inputs
14171419

Tools/cases_generator/tier2_generator.py

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
analysis_error,
1717
get_uop_cache_depths,
1818
is_large,
19+
MAX_CACHED_REGISTER,
1920
MAX_GENERATED_CACHED_REGISTER,
2021
)
2122

@@ -264,45 +265,52 @@ def generate_tier2(
264265
out = CWriter(outfile, 2, lines)
265266
out.emit("\n")
266267

267-
for name, uop in analysis.uops.items():
268-
if uop.properties.tier == 1:
269-
continue
270-
if uop.is_super():
271-
continue
272-
if uop.properties.records_value:
273-
continue
274-
why_not_viable = uop.why_not_viable()
275-
if why_not_viable is not None:
276-
out.emit(
277-
f"/* {uop.name} is not a viable micro-op for tier 2 because it {why_not_viable} */\n\n"
278-
)
279-
continue
280-
for inputs, outputs, exit_depth in get_uop_cache_depths(uop):
281-
emitter = Tier2Emitter(out, analysis.labels, exit_depth)
282-
opname = f"{uop.name}_r{inputs}{outputs}"
283-
needed_cached_registers = max(inputs, outputs)
284-
if needed_cached_registers:
285-
out.start_line()
286-
out.out.write(f"#if MAX_CACHED_REGISTER >= {needed_cached_registers}\n")
287-
out.emit(f"case {opname}: {{\n")
288-
out.emit(f"CHECK_CURRENT_CACHED_VALUES({inputs});\n")
289-
out.emit("assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());\n")
290-
declare_variables(uop, out)
291-
stack = Stack()
292-
stack.push_cache([f"_tos_cache{i}" for i in range(inputs)], out)
293-
stack._print(out)
294-
reachable, stack = write_uop(uop, emitter, stack, outputs)
295-
out.start_line()
296-
if reachable:
268+
first = True
269+
for target_depth in range(
270+
MAX_CACHED_REGISTER, MAX_GENERATED_CACHED_REGISTER + 1
271+
):
272+
directive = "#if" if first else "#elif"
273+
out.start_line()
274+
out.out.write(f"{directive} MAX_CACHED_REGISTER == {target_depth}\n")
275+
for name, uop in analysis.uops.items():
276+
if uop.properties.tier == 1:
277+
continue
278+
if uop.is_super():
279+
continue
280+
if uop.properties.records_value:
281+
continue
282+
why_not_viable = uop.why_not_viable()
283+
if why_not_viable is not None:
284+
out.emit(
285+
f"/* {uop.name} is not a viable micro-op for tier 2 because it {why_not_viable} */\n\n"
286+
)
287+
continue
288+
for inputs, outputs, exit_depth in get_uop_cache_depths(
289+
uop, max_cached_register=target_depth
290+
):
291+
emitter = Tier2Emitter(out, analysis.labels, exit_depth)
292+
opname = f"{uop.name}_r{inputs}{outputs}"
293+
out.emit(f"case {opname}: {{\n")
294+
out.emit(f"CHECK_CURRENT_CACHED_VALUES({inputs});\n")
297295
out.emit("assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());\n")
298-
if not uop.properties.always_exits:
299-
out.emit("break;\n")
300-
out.start_line()
301-
out.emit("}")
302-
if needed_cached_registers:
296+
declare_variables(uop, out)
297+
stack = Stack()
298+
stack.push_cache([f"_tos_cache{i}" for i in range(inputs)], out)
299+
stack._print(out)
300+
reachable, stack = write_uop(uop, emitter, stack, outputs)
301+
out.start_line()
302+
if reachable:
303+
out.emit("assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());\n")
304+
if not uop.properties.always_exits:
305+
out.emit("break;\n")
303306
out.start_line()
304-
out.out.write(f"#endif\n")
305-
out.emit("\n\n")
307+
out.emit("}\n\n")
308+
first = False
309+
out.start_line()
310+
out.out.write("#else\n")
311+
out.emit('#error "Unsupported MAX_CACHED_REGISTER value"\n')
312+
out.start_line()
313+
out.out.write("#endif\n")
306314
out.emit("\n")
307315
outfile.write("#undef TIER_TWO\n")
308316

Tools/cases_generator/uop_id_generator.py

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
analyze_files,
1212
get_uop_cache_depths,
1313
MAX_CACHED_REGISTER,
14+
MAX_GENERATED_CACHED_REGISTER,
1415
)
1516
from generators_common import (
1617
DEFAULT_INPUT,
@@ -49,29 +50,37 @@ def generate_uop_ids(
4950
out.emit(f"#define {name} {next_id}\n")
5051
next_id += 1
5152

52-
out.emit(f"#define MAX_UOP_ID {next_id-1}\n")
53+
base_max_uop_id = next_id - 1
54+
out.emit(f"#define MAX_UOP_ID {base_max_uop_id}\n")
5355
out.emit(f"#define MAX_CACHED_REGISTER {MAX_CACHED_REGISTER}\n")
54-
register_groups: dict[int, list[tuple[str, int, int]]] = defaultdict(list)
55-
for name, uop in sorted(uops):
56-
if uop.properties.tier == 1:
57-
continue
58-
if uop.properties.records_value:
59-
continue
60-
for inputs, outputs, _ in sorted(get_uop_cache_depths(uop)):
61-
register_groups[max(inputs, outputs)].append((name, inputs, outputs))
62-
first_group = True
63-
for level in sorted(register_groups):
64-
if level > 0:
65-
out.emit(f"#if MAX_CACHED_REGISTER >= {level}\n")
66-
for name, inputs, outputs in register_groups[level]:
67-
out.emit(f"#define {name}_r{inputs}{outputs} {next_id}\n")
68-
next_id += 1
69-
if not first_group:
70-
out.emit(f"#undef MAX_UOP_REGS_ID\n")
71-
out.emit(f"#define MAX_UOP_REGS_ID {next_id-1}\n")
72-
first_group = False
73-
if level > 0:
74-
out.emit(f"#endif\n")
56+
first = True
57+
for target_depth in range(
58+
MAX_CACHED_REGISTER, MAX_GENERATED_CACHED_REGISTER + 1
59+
):
60+
directive = "#if" if first else "#elif"
61+
out.emit(f"{directive} MAX_CACHED_REGISTER == {target_depth}\n")
62+
target_next_id = base_max_uop_id + 1
63+
register_groups: dict[int, list[tuple[str, int, int]]] = defaultdict(list)
64+
for name, uop in sorted(uops):
65+
if uop.properties.tier == 1:
66+
continue
67+
if uop.properties.records_value:
68+
continue
69+
for inputs, outputs, _ in sorted(
70+
get_uop_cache_depths(uop, max_cached_register=target_depth)
71+
):
72+
register_groups[max(inputs, outputs)].append(
73+
(name, inputs, outputs)
74+
)
75+
for level in sorted(register_groups):
76+
for name, inputs, outputs in register_groups[level]:
77+
out.emit(f"#define {name}_r{inputs}{outputs} {target_next_id}\n")
78+
target_next_id += 1
79+
out.emit(f"#define MAX_UOP_REGS_ID {target_next_id-1}\n")
80+
first = False
81+
out.emit("#else\n")
82+
out.emit('#error "Unsupported MAX_CACHED_REGISTER value"\n')
83+
out.emit("#endif\n")
7584

7685

7786
arg_parser = argparse.ArgumentParser(

0 commit comments

Comments
 (0)