73
73
#define TAIL_CALL_PARAMS _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate, _Py_CODEUNIT *next_instr, int oparg
74
74
#define TAIL_CALL_ARGS frame, stack_pointer, tstate, next_instr, oparg
75
75
76
+ // 1st version looks like this where we load directly
77
+ // next_op_f = INSTRUCTION_TABLE[next_instr->op.code];
78
+ // 2nd version is like NEXTOPARG which does this atomic thing
76
79
#ifdef Py_TAIL_CALL_INTERP
77
80
# define LOAD_NEXT_OP_F () \
78
81
do { \
79
- next_op_f = INSTRUCTION_TABLE[next_instr->op.code]; \
82
+ _Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \
83
+ next_op_f = INSTRUCTION_TABLE[word.op.code]; \
80
84
} while (0)
81
85
// Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
82
86
# define Py_MUSTTAIL [[clang::musttail]]
86
90
# define TARGET (op ) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
87
91
# define DISPATCH_GOTO () \
88
92
do { \
93
+ assert(next_op_f == INSTRUCTION_TABLE[opcode]); \
89
94
Py_MUSTTAIL return next_op_f(TAIL_CALL_ARGS); \
90
95
} while (0)
91
96
# define JUMP_TO_LABEL (name ) \
@@ -148,9 +153,11 @@ do { \
148
153
DISPATCH_GOTO(); \
149
154
}
150
155
156
+ // TODO better
151
157
#define DISPATCH_SAME_OPARG () \
152
158
{ \
153
159
opcode = next_instr->op.code; \
160
+ next_op_f = INSTRUCTION_TABLE[opcode]; \
154
161
PRE_DISPATCH_GOTO(); \
155
162
DISPATCH_GOTO(); \
156
163
}
@@ -347,13 +354,13 @@ do { \
347
354
} else { \
348
355
_PyFrame_SetStackPointer(frame, stack_pointer); \
349
356
next_instr = _Py_call_instrumentation_jump(this_instr, tstate, event, frame, src, dest); \
350
- LOAD_NEXT_OP_F(); \
351
357
stack_pointer = _PyFrame_GetStackPointer(frame); \
352
358
if (next_instr == NULL) { \
353
359
next_instr = (dest)+1; \
354
360
JUMP_TO_LABEL(error); \
355
361
} \
356
362
} \
363
+ LOAD_NEXT_OP_F(); \
357
364
} while (0);
358
365
359
366
@@ -373,7 +380,7 @@ static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
373
380
374
381
#define LOAD_IP (OFFSET ) do { \
375
382
next_instr = frame->instr_ptr + (OFFSET); \
376
- LOAD_NEXT_OP_F(); \
383
+ LOAD_NEXT_OP_F(); \
377
384
} while (0)
378
385
379
386
/* There's no STORE_IP(), it's inlined by the code generator. */
0 commit comments