Skip to content

Commit 060a96f

Browse files
authored
gh-116968: Reimplement Tier 2 counters (#117144)
Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``), shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The API used for adaptive specialization counters is changed but the behavior is (supposed to be) identical. The behavior of the Tier 2 counters is changed: - There are no longer dynamic thresholds (we never varied these). - All counters now use the same exponential backoff. - The counter for ``JUMP_BACKWARD`` starts counting down from 16. - The ``temperature`` in side exits starts counting down from 64.
1 parent 63bbe77 commit 060a96f

19 files changed

+313
-235
lines changed

‎Include/cpython/code.h

+11
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ typedef struct _Py_GlobalMonitors {
2424
uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS];
2525
} _Py_GlobalMonitors;
2626

27+
typedef struct {
28+
union {
29+
struct {
30+
uint16_t backoff : 4;
31+
uint16_t value : 12;
32+
};
33+
uint16_t as_counter; // For printf("%#x", ...)
34+
};
35+
} _Py_BackoffCounter;
36+
2737
/* Each instruction in a code object is a fixed-width value,
2838
* currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG
2939
* opcode allows for larger values but the current limit is 3 uses
@@ -39,6 +49,7 @@ typedef union {
3949
uint8_t code;
4050
uint8_t arg;
4151
} op;
52+
_Py_BackoffCounter counter; // First cache entry of specializable op
4253
} _Py_CODEUNIT;
4354

4455

‎Include/cpython/optimizer.h

+1-14
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
8989

9090
typedef struct _exit_data {
9191
uint32_t target;
92-
int16_t temperature;
92+
_Py_BackoffCounter temperature;
9393
const struct _PyExecutorObject *executor;
9494
} _PyExitData;
9595

@@ -115,11 +115,6 @@ typedef int (*optimize_func)(
115115
struct _PyOptimizerObject {
116116
PyObject_HEAD
117117
optimize_func optimize;
118-
/* These thresholds are treated as signed so do not exceed INT16_MAX
119-
* Use INT16_MAX to indicate that the optimizer should never be called */
120-
uint16_t resume_threshold;
121-
uint16_t side_threshold;
122-
uint16_t backedge_threshold;
123118
/* Data needed by the optimizer goes here, but is opaque to the VM */
124119
};
125120

@@ -151,14 +146,6 @@ extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_inval
151146
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void);
152147
PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void);
153148

154-
#define OPTIMIZER_BITS_IN_COUNTER 4
155-
/* Minimum of 16 additional executions before retry */
156-
#define MIN_TIER2_BACKOFF 4
157-
#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER)
158-
#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1)
159-
/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */
160-
#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX
161-
162149
#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
163150
#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
164151

‎Include/internal/pycore_backoff.h

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
2+
#ifndef Py_INTERNAL_BACKOFF_H
3+
#define Py_INTERNAL_BACKOFF_H
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
#ifndef Py_BUILD_CORE
9+
# error "this header requires Py_BUILD_CORE define"
10+
#endif
11+
12+
#include <assert.h>
13+
#include <stdbool.h>
14+
#include <stdint.h>
15+
16+
/* 16-bit countdown counters using exponential backoff.
17+
18+
These are used by the adaptive specializer to count down until
19+
it is time to specialize an instruction. If specialization fails
20+
the counter is reset using exponential backoff.
21+
22+
Another use is for the Tier 2 optimizer to decide when to create
23+
a new Tier 2 trace (executor). Again, exponential backoff is used.
24+
25+
The 16-bit counter is structured as a 12-bit unsigned 'value'
26+
and a 4-bit 'backoff' field. When resetting the counter, the
27+
backoff field is incremented (until it reaches a limit) and the
28+
value is set to a bit mask representing the value 2**backoff - 1.
29+
The maximum backoff is 12 (the number of value bits).
30+
31+
There is an exceptional value which must not be updated, 0xFFFF.
32+
*/
33+
34+
#define UNREACHABLE_BACKOFF 0xFFFF
35+
36+
static inline bool
37+
is_unreachable_backoff_counter(_Py_BackoffCounter counter)
38+
{
39+
return counter.as_counter == UNREACHABLE_BACKOFF;
40+
}
41+
42+
static inline _Py_BackoffCounter
43+
make_backoff_counter(uint16_t value, uint16_t backoff)
44+
{
45+
assert(backoff <= 15);
46+
assert(value <= 0xFFF);
47+
return (_Py_BackoffCounter){.value = value, .backoff = backoff};
48+
}
49+
50+
static inline _Py_BackoffCounter
51+
forge_backoff_counter(uint16_t counter)
52+
{
53+
return (_Py_BackoffCounter){.as_counter = counter};
54+
}
55+
56+
static inline _Py_BackoffCounter
57+
restart_backoff_counter(_Py_BackoffCounter counter)
58+
{
59+
assert(!is_unreachable_backoff_counter(counter));
60+
if (counter.backoff < 12) {
61+
return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1);
62+
}
63+
else {
64+
return make_backoff_counter((1 << 12) - 1, 12);
65+
}
66+
}
67+
68+
static inline _Py_BackoffCounter
69+
pause_backoff_counter(_Py_BackoffCounter counter)
70+
{
71+
return make_backoff_counter(counter.value | 1, counter.backoff);
72+
}
73+
74+
static inline _Py_BackoffCounter
75+
advance_backoff_counter(_Py_BackoffCounter counter)
76+
{
77+
if (!is_unreachable_backoff_counter(counter)) {
78+
return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff);
79+
}
80+
else {
81+
return counter;
82+
}
83+
}
84+
85+
static inline bool
86+
backoff_counter_triggers(_Py_BackoffCounter counter)
87+
{
88+
return counter.value == 0;
89+
}
90+
91+
/* Initial JUMP_BACKWARD counter.
92+
* This determines when we create a trace for a loop.
93+
* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */
94+
#define JUMP_BACKWARD_INITIAL_VALUE 16
95+
#define JUMP_BACKWARD_INITIAL_BACKOFF 4
96+
static inline _Py_BackoffCounter
97+
initial_jump_backoff_counter(void)
98+
{
99+
return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE,
100+
JUMP_BACKWARD_INITIAL_BACKOFF);
101+
}
102+
103+
/* Initial exit temperature.
104+
* Must be larger than ADAPTIVE_COOLDOWN_VALUE,
105+
* otherwise when a side exit warms up we may construct
106+
* a new trace before the Tier 1 code has properly re-specialized.
107+
* Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */
108+
#define COLD_EXIT_INITIAL_VALUE 64
109+
#define COLD_EXIT_INITIAL_BACKOFF 6
110+
111+
static inline _Py_BackoffCounter
112+
initial_temperature_backoff_counter(void)
113+
{
114+
return make_backoff_counter(COLD_EXIT_INITIAL_VALUE,
115+
COLD_EXIT_INITIAL_BACKOFF);
116+
}
117+
118+
/* Unreachable backoff counter. */
119+
static inline _Py_BackoffCounter
120+
initial_unreachable_backoff_counter(void)
121+
{
122+
return forge_backoff_counter(UNREACHABLE_BACKOFF);
123+
}
124+
125+
#ifdef __cplusplus
126+
}
127+
#endif
128+
#endif /* !Py_INTERNAL_BACKOFF_H */

‎Include/internal/pycore_code.h

+27-37
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ extern "C" {
3131
#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
3232

3333
typedef struct {
34-
uint16_t counter;
34+
_Py_BackoffCounter counter;
3535
uint16_t module_keys_version;
3636
uint16_t builtin_keys_version;
3737
uint16_t index;
@@ -40,44 +40,44 @@ typedef struct {
4040
#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
4141

4242
typedef struct {
43-
uint16_t counter;
43+
_Py_BackoffCounter counter;
4444
} _PyBinaryOpCache;
4545

4646
#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
4747

4848
typedef struct {
49-
uint16_t counter;
49+
_Py_BackoffCounter counter;
5050
} _PyUnpackSequenceCache;
5151

5252
#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
5353
CACHE_ENTRIES(_PyUnpackSequenceCache)
5454

5555
typedef struct {
56-
uint16_t counter;
56+
_Py_BackoffCounter counter;
5757
} _PyCompareOpCache;
5858

5959
#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
6060

6161
typedef struct {
62-
uint16_t counter;
62+
_Py_BackoffCounter counter;
6363
} _PyBinarySubscrCache;
6464

6565
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
6666

6767
typedef struct {
68-
uint16_t counter;
68+
_Py_BackoffCounter counter;
6969
} _PySuperAttrCache;
7070

7171
#define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
7272

7373
typedef struct {
74-
uint16_t counter;
74+
_Py_BackoffCounter counter;
7575
uint16_t version[2];
7676
uint16_t index;
7777
} _PyAttrCache;
7878

7979
typedef struct {
80-
uint16_t counter;
80+
_Py_BackoffCounter counter;
8181
uint16_t type_version[2];
8282
union {
8383
uint16_t keys_version[2];
@@ -93,39 +93,39 @@ typedef struct {
9393
#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
9494

9595
typedef struct {
96-
uint16_t counter;
96+
_Py_BackoffCounter counter;
9797
uint16_t func_version[2];
9898
} _PyCallCache;
9999

100100
#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
101101

102102
typedef struct {
103-
uint16_t counter;
103+
_Py_BackoffCounter counter;
104104
} _PyStoreSubscrCache;
105105

106106
#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
107107

108108
typedef struct {
109-
uint16_t counter;
109+
_Py_BackoffCounter counter;
110110
} _PyForIterCache;
111111

112112
#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
113113

114114
typedef struct {
115-
uint16_t counter;
115+
_Py_BackoffCounter counter;
116116
} _PySendCache;
117117

118118
#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
119119

120120
typedef struct {
121-
uint16_t counter;
121+
_Py_BackoffCounter counter;
122122
uint16_t version[2];
123123
} _PyToBoolCache;
124124

125125
#define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache)
126126

127127
typedef struct {
128-
uint16_t counter;
128+
_Py_BackoffCounter counter;
129129
} _PyContainsOpCache;
130130

131131
#define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache)
@@ -451,18 +451,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
451451

452452
/** Counters
453453
* The first 16-bit value in each inline cache is a counter.
454-
* When counting misses, the counter is treated as a simple unsigned value.
455454
*
456455
* When counting executions until the next specialization attempt,
457456
* exponential backoff is used to reduce the number of specialization failures.
458-
* The high 12 bits store the counter, the low 4 bits store the backoff exponent.
459-
* On a specialization failure, the backoff exponent is incremented and the
460-
* counter set to (2**backoff - 1).
461-
* Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
457+
* See pycore_backoff.h for more details.
458+
* On a specialization failure, the backoff counter is restarted.
462459
*/
463460

464-
/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
465-
#define ADAPTIVE_BACKOFF_BITS 4
461+
#include "pycore_backoff.h"
466462

467463
// A value of 1 means that we attempt to specialize the *second* time each
468464
// instruction is executed. Executing twice is a much better indicator of
@@ -480,36 +476,30 @@ write_location_entry_start(uint8_t *ptr, int code, int length)
480476
#define ADAPTIVE_COOLDOWN_VALUE 52
481477
#define ADAPTIVE_COOLDOWN_BACKOFF 0
482478

483-
#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
479+
// Can't assert this in pycore_backoff.h because of header order dependencies
480+
static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE,
481+
"Cold exit value should be larger than adaptive cooldown value");
484482

485-
486-
static inline uint16_t
483+
static inline _Py_BackoffCounter
487484
adaptive_counter_bits(uint16_t value, uint16_t backoff) {
488-
return ((value << ADAPTIVE_BACKOFF_BITS)
489-
| (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1)));
485+
return make_backoff_counter(value, backoff);
490486
}
491487

492-
static inline uint16_t
488+
static inline _Py_BackoffCounter
493489
adaptive_counter_warmup(void) {
494490
return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
495491
ADAPTIVE_WARMUP_BACKOFF);
496492
}
497493

498-
static inline uint16_t
494+
static inline _Py_BackoffCounter
499495
adaptive_counter_cooldown(void) {
500496
return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
501497
ADAPTIVE_COOLDOWN_BACKOFF);
502498
}
503499

504-
static inline uint16_t
505-
adaptive_counter_backoff(uint16_t counter) {
506-
uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1);
507-
backoff++;
508-
if (backoff > MAX_BACKOFF_VALUE) {
509-
backoff = MAX_BACKOFF_VALUE;
510-
}
511-
uint16_t value = (uint16_t)(1 << backoff) - 1;
512-
return adaptive_counter_bits(value, backoff);
500+
static inline _Py_BackoffCounter
501+
adaptive_counter_backoff(_Py_BackoffCounter counter) {
502+
return restart_backoff_counter(counter);
513503
}
514504

515505

‎Include/internal/pycore_interp.h

-6
Original file line numberDiff line numberDiff line change
@@ -239,12 +239,6 @@ struct _is {
239239
_PyOptimizerObject *optimizer;
240240
_PyExecutorObject *executor_list_head;
241241

242-
/* These two values are shifted and offset to speed up check in JUMP_BACKWARD */
243-
uint32_t optimizer_resume_threshold;
244-
uint32_t optimizer_backedge_threshold;
245-
246-
uint16_t optimizer_side_threshold;
247-
248242
_rare_events rare_events;
249243
PyDict_WatchCallback builtins_dict_watcher;
250244

0 commit comments

Comments
 (0)