Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
This repository was archived by the owner on Feb 13, 2025. It is now read-only.

Commit 9ab9a2f

Browse files
author
Anselm Kruis
committed
Merge branch main into main-slp
2 parents 0feca7a + 91234a1 commit 9ab9a2f

File tree

13 files changed

+285
-16
lines changed

13 files changed

+285
-16
lines changed

‎Doc/whatsnew/3.8.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,10 @@ Optimizations
860860
methods up to 20--50%. (Contributed by Serhiy Storchaka in :issue:`23867`,
861861
:issue:`35582` and :issue:`36127`.)
862862

863+
* ``LOAD_GLOBAL`` instruction now uses new "per opcode cache" mechanism.
864+
It is about 40% faster now. (Contributed by Yury Selivanov and Inada Naoki in
865+
:issue:`26219`.)
866+
863867

864868
Build and C API Changes
865869
=======================

‎Include/code.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ typedef uint16_t _Py_CODEUNIT;
1717
# define _Py_OPARG(word) ((word) >> 8)
1818
#endif
1919

20+
typedef struct _PyOpcache _PyOpcache;
21+
2022
/* Bytecode object */
2123
typedef struct {
2224
PyObject_HEAD
@@ -49,6 +51,21 @@ typedef struct {
4951
Type is a void* to keep the format private in codeobject.c to force
5052
people to go through the proper APIs. */
5153
void *co_extra;
54+
55+
/* Per opcodes just-in-time cache
56+
*
57+
* To reduce cache size, we use indirect mapping from opcode index to
58+
* cache object:
59+
* cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1]
60+
*/
61+
62+
// co_opcache_map is indexed by (next_instr - first_instr).
63+
// * 0 means there is no cache for this opcode.
64+
// * n > 0 means there is cache in co_opcache[n-1].
65+
unsigned char *co_opcache_map;
66+
_PyOpcache *co_opcache;
67+
int co_opcache_flag; // used to determine when create a cache.
68+
unsigned char co_opcache_size; // length of co_opcache.
5269
} PyCodeObject;
5370

5471
/* Masks for co_flags above */

‎Include/internal/pycore_ceval.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ PyAPI_FUNC(void) _PyEval_SignalAsyncExc(
3131
PyAPI_FUNC(void) _PyEval_ReInitThreads(
3232
_PyRuntimeState *runtime);
3333

34+
/* Private function */
35+
void _PyEval_Fini(void);
36+
3437
#ifdef __cplusplus
3538
}
3639
#endif

‎Include/internal/pycore_code.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef Py_INTERNAL_CODE_H
2+
#define Py_INTERNAL_CODE_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
typedef struct {
8+
PyObject *ptr; /* Cached pointer (borrowed reference) */
9+
uint64_t globals_ver; /* ma_version of global dict */
10+
uint64_t builtins_ver; /* ma_version of builtin dict */
11+
} _PyOpcache_LoadGlobal;
12+
13+
struct _PyOpcache {
14+
union {
15+
_PyOpcache_LoadGlobal lg;
16+
} u;
17+
char optimized;
18+
};
19+
20+
/* Private API */
21+
int _PyCode_InitOpcache(PyCodeObject *co);
22+
23+
24+
#ifdef __cplusplus
25+
}
26+
#endif
27+
#endif /* !Py_INTERNAL_CODE_H */

‎Lib/test/test_dict_version.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,14 @@ def test_setitem_same_value(self):
8080

8181
# setting a key to the same value with dict.__setitem__
8282
# must change the version
83-
self.check_version_changed(d, d.__setitem__, 'key', value)
83+
self.check_version_dont_change(d, d.__setitem__, 'key', value)
8484

8585
# setting a key to the same value with dict.update
8686
# must change the version
87-
self.check_version_changed(d, d.update, key=value)
87+
self.check_version_dont_change(d, d.update, key=value)
8888

8989
d2 = self.new_dict(key=value)
90-
self.check_version_changed(d, d.update, d2)
90+
self.check_version_dont_change(d, d.update, d2)
9191

9292
def test_setitem_equal(self):
9393
class AlwaysEqual:

‎Makefile.pre.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,7 @@ PYTHON_HEADERS= \
11221122
$(srcdir)/Include/internal/pycore_accu.h \
11231123
$(srcdir)/Include/internal/pycore_atomic.h \
11241124
$(srcdir)/Include/internal/pycore_ceval.h \
1125+
$(srcdir)/Include/internal/pycore_code.h \
11251126
$(srcdir)/Include/internal/pycore_condvar.h \
11261127
$(srcdir)/Include/internal/pycore_context.h \
11271128
$(srcdir)/Include/internal/pycore_fileutils.h \
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Implemented per opcode cache mechanism and ``LOAD_GLOBAL`` instruction use
2+
it. ``LOAD_GLOBAL`` is now about 40% faster. Contributed by Yury Selivanov,
3+
and Inada Naoki.

‎Objects/codeobject.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
#include "Python.h"
44
#include "code.h"
5+
#include "opcode.h"
56
#include "structmember.h"
7+
#include "pycore_code.h"
68
#include "pycore_pystate.h"
79
#include "pycore_tupleobject.h"
810
#include "clinic/codeobject.c.h"
@@ -233,9 +235,56 @@ PyCode_New(int argcount, int posonlyargcount, int kwonlyargcount,
233235
co->co_zombieframe = NULL;
234236
co->co_weakreflist = NULL;
235237
co->co_extra = NULL;
238+
239+
co->co_opcache_map = NULL;
240+
co->co_opcache = NULL;
241+
co->co_opcache_flag = 0;
242+
co->co_opcache_size = 0;
236243
return co;
237244
}
238245

246+
int
247+
_PyCode_InitOpcache(PyCodeObject *co)
248+
{
249+
Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
250+
co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
251+
if (co->co_opcache_map == NULL) {
252+
return -1;
253+
}
254+
255+
_Py_CODEUNIT *opcodes = (_Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
256+
Py_ssize_t opts = 0;
257+
258+
for (Py_ssize_t i = 0; i < co_size;) {
259+
unsigned char opcode = _Py_OPCODE(opcodes[i]);
260+
i++; // 'i' is now aligned to (next_instr - first_instr)
261+
262+
// TODO: LOAD_METHOD, LOAD_ATTR
263+
if (opcode == LOAD_GLOBAL) {
264+
co->co_opcache_map[i] = ++opts;
265+
if (opts > 254) {
266+
break;
267+
}
268+
}
269+
}
270+
271+
if (opts) {
272+
co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
273+
if (co->co_opcache == NULL) {
274+
PyMem_FREE(co->co_opcache_map);
275+
return -1;
276+
}
277+
}
278+
else {
279+
PyMem_FREE(co->co_opcache_map);
280+
co->co_opcache_map = NULL;
281+
co->co_opcache = NULL;
282+
}
283+
284+
co->co_opcache_size = opts;
285+
return 0;
286+
}
287+
239288
PyCodeObject *
240289
PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
241290
{
@@ -458,6 +507,15 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
458507
static void
459508
code_dealloc(PyCodeObject *co)
460509
{
510+
if (co->co_opcache != NULL) {
511+
PyMem_FREE(co->co_opcache);
512+
}
513+
if (co->co_opcache_map != NULL) {
514+
PyMem_FREE(co->co_opcache_map);
515+
}
516+
co->co_opcache_flag = 0;
517+
co->co_opcache_size = 0;
518+
461519
if (co->co_extra != NULL) {
462520
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
463521
_PyCodeObjectExtra *co_extra = co->co_extra;
@@ -504,6 +562,13 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args))
504562
res += sizeof(_PyCodeObjectExtra) +
505563
(co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]);
506564
}
565+
if (co->co_opcache != NULL) {
566+
assert(co->co_opcache_map != NULL);
567+
// co_opcache_map
568+
res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
569+
// co_opcache
570+
res += co->co_opcache_size * sizeof(_PyOpcache);
571+
}
507572
return PyLong_FromSsize_t(res);
508573
}
509574

‎Objects/dictobject.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,20 +1080,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
10801080
return 0;
10811081
}
10821082

1083-
if (_PyDict_HasSplitTable(mp)) {
1084-
mp->ma_values[ix] = value;
1085-
if (old_value == NULL) {
1086-
/* pending state */
1087-
assert(ix == mp->ma_used);
1088-
mp->ma_used++;
1083+
if (old_value != value) {
1084+
if (_PyDict_HasSplitTable(mp)) {
1085+
mp->ma_values[ix] = value;
1086+
if (old_value == NULL) {
1087+
/* pending state */
1088+
assert(ix == mp->ma_used);
1089+
mp->ma_used++;
1090+
}
10891091
}
1092+
else {
1093+
assert(old_value != NULL);
1094+
DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1095+
}
1096+
mp->ma_version_tag = DICT_NEXT_VERSION();
10901097
}
1091-
else {
1092-
assert(old_value != NULL);
1093-
DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1094-
}
1095-
1096-
mp->ma_version_tag = DICT_NEXT_VERSION();
10971098
Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
10981099
ASSERT_CONSISTENT(mp);
10991100
Py_DECREF(key);

‎PCbuild/pythoncore.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@
164164
<ClInclude Include="..\Include\import.h" />
165165
<ClInclude Include="..\Include\internal\pycore_accu.h" />
166166
<ClInclude Include="..\Include\internal\pycore_atomic.h" />
167+
<ClInclude Include="..\Include\internal\pycore_code.h" />
167168
<ClInclude Include="..\Include\internal\pycore_ceval.h" />
168169
<ClInclude Include="..\Include\internal\pycore_condvar.h" />
169170
<ClInclude Include="..\Include\internal\pycore_context.h" />

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /