From d6f6bfc679ee664bce2abac3f94aaba516d44725 Mon Sep 17 00:00:00 2001 From: ausaki Date: 2021年2月18日 14:39:32 +0800 Subject: [PATCH 1/4] update --- README.md | 68 +-- ch15.md | 1 - "code 345円257円271円350円261円241円.md" | 242 ++++++++ codes/README.md | 9 - codes/python_scripts/ch09/simple_obj.py | 4 - codes/python_scripts/ch09/simple_obj.pyc | Bin 170 -> 0 bytes codes/python_scripts/ch10/for_control.py | 3 - codes/python_scripts/ch10/if_control.py | 11 - codes/python_scripts/ch10/while_control.py | 8 - codes/python_scripts/ch11/func_00.py | 4 - codes/python_scripts/ch11/func_01.py | 4 - codes/python_scripts/ch11/func_02.py | 4 - codes/python_scripts/ch11/func_03.py | 4 - codes/python_scripts/ch11/func_04.py | 8 - codes/python_scripts/ch12/class_00.py | 15 - codes/python_scripts/ch12/test.py | 15 - codes/python_scripts/ch14/foo/m.py | 4 - codes/python_scripts/ch14/foo/m2.py | 2 - codes/python_scripts/ch14/test.py | 3 - codes/python_scripts/get_codeobj_info.py | 58 -- codes/python_scripts/parse_pyc.py | 230 ------- codes/small_python/Makefile | 92 --- codes/small_python/include/engine.h | 147 ----- codes/small_python/include/intObject.h | 15 - codes/small_python/include/object.h | 31 - codes/small_python/include/strObject.h | 15 - codes/small_python/samll_python.ncb | Bin 27648 -> 0 bytes codes/small_python/samll_python.sln | 21 - codes/small_python/samll_python.suo | Bin 8192 -> 0 bytes codes/small_python/samll_python.vcproj | 157 ----- codes/small_python/src/engine.cpp | 1 - codes/small_python/src/intObject.cpp | 41 -- codes/small_python/src/main.cpp | 14 - codes/small_python/src/object.cpp | 9 - codes/small_python/src/strObject.cpp | 47 -- docker.md | 30 - gil.md | 407 +++++++++++++ .../ch14/foo/__init__.py => opcache.md | 0 ...25347円232円204円346円212円200円345円267円247円.md" | 26 + "345円207円275円346円225円260円.md" | 570 ++++++++++++++++++ "345円215円217円347円250円213円.md" | 321 ++++++++++ ...02345円270円270円345円244円204円347円220円206円.md" | 377 ++++++++++++ "346円240円210円345円270円247円.md" | 136 +++++ "347円224円237円346円210円220円345円231円250円.md" | 445 ++++++++++++++ "347円272円277円347円250円213円.md" | 2 + "350円231円232円346円213円237円346円234円272円.md" | 544 +++++++++++++++++ 46 files changed, 3075 insertions(+), 1070 deletions(-) create mode 100644 "code 345円257円271円350円261円241円.md" delete mode 100644 codes/README.md delete mode 100644 codes/python_scripts/ch09/simple_obj.py delete mode 100644 codes/python_scripts/ch09/simple_obj.pyc delete mode 100644 codes/python_scripts/ch10/for_control.py delete mode 100644 codes/python_scripts/ch10/if_control.py delete mode 100644 codes/python_scripts/ch10/while_control.py delete mode 100644 codes/python_scripts/ch11/func_00.py delete mode 100644 codes/python_scripts/ch11/func_01.py delete mode 100644 codes/python_scripts/ch11/func_02.py delete mode 100644 codes/python_scripts/ch11/func_03.py delete mode 100644 codes/python_scripts/ch11/func_04.py delete mode 100644 codes/python_scripts/ch12/class_00.py delete mode 100644 codes/python_scripts/ch12/test.py delete mode 100644 codes/python_scripts/ch14/foo/m.py delete mode 100644 codes/python_scripts/ch14/foo/m2.py delete mode 100644 codes/python_scripts/ch14/test.py delete mode 100644 codes/python_scripts/get_codeobj_info.py delete mode 100644 codes/python_scripts/parse_pyc.py delete mode 100755 codes/small_python/Makefile delete mode 100755 codes/small_python/include/engine.h delete mode 100755 codes/small_python/include/intObject.h delete mode 100755 codes/small_python/include/object.h delete mode 100755 codes/small_python/include/strObject.h delete mode 100755 codes/small_python/samll_python.ncb delete mode 100755 codes/small_python/samll_python.sln delete mode 100755 codes/small_python/samll_python.suo delete mode 100755 codes/small_python/samll_python.vcproj delete mode 100755 codes/small_python/src/engine.cpp delete mode 100755 codes/small_python/src/intObject.cpp delete mode 100755 codes/small_python/src/main.cpp delete mode 100755 codes/small_python/src/object.cpp delete mode 100755 codes/small_python/src/strObject.cpp delete mode 100644 docker.md create mode 100644 gil.md rename codes/python_scripts/ch14/foo/__init__.py => opcache.md (100%) create mode 100644 "344円275円277円347円224円250円 printf 350円277円233円350円241円214円346円265円213円350円257円225円347円232円204円346円212円200円345円267円247円.md" create mode 100644 "345円207円275円346円225円260円.md" create mode 100644 "345円215円217円347円250円213円.md" create mode 100644 "345円274円202円345円270円270円345円244円204円347円220円206円.md" create mode 100644 "346円240円210円345円270円247円.md" create mode 100644 "347円224円237円346円210円220円345円231円250円.md" create mode 100644 "347円272円277円347円250円213円.md" create mode 100644 "350円231円232円346円213円237円346円234円272円.md" diff --git a/README.md b/README.md index f663f4a..6fc12f6 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,12 @@ -# 《Python 源码剖析》学习笔记 +# Python 3.9 源代码阅读笔记 -> 《Python 源码剖析》 -> 作者:陈儒 Robert Chen -> 出版年份:2008 年 -> Python 版本:2.5 +之前看了 "Python 源码剖析", 这本书是基于 Python 2.5 的, 现在的 Python 已经发生了很大的改变. 因此, 在这里记录下阅读 Python 3.9 的源代码的笔记, 当然阅读内容主要是 Python3 某些新加的特性. -在阅读《Python 源码剖析》的过程中记录的一些笔记,不是特别详细,简单记录了一些关键的地方,方便以后查看。 +- [Python 虚拟机](ceval.md) -## 编译代码 +- [GIL](gil.md) -使用 Docker 编译 Python 源代码,使用说明参考 [Docker 使用说明](docker.md)。 - -## 源代码 - -在阅读《Python 源码剖析》过程中,为了验证一些想法,对 Python2.5的源代码进行了不少修改。修改过的代码在[这里](https://github.com/ausaki/python25)。 - -master 分支是原始代码。 - -每个 chxx 分支对应书中相应的章节,基于 master 分支修改而来。 - -## 其它资源 - -- [作者在 CSDN 的博客](https://blog.csdn.net/balabalamerobert)(不再更新)。 - -- [Extending and Embedding the Python Interpreter](https://docs.python.org/2.7/extending/index.html) - - 扩展和嵌入 Python 解析器,介绍了如何用 C/C++ 编写 Python 的扩展模块,如何在其它语言中嵌入 Python 解释器。 - -- [C API](https://docs.python.org/2.7/c-api/index.html) - - 详细介绍了 Python 内部的 C API。 - -- [Python Developer’s Guide](https://devguide.python.org/) - - Python 开发者指南。 - - - -## 目录 - -- 第一部分 - - - [ch01 - Pyhton 对象初探](ch01.md) - - [ch02 - Pyhton 中的整数对象](ch02.md) - - [ch03 - Pyhton 中的字符串对象](ch03.md) - - [ch04 - Python 中的 List 对象](ch04.md) - - [ch05 - Python 中的 Dict 对象](ch05.md) - - [ch06 - 最简单的Python模拟——Small Python](ch06.md) - -- 第二部分 - - - [ch07 - Python的编译结果——Code对象与pyc文件](ch07.md) - - [ch08 - Python 虚拟机框架](ch08.md) - - [ch09 - Python虚拟机中的一般表达式](ch09.md) - - [ch010 - Python虚拟机中的控制流](ch10.md) - - [ch011 - Python虚拟机中的函数机制](ch11.md) - - [ch012 - Python虚拟机中的类机制](ch12.md) - - [ch013 - Python运行环境初始化](ch13.md) - - [ch014 - Python模块的动态加载机制](ch14.md) - - [ch015 - Python多线程机制](ch15.md) - - [ch016 - Python的内存管理机制](ch16.md) - - -## THE END - -大概花了一个月的时间(2018年8月14日 ~ 2018年9月13日 )看完本书,收获颇多,初步了解了Python 的底层细节,也增加了阅读源码的信心。《Python 源码剖析》这本书没有办法把 Python 源码的各个方面都介绍到,自己有时间的话还应该多阅读源码。 +[源码注释分支](https://github.com/ausaki/python) diff --git a/ch15.md b/ch15.md index c255be5..5530b31 100644 --- a/ch15.md +++ b/ch15.md @@ -32,4 +32,3 @@ Python 中的线程是对操作系统的原生线程的封装,具体实现在 当持有GIL的线程发现标志后,会释放掉 GIL。 - \ No newline at end of file diff --git "a/code 345円257円271円350円261円241円.md" "b/code 345円257円271円350円261円241円.md" new file mode 100644 index 0000000..8cd4b91 --- /dev/null +++ "b/code 345円257円271円350円261円241円.md" @@ -0,0 +1,242 @@ +# code 对象 + +```c +struct PyCodeObject { + PyObject_HEAD + int co_argcount; /* #arguments, except *args */ + int co_posonlyargcount; /* #positional only arguments */ + int co_kwonlyargcount; /* #keyword only arguments */ + int co_nlocals; /* #local variables */ + int co_stacksize; /* #entries needed for evaluation stack */ + int co_flags; /* CO_..., see below */ + int co_firstlineno; /* first source line number */ + PyObject *co_code; /* instruction opcodes */ + PyObject *co_consts; /* list (constants used) */ + PyObject *co_names; /* list of strings (names used) */ + PyObject *co_varnames; /* tuple of strings (local variable names) */ + PyObject *co_freevars; /* tuple of strings (free variable names) */ + PyObject *co_cellvars; /* tuple of strings (cell variable names) */ + /* The rest aren't used in either hash or comparisons, except for co_name, + used in both. This is done to preserve the name and line number + for tracebacks and debuggers; otherwise, constant de-duplication + would collapse identical functions/lambdas defined on different lines. + */ + Py_ssize_t *co_cell2arg; /* Maps cell vars which are arguments. */ + PyObject *co_filename; /* unicode (where it was loaded from) */ + PyObject *co_name; /* unicode (name, for reference) */ + PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See + Objects/lnotab_notes.txt for details. */ + void *co_zombieframe; /* for optimization only (see frameobject.c) */ + PyObject *co_weakreflist; /* to support weakrefs to code objects */ + /* Scratch space for extra data relating to the code object. + Type is a void* to keep the format private in codeobject.c to force + people to go through the proper APIs. */ + void *co_extra; + + /* Per opcodes just-in-time cache + * + * To reduce cache size, we use indirect mapping from opcode index to + * cache object: + * cache = co_opcache[co_opcache_map[next_instr - first_instr] - 1] + */ + + // co_opcache_map is indexed by (next_instr - first_instr). + // * 0 means there is no cache for this opcode. + // * n> 0 means there is cache in co_opcache[n-1]. + unsigned char *co_opcache_map; + _PyOpcache *co_opcache; + int co_opcache_flag; // used to determine when create a cache. + unsigned char co_opcache_size; // length of co_opcache. +}; +``` + + +## 字节码缓存(opcache) + +注意到 PyCodeObject 对象中有一个 co_opcache 属性, 似乎支持字节码缓存, 查看了其它代码发现字节码缓存功能目前只支持 LOAD_GLOBALS. + +字节码缓存的基本原理是保存字节码执行的结果, 当再次执行该字节码可以直接返回缓存的结果, 从而提高字节码的执行效率. + +从定义 PyCodeObject 的结构体的代码注释中可以看出字节码缓存的实现原理, co_opcache_map 是一个 char 类型的数组, 索引是字节码的偏移量(`offset = next_instr - first_instr`), 如果 `co_opcache_map[offset]` 等于 0 说明该字节码没有缓存, 如果大于 0, 说明该字节码的缓存保存在 `co_opcache[co_opcache_map[offset]]`. + +co_opcache 是一个 _PyOpcache 类型的数组, 代码如下: + +```c +typedef struct { + PyObject *ptr; /* Cached pointer (borrowed reference) */ + uint64_t globals_ver; /* ma_version of global dict */ + uint64_t builtins_ver; /* ma_version of builtin dict */ +} _PyOpcache_LoadGlobal; + +struct _PyOpcache { + union { + _PyOpcache_LoadGlobal lg; + } u; + char optimized; +}; +``` + +`_PyOpcache_LoadGlobal.ptr` 指向缓存的数据, `_PyOpcache_LoadGlobal.globals_ver` 表示缓存数据时 globals(全局变量字典) 的版本, `_PyOpcache_LoadGlobal.builtins_ver` 表示缓存数据时 builtins 的版本. + +字典类型内部有一个版本字段 `ma_version_tag`, 每次字典被修改时, 都会增加版本字段. 代码如下: + +```c +/*Global counter used to set ma_version_tag field of dictionary. + * It is incremented each time that a dictionary is created and each + * time that a dictionary is modified. */ +static uint64_t pydict_global_version = 0; + +#define DICT_NEXT_VERSION() (++pydict_global_version) +``` + +关于 `ma_version_tag` 的更多信息可以查看 [PEP 509 -- Add a private version to dict](https://www.python.org/dev/peps/pep-0509/). + +当执行 `LOAD_GLOBAL` 时, 如果缓存存在并且缓存的版本号和当前版本号一致, 那么直接返回缓存的数据. + +### 初始化 opcache + +```c +int +_PyCode_InitOpcache(PyCodeObject *co) +{ + Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT); + co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1); + if (co->co_opcache_map == NULL) { + return -1; + } + + _Py_CODEUNIT *opcodes = (_Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code); + Py_ssize_t opts = 0; + + for (Py_ssize_t i = 0; i < co_size;) { + unsigned char opcode = _Py_OPCODE(opcodes[i]); + i++; // 'i' is now aligned to (next_instr - first_instr) + + // TODO: LOAD_METHOD, LOAD_ATTR + if (opcode == LOAD_GLOBAL) { + opts++; + co->co_opcache_map[i] = (unsigned char)opts; + if (opts> 254) { + break; + } + } + } + + if (opts) { + co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache)); + if (co->co_opcache == NULL) { + PyMem_FREE(co->co_opcache_map); + return -1; + } + } + else { + PyMem_FREE(co->co_opcache_map); + co->co_opcache_map = NULL; + co->co_opcache = NULL; + } + + co->co_opcache_size = (unsigned char)opts; + return 0; +} +``` + +### LOAD_GLOBAL 检查 opcache + +```c +case TARGET(LOAD_GLOBAL): { + PyObject *name; + PyObject *v; + if (PyDict_CheckExact(f->f_globals) + && PyDict_CheckExact(f->f_builtins)) + { + OPCACHE_CHECK(); + if (co_opcache != NULL && co_opcache->optimized> 0) { + _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; + + if (lg->globals_ver == + ((PyDictObject *)f->f_globals)->ma_version_tag + && lg->builtins_ver == + ((PyDictObject *)f->f_builtins)->ma_version_tag) + { + PyObject *ptr = lg->ptr; + OPCACHE_STAT_GLOBAL_HIT(); + assert(ptr != NULL); + Py_INCREF(ptr); + PUSH(ptr); + DISPATCH(); + } + } + + name = GETITEM(names, oparg); + v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals, + (PyDictObject *)f->f_builtins, + name); + if (v == NULL) { + if (!_PyErr_OCCURRED()) { + /* _PyDict_LoadGlobal() returns NULL without raising + * an exception if the key doesn't exist */ + format_exc_check_arg(tstate, PyExc_NameError, + NAME_ERROR_MSG, name); + } + goto error; + } + + if (co_opcache != NULL) { + _PyOpcache_LoadGlobal *lg = &co_opcache->u.lg; + + if (co_opcache->optimized == 0) { + /* Wasn't optimized before. */ + OPCACHE_STAT_GLOBAL_OPT(); + } else { + OPCACHE_STAT_GLOBAL_MISS(); + } + + co_opcache->optimized = 1; + lg->globals_ver = + ((PyDictObject *)f->f_globals)->ma_version_tag; + lg->builtins_ver = + ((PyDictObject *)f->f_builtins)->ma_version_tag; + lg->ptr = v; /* borrowed */ + } + + Py_INCREF(v); + } +``` + +网上搜 "Python opcache" 发现都是关于 PHP 的, 唯一比较有用的信息是一个 [issue](https://bugs.python.org/issue26219). 这个 issue 在 2016 年提出, 2019 年才合并到 python 3.8. 到目前为止只支持 LOAD_GLOBAL, 未来应该会支持 LOAD_ATTR 和 LOAD_METHOD. + +突然想到一个手动优化读取全局变量的性能的方法, 在函数内使用一个局部变量保存全局变量的引用, 然后在之后代码都使用该局部变量. 这招对于比较长的属性访问也有帮助, 例如 `foo = obj.a.b.c.d` 可以提高属性访问的速度. + +一个例子: + +```py +class A: + def __init__(self) -> None: + self.a = 1 + +class B: + def __init__(self) -> None: + self.a = A() + +class C: + def __init__(self) -> None: + self.b = B() + +c = C() +print(c.b.a.a) +``` + +属性访问的字节码: + +``` + 15 48 LOAD_NAME 4 (print) + 50 LOAD_NAME 3 (c) + 52 LOAD_ATTR 5 (b) + 54 LOAD_ATTR 6 (a) + 56 LOAD_ATTR 6 (a) + 58 CALL_FUNCTION 1 +``` + + + + diff --git a/codes/README.md b/codes/README.md deleted file mode 100644 index c8fe0bb..0000000 --- a/codes/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# 书中涉及到的一些代码 - -- small_python - - 非常简单的 python 解释器。 - -- pytho_scripts - - 一些有用的 python 脚本,例如解析 pyc 文件的脚本。 \ No newline at end of file diff --git a/codes/python_scripts/ch09/simple_obj.py b/codes/python_scripts/ch09/simple_obj.py deleted file mode 100644 index a2fa51b..0000000 --- a/codes/python_scripts/ch09/simple_obj.py +++ /dev/null @@ -1,4 +0,0 @@ -i = 1 -s = "Python" -d = {} -l = [] diff --git a/codes/python_scripts/ch09/simple_obj.pyc b/codes/python_scripts/ch09/simple_obj.pyc deleted file mode 100644 index d5c09cbdc4d3c44986b4dd4c08a2587c15032e3b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 170 zcmdn|iI=Oru0A@M0SXv_v;z0w!}bfWiz2qL>%REzZm>$VrXQPs-9Os4V6H WirD1lr diff --git a/codes/python_scripts/ch10/for_control.py b/codes/python_scripts/ch10/for_control.py deleted file mode 100644 index 688d474..0000000 --- a/codes/python_scripts/ch10/for_control.py +++ /dev/null @@ -1,3 +0,0 @@ -lst = [1, 2] -for i in lst: - print i \ No newline at end of file diff --git a/codes/python_scripts/ch10/if_control.py b/codes/python_scripts/ch10/if_control.py deleted file mode 100644 index 80501d8..0000000 --- a/codes/python_scripts/ch10/if_control.py +++ /dev/null @@ -1,11 +0,0 @@ -a = 1 -if a> 10: - print "a> 10" -elif a <= -2: - print "a <= -2" -elif a != 1: - print "a != 1" -elif a == 1: - print "a == 1" -else: - print "Unknown a" \ No newline at end of file diff --git a/codes/python_scripts/ch10/while_control.py b/codes/python_scripts/ch10/while_control.py deleted file mode 100644 index 64f7f52..0000000 --- a/codes/python_scripts/ch10/while_control.py +++ /dev/null @@ -1,8 +0,0 @@ -i = 0 -while i < 10: - i += 1 - if i>= 5: - continue - if i == 20: - break - print i \ No newline at end of file diff --git a/codes/python_scripts/ch11/func_00.py b/codes/python_scripts/ch11/func_00.py deleted file mode 100644 index 565f1cb..0000000 --- a/codes/python_scripts/ch11/func_00.py +++ /dev/null @@ -1,4 +0,0 @@ -def f(): - print 'hello world' - -f() \ No newline at end of file diff --git a/codes/python_scripts/ch11/func_01.py b/codes/python_scripts/ch11/func_01.py deleted file mode 100644 index fb584b5..0000000 --- a/codes/python_scripts/ch11/func_01.py +++ /dev/null @@ -1,4 +0,0 @@ -def f(a, b): - print a, b - -f(1, 2) \ No newline at end of file diff --git a/codes/python_scripts/ch11/func_02.py b/codes/python_scripts/ch11/func_02.py deleted file mode 100644 index 8fc5b40..0000000 --- a/codes/python_scripts/ch11/func_02.py +++ /dev/null @@ -1,4 +0,0 @@ -def f(a, b): - print a, b - -f(1, b=2) \ No newline at end of file diff --git a/codes/python_scripts/ch11/func_03.py b/codes/python_scripts/ch11/func_03.py deleted file mode 100644 index bbe85b6..0000000 --- a/codes/python_scripts/ch11/func_03.py +++ /dev/null @@ -1,4 +0,0 @@ -def f(a, b, *args, **kwargs): - print a, b, args, kwargs - -f(1, 2, 3, 4, c=1, d=2) \ No newline at end of file diff --git a/codes/python_scripts/ch11/func_04.py b/codes/python_scripts/ch11/func_04.py deleted file mode 100644 index f67a1a0..0000000 --- a/codes/python_scripts/ch11/func_04.py +++ /dev/null @@ -1,8 +0,0 @@ -def f(a): - v = 'value' - def g(): - print v - return g - -g = f(1) -g() \ No newline at end of file diff --git a/codes/python_scripts/ch12/class_00.py b/codes/python_scripts/ch12/class_00.py deleted file mode 100644 index 2093f79..0000000 --- a/codes/python_scripts/ch12/class_00.py +++ /dev/null @@ -1,15 +0,0 @@ -class A(object): - name = 'Python' - def __init__(self): - print 'A::__init__' - - def f(self): - print 'A::f' - - def g(self, aValue): - self.value = aValue - print self.value - -a = A() -a.f() -a.g(10) \ No newline at end of file diff --git a/codes/python_scripts/ch12/test.py b/codes/python_scripts/ch12/test.py deleted file mode 100644 index da5ab5e..0000000 --- a/codes/python_scripts/ch12/test.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- -""" 测试类的创建流程 -""" -class MyTestClass(object): - def __init__(self, name): - self.name = name - - def f(self): - print self.name - - def g(self, a): - print a - -c = MyTestClass('jack') -c.f() \ No newline at end of file diff --git a/codes/python_scripts/ch14/foo/m.py b/codes/python_scripts/ch14/foo/m.py deleted file mode 100644 index a64619f..0000000 --- a/codes/python_scripts/ch14/foo/m.py +++ /dev/null @@ -1,4 +0,0 @@ -a = 1 -b = 2 - -from . import m2 \ No newline at end of file diff --git a/codes/python_scripts/ch14/foo/m2.py b/codes/python_scripts/ch14/foo/m2.py deleted file mode 100644 index eae66cb..0000000 --- a/codes/python_scripts/ch14/foo/m2.py +++ /dev/null @@ -1,2 +0,0 @@ -a = 1 -b = 2 \ No newline at end of file diff --git a/codes/python_scripts/ch14/test.py b/codes/python_scripts/ch14/test.py deleted file mode 100644 index 21396e0..0000000 --- a/codes/python_scripts/ch14/test.py +++ /dev/null @@ -1,3 +0,0 @@ -import sys -sys._debug = True -import foo as f diff --git a/codes/python_scripts/get_codeobj_info.py b/codes/python_scripts/get_codeobj_info.py deleted file mode 100644 index a9e35b9..0000000 --- a/codes/python_scripts/get_codeobj_info.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -import sys -import os -import dis - -pyfile = sys.argv[1] -f_name = os.path.basename(pyfile) - -fp = open(pyfile) -src = fp.read() -fp.close() - -codeobj = compile(src, f_name, 'exec') - -def print_indent(s, indent): - if indent == 0: - print s - else: - print ' ' * (4 * indent), s - -def display(codeobj, indent=0): - info_keys = [ - 'co_filename', - 'co_name', - 'co_firstlineno', - 'co_flags', - 'co_lnotab', - 'co_names', - 'co_argcount', - 'co_nlocals', - 'co_varnames', - 'co_consts', - 'co_cellvars', - 'co_freevars', - 'co_code', - 'co_stacksize', - ] - print_indent('[code obj info] - [%s]' % codeobj.co_name, indent) - for k in info_keys: - v = getattr(codeobj, k) - if k == 'co_code': - print_indent(k + ': ', indent) - codes = dis.dis(codeobj) - if codes is not None: - for l in codes.splitlines(): - print_indent(l, indent) - elif k == 'co_consts': - print_indent(k + ': ', indent) - for const in v: - if hasattr(const, 'co_code'): - display(const, indent + 1) - else: - print_indent(const, indent) - else: - print_indent(k + ': ' + str(v), indent) - -display(codeobj) - diff --git a/codes/python_scripts/parse_pyc.py b/codes/python_scripts/parse_pyc.py deleted file mode 100644 index acdfaf3..0000000 --- a/codes/python_scripts/parse_pyc.py +++ /dev/null @@ -1,230 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -解析.pyc 文件 -用法:python parse_pyc.py xxx.pyc -""" - -import sys -import struct - -TYPE_NULL = '0' -TYPE_NONE = 'N' -TYPE_FALSE = 'F' -TYPE_TRUE = 'T' -TYPE_STOPITER = 'S' -TYPE_ELLIPSIS = '.' -TYPE_INT = 'i' -TYPE_INT64 = 'I' -TYPE_FLOAT = 'f' -TYPE_BINARY_FLOAT = 'g' -TYPE_COMPLEX = 'x' -TYPE_BINARY_COMPLEX = 'y' -TYPE_LONG = 'l' -TYPE_STRING = 's' -TYPE_INTERNED = 't' -TYPE_STRINGREF = 'R' -TYPE_TUPLE = '(' -TYPE_LIST = '[' -TYPE_DICT = '{' -TYPE_CODE = 'c' -TYPE_UNICODE = 'u' -TYPE_UNKNOWN = '?' -TYPE_SET = '<' -TYPE_FROZENSET = '>' - -strlist = [] - - -class NULL(object): - pass - - -null = NULL() - - -class Code(object): - - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - - def __str__(self): - keys = [ - 'argcount', - 'nlocals', - 'stacksize', - 'flags', - 'code', - 'consts', - 'names', - 'varnames', - 'freevars', - 'cellvars', - 'filename', - 'name', - 'firstlineno', - 'lnotab' - ] - s = 'code(\n' - for k in keys: - v = getattr(self, k) - if k == 'code': - v = '0x' + v.encode('hex') - s += '\t{} = {}'.format(k, v) - s += '\n' - s += ')' - return s - -def r_byte(fp): - return ord(fp.read(1)) - - -def r_short(fp): - p = struct.unpack(' -#include -#include
    - -char* info = "********** Python Research **********\nInput 'exit' to exit\n"; -char* prompt = ">>> "; - -class ExcuteEngine -{ -public: - void Excute() - { - cout << info; - cout << prompt; - while(getline(cin, m_Command)) - { - if(m_Command.size() == 0) - { - cout << prompt; - continue; - } - else if(m_Command == "exit") - { - return; - } - else - { - ExcuteCommand(m_Command); - } - cout << prompt; - } - } - -private: - void ExcuteCommand(string& command) - { - string::size_type pos = 0; - if((pos = command.find("print ")) != string::npos) - { - ExcutePrint(command.substr(6)); - } - else if((pos = command.find(" = ")) != string::npos) - { - string target = command.substr(0, pos); - string source = command.substr(pos+3); - ExcuteAdd(target, source); - } - else { - cout << "[Error] : Can not recognize : " << command << endl; - } - } - - void ExcutePrint(string symbol) - { - PyObject* object = GetObjectFromSymbol(symbol); - if(object != NULL) - { - PyTypeObject* type = object->type; - type->print(object); - } - } - - void ExcuteAdd(string& target, string& source) - { - string::size_type pos; - if(IsSourceAllDigit(source)) - { - PyObject* intObject = CreatePyIntObject(atoi(source.c_str())); - if(m_Symbol2Object.find(target) == m_Symbol2Object.end()) { - m_Symbol2Object.insert(map::value_type(target, intObject)); - } - else { - m_Symbol2Object[target] = intObject; - } - } - else if(source.find("\"") != string::npos || source.find("'") != string::npos) - { - PyObject* strObject = CreatePyStrObject(source.substr(1, source.size()-2).c_str()); - if(m_Symbol2Object.find(target) == m_Symbol2Object.end()) { - m_Symbol2Object.insert(map::value_type(target, strObject)); - } - else { - m_Symbol2Object[target] = strObject; - } - } - else if((pos = source.find("+")) != string::npos) - { - PyObject* leftObject = GetObjectFromSymbol(Trim(source.substr(0, pos))); - PyObject* rightObject = GetObjectFromSymbol(Trim(source.substr(pos+1))); - if(leftObject != NULL && right != NULL && leftObject->type == rightObject->type) - { - PyObject* result = (leftObject->type)->add(leftObject, rightObject); - m_Symbol2Object.insert(map::value_type(target, result)); - } - } - } - - bool IsSourceAllDigit(string& source) - { - string::size_type len = source.size(); - for(string::size_type i = 0; i < len; ++i) - { - if(!isdigit(source[i])) - { - return false; - } - } - return true; - } - - PyObject* GetObjectFromSymbol(string symbol) - { - map::iterator it = m_Symbol2Object.find(symbol); - if(it == m_Symbol2Object.end()) - { - cout << "[Error] : " << symbol << " is not defined!!" << endl; - return NULL; - } - return it->second; - } - - string Trim(string symbol) { - int length = symbol.length(); - - int start = 0; - while(symbol[start] == ' ') { - ++start; - } - - int end = length-1; - while(symbol[end] == ' ') { - --end; - } - return symbol.substr(start, end+1); - } -private: - string m_Command; - map m_Symbol2Object; -}; -#endif diff --git a/codes/small_python/include/intObject.h b/codes/small_python/include/intObject.h deleted file mode 100755 index 6b939f9..0000000 --- a/codes/small_python/include/intObject.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __PYTHONREADING_INTOBJECT_H_ -#define __PYTHONREADING_INTOBJECT_H_ - -#include "object.h" - -typedef struct tagPyIntObject -{ - PyObject_HEAD; - int value; -}PyIntObject; - -extern PyTypeObject PyInt_Type; - -PyObject* CreatePyIntObject(int value); -#endif diff --git a/codes/small_python/include/object.h b/codes/small_python/include/object.h deleted file mode 100755 index eaca759..0000000 --- a/codes/small_python/include/object.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef __PYTHONREADING_OBJECT_H_ -#define __PYTHONREADING_OBJECT_H_ - -//definition of PyObject -#define PyObject_HEAD \ - int refCount;\ - struct tagPyTypeObject *type - -#define PyObject_HEAD_INIT(typePtr)\ - 0, typePtr - -typedef struct tagPyObject -{ - PyObject_HEAD; -}PyObject; - -//definition of PyTypeObject -typedef void (*PrintFun)(PyObject* object); -typedef PyObject* (*AddFun)(PyObject* left, PyObject* right); - -typedef struct tagPyTypeObject -{ - PyObject_HEAD; - char* name; - PrintFun print; - AddFun add; -}PyTypeObject; - -extern PyTypeObject PyType_Type; - -#endif diff --git a/codes/small_python/include/strObject.h b/codes/small_python/include/strObject.h deleted file mode 100755 index 1232f3f..0000000 --- a/codes/small_python/include/strObject.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __PYTHONREADING_STROBJECT_H_ -#define __PYTHONREADING_STROBJECT_H_ - -#include "object.h" - -typedef struct tagPyStrObject -{ - PyObject_HEAD; - char value[50]; -}PyStrObject; - -extern PyTypeObject PyStr_Type; - -PyObject* CreatePyStrObject(const char* value); -#endif diff --git a/codes/small_python/samll_python.ncb b/codes/small_python/samll_python.ncb deleted file mode 100755 index c86058b306c0581b8e0932f5e721263ec3d3f8b5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27648 zcmeHPZHQb~8GdK(?9A-SmG+|172-I->- zn7OkgP-Fgxu_~<~+8qy-wj0cjsls9`tbe57fqtgh4@03m76pixvvih|irq+2c6klf zXqr9maOS@Eo%eh_=kvbrxpO#^Diz9w1=k+kJ-TzJeRz7p-X9wt?(Nz^_qsWba8!`wO1lA-1o7QBlO{EP21_2^)A2#>vL@tS4+sHhm zW#Y>Mcmnt$WPw1?lLf_+E_o3$JRW#5OSZ~(*-5>xUa*2Hy)e?Yebl~ftb8XJ!&Yuuxylv<)@vw;#$b}2@j{xIn-`gipM*F{-JL>f4J5WA>I#~oBfU6kOzK?-% zJMnq1NfyN(V*|=EFcOr+Z~__u8N@tsmaMb6648e~0^ma<%lkoczq`g%}vt7xps%iax{;j^njhxzua^lg>@ zgn3NkHz?is1dv{60ドルiTcb?|`nNboNa34W;)vt*arPX#F(?*`wC(1*~ke4q4xejD*$ zMIb-nU5mKVPs;0W5%_yA%ENnJKZ9@`WnnO65NH>HucO~YfAHco`!Bi|MsTP0^?75R zGLCiS|EC@X6rTus@p%n#Bh|PTVs^>}$Of@bNDNaigpnAP;5(tJzyA&VZv27mwvN#= z2;5l&j=|6$_4f5#I=ZnIy+zqWm-O&R7L zE$dXW1Tv_}y6}#|1JczX>&A1(MzI@YJ+cX(KQO)S>mi+9uiTH%-zY=zw5(6bg7081 zdmU@|Wq-})`i*rQYx*nD{~XeG5xT%nA{;^Z3hp`IJqw-~a(yOlz6@CyG6;NN2)vKJ z812?)eb7x?uK(&;ziIt{3vnaf+!4SYf$RT2A@j#!b^X7r*MC0Szs=v(ApgLsY%(?o z7z8{7IOg2ドルA695X1_6V>T0?;LxHs>zN>6+zHi<_j`7_gjcmpy)07cdj%3s)@yc(5s z4A^n}|7cQ`D_3%viO6Nm^$Q9l493&yw#cT;B!2b926P&|kHBO>Tw5@gEhI{B1l+g0_+c|MN zAk}61d4(73&L|3>@B7iiB6zNEO)bmZI*LdFKf+)b93W={O%xZX+Ym2LGhVq~LYes7 zu?k*ZB2~QF>xj!|U6tAbRGa9eSFR?pxitvfbqMgkJZP7qeV@QX%NBinl=pGw@N@>jvtE>kk*|865j@XB`R5oqa7X)~ zjvEoaLq(n?A%2?gP`HxkY^lyWfDa?CdPfR;2)}Usp-w;YCBEb>d=o$Nj6uIB`3Q9A zb7g%8-n(egzDapEexa{Z^Bt-kw0=aj75NZ!*iPgvU`xL$`6$w4JCSD&^xtUxuxexS z9gy?<2yh_ip#pp8cw`%qmv6xcwwhbncx5kw+=4#)5p_5}vodb6n_=urZK3MX- z&|;q<&s5p3w7#y6zt`u*m;ph&vd$&=3eqo7@2(*!x%a-67vwy!>4ls=X!7(?bLFyB zP4tSvHJ0aDw3MJR56)4m0U!GV#%4SWU_87pPG}g_{^Atvw(%-OgWj$W@n3M+;kz|a1%r1QsL=m z#t?7unI|*(y?bse-V;vNNtPWi=6tIhOQ{(rznIB8u~e}rE2(Aj?q}zoc2cg=+*-R$ zZl7WQy;OQa+A@LlXRV_ zGl%^A8N*&mW-AWTf^_!inIc(UbfzoTBbUtNRTU0o3S}3G=SF0!&MSthZ1vy(OHF3B zVsc(gqNj_Xi?b4v$W@n9BU!uENCJyRLg`n0+EPO+N|MQ^vXyzqZ~inJktvMGfn2i4mRu-^&P&h}b46FiPp2xbGp_ne!%aM{e_B>J zTFB*+`FYXF+YhI_`})-U1g2C+@HES3a&v|3vS}Vf7xU9|lchz+t=^Ukl~T%Sigl$khW<7egavj#^&iq5ok42li%hr`(dric0kfi?c!9eg^d# z@wiGD6Z?=m`^*e6^VN(DtEiJGT4a+C#hS8oxuQSc~6$-<)?%bnt$^@q9ap+?t9w zd8#wBa80F-2ドルmiCwzHcGyQ5kUcC%`;c_7t`Y9(8>DG^O45Mygj`*#gF)TFAct#1AP zrKW{#$M+4Nkgs8Rr2U(|4B9Yg>!R(3=e%f#A<-5^qd_rpe1m~ymf|ttpj;m`(n0c1 z2yHXxanEz!T}X@P!D(}$ZHed0>WSxRNjz6ZyAp}^D-zF+lj!Go7BbqPNVJ#Jp3_Wz zk!Tm=xiy|E=NUGhRik}|wmA~*Z#HSeG4X%Pc&>?dIoj-a9-lVD<66ah8wboz z1XlY0p9%Eo-JJQ$xTNNG>m7*NFsb`oVZM@eYWHQgP^!DlWU!5!cNVOJ&9#^LKYg~Ck`zZweRn^B4XtU=^-HpJsIl=!Gp%>wo2t5e<(5vsm z)esR_kHD6>2~>QVA^e~jz(+)f(O!6)P0_%Xuo&b_!Be5j_ diff --git a/codes/small_python/samll_python.sln b/codes/small_python/samll_python.sln deleted file mode 100755 index 34f306f..0000000 --- a/codes/small_python/samll_python.sln +++ /dev/null @@ -1,21 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 8.00 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "samll_python", "samll_python.vcproj", "{CE93E66B-1A83-4AEB-850A-7E07AF661E62}" - ProjectSection(ProjectDependencies) = postProject - EndProjectSection -EndProject -Global - GlobalSection(SolutionConfiguration) = preSolution - Debug = Debug - Release = Release - EndGlobalSection - GlobalSection(ProjectConfiguration) = postSolution - {CE93E66B-1A83-4AEB-850A-7E07AF661E62}.Debug.ActiveCfg = Debug|Win32 - {CE93E66B-1A83-4AEB-850A-7E07AF661E62}.Debug.Build.0 = Debug|Win32 - {CE93E66B-1A83-4AEB-850A-7E07AF661E62}.Release.ActiveCfg = Release|Win32 - {CE93E66B-1A83-4AEB-850A-7E07AF661E62}.Release.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - EndGlobalSection - GlobalSection(ExtensibilityAddIns) = postSolution - EndGlobalSection -EndGlobal diff --git a/codes/small_python/samll_python.suo b/codes/small_python/samll_python.suo deleted file mode 100755 index 089a942ef57c625fbbe78b39a25cf59b7e9e7f3a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8192 zcmeHLO-x)>6g~rJ`EP4mRMa|_Vr!*Hq1sx~Du1OUOsNHdMPoBC5197NJTmiWOO(W& zi3{V>7>zc%HzuZTj7C@L2IE#2vM?@nVU=oI{J#6%op}s1@MeZ;iJZxqckg}o-E;1} z=X~ef``!HZpRRsfzaSZOLQ3WKQia^>X%C=GRYWi7H|c)3S0@U2G@XV!Q5~1CT;u`I4c%@wgja|;&>)x7CY>p>R>T?6}(uH zZG`t*-|e{Z>DPf$ynLPp1v&$XmRyiNNS{EdN3J+8eE!z~g|vJ=eSRi@FGWKVmZV&i zeu?5~%ZvCopp$M-t})|{3N&ud=T6W_R8eEH>t1{IREV@U;bcT3%z4+Eop1I zK98l%i+<~(g%np-t+mx09p(^~qomxb;p{jgq`mu8xxowsgftu`z6(-ytgv!vnnrnm z7z>`X5;5hS70YUsT~=(`iW_lTy{s9D#v|6{WV(CVsi9Cm2dnKeqtjznI1ve+jm9V0 zw58XG8;P*VvOl6P7eoBM72GYyumvr85392V?X3GmM}N+E1z-lL=O3ke?GbB@3!EbK~R%_s!<0i7%qoj+xn<{ebp`lnqm#>CZ~ zv5@(1h5TImAHv$lAeDL0h`Sb+&g*-@&#T}rg+mS&OhPN-Ptc>)Iba2I6J~8m z@C0-V0z;TR&8Qg!VLLX;5wux&*DR-z)eWK4is;@VZ9uPKjBo@`j$rU2*<;763-!uow!p-q&qb+xe zSlRRp5L};aM2@TYB+@R2(f?(5S&uvmPw0_n@asfYdq_IuC|VApM+e&3c2u5*zjUB< z1T8K2?|>)ri$m(mR+)o$MSxE9xU}2>6+OICiFnKVXx+NuqvOy&<|2z1^uf$ake)q5 z>&S+IEx;e*jwU~Qu>+kLGf1n+*JrVA%#HKQM$M3~&nMI&Tc6A}V!y^e#yJ!7*U{F& z*$M3BBwC4o`c392P96BKa6g~<@4}}qxehjf7I*l<|?lc0*oqv{{v0oevc>Q|%YC4ドル>N$KziEqj<35e z>D(nBJ!q{R@HyHQbNw#p&BF(GfM}E+wH>v!h;h62^x%)5|5_b-z45ItO5WSG7~n_0 z>}>r0qlSjwYdmTV>#5QoZmCKZRjE||#FJl@ z%8gX7zfz^YQ>B}!(k-`CB};Cp3E%fZmpH7?RKAo{Ih=u5k{=4;{wtu*Q-1N+d;ZgL zhP^WD`+-@oO9zmH+aGUX3~`@Xo68U|VzV6Bg1>)9gSa((s`fWO}P z=kkQ*QgYYovHqM2BfQ`BZ$w|yjXzv#o;$p8Z>5zwc07z;ov_+*#4DbVjnv&G_2X?F z?~o(Q#&XtE-zr44ドルcC-6fw__V?)RGiX4SB0D`<3{@uycl_utsadr;jl@l?jfap_+n qN~QlZ2c|jswZEs(SKmLNSTt$h_bb0>ff)n*-{og47ドルXc{t-wE<`2-38 diff --git a/codes/small_python/samll_python.vcproj b/codes/small_python/samll_python.vcproj deleted file mode 100755 index fc5f58f..0000000 --- a/codes/small_python/samll_python.vcproj +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/codes/small_python/src/engine.cpp b/codes/small_python/src/engine.cpp deleted file mode 100755 index d3f5a12..0000000 --- a/codes/small_python/src/engine.cpp +++ /dev/null @@ -1 +0,0 @@ - diff --git a/codes/small_python/src/intObject.cpp b/codes/small_python/src/intObject.cpp deleted file mode 100755 index da6126f..0000000 --- a/codes/small_python/src/intObject.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "intObject.h" -#include - -PyObject* CreatePyIntObject(int value) -{ - PyIntObject* object = new PyIntObject; - object->refCount = 1; - object->type = &PyInt_Type; - object->value = value; - return (PyObject*)object; -} - -static void int_print(PyObject* object) -{ - PyIntObject* intObject = (PyIntObject*)object; - printf("%d\n", intObject->value); -} - -static PyObject* int_add(PyObject* left, PyObject* right) -{ - PyIntObject* leftInt = (PyIntObject*)left; - PyIntObject* rightInt = (PyIntObject*)right; - PyIntObject* result = (PyIntObject*)CreatePyIntObject(0); - if(result == NULL) - { - printf("We have not enough memory!!"); - } - else - { - result->value = leftInt->value + rightInt->value; - } - return (PyObject*)result; -} - -PyTypeObject PyInt_Type = -{ - PyObject_HEAD_INIT(&PyType_Type), - "int", - int_print, - int_add -}; diff --git a/codes/small_python/src/main.cpp b/codes/small_python/src/main.cpp deleted file mode 100755 index 6de1237..0000000 --- a/codes/small_python/src/main.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include -using namespace std; - -#include "object.h" -#include "intObject.h" -#include "engine.h" - -int main() -{ - ExcuteEngine engine; - engine.Excute(); - return 0; -} - diff --git a/codes/small_python/src/object.cpp b/codes/small_python/src/object.cpp deleted file mode 100755 index 1a1f098..0000000 --- a/codes/small_python/src/object.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "object.h" - -PyTypeObject PyType_Type = -{ - PyObject_HEAD_INIT(&PyType_Type), - "type", - 0, - 0 -}; diff --git a/codes/small_python/src/strObject.cpp b/codes/small_python/src/strObject.cpp deleted file mode 100755 index b8c24ac..0000000 --- a/codes/small_python/src/strObject.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "strObject.h" -#include -#include - -PyObject* CreatePyStrObject(const char* value) -{ - PyStrObject* object = new PyStrObject; - object->refCount = 1; - object->type = &PyStr_Type; - memset(object->value, 0, 50); - if(value != NULL) - { - strcpy(object->value, value); - } - return (PyObject*)object; -} - -static void string_print(PyObject* object) -{ - PyStrObject* strObject = (PyStrObject*)object; - printf("%s\n", strObject->value); -} - -static PyObject* string_add(PyObject* left, PyObject* right) -{ - PyStrObject* leftStr = (PyStrObject*)left; - PyStrObject* rightStr = (PyStrObject*)right; - PyStrObject* result = (PyStrObject*)CreatePyStrObject(NULL); - if(result == NULL) - { - printf("We have not enough memory!!"); - } - else - { - strcpy(result->value, leftStr->value); - strcat(result->value, rightStr->value); - } - return (PyObject*)result; -} - -PyTypeObject PyStr_Type = -{ - PyObject_HEAD_INIT(&PyType_Type), - "str", - string_print, - string_add -}; diff --git a/docker.md b/docker.md deleted file mode 100644 index 82a17ab..0000000 --- a/docker.md +++ /dev/null @@ -1,30 +0,0 @@ -# 使用 Docker 编译 Python 源码 - -- 项目目录结构 - - - python_source_code_study - - build # build 目录,即 configure --prefix - - Python-2.5 # 源码 - - -- Dockerfile - -``` -FROM ubuntu -RUN apt-get install -y gcc make vim -``` - -- build image - -`docker build -t ubuntu:pyc .` - -- run ubuntu:pyc - -`docker run -it --name ubuntu_pyc -v /Users/jiaminlu/Workspace/python/python_source_code_study:/data/python_source_code_study ubuntu:latest` - -- 编译 Python - -``` -cd /data/python_source_code_study/Pyhton-2.5 -./configure --prefix /data/python_source_code_study/build -``` \ No newline at end of file diff --git a/gil.md b/gil.md new file mode 100644 index 0000000..13bb0f3 --- /dev/null +++ b/gil.md @@ -0,0 +1,407 @@ +# GIL + +## 数据结构 + +### _gil_runtime_state + +_gil_runtime_state 保存 GIL 相关的信息. + +```c +// Include/internal/pycore_gil.h + +struct _gil_runtime_state { + /* microseconds (the Python API uses seconds, though) */ + unsigned long interval; + /* Last PyThreadState holding / having held the GIL. This helps us + know whether anyone else was scheduled after we dropped the GIL. */ + _Py_atomic_address last_holder; + /* Whether the GIL is already taken (-1 if uninitialized). This is + atomic because it can be read without any lock taken in ceval.c. */ + _Py_atomic_int locked; + /* Number of GIL switches since the beginning. */ + unsigned long switch_number; + /* This condition variable allows one or several threads to wait + until the GIL is released. In addition, the mutex also protects + the above variables. */ + PyCOND_T cond; + PyMUTEX_T mutex; +#ifdef FORCE_SWITCHING + /* This condition variable helps the GIL-releasing thread wait for + a GIL-awaiting thread to be scheduled and take the GIL. */ + PyCOND_T switch_cond; + PyMUTEX_T switch_mutex; +#endif +}; + +// Include/internal/pycore_runtime.h + +typedef struct pyruntimestate { + struct _ceval_runtime_state ceval; +} _PyRuntimeState; + +struct _ceval_runtime_state { + /* Request for checking signals. It is shared by all interpreters (see + bpo-40513). Any thread of any interpreter can receive a signal, but only + the main thread of the main interpreter can handle signals: see + _Py_ThreadCanHandleSignals(). */ + _Py_atomic_int signals_pending; + struct _gil_runtime_state gil; +}; +``` + +关系链: `_PyRuntimeState` -> `_ceval_runtime_state` -> `_gil_runtime_state`. + +### _ceval_state + +_ceval_state 保存有 `gil_drop_request` 属性, `gil_drop_request` 和 GIL 有一点关系. + +```c +// Include/internal/pycore_interp.h + +struct _ceval_state { + int recursion_limit; + /* Records whether tracing is on for any thread. Counts the number + of threads for which tstate->c_tracefunc is non-NULL, so if the + value is 0, we know we don't have to check this thread's + c_tracefunc. This speeds up the if statement in + _PyEval_EvalFrameDefault() after fast_next_opcode. */ + int tracing_possible; + /* This single variable consolidates all requests to break out of + the fast path in the eval loop. */ + _Py_atomic_int eval_breaker; + /* Request for dropping the GIL */ + _Py_atomic_int gil_drop_request; + struct _pending_calls pending; +}; + +struct _is { + struct _ceval_state ceval +} +``` + +PyInterpreterState 包含 _ceval_state. + + + +## 获取和释放 GIL + + +### ceval 执行字节码时检查 GIL + +```c +//Python/ceval.c + +PyObject* _Py_HOT_FUNCTION +_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) +{ +main_loop: + for (;;) { + assert(stack_pointer>= f->f_valuestack); /* else underflow */ + assert(STACK_LEVEL() <= co->co_stacksize); /* else overflow */ + assert(!_PyErr_Occurred(tstate)); + + /* Do periodic things. Doing this every time through + the loop would add too much overhead, so we do it + only every Nth instruction. We also do it if + ``pending.calls_to_do'' is set, i.e. when an asynchronous + event needs attention (e.g. a signal handler or + async I/O handler); see Py_AddPendingCall() and + Py_MakePendingCalls() above. */ + + if (_Py_atomic_load_relaxed(eval_breaker)) { + opcode = _Py_OPCODE(*next_instr); + if (opcode == SETUP_FINALLY || + opcode == SETUP_WITH || + opcode == BEFORE_ASYNC_WITH || + opcode == YIELD_FROM) { + /* Few cases where we skip running signal handlers and other + pending calls: + - If we're about to enter the 'with:'. It will prevent + emitting a resource warning in the common idiom + 'with open(path) as file:'. + - If we're about to enter the 'async with:'. + - If we're about to enter the 'try:' of a try/finally (not + *very* useful, but might help in some cases and it's + traditional) + - If we're resuming a chain of nested 'yield from' or + 'await' calls, then each frame is parked with YIELD_FROM + as its next opcode. If the user hit control-C we want to + wait until we've reached the innermost frame before + running the signal handler and raising KeyboardInterrupt + (see bpo-30039). + */ + goto fast_next_opcode; + } + + if (eval_frame_handle_pending(tstate) != 0) { + goto error; + } + } + } +} + +static int +eval_frame_handle_pending(PyThreadState *tstate) +{ + _PyRuntimeState * const runtime = &_PyRuntime; + struct _ceval_runtime_state *ceval = &runtime->ceval; + + /* GIL drop request */ + if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request)) { + /* Give another thread a chance */ + if (_PyThreadState_Swap(&runtime->gilstate, NULL) != tstate) { + Py_FatalError("tstate mix-up"); + } + drop_gil(ceval, ceval2, tstate); + + /* Other threads may run now */ + + take_gil(tstate); + + if (_PyThreadState_Swap(&runtime->gilstate, tstate) != NULL) { + Py_FatalError("orphan tstate"); + } + } + return 0; +} +``` + +### take_gil 和 drop_gil + +```c +// Python/ceval_gil.h + +static void +drop_gil(struct _ceval_runtime_state *ceval, struct _ceval_state *ceval2, + PyThreadState *tstate) +{ + struct _gil_runtime_state *gil = &ceval->gil; + if (!_Py_atomic_load_relaxed(&gil->locked)) { + Py_FatalError("drop_gil: GIL is not locked"); + } + + /* tstate is allowed to be NULL (early interpreter init) */ + if (tstate != NULL) { + /* Sub-interpreter support: threads might have been switched + under our feet using PyThreadState_Swap(). Fix the GIL last + holder variable so that our heuristics work. */ + _Py_atomic_store_relaxed(&gil->last_holder, (uintptr_t)tstate); + } + + MUTEX_LOCK(gil->mutex); + _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1); + _Py_atomic_store_relaxed(&gil->locked, 0); + COND_SIGNAL(gil->cond); + MUTEX_UNLOCK(gil->mutex); + +#ifdef FORCE_SWITCHING + if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request) && tstate != NULL) { + MUTEX_LOCK(gil->switch_mutex); + /* Not switched yet => wait */ + if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate) + { + assert(is_tstate_valid(tstate)); + RESET_GIL_DROP_REQUEST(tstate->interp); + /* NOTE: if COND_WAIT does not atomically start waiting when + releasing the mutex, another thread can run through, take + the GIL and drop it again, and reset the condition + before we even had a chance to wait for it. */ + COND_WAIT(gil->switch_cond, gil->switch_mutex); + } + MUTEX_UNLOCK(gil->switch_mutex); + } +#endif +} + +/* Take the GIL. + + The function saves errno at entry and restores its value at exit. + + tstate must be non-NULL. */ +static void +take_gil(PyThreadState *tstate) +{ + int err = errno; + + assert(tstate != NULL); + + if (tstate_must_exit(tstate)) { + /* bpo-39877: If Py_Finalize() has been called and tstate is not the + thread which called Py_Finalize(), exit immediately the thread. + + This code path can be reached by a daemon thread after Py_Finalize() + completes. In this case, tstate is a dangling pointer: points to + PyThreadState freed memory. */ + PyThread_exit_thread(); + } + + assert(is_tstate_valid(tstate)); + PyInterpreterState *interp = tstate->interp; + struct _ceval_runtime_state *ceval = &interp->runtime->ceval; + struct _ceval_state *ceval2 = &interp->ceval; + struct _gil_runtime_state *gil = &ceval->gil; + + /* Check that _PyEval_InitThreads() was called to create the lock */ + assert(gil_created(gil)); + + MUTEX_LOCK(gil->mutex); + + if (!_Py_atomic_load_relaxed(&gil->locked)) { + goto _ready; + } + + while (_Py_atomic_load_relaxed(&gil->locked)) { + unsigned long saved_switchnum = gil->switch_number; + + unsigned long interval = (gil->interval>= 1 ? gil->interval : 1); + int timed_out = 0; + COND_TIMED_WAIT(gil->cond, gil->mutex, interval, timed_out); + + /* If we timed out and no switch occurred in the meantime, it is time + to ask the GIL-holding thread to drop it. */ + + /* 我觉得这里检查 gil->switch_number == saved_switchnum 的原因: + 在当前线程阻塞在 COND_TIMED_WAIT 期间, 有可能其它线程获取了 GIL, 导致 gil->switch_number 增加 1. + 如果此时当前线程不检查 gil->switch_number == saved_switchnum 而直接设置 gil_drop_request, 那么可能导致刚刚获得 GIL 的线程立刻又释放 GIL. + 频繁的 GIL 切换会影响解释器的吞吐量, + */ + if (timed_out && + _Py_atomic_load_relaxed(&gil->locked) && + gil->switch_number == saved_switchnum) + { + if (tstate_must_exit(tstate)) { + MUTEX_UNLOCK(gil->mutex); + PyThread_exit_thread(); + } + assert(is_tstate_valid(tstate)); + + SET_GIL_DROP_REQUEST(interp); + } + } + +_ready: +#ifdef FORCE_SWITCHING + /* This mutex must be taken before modifying gil->last_holder: + see drop_gil(). */ + MUTEX_LOCK(gil->switch_mutex); +#endif + /* We now hold the GIL */ + _Py_atomic_store_relaxed(&gil->locked, 1); + _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil->locked, /*is_write=*/1); + + if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) { + _Py_atomic_store_relaxed(&gil->last_holder, (uintptr_t)tstate); + ++gil->switch_number; + } + +#ifdef FORCE_SWITCHING + COND_SIGNAL(gil->switch_cond); + MUTEX_UNLOCK(gil->switch_mutex); +#endif + + if (tstate_must_exit(tstate)) { + /* bpo-36475: If Py_Finalize() has been called and tstate is not + the thread which called Py_Finalize(), exit immediately the + thread. + + This code path can be reached by a daemon thread which was waiting + in take_gil() while the main thread called + wait_for_thread_shutdown() from Py_Finalize(). */ + MUTEX_UNLOCK(gil->mutex); + drop_gil(ceval, ceval2, tstate); + PyThread_exit_thread(); + } + assert(is_tstate_valid(tstate)); + + if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request)) { + RESET_GIL_DROP_REQUEST(interp); + } + else { + /* bpo-40010: eval_breaker should be recomputed to be set to 1 if there + is a pending signal: signal received by another thread which cannot + handle signals. + + Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */ + COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); + } + + /* Don't access tstate if the thread must exit */ + if (tstate->async_exc != NULL) { + _PyEval_SignalAsyncExc(tstate); + } + + MUTEX_UNLOCK(gil->mutex); + + errno = err; +} +``` + +## 总结 + +假设当前持有 GIL 正在运行的线程为 A, 而另一个阻塞在 GIL 的线程为 B. + +A 在执行字节码的过程中会不断检查 `gil_drop_request`, 如果 `gil_drop_request` 为真, 那么说明有其它线程想要获取 GIL. + +A 发现 `gil_drop_request` 为真, 立马调用 `drop_gil` 释放 GIL, 其实释放 GIL 的内部过程就是将 `gil->locked` 设为 0, 同时通知阻塞在 `gil->cond` 的线程. A 释放 GIL 后, 接着调用 `take_gil` 从而阻塞在 GIL 上. + +B 被唤醒, 获取到 GIL 然后开始执行. + +一个线程在尝试获取 GIL 时, 会等待 `gil->cond` 并设置超时时间, 如果超时说明当前持有 GIL 的线程没有主动释放 GIL, 那么该线程会设置 `gil_drop_request`, 然后又开始等待 `gil->cond`. + +等待时间等于 `gil->interval`, 该值可以通过 `sys.setswitchinterval` 设置. + +线程阻塞在 GIL 的时间并不是准确(或大致)等于 `gil->interval`, 有几个原因会影响阻塞时间: + +- 每个字节码需要的时间不一样. 线程刚设置 `gil_drop_request`, 当前持有 GIL 线程正好已经开始执行下一跳字节码了, 错过了检查 `gil_drop_request` 的时机. 因此该线程需要多等待一个字节码的时间. + +- 解释器并不是每执行完一条字节码就回到循环的开头. 解释器为了提升执行效率对执行字节码的流程进行了各种优化. 例如, 当执行完当前字节码后, 立马跳转到下一条字字节码的地址, 跳过了检查 `gil_drop_request` 的代码. + +- 如果有多个线程在等待 GIL, 此时哪个线程能够获取 GIL 是不确定的, 取决于操作系统的进程调度. 可能一个线程等待时间远小于 `gil->interval` 却获取了 GIL, 而另一个等待了很久的线程却没有获取 GIL. + +由于 GIL 的存在, 同一时刻只能有一个线程拥有解释器的执行权. 另外, GIL 还影响了操作系统的进程调度, 然而 Python 却没有实现自己的调度系统. + +### 来自 Python/ceval_gil.h 的注释 + +``` +/* + Notes about the implementation: + + - The GIL is just a boolean variable (locked) whose access is protected + by a mutex (gil_mutex), and whose changes are signalled by a condition + variable (gil_cond). gil_mutex is taken for short periods of time, + and therefore mostly uncontended. + + - In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be + able to release the GIL on demand by another thread. A volatile boolean + variable (gil_drop_request) is used for that purpose, which is checked + at every turn of the eval loop. That variable is set after a wait of + `interval` microseconds on `gil_cond` has timed out. + + [Actually, another volatile boolean variable (eval_breaker) is used + which ORs several conditions into one. Volatile booleans are + sufficient as inter-thread signalling means since Python is run + on cache-coherent architectures only.] + + - A thread wanting to take the GIL will first let pass a given amount of + time (`interval` microseconds) before setting gil_drop_request. This + encourages a defined switching period, but doesn't enforce it since + opcodes can take an arbitrary time to execute. + + The `interval` value is available for the user to read and modify + using the Python API `sys.{get,set}switchinterval()`. + + - When a thread releases the GIL and gil_drop_request is set, that thread + ensures that another GIL-awaiting thread gets scheduled. + It does so by waiting on a condition variable (switch_cond) until + the value of last_holder is changed to something else than its + own thread state pointer, indicating that another thread was able to + take the GIL. + + This is meant to prohibit the latency-adverse behaviour on multi-core + machines where one thread would speculatively release the GIL, but still + run and end up being the first to re-acquire it, making the "timeslices" + much longer than expected. + (Note: this mechanism is enabled with FORCE_SWITCHING above) +*/ +``` + diff --git a/codes/python_scripts/ch14/foo/__init__.py b/opcache.md similarity index 100% rename from codes/python_scripts/ch14/foo/__init__.py rename to opcache.md diff --git "a/344円275円277円347円224円250円 printf 350円277円233円350円241円214円346円265円213円350円257円225円347円232円204円346円212円200円345円267円247円.md" "b/344円275円277円347円224円250円 printf 350円277円233円350円241円214円346円265円213円350円257円225円347円232円204円346円212円200円345円267円247円.md" new file mode 100644 index 0000000..36cf7da --- /dev/null +++ "b/344円275円277円347円224円250円 printf 350円277円233円350円241円214円346円265円213円350円257円225円347円232円204円346円212円200円345円267円247円.md" @@ -0,0 +1,26 @@ +# 使用 printf 进行测试的技巧 + +如果直接在代码中使用 printf 输出测试信息, 那么在编译时会产生大量 printf 的信息. 更好的做法是使用一个 debug 变量来判断是否要进行测试. + +我的做法是: + +```c +int debug = PySys_GetObject('_debug') != NULL ? 1 : 0; + +if(debug){ + // 输出信息 +} +``` + +在 Python 代码中使用 `sys._debug = 1` 来设置 `_debug` 属性. + +当然这个方法有局限性, 需要执行 Python 代码, 在 CPython 还没有真正执行 Python 代码时, debug 变量一直是 0. + +其它方法: + +- 向 CPython 添加一个命令行参数. + +- 环境变量. + +打印对象信息: `_PyObject_Dump`. + diff --git "a/345円207円275円346円225円260円.md" "b/345円207円275円346円225円260円.md" new file mode 100644 index 0000000..6d4397f --- /dev/null +++ "b/345円207円275円346円225円260円.md" @@ -0,0 +1,570 @@ +# 函数和方法(method) + +PyFunctionObject: + +```c +typedef struct { + PyObject_HEAD + PyObject *func_code; /* A code object, the __code__ attribute */ + PyObject *func_globals; /* A dictionary (other mappings won't do) */ + // func_defaults 保存函数参数的默认值, 例如 def func(a, b, c=1, d=2) + PyObject *func_defaults; /* NULL or a tuple */ + // func_kwdefaults 保存函数参数的默认值, 不过和 func_defaults 不同, 当参数中定义了 *args 是才会使用 func_kwdefaults + // 例如 def func(a, b, *args, c=1, d=2) + PyObject *func_closure; /* NULL or a tuple of cell objects */ + PyObject *func_doc; /* The __doc__ attribute, can be anything */ + PyObject *func_name; /* The __name__ attribute, a string object */ + PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */ + PyObject *func_weakreflist; /* List of weak references */ + PyObject *func_module; /* The __module__ attribute, can be anything */ + PyObject *func_annotations; /* Annotations, a dict or NULL */ + PyObject *func_qualname; /* The qualified name */ + vectorcallfunc vectorcall; + + /* Invariant: + * func_closure contains the bindings for func_code->co_freevars, so + * PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code) + * (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0). + */ +} PyFunctionObject; +``` + +每次遇到 def 语句时, 就是创建一个 PyFunctionObject 对象. + +## 创建函数对象 + +```c +case TARGET(MAKE_FUNCTION): { + PyObject *qualname = POP(); + PyObject *codeobj = POP(); + PyFunctionObject *func = (PyFunctionObject *) + PyFunction_NewWithQualName(codeobj, f->f_globals, qualname); + + Py_DECREF(codeobj); + Py_DECREF(qualname); + if (func == NULL) { + goto error; + } + + if (oparg & 0x08) { + assert(PyTuple_CheckExact(TOP())); + func ->func_closure = POP(); + } + if (oparg & 0x04) { + assert(PyDict_CheckExact(TOP())); + func->func_annotations = POP(); + } + /* def func(a, b, *args, c = 1, d = 2, **kwargs): + LOAD_CONST 14 (1) + LOAD_CONST 15 (2) + LOAD_CONST 16 (('c', 'd')) + BUILD_CONST_KEY_MAP 2 + LOAD_CONST 17 () + LOAD_CONST 18 ('func') + MAKE_FUNCTION 2 (kwdefaults) + */ + if (oparg & 0x02) { + assert(PyDict_CheckExact(TOP())); + func->func_kwdefaults = POP(); + } + /* def func(a, b, c='c', d='d') + LOAD_CONST 29 (('c', 'd')) + LOAD_CONST 6 () + LOAD_CONST 7 ('func') + MAKE_FUNCTION 1 (defaults) + */ + if (oparg & 0x01) { + assert(PyTuple_CheckExact(TOP())); + func->func_defaults = POP(); + } + + PUSH((PyObject *)func); + DISPATCH(); +} +``` + +`MAKE_FUNCTION` 的参数 oparg 表示函数的形参是否有默认值, 以及函数是否有类型注解. + +```c +PyObject * +PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname) +{ + PyFunctionObject *op; + PyObject *doc, *consts, *module; + static PyObject *__name__ = NULL; + + if (__name__ == NULL) { + __name__ = PyUnicode_InternFromString("__name__"); + if (__name__ == NULL) + return NULL; + } + + /* __module__: If module name is in globals, use it. + Otherwise, use None. */ + module = PyDict_GetItemWithError(globals, __name__); + if (module) { + Py_INCREF(module); + } + else if (PyErr_Occurred()) { + return NULL; + } + + op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); + if (op == NULL) { + Py_XDECREF(module); + return NULL; + } + /* Note: No failures from this point on, since func_dealloc() does not + expect a partially-created object. */ + + op->func_weakreflist = NULL; + Py_INCREF(code); + op->func_code = code; + Py_INCREF(globals); + op->func_globals = globals; + op->func_name = ((PyCodeObject *)code)->co_name; + Py_INCREF(op->func_name); + op->func_defaults = NULL; /* No default arguments */ + op->func_kwdefaults = NULL; /* No keyword only defaults */ + op->func_closure = NULL; + op->vectorcall = _PyFunction_Vectorcall; + op->func_module = module; + + consts = ((PyCodeObject *)code)->co_consts; + if (PyTuple_Size(consts)>= 1) { + doc = PyTuple_GetItem(consts, 0); + if (!PyUnicode_Check(doc)) + doc = Py_None; + } + else + doc = Py_None; + Py_INCREF(doc); + op->func_doc = doc; + + op->func_dict = NULL; + op->func_annotations = NULL; + + if (qualname) + op->func_qualname = qualname; + else + op->func_qualname = op->func_name; + Py_INCREF(op->func_qualname); + + _PyObject_GC_TRACK(op); + return (PyObject *)op; +} +``` + +### 调用函数 + +```c +// Python/ceval.c + +case TARGET(CALL_FUNCTION): { + PREDICTED(CALL_FUNCTION); + PyObject **sp, *res; + sp = stack_pointer; + res = call_function(tstate, &sp, oparg, NULL); + stack_pointer = sp; + PUSH(res); + if (res == NULL) { + goto error; + } + DISPATCH(); +} + +case TARGET(CALL_FUNCTION_KW): { + PyObject **sp, *res, *names; + + names = POP(); + assert(PyTuple_Check(names)); + assert(PyTuple_GET_SIZE(names) <= oparg); + /* We assume without checking that names contains only strings */ + sp = stack_pointer; + res = call_function(tstate, &sp, oparg, names); + stack_pointer = sp; + PUSH(res); + Py_DECREF(names); + + if (res == NULL) { + goto error; + } + DISPATCH(); +} + +case TARGET(CALL_FUNCTION_EX): { + PREDICTED(CALL_FUNCTION_EX); + PyObject *func, *callargs, *kwargs = NULL, *result; + if (oparg & 0x01) { + kwargs = POP(); + if (!PyDict_CheckExact(kwargs)) { + PyObject *d = PyDict_New(); + if (d == NULL) + goto error; + if (_PyDict_MergeEx(d, kwargs, 2) < 0) { + Py_DECREF(d); + format_kwargs_error(tstate, SECOND(), kwargs); + Py_DECREF(kwargs); + goto error; + } + Py_DECREF(kwargs); + kwargs = d; + } + assert(PyDict_CheckExact(kwargs)); + } + callargs = POP(); + func = TOP(); + if (!PyTuple_CheckExact(callargs)) { + if (check_args_iterable(tstate, func, callargs) < 0) { + Py_DECREF(callargs); + goto error; + } + Py_SETREF(callargs, PySequence_Tuple(callargs)); + if (callargs == NULL) { + goto error; + } + } + assert(PyTuple_CheckExact(callargs)); + + result = do_call_core(tstate, func, callargs, kwargs); + Py_DECREF(func); + Py_DECREF(callargs); + Py_XDECREF(kwargs); + + SET_TOP(result); + if (result == NULL) { + goto error; + } + DISPATCH(); +} +``` + + +从上面的代码可以看出, 有三个调用函数的字节码, 分别是 `CALL_FUNCTION`, `CALL_FUNCTION_KW` 和 `CALL_FUNCTION_EX`. + +`CALL_FUNCTION` 和 `CALL_FUNCTION_KW` 会调用 `call_function` 进行函数调用, 而 `CALL_FUNCTION_EX` 会调用 `do_call_core` 进行函数. + +### call_function + +```c +// Python/ceval.c + +Py_LOCAL_INLINE(PyObject *) _Py_HOT_FUNCTION +call_function(PyThreadState *tstate, PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames) +{ + PyObject **pfunc = (*pp_stack) - oparg - 1; + PyObject *func = *pfunc; + PyObject *x, *w; + Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames); + Py_ssize_t nargs = oparg - nkwargs; + PyObject **stack = (*pp_stack) - nargs - nkwargs; + + if (tstate->use_tracing) { + x = trace_call_function(tstate, func, stack, nargs, kwnames); + } + else { + x = PyObject_Vectorcall(func, stack, nargs | PY_VECTORCALL_ARGUMENTS_OFFSET, kwnames); + } + + assert((x != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + + /* Clear the stack of the function object. */ + while ((*pp_stack)> pfunc) { + w = EXT_POP(*pp_stack); + Py_DECREF(w); + } + + return x; +} +``` + +`call_function` 最终会调用 `_PyObject_VectorcallTstate`, 而 `_PyObject_VectorcallTstate` 最终会调用 callable 对象中的 vectorcallfunc 函数. + +```c +/* Call the callable object 'callable' with the "vectorcall" calling + convention. + + args is a C array for positional arguments. + + nargsf is the number of positional arguments plus optionally the flag + PY_VECTORCALL_ARGUMENTS_OFFSET which means that the caller is allowed to + modify args[-1]. + + kwnames is a tuple of keyword names. The values of the keyword arguments + are stored in "args" after the positional arguments (note that the number + of keyword arguments does not change nargsf). kwnames can also be NULL if + there are no keyword arguments. + + keywords must only contain strings and all keys must be unique. + + Return the result on success. Raise an exception and return NULL on + error. */ +static inline PyObject * +_PyObject_VectorcallTstate(PyThreadState *tstate, PyObject *callable, + PyObject *const *args, size_t nargsf, + PyObject *kwnames) +{ + vectorcallfunc func; + PyObject *res; + + assert(kwnames == NULL || PyTuple_Check(kwnames)); + assert(args != NULL || PyVectorcall_NARGS(nargsf) == 0); + + func = PyVectorcall_Function(callable); + if (func == NULL) { + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + return _PyObject_MakeTpCall(tstate, callable, args, nargs, kwnames); + } + res = func(callable, args, nargsf, kwnames); + return _Py_CheckFunctionResult(tstate, callable, res, NULL); +} +``` + +关于 vectorcall 的信息可以参考 [PEP 590 -- Vectorcall: a fast calling protocol for CPython](https://www.python.org/dev/peps/pep-0590/). + +vectorcall 看来简化了早期 tp_call 函数调用约定的复杂过程, 上面的 PEP 590 说 vectorcall 一定程度减少了分配对象的次数, 理论上性能有所提升, 但是实际上并不会带来明显可见的性能提升. 主要还是简化函数调用的流程. + +支持 vectorcall 的 callable 对象的类型需要在 tp_flags 中设置 `Py_TPFLAGS_HAVE_VECTORCALL` 标志位, 并且 callable 对象要设置一个被调用的函数, 该函数的原型是 + +``` +typedef PyObject *(*vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); +``` + +普通函数对象 PyFunctionObject 的 vectorcallfunc 设置为 _PyFunction_Vectorcall: + +```c +PyObject * +PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname) +{ + op->vectorcall = _PyFunction_Vectorcall; +} +``` + +_PyFunction_Vectorcall: + +```c +// Objects/call.c +PyObject * +_PyFunction_Vectorcall(PyObject *func, PyObject* const* stack, + size_t nargsf, PyObject *kwnames) +{ + assert(PyFunction_Check(func)); + assert(kwnames == NULL || PyTuple_CheckExact(kwnames)); + + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + assert(nargs>= 0); + Py_ssize_t nkwargs = (kwnames == NULL) ? 0 : PyTuple_GET_SIZE(kwnames); + assert((nargs == 0 && nkwargs == 0) || stack != NULL); + /* kwnames must only contain strings and all keys must be unique */ + + PyThreadState *tstate = _PyThreadState_GET(); + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + PyObject *globals = PyFunction_GET_GLOBALS(func); + PyObject *argdefs = PyFunction_GET_DEFAULTS(func); + + if (co->co_kwonlyargcount == 0 && nkwargs == 0 && + (co->co_flags & ~PyCF_MASK) == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) + { + // CO_NOFREE 说明函数不是嵌套函数. + // nkwargs == 0 说明实参没有关键字参数 + + // 函数的形参没有默认值, 实参都是位置参数(即没有关键字参数), 那么进行快速调用. + if (argdefs == NULL && co->co_argcount == nargs) { + return function_code_fastcall(tstate, co, stack, nargs, globals); + } + // 没有实参, 而且函数的所有形参都有默认值 + else if (nargs == 0 && argdefs != NULL + && co->co_argcount == PyTuple_GET_SIZE(argdefs)) { + /* function called with no arguments, but all parameters have + a default value: use default values as arguments .*/ + stack = _PyTuple_ITEMS(argdefs); + return function_code_fastcall(tstate, co, + stack, PyTuple_GET_SIZE(argdefs), + globals); + } + } + + PyObject *kwdefs = PyFunction_GET_KW_DEFAULTS(func); + PyObject *closure = PyFunction_GET_CLOSURE(func); + PyObject *name = ((PyFunctionObject *)func) -> func_name; + PyObject *qualname = ((PyFunctionObject *)func) -> func_qualname; + + PyObject **d; + Py_ssize_t nd; + if (argdefs != NULL) { + d = _PyTuple_ITEMS(argdefs); + nd = PyTuple_GET_SIZE(argdefs); + assert(nd <= INT_MAX); + } + else { + d = NULL; + nd = 0; + } + return _PyEval_EvalCode(tstate, + (PyObject*)co, globals, (PyObject *)NULL, + stack, nargs, + nkwargs ? _PyTuple_ITEMS(kwnames) : NULL, + stack + nargs, + nkwargs, 1, + d, (int)nd, kwdefs, + closure, name, qualname); +} +``` + +_PyFunction_Vectorcall 有两个选择, 一是调用 function_code_fastcall 执行函数, 二是调用 _PyEval_EvalCode 执行函数. _PyEval_EvalCode 的代码非常复杂, 后面再讲. + +function_code_fastcall 负责简单的函数调用. + +_PyEval_EvalCode 负责复杂的函数调用, 例如闭包函数, 实参和形参比较复杂的函数调用. + +### do_call_core + +```c +// Python/ceval.c + +static PyObject * +do_call_core(PyThreadState *tstate, PyObject *func, PyObject *callargs, PyObject *kwdict) +{ + PyObject *result; + + if (PyCFunction_CheckExact(func) || PyCMethod_CheckExact(func)) { + C_TRACE(result, PyObject_Call(func, callargs, kwdict)); + return result; + } + else if (Py_IS_TYPE(func, &PyMethodDescr_Type)) { + Py_ssize_t nargs = PyTuple_GET_SIZE(callargs); + if (nargs> 0 && tstate->use_tracing) { + /* We need to create a temporary bound method as argument + for profiling. + + If nargs == 0, then this cannot work because we have no + "self". In any case, the call itself would raise + TypeError (foo needs an argument), so we just skip + profiling. */ + PyObject *self = PyTuple_GET_ITEM(callargs, 0); + func = Py_TYPE(func)->tp_descr_get(func, self, (PyObject*)Py_TYPE(self)); + if (func == NULL) { + return NULL; + } + + C_TRACE(result, _PyObject_FastCallDictTstate( + tstate, func, + &_PyTuple_ITEMS(callargs)[1], + nargs - 1, + kwdict)); + Py_DECREF(func); + return result; + } + } + return PyObject_Call(func, callargs, kwdict); +} +``` + +do_call_core 最终调用 PyObject_Call, 而 PyObject_Call 一般最终会调用 callable 对象所属类型的 tp_call. + +### tp_call + +### 几个实例 + + +## _PyEval_EvalCode + +_PyEval_EvalCode 的大部分工作都是在处理各种实参和形参的情况, 并将实参放入 `f->f_localsplus`(fastlocals) 数组. 具体的细节可以查看代码. + + +## 闭包 + +### code 对象 + +闭包函数的 code 对象的 co_flags 中包含 `CO_NESTED`(0x10). + +### 创建函数对象时保存闭包变量 + +例子: + +```py +def f(): + a = 1 + def g(): + b = a + 1 +``` + +字节码如下: + +``` + 2 0 LOAD_CONST 1 (1) + 2 STORE_DEREF 0 (a) + + 3 4 LOAD_CLOSURE 0 (a) + 6 BUILD_TUPLE 1 + 8 LOAD_CONST 2 () + 10 LOAD_CONST 3 ('f..g') + 12 MAKE_FUNCTION 8 (closure) + 14 STORE_FAST 0 (g) +``` +`LOAD_CLOSURE` 和 `MAKE_FUNCTION` 对应的处理器如下: + +```c +case TARGET(LOAD_CLOSURE): { + PyObject *cell = freevars[oparg]; + Py_INCREF(cell); + PUSH(cell); + DISPATCH(); +} + +case TARGET(MAKE_FUNCTION): { + PyObject *qualname = POP(); + PyObject *codeobj = POP(); + PyFunctionObject *func = (PyFunctionObject *) + PyFunction_NewWithQualName(codeobj, f->f_globals, qualname); + + Py_DECREF(codeobj); + Py_DECREF(qualname); + if (func == NULL) { + goto error; + } + + if (oparg & 0x08) { + assert(PyTuple_CheckExact(TOP())); + func ->func_closure = POP(); + } +} +``` + +可见 `LOAD_CLOSURE` 是直接从当前 frame 的 freevars 数组读取值, 速度相较于 `LOAD_NAME` 更快. + +`func ->func_closure` 保存了内层函数引用外层函数的变量. + +在调用函数时, `func ->func_closure` 保存到 `f->f_localplus` 中, 代码如下: + +```c +PyObject * +_PyEval_EvalCode(PyThreadState *tstate, + PyObject *_co, PyObject *globals, PyObject *locals, + PyObject *const *args, Py_ssize_t argcount, + PyObject *const *kwnames, PyObject *const *kwargs, + Py_ssize_t kwcount, int kwstep, + PyObject *const *defs, Py_ssize_t defcount, + PyObject *kwdefs, PyObject *closure, + PyObject *name, PyObject *qualname) +{ + /* Copy closure variables to free variables */ + for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) { + PyObject *o = PyTuple_GET_ITEM(closure, i); + Py_INCREF(o); + freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; + } +} +``` + +## 总结 + +函数调用在 CPython 内部的实现非常复杂, 难以用简单的语言来概括, 想要理清内部的各种调用链还是要仔细看代码. + +这里的函数调用不仅仅指常规意义的形如 `func(a, b, c)` 的函数调用, 在 Python 的世界里, 只要是可调用的对象都可以当成函数来调用, 其内部实现也类似常规函数的调用, 从字节码来看都是使用 `CALL_FUNCTION`. 例如, 类的实例化 `MyClass(a, b, c)` 其实就是 `CALL_FUNCTIOON`, 甚至类的定义 `class MyClass:` 也是通过 `MAKE_FUNCTION` 和 `CALL_FUNCTION` 实现的. + +从总体的角度来看, 函数调用的难点在于处理复杂的实参和形参, 如果参数简单, 那么 CPython 内部会选择快速调用的方法. 当处理好复杂的参数后, 创建一个新的 PyFrameObject, 参数加载进 PyFrameObject 的 fastlocals, 最后开始执行 PyFrameObject 的字节码. + +对于解释器来说, 它只负责执行 PyFrameObject. 因此, PyFunctionObject 只是中间对象, 最终会被转换成 PyFrameObject. + diff --git "a/345円215円217円347円250円213円.md" "b/345円215円217円347円250円213円.md" new file mode 100644 index 0000000..b59d3c8 --- /dev/null +++ "b/345円215円217円347円250円213円.md" @@ -0,0 +1,321 @@ +# 协程 + +因为协程的实现和生成器有关, 所以关于生成器的实现查看[生成器](./生成器.md) + +## 数据结构 + +PyCoroObject: + +```c +/* _PyGenObject_HEAD defines the initial segment of generator + and coroutine objects. */ +#define _PyGenObject_HEAD(prefix) \ + PyObject_HEAD \ + /* Note: gi_frame can be NULL if the generator is "finished" */ \ + PyFrameObject *prefix##_frame; \ + /* True if generator is being executed. */ \ + char prefix##_running; \ + /* The code object backing the generator */ \ + PyObject *prefix##_code; \ + /* List of weak reference. */ \ + PyObject *prefix##_weakreflist; \ + /* Name of the generator. */ \ + PyObject *prefix##_name; \ + /* Qualified name of the generator. */ \ + PyObject *prefix##_qualname; \ + _PyErr_StackItem prefix##_exc_state; + +typedef struct { + _PyGenObject_HEAD(cr) + PyObject *cr_origin; +} PyCoroObject; +``` + +从 PyCoroObject 的定义可以看出它和生成器(PyGenObject)几乎相同的. + + +## 从一个简单的例子开始 + +Python 代码: + +```py +import asyncio + +async def f(): + await asyncio.sleep(5) + return 10 + +print(f.__code__.co_flags) +print(asyncio.run(f())) +``` + +对应的字节码: + +``` + 1 0 LOAD_CONST 0 (0) + 2 LOAD_CONST 1 (None) + 4 IMPORT_NAME 0 (asyncio) + 6 STORE_NAME 0 (asyncio) + + 3 8 LOAD_CONST 2 () + 10 LOAD_CONST 3 ('f') + 12 MAKE_FUNCTION 0 + 14 STORE_NAME 1 (f) + + 7 16 LOAD_NAME 2 (print) + 18 LOAD_NAME 0 (asyncio) + 20 LOAD_METHOD 3 (run) + 22 LOAD_NAME 1 (f) + 24 CALL_FUNCTION 0 + 26 CALL_METHOD 1 + 28 CALL_FUNCTION 1 + 30 POP_TOP + 32 LOAD_CONST 1 (None) + 34 RETURN_VALUE + +Disassembly of : + 4 0 LOAD_GLOBAL 0 (asyncio) + 2 LOAD_METHOD 1 (sleep) + 4 LOAD_CONST 1 (5) + 6 CALL_METHOD 1 + 8 GET_AWAITABLE + 10 LOAD_CONST 0 (None) + 12 YIELD_FROM + 14 POP_TOP + + 5 16 LOAD_CONST 1 (10) + 18 RETURN_VALUE +``` + +code 对象的 co_flags 等于 195(0xc3), 即等于 `CO_COROUTINE | CO_NOFREE | CO_NEWLOCALS | CO_OPTIMIZED`. + +和协程相关的字节码有: `GET_AWAITABLE`. + +### async + +async 关键字的作用其实就是标记函数是返回协程的函数, code 对象的 co_flags 中会添加 CO_COROUTINE. 这样一来就可以区分协程和生成器. + +### async 生成新的协程对象(PyCoro_New) + +调用 `f()` 会创建一个协程对象, 过程和创建生成器一样, 代码如下: + +```c +/* Handle generator/coroutine/asynchronous generator */ +if (co->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + PyObject *gen; + int is_coro = co->co_flags & CO_COROUTINE; + + /* Don't need to keep the reference to f_back, it will be set + * when the generator is resumed. */ + Py_CLEAR(f->f_back); + + /* Create a new generator that owns the ready to run frame + * and return that as the value. */ + if (is_coro) { + gen = PyCoro_New(f, name, qualname); + } else if (co->co_flags & CO_ASYNC_GENERATOR) { + gen = PyAsyncGen_New(f, name, qualname); + } else { + gen = PyGen_NewWithQualName(f, name, qualname); + } + if (gen == NULL) { + return NULL; + } + + _PyObject_GC_TRACK(f); + + return gen; +} + +retval = _PyEval_EvalFrame(tstate, f, 0); +``` + +```c +PyObject * +PyCoro_New(PyFrameObject *f, PyObject *name, PyObject *qualname) +{ + // 注意: gen_new_with_qualname 的作用是创建 PyGenObject, 但是因为 PyCoroObject 和 PyGenObject 几乎一样, + // PyCoroObject 在结构体尾部多了一个字段 cr_origin, 所以可以复用 gen_new_with_qualname 用于创建 PyCoroObject + PyObject *coro = gen_new_with_qualname(&PyCoro_Type, f, name, qualname); + if (!coro) { + return NULL; + } + + PyThreadState *tstate = _PyThreadState_GET(); + int origin_depth = tstate->coroutine_origin_tracking_depth; + + if (origin_depth == 0) { + ((PyCoroObject *)coro)->cr_origin = NULL; + } else { + PyObject *cr_origin = compute_cr_origin(origin_depth); + ((PyCoroObject *)coro)->cr_origin = cr_origin; + if (!cr_origin) { + Py_DECREF(coro); + return NULL; + } + } + + return coro; +} +``` + +从上面的代码可以看出, `PyCoro_New` 使用 `gen_new_with_qualname` 创建了一个 PyCoroObject 对象, 接下来将最近 origin_depth 个的 frame 保存到 `coro->cr_origin` 中. + +`compute_cr_origin` 获取最近的 origin_depth 个 frame 的信息. + + +### await + +await 关键字对应的字节码为: + +``` + 8 GET_AWAITABLE +10 LOAD_CONST 0 (None) +12 YIELD_FROM +``` + +GET_AWAITABLE 对应的处理器: + +```c +case TARGET(GET_AWAITABLE): { + PREDICTED(GET_AWAITABLE); + PyObject *iterable = TOP(); + PyObject *iter = _PyCoro_GetAwaitableIter(iterable); + + if (iter == NULL) { + int opcode_at_minus_3 = 0; + if ((next_instr - first_instr)> 2) { + opcode_at_minus_3 = _Py_OPCODE(next_instr[-3]); + } + format_awaitable_error(tstate, Py_TYPE(iterable), + opcode_at_minus_3, + _Py_OPCODE(next_instr[-2])); + } + + Py_DECREF(iterable); + + if (iter != NULL && PyCoro_CheckExact(iter)) { + PyObject *yf = _PyGen_yf((PyGenObject*)iter); + if (yf != NULL) { + /* `iter` is a coroutine object that is being + awaited, `yf` is a pointer to the current awaitable + being awaited on. */ + // yf != NULL 说明 iter 已经被执行过了 + Py_DECREF(yf); + Py_CLEAR(iter); + _PyErr_SetString(tstate, PyExc_RuntimeError, + "coroutine is being awaited already"); + /* The code below jumps to `error` if `iter` is NULL. */ + } + } + + SET_TOP(iter); /* Even if it's NULL */ + + if (iter == NULL) { + goto error; + } + + PREDICT(LOAD_CONST); + DISPATCH(); +} +``` + +可以看出 await 的基本原理就是 `YIELD_FROM`, 这再一次说明协程和生成器的关系. `GET_AWAITABLE` 将协程放入到栈顶, `YIELD_FROM` 的 receiver 就是该协程. + +## asyncio 模块 + + + +### eventloop + +eventloop 的各种方法分类: + +- 启动和关闭 loop + + - run_until_complete + - run_forever + - stop + - close + +- 定时任务 + + - call_soon + - call_soon_threadsafe + - call_later + - call_at + +- 创建 future 和 task + + - create_future + - create_task + +- 网络操作 + + - create_connection + - create_datagram_endpoint + - create_unix_connection + - create_server + - create_unix_server + - connect_accepted_socket + +- 监听文件描述符(I/O 复用) + + - add_reader + - remove_reader + - add_writer + - remove_writer + +- 直接操作 socket + + - sock_recv + - sock_recv_into + - sock_sendall + - sock_connect + - sock_accept + - sock_sendfile + +- DNS + + - getaddrinfo + - getnameinfo + +- pipe + + - connect_read_pipe + - connect_write_pipe + +- 信号 + + - add_signal_handler + - remove_signal_handler + +- 使用线程池和进程池执行任务 + + - run_in_executor + - set_default_executor + +- 异常处理 + + - set_exception_handler + - get_exception_handler + - default_exception_handler + - call_exception_handler + +- 调试模式 + + - get_debug + - set_debug + +- 子进程 + + - subprocess_exec + - subprocess_shell + +[eventloop 的官方文档](https://docs.python.org/3/library/asyncio-eventloop.html) + + +### Future + +### network + + diff --git "a/345円274円202円345円270円270円345円244円204円347円220円206円.md" "b/345円274円202円345円270円270円345円244円204円347円220円206円.md" new file mode 100644 index 0000000..c83c28e --- /dev/null +++ "b/345円274円202円345円270円270円345円244円204円347円220円206円.md" @@ -0,0 +1,377 @@ +# 异常处理 + +当代码中发生异常而又没有被当前的 frame 捕获时, Python 会将异常一直往上传递, 直到被某个 frame 中的代码捕获, 如果最终仍然没有一个 frame 捕获异常, 那么 Python 的入口点(不同的启动方式有不同的入口点, 例 +如执行文件, 交互式 shell等等) 会打印出异常信息并退出. + +异常对象形成一个单向链表, 越靠近头部的节点越处于调用链(call frame)的上层, 越靠近尾部的节点越接近异常发生的地方. + +## Python 如何处理异常 + +测试的 Python 代码: + +```py +try: + x = 1 / 0 +except ZeroDivisionError as e: + print(e) +``` + +使用 dis 查看对应的字节码: + +``` + 1 0 SETUP_FINALLY 12 (to 14) + + 2 2 LOAD_CONST 0 (1) + 4 LOAD_CONST 1 (0) + 6 BINARY_TRUE_DIVIDE + 8 STORE_NAME 0 (x) + 10 POP_BLOCK + 12 JUMP_FORWARD 44 (to 58) + + 3>> 14 DUP_TOP + 16 LOAD_NAME 1 (ZeroDivisionError) + 18 JUMP_IF_NOT_EXC_MATCH 56 + 20 POP_TOP + 22 STORE_NAME 2 (e) + 24 POP_TOP + 26 SETUP_FINALLY 20 (to 48) + + 4 28 LOAD_NAME 3 (print) + 30 LOAD_NAME 2 (e) + 32 CALL_FUNCTION 1 + 34 POP_TOP + 36 POP_BLOCK + 38 POP_EXCEPT + 40 LOAD_CONST 2 (None) + 42 STORE_NAME 2 (e) + 44 DELETE_NAME 2 (e) + 46 JUMP_FORWARD 10 (to 58) +>> 48 LOAD_CONST 2 (None) + 50 STORE_NAME 2 (e) + 52 DELETE_NAME 2 (e) + 54 RERAISE +>> 56 RERAISE +>> 58 LOAD_CONST 2 (None) + 60 RETURN_VALUE +``` + +和异常处理相关的字节码有: + +- SETUP_FINALLY + +- JUMP_IF_NOT_EXC_MATCH + +- RERAISE + +- POP_EXCEPT + + +### SETUP_FINALLY + +SETUP_FINALLY 对应的处理器: + +```c +case TARGET(SETUP_FINALLY): { + PyFrame_BlockSetup(f, SETUP_FINALLY, INSTR_OFFSET() + oparg, + STACK_LEVEL()); + DISPATCH(); +} +``` + +```c +void +PyFrame_BlockSetup(PyFrameObject *f, int type, int handler, int level) +{ + PyTryBlock *b; + if (f->f_iblock>= CO_MAXBLOCKS) + Py_FatalError("XXX block stack overflow"); + b = &f->f_blockstack[f->f_iblock++]; + b->b_type = type; + b->b_level = level; + b->b_handler = handler; +} +``` + +```c +typedef struct { + int b_type; /* what kind of block this is */ + int b_handler; /* where to jump to find handler, b_handler 其实就是跳转地址 */ + int b_level; /* value stack level to pop to, b_level 保存当前栈顶的位置 */ +} PyTryBlock; +``` + +PyTryBlock 保存了异常处理的重要信息, 当异常发生时可以从 PyTryBlock 得知异常处理的代码的位置. + +### 异常发生时: 除 0 错误 + +`BINARY_TRUE_DIVIDE` 的处理器如下: + +```c +case TARGET(BINARY_TRUE_DIVIDE): { + PyObject *divisor = POP(); + PyObject *dividend = TOP(); + PyObject *quotient = PyNumber_TrueDivide(dividend, divisor); + Py_DECREF(dividend); + Py_DECREF(divisor); + SET_TOP(quotient); + if (quotient == NULL) + goto error; + DISPATCH(); +} +``` + +`PyNumber_TrueDivide` 最终会调用整数对象的除法函数: + +```c +static PyObject * +long_true_divide(PyObject *v, PyObject *w) +{ + if (b_size == 0) { + PyErr_SetString(PyExc_ZeroDivisionError, + "division by zero"); + goto error; + } + error: + return NULL; +} +``` + +`PyErr_SetString` 会设置 `tstate->curexc_type`, `tstate->curexc_value` 和 `tstate->curexc_traceback`. 这三个变量记录了当前异常的信息. + +`PyThreadState` 中保存着异常相关的信息: + +```c +struct _ts { + /* The exception currently being raised */ + PyObject *curexc_type; + PyObject *curexc_value; + PyObject *curexc_traceback; +} +``` + +- curexc_type, 异常类型. + +- curexc_value, 异常信息. + +- curexc_traceback, PyTracebackObject 对象. + +执行流程最终回到 ceval, quotient 等于 NULL, 跳转到处理异常的代码. + +### ceval 处理异常 + +```c +/* Log traceback info. */ +PyTraceBack_Here(f); + +int +PyTraceBack_Here(PyFrameObject *frame) +{ + PyObject *exc, *val, *tb, *newtb; + PyErr_Fetch(&exc, &val, &tb); + newtb = _PyTraceBack_FromFrame(tb, frame); + if (newtb == NULL) { + _PyErr_ChainExceptions(exc, val, tb); + return -1; + } + PyErr_Restore(exc, val, newtb); + Py_XDECREF(tb); + return 0; +} +``` + +`PyTraceBack_Here` 生成新的 `PyTracebackObject` 对象. + +```c +typedef struct _traceback { + PyObject_HEAD + struct _traceback *tb_next; + struct _frame *tb_frame; + int tb_lasti; + int tb_lineno; +} PyTracebackObject; + + +static PyObject * +tb_create_raw(PyTracebackObject *next, PyFrameObject *frame, int lasti, + int lineno) +{ + PyTracebackObject *tb; + if ((next != NULL && !PyTraceBack_Check(next)) || + frame == NULL || !PyFrame_Check(frame)) { + PyErr_BadInternalCall(); + return NULL; + } + tb = PyObject_GC_New(PyTracebackObject, &PyTraceBack_Type); + if (tb != NULL) { + Py_XINCREF(next); + tb->tb_next = next; + Py_XINCREF(frame); + tb->tb_frame = frame; + tb->tb_lasti = lasti; + tb->tb_lineno = lineno; + PyObject_GC_Track(tb); + } + return (PyObject *)tb; +} +``` + +`PyTracebackObject` 对象记录了异常发生时的信息, 例如 frame, 发生异常的代码所在的行号和字节码地址. 这些信息也就是当 Python 解释器因为异常退出时在终端打印的异常栈信息. + +### 跳转到异常处理代码 + +```c +exception_unwind: + /* Unwind stacks if an exception occurred */ + while (f->f_iblock> 0) { + /* Pop the current block. */ + PyTryBlock *b = &f->f_blockstack[--f->f_iblock]; + + if (b->b_type == EXCEPT_HANDLER) { + // 取出栈上的异常信息 + // exc_info->exc_type = POP(); + // exc_info->exc_value = POP(); + // exc_info->exc_traceback = POP(); + UNWIND_EXCEPT_HANDLER(b); + continue; + } + // 清空栈中在 b->b_level 上面的元素 + // 例如, 如果 b->b_type 等于 SETUP_FINALLY, 那么清空自 SETUP_FINALLY 指令之后栈上新增的元素. + UNWIND_BLOCK(b); + if (b->b_type == SETUP_FINALLY) { + PyObject *exc, *val, *tb; + int handler = b->b_handler; + _PyErr_StackItem *exc_info = tstate->exc_info; + /* Beware, this invalidates all b->b_* fields */ + PyFrame_BlockSetup(f, EXCEPT_HANDLER, -1, STACK_LEVEL()); + // 保存上一个异常信息 + PUSH(exc_info->exc_traceback); + PUSH(exc_info->exc_value); + if (exc_info->exc_type != NULL) { + PUSH(exc_info->exc_type); + } + else { + Py_INCREF(Py_None); + PUSH(Py_None); + } + _PyErr_Fetch(tstate, &exc, &val, &tb); + /* Make the raw exception data + available to the handler, + so a program can emulate the + Python main loop. */ + _PyErr_NormalizeException(tstate, &exc, &val, &tb); + // 设置异常的 traceback 属性. + if (tb != NULL) + PyException_SetTraceback(val, tb); + else + PyException_SetTraceback(val, Py_None); + // 保存当前异常信息到 tstate->exc_info + Py_INCREF(exc); + exc_info->exc_type = exc; + Py_INCREF(val); + exc_info->exc_value = val; + exc_info->exc_traceback = tb; + if (tb == NULL) + tb = Py_None; + Py_INCREF(tb); + // 将异常信息放入栈上, except 代码块需要使用这些异常信息和 except 语句中的异常进行对比. + PUSH(tb); + PUSH(val); + PUSH(exc); + JUMPTO(handler); + if (_Py_TracingPossible(ceval2)) { + int needs_new_execution_window = (f->f_lasti < instr_lb || f->f_lasti>= instr_ub); + int needs_line_update = (f->f_lasti == instr_lb || f->f_lasti < instr_prev); + /* Make sure that we trace line after exception if we are in a new execution + * window or we don't need a line update and we are not in the first instruction + * of the line. */ + if (needs_new_execution_window || (!needs_line_update && instr_lb> 0)) { + instr_prev = INT_MAX; + } + } + /* Resume normal execution */ + goto main_loop; + } + } /* unwind stack */ +``` + +### RERAISE + +如果没有 except 可以捕获异常, 那么最后该异常会被重新触发. RERAISE 的作用就是重新触发异常. + +### 如果处理异常时发生了别的异常 + +例如, 如果在处理异常 A 时, 有发生了异常 B, 那么 Python 内部会记录下这种嵌套异常的关系. 异常对象中有一个 `context` 属性, 通过该 `context` 便可以知道是否发生了嵌套的异常. + +```c +/* exception 是异常类, value 一般是一个字符串, 表示错误信息. + 主要作用是调用 _PyErr_Restore(tstate, exception, value, tb) 记录下该异常信息. + + 如果该异常是嵌套异常, 那么设置该异常的 context 属性, PyException_SetContext(value, exc_value). + */ +void +_PyErr_SetObject(PyThreadState *tstate, PyObject *exception, PyObject *value) +{ + PyObject *exc_value; + PyObject *tb = NULL; + + if (exc_value != NULL && exc_value != Py_None) { + /* Implicit exception chaining */ + Py_INCREF(exc_value); + if (value == NULL || !PyExceptionInstance_Check(value)) { + /* We must normalize the value right now */ + PyObject *fixed_value; + + /* Issue #23571: functions must not be called with an + exception set */ + _PyErr_Clear(tstate); + + fixed_value = _PyErr_CreateException(exception, value); + Py_XDECREF(value); + if (fixed_value == NULL) { + Py_DECREF(exc_value); + return; + } + + value = fixed_value; + } + + /* Avoid reference cycles through the context chain. + This is O(chain length) but context chains are + usually very short. Sensitive readers may try + to inline the call to PyException_GetContext. */ + + /* 异常对象的 context 等于上一个异常对象, + 例如在处理异常 A 的 except 代码块中又发生了异常 B, 那么异常 B 的 context 就是异常 A + */ + if (exc_value != value) { + PyObject *o = exc_value, *context; + while ((context = PyException_GetContext(o))) { + Py_DECREF(context); + if (context == value) { + PyException_SetContext(o, NULL); + break; + } + o = context; + } + PyException_SetContext(value, exc_value); // 设置 context + } + else { + Py_DECREF(exc_value); + } + } +} +``` + +## 总结 + +之前一直不清楚 Python 内部是如何实现异常处理的, 通过阅读源码终于弄懂了内部的实现原理. + +当 ceval 执行字节码遇到错误时, 会调用 `PyErr_SetString(PyObject *exception, const char *string)` 设置异常信息, 异常信息保存在线程对象中, 具体是 `tstate->curexc_type`, `tstate->curexc_value`和 `tstate->curexc_traceback`, 然后跳出 switch case 来到错误处理的代码. + +如果错误发生在 try/except 中, 那么通过 `PyTryBlock` 可以知道处理异常的代码的地址, 将异常信息(`tstate->curexc_*`) 保存到 `tstate->exc_info`, 以及栈上, 然后重置 `tstate->curexc_*`, 最后跳转到处理异常的代码. 如果没有匹配的 except, 那么异常会被重新触发, 重复异常处理的流程. + +如果在当前栈帧没有捕获异常, 那么异常会上溯到上一层栈帧, 直到有一个栈帧捕获异常或退出程序. + +如果错误没有被捕获, 那么最终会终止程序, 打印异常栈信息. + + diff --git "a/346円240円210円345円270円247円.md" "b/346円240円210円345円270円247円.md" new file mode 100644 index 0000000..7c51e48 --- /dev/null +++ "b/346円240円210円345円270円247円.md" @@ -0,0 +1,136 @@ +# 栈帧 + +```c +// Include/cpython/frameobject.h + +struct _frame { + PyObject_VAR_HEAD + struct _frame *f_back; /* previous frame, or NULL */ + PyCodeObject *f_code; /* code segment */ + PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ + PyObject *f_globals; /* global symbol table (PyDictObject) */ + PyObject *f_locals; /* local symbol table (any mapping) */ + PyObject **f_valuestack; /* points after the last local */ + /* Next free slot in f_valuestack. Frame creation sets to f_valuestack. + Frame evaluation usually NULLs it, but a frame that yields sets it + to the current stack top. */ + PyObject **f_stacktop; + PyObject *f_trace; /* Trace function */ + char f_trace_lines; /* Emit per-line trace events? */ + char f_trace_opcodes; /* Emit per-opcode trace events? */ + + /* Borrowed reference to a generator, or NULL */ + PyObject *f_gen; + + int f_lasti; /* Last instruction if called */ + /* Call PyFrame_GetLineNumber() instead of reading this field + directly. As of 2.3 f_lineno is only valid when tracing is + active (i.e. when f_trace is set). At other times we use + PyCode_Addr2Line to calculate the line from the current + bytecode index. */ + int f_lineno; /* Current line number */ + int f_iblock; /* index in f_blockstack */ + char f_executing; /* whether the frame is still executing */ + PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ + PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ +}; +``` + +初始化: + +```c +PyFrameObject* _Py_HOT_FUNCTION +_PyFrame_New_NoTrack(PyThreadState *tstate, PyCodeObject *code, + PyObject *globals, PyObject *locals) +{ +#ifdef Py_DEBUG + if (code == NULL || globals == NULL || !PyDict_Check(globals) || + (locals != NULL && !PyMapping_Check(locals))) { + PyErr_BadInternalCall(); + return NULL; + } +#endif + + PyFrameObject *back = tstate->frame; + PyObject *builtins = frame_get_builtins(back, globals); + if (builtins == NULL) { + return NULL; + } + + PyFrameObject *f = frame_alloc(code); + if (f == NULL) { + Py_DECREF(builtins); + return NULL; + } + + f->f_stacktop = f->f_valuestack; + f->f_builtins = builtins; + Py_XINCREF(back); + f->f_back = back; + Py_INCREF(code); + Py_INCREF(globals); + f->f_globals = globals; + /* Most functions have CO_NEWLOCALS and CO_OPTIMIZED set. */ + if ((code->co_flags & (CO_NEWLOCALS | CO_OPTIMIZED)) == + (CO_NEWLOCALS | CO_OPTIMIZED)) + ; /* f_locals = NULL; will be set by PyFrame_FastToLocals() */ + else if (code->co_flags & CO_NEWLOCALS) { + locals = PyDict_New(); + if (locals == NULL) { + Py_DECREF(f); + return NULL; + } + f->f_locals = locals; + } + else { + if (locals == NULL) + locals = globals; + Py_INCREF(locals); + f->f_locals = locals; + } + + f->f_lasti = -1; + f->f_lineno = code->co_firstlineno; + f->f_iblock = 0; + f->f_executing = 0; + f->f_gen = NULL; + f->f_trace_opcodes = 0; + f->f_trace_lines = 1; + + assert(f->f_code != NULL); + + return f; +} + +PyFrameObject* +PyFrame_New(PyThreadState *tstate, PyCodeObject *code, + PyObject *globals, PyObject *locals) +{ + PyFrameObject *f = _PyFrame_New_NoTrack(tstate, code, globals, locals); + if (f) + _PyObject_GC_TRACK(f); + return f; +} +``` + +PyFrameObject 的 f_localsplus 数组的布局: + +``` ++----------+---------+-------+----------+----------+----------+----------+----------+----------+ +| pos args | kw args | *args | **kwargs | cellvars | freevars | locals | stack | ++----------+---------+-------+----------+----------+----------+----------+----------+----------+ +^ ^ +| | +| | +fastlocals f_valuestack + + +``` + +locals 表示除了其它的局部变量. + +cellvars 一般是外层函数使用, 存放着被内层嵌套函数引用的变量 + +freevars 一般是内层嵌套函数使用, 存放着对外层函数中变量的引用 + +`code->co_nlocals` 等于局部变量的数量, 也就是在上面的布局中从开头到 cellvars 开头为止的部分. \ No newline at end of file diff --git "a/347円224円237円346円210円220円345円231円250円.md" "b/347円224円237円346円210円220円345円231円250円.md" new file mode 100644 index 0000000..6bc1aea --- /dev/null +++ "b/347円224円237円346円210円220円345円231円250円.md" @@ -0,0 +1,445 @@ +# 生成器 + +```c +/* _PyGenObject_HEAD defines the initial segment of generator + and coroutine objects. */ +#define _PyGenObject_HEAD(prefix) \ + PyObject_HEAD \ + /* Note: gi_frame can be NULL if the generator is "finished" */ \ + PyFrameObject *prefix##_frame; \ + /* True if generator is being executed. */ \ + char prefix##_running; \ + /* The code object backing the generator */ \ + PyObject *prefix##_code; \ + /* List of weak reference. */ \ + PyObject *prefix##_weakreflist; \ + /* Name of the generator. */ \ + PyObject *prefix##_name; \ + /* Qualified name of the generator. */ \ + PyObject *prefix##_qualname; \ + _PyErr_StackItem prefix##_exc_state; + +typedef struct { + /* The gi_ prefix is intended to remind of generator-iterator. */ + _PyGenObject_HEAD(gi) +} PyGenObject; +``` + +创建 PyGenObject: + +```c +static PyObject * +gen_new_with_qualname(PyTypeObject *type, PyFrameObject *f, + PyObject *name, PyObject *qualname) +{ + PyGenObject *gen = PyObject_GC_New(PyGenObject, type); + if (gen == NULL) { + Py_DECREF(f); + return NULL; + } + gen->gi_frame = f; + f->f_gen = (PyObject *) gen; + Py_INCREF(f->f_code); + gen->gi_code = (PyObject *)(f->f_code); + gen->gi_running = 0; + gen->gi_weakreflist = NULL; + gen->gi_exc_state.exc_type = NULL; + gen->gi_exc_state.exc_value = NULL; + gen->gi_exc_state.exc_traceback = NULL; + gen->gi_exc_state.previous_item = NULL; + if (name != NULL) + gen->gi_name = name; + else + gen->gi_name = ((PyCodeObject *)gen->gi_code)->co_name; + Py_INCREF(gen->gi_name); + if (qualname != NULL) + gen->gi_qualname = qualname; + else + gen->gi_qualname = gen->gi_name; + Py_INCREF(gen->gi_qualname); + _PyObject_GC_TRACK(gen); + return (PyObject *)gen; +} +``` +在 + + +生成器和函数几乎一样, 如果一个函数中出现 yield(yield from), 那么就变成了生成器. + +其实生成器完全可以使用别的关键字来定义, 例如将 `def` 替换为 `gen` 或者 `defgen`, 至于为什么最后还是使用了 `def`, 可以阅读 [pep 255](https://www.python.org/dev/peps/pep-0255/#bdfl-pronouncements). + +生成器的函数对象的创建几乎完全利用了创建普通函数的代码, 字节码都是 `MAKE_FUNCTION`. 当调用生成器的函数对象时, 也利用了调用普通函数的代码, 字节码都是 `CALL_FUNCTION`, 区别是生成器的 PyCodeObject 的 co_flags 包含 CO_GENERATOR 标志, `CALL_FUNCTION` 最终返回的是生成器对象(PyGenObject). + +### 创建生成器 + +```py +def f(): + for i in range(10): + yield i + +g = f() +``` + +对应的字节码为: + +``` + 1 0 LOAD_CONST 0 () + 2 LOAD_CONST 1 ('f') + 4 MAKE_FUNCTION 0 + 6 STORE_NAME 0 (f) + + 5 8 LOAD_NAME 0 (f) + 10 CALL_FUNCTION 0 + 12 STORE_NAME 1 (g) + 14 LOAD_CONST 2 (None) + 16 RETURN_VALUE + +Disassembly of : + 2 0 LOAD_GLOBAL 0 (range) + 2 LOAD_CONST 1 (10) + 4 CALL_FUNCTION 1 + 6 GET_ITER +>> 8 FOR_ITER 10 (to 20) + 10 STORE_FAST 0 (i) + + 3 12 LOAD_FAST 0 (i) + 14 YIELD_VALUE + 16 POP_TOP + 18 JUMP_ABSOLUTE 8 +>> 20 LOAD_CONST 0 (None) + 22 RETURN_VALUE +``` + +在执行 `g = f()` 之前, f 仍然是一个普通的函数, 生成器是在调用 `f()` 时创建的, 创建生成器的代码如下: + +```c +PyObject * +_PyEval_EvalCode(PyThreadState *tstate, + PyObject *_co, PyObject *globals, PyObject *locals, + PyObject *const *args, Py_ssize_t argcount, + PyObject *const *kwnames, PyObject *const *kwargs, + Py_ssize_t kwcount, int kwstep, + PyObject *const *defs, Py_ssize_t defcount, + PyObject *kwdefs, PyObject *closure, + PyObject *name, PyObject *qualname) +{ + // ... + /* Handle generator/coroutine/asynchronous generator */ + if (co->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + PyObject *gen; + int is_coro = co->co_flags & CO_COROUTINE; + + /* Don't need to keep the reference to f_back, it will be set + * when the generator is resumed. */ + Py_CLEAR(f->f_back); + + /* Create a new generator that owns the ready to run frame + * and return that as the value. */ + if (is_coro) { + gen = PyCoro_New(f, name, qualname); + } else if (co->co_flags & CO_ASYNC_GENERATOR) { + gen = PyAsyncGen_New(f, name, qualname); + } else { + gen = PyGen_NewWithQualName(f, name, qualname); + } + if (gen == NULL) { + return NULL; + } + + _PyObject_GC_TRACK(f); + + return gen; + } + // ... +} +``` + +`_PyEval_EvalCode` 前面所有的代码都在处理函数的参数(实参, 形参, 位置参数, 关键字参数, 默认参数等等). 处理完之后, 新创键的栈帧就可以投入执行了, 但是发现 code 对象 的 `CO_GENERATOR` 标志, 因此, 创建生成器对象并返回. 如果是普通的函数, 那么就执行栈帧并返回结果. + +### 操作生成器 + +生成器支持三个方法: `send`, `throw`, `close`. + +``` +static PyMethodDef gen_methods[] = { + {"send",(PyCFunction)_PyGen_Send, METH_O, send_doc}, + {"throw",(PyCFunction)gen_throw, METH_VARARGS, throw_doc}, + {"close",(PyCFunction)gen_close, METH_NOARGS, close_doc}, + {NULL, NULL} /* Sentinel */ +}; +``` + +`next(g)` 是通过 `send` 实现的. + +### send 方法 + +```py +def f(): + for i in range(10): + yield i + +g = f() +# 第一次调用 send 方法, 参数必须是 None +res = g.send(None) +``` + +```c +static PyObject * +gen_send_ex(PyGenObject *gen, PyObject *arg, int exc, int closing) +{ + PyThreadState *tstate = _PyThreadState_GET(); + PyFrameObject *f = gen->gi_frame; + PyObject *result; + + if (gen->gi_running) { + const char *msg = "generator already executing"; + if (PyCoro_CheckExact(gen)) { + msg = "coroutine already executing"; + } + else if (PyAsyncGen_CheckExact(gen)) { + msg = "async generator already executing"; + } + PyErr_SetString(PyExc_ValueError, msg); + return NULL; + } + if (f == NULL || f->f_stacktop == NULL) { + if (PyCoro_CheckExact(gen) && !closing) { + /* `gen` is an exhausted coroutine: raise an error, + except when called from gen_close(), which should + always be a silent method. */ + PyErr_SetString( + PyExc_RuntimeError, + "cannot reuse already awaited coroutine"); + } + else if (arg && !exc) { + /* `gen` is an exhausted generator: + only set exception if called from send(). */ + if (PyAsyncGen_CheckExact(gen)) { + PyErr_SetNone(PyExc_StopAsyncIteration); + } + else { + PyErr_SetNone(PyExc_StopIteration); + } + } + return NULL; + } + + if (f->f_lasti == -1) { + if (arg && arg != Py_None) { + const char *msg = "can't send non-None value to a " + "just-started generator"; + if (PyCoro_CheckExact(gen)) { + msg = NON_INIT_CORO_MSG; + } + else if (PyAsyncGen_CheckExact(gen)) { + msg = "can't send non-None value to a " + "just-started async generator"; + } + PyErr_SetString(PyExc_TypeError, msg); + return NULL; + } + } else { + /* Push arg onto the frame's value stack */ + result = arg ? arg : Py_None; + Py_INCREF(result); + *(f->f_stacktop++) = result; + } + + /* Generators always return to their most recent caller, not + * necessarily their creator. */ + Py_XINCREF(tstate->frame); + assert(f->f_back == NULL); + f->f_back = tstate->frame; + + gen->gi_running = 1; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + + if (exc) { + assert(_PyErr_Occurred(tstate)); + _PyErr_ChainStackItem(NULL); + } + + result = _PyEval_EvalFrame(tstate, f, exc); + tstate->exc_info = gen->gi_exc_state.previous_item; + gen->gi_exc_state.previous_item = NULL; + gen->gi_running = 0; + + /* Don't keep the reference to f_back any longer than necessary. It + * may keep a chain of frames alive or it could create a reference + * cycle. */ + assert(f->f_back == tstate->frame); + Py_CLEAR(f->f_back); + + /* If the generator just returned (as opposed to yielding), signal + * that the generator is exhausted. */ + if (result && f->f_stacktop == NULL) { + if (result == Py_None) { + /* Delay exception instantiation if we can */ + if (PyAsyncGen_CheckExact(gen)) { + PyErr_SetNone(PyExc_StopAsyncIteration); + } + else { + PyErr_SetNone(PyExc_StopIteration); + } + } + else { + /* Async generators cannot return anything but None */ + assert(!PyAsyncGen_CheckExact(gen)); + _PyGen_SetStopIterationValue(result); + } + Py_CLEAR(result); + } + else if (!result && PyErr_ExceptionMatches(PyExc_StopIteration)) { + const char *msg = "generator raised StopIteration"; + if (PyCoro_CheckExact(gen)) { + msg = "coroutine raised StopIteration"; + } + else if (PyAsyncGen_CheckExact(gen)) { + msg = "async generator raised StopIteration"; + } + _PyErr_FormatFromCause(PyExc_RuntimeError, "%s", msg); + + } + else if (!result && PyAsyncGen_CheckExact(gen) && + PyErr_ExceptionMatches(PyExc_StopAsyncIteration)) + { + /* code in `gen` raised a StopAsyncIteration error: + raise a RuntimeError. + */ + const char *msg = "async generator raised StopAsyncIteration"; + _PyErr_FormatFromCause(PyExc_RuntimeError, "%s", msg); + } + + if (!result || f->f_stacktop == NULL) { + /* generator can't be rerun, so release the frame */ + /* first clean reference cycle through stored exception traceback */ + _PyErr_ClearExcState(&gen->gi_exc_state); + gen->gi_frame->f_gen = NULL; + gen->gi_frame = NULL; + Py_DECREF(f); + } + + return result; +} + +PyObject * +_PyGen_Send(PyGenObject *gen, PyObject *arg) +{ + return gen_send_ex(gen, arg, 0, 0); +} +``` + +`gen_send_ex` 的核心功能就是调用 `_PyEval_EvalFrame` 对生成器进行求值. + +### 恢复栈指针 + +在 `_PyEval_EvalFrameDefault` 函数有下面的代码: + +```c +stack_pointer = f->f_stacktop; +assert(stack_pointer != NULL); +f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */ +f->f_executing = 1; +``` + +`stack_pointer` 是函数局部变量, 指向栈顶. 如果是普通函数, `f->f_stacktop` 的值是在创建 PyFrameObject 时的初始化值, 指向 `f->f_valuestack`. 如果是生成器, 那么 `f->f_stacktop` 指向上一次的栈顶. + + +### YIELD_VALUE + +```c +case TARGET(YIELD_VALUE): { + retval = POP(); + + if (co->co_flags & CO_ASYNC_GENERATOR) { + PyObject *w = _PyAsyncGenValueWrapperNew(retval); + Py_DECREF(retval); + if (w == NULL) { + retval = NULL; + goto error; + } + retval = w; + } + // 保存栈顶指针 + f->f_stacktop = stack_pointer; + goto exiting; +} +``` + +`YIELD_VALUE` 在返回之前将栈顶指针保存到 `f->f_stacktop`, 这样一来当重新执行生成器时, 才可以恢复之前的栈. + +### YIELD_FROM + +```c +case TARGET(YIELD_FROM): { + PyObject *v = POP(); + PyObject *receiver = TOP(); + int err; + if (PyGen_CheckExact(receiver) || PyCoro_CheckExact(receiver)) { + retval = _PyGen_Send((PyGenObject *)receiver, v); + } else { + _Py_IDENTIFIER(send); + if (v == Py_None) + retval = Py_TYPE(receiver)->tp_iternext(receiver); + else + retval = _PyObject_CallMethodIdOneArg(receiver, &PyId_send, v); + } + Py_DECREF(v); + if (retval == NULL) { + // retval == NULL 说明 yield from 的 receiver 执行完了 + PyObject *val; + if (tstate->c_tracefunc != NULL + && _PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) + call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, f); + // 从 StopIteration 对象中取出 value 属性并赋值给 val + err = _PyGen_FetchStopIterationValue(&val); + if (err < 0) + goto error; + Py_DECREF(receiver); + // 将 val 保存到栈顶. 当执行 YIELD_FROM 的下一条字节码时, `ret = yield from receiver` 的 ret 就等于这里的 val. + SET_TOP(val); + DISPATCH(); + } + /* receiver remains on stack, retval is value to be yielded */ + f->f_stacktop = stack_pointer; + /* and repeat... */ + assert(f->f_lasti>= (int)sizeof(_Py_CODEUNIT)); + // f->f_lasti 倒退一步, 回到 YIELD_FROM. 不停执行 YIELD_FROM, 直到 receiver 耗尽. + f->f_lasti -= sizeof(_Py_CODEUNIT); + goto exiting; +} +``` + +`YIELD_FROM` 的前一条字节码 `LOAD_CONST 0 (None)`, `None` 作为 `YIELD_FROM` 第一次执行发送到 receiver 的初始值, 也就是说 `PyObject *v = POP();` 的 v 等于 None. + +当 `YIELD_FROM` 第二次执行时, `PyObject *v = POP();` 的 v 等于上层调用者调用 `g.send(val)` 发送的值. + +### throw 方法 + +throw 方法的基本原理是: + +```c +PyErr_Restore(typ, val, tb); +return gen_send_ex(gen, Py_None, 1, 0); +``` + +具体查看我在代码中的注释. + +### close + +具体查看我在代码中的注释. + +## 总结 + +看完生成器的实现代码, 终于理解了生成器的实现原理, 以及如何在生成器的基础上实现协程. + +## 参考 + +- [PEP 255 -- Simple Generators](https://www.python.org/dev/peps/pep-0255/) + +- [PEP 342 -- Coroutines via Enhanced Generators](https://www.python.org/dev/peps/pep-0342/) + +- [PEP 380 -- Syntax for Delegating to a Subgenerator](https://www.python.org/dev/peps/pep-0380/) + +- [PEP 492 -- Coroutines with async and await syntax](https://www.python.org/dev/peps/pep-0492/) \ No newline at end of file diff --git "a/347円272円277円347円250円213円.md" "b/347円272円277円347円250円213円.md" new file mode 100644 index 0000000..c2188cf --- /dev/null +++ "b/347円272円277円347円250円213円.md" @@ -0,0 +1,2 @@ +# 线程 + diff --git "a/350円231円232円346円213円237円346円234円272円.md" "b/350円231円232円346円213円237円346円234円272円.md" new file mode 100644 index 0000000..0645f7a --- /dev/null +++ "b/350円231円232円346円213円237円346円234円272円.md" @@ -0,0 +1,544 @@ +# Python 虚拟机 + +涉及文件: + +- Python/ceval.c + +- Include/internal/pycore_pystate.h + +- Include/pystate.h + + +## 一些重要的结构体 + +- _PyRuntimeState + + ```c + typedef struct pyruntimestate { + /* Is running Py_PreInitialize()? */ + int preinitializing; + + /* Is Python preinitialized? Set to 1 by Py_PreInitialize() */ + int preinitialized; + + /* Is Python core initialized? Set to 1 by _Py_InitializeCore() */ + int core_initialized; + + /* Is Python fully initialized? Set to 1 by Py_Initialize() */ + int initialized; + + /* Set by Py_FinalizeEx(). Only reset to NULL if Py_Initialize() + is called again. */ + PyThreadState *finalizing; + + struct pyinterpreters { + PyThread_type_lock mutex; + PyInterpreterState *head; + PyInterpreterState *main; + /* _next_interp_id is an auto-numbered sequence of small + integers. It gets initialized in _PyInterpreterState_Init(), + which is called in Py_Initialize(), and used in + PyInterpreterState_New(). A negative interpreter ID + indicates an error occurred. The main interpreter will + always have an ID of 0. Overflow results in a RuntimeError. + If that becomes a problem later then we can adjust, e.g. by + using a Python int. */ + int64_t next_id; + } interpreters; + + unsigned long main_thread; + + #define NEXITFUNCS 32 + void (*exitfuncs[NEXITFUNCS])(void); + int nexitfuncs; + + struct _gc_runtime_state gc; + struct _ceval_runtime_state ceval; + struct _gilstate_runtime_state gilstate; + } _PyRuntimeState; + + _PyRuntimeState _PyRuntime = _PyRuntimeState_INIT; + ``` + + _PyRuntime 是一个全局变量, 保存着许多和解释器运行状态相关的信息. + +- PyInterpreterState + + ```c + // Include/internal/pycore_pystate.h + struct _is { + ... + struct _is *next; + struct _ts *tstate_head; + PyObject *modules; + PyObject *modules_by_index; + PyObject *sysdict; + PyObject *builtins; + PyObject *importlib; + ... + }; + ``` + + ```c + // Include/pystate.h + /* struct _is is defined in internal/pycore_pystate.h */ + typedef struct _is PyInterpreterState; + ``` + + PyInterpreterState 通过 next 指针形成一个单向链表, tstate_head 指向线程链表. + +- PyThreadState + + ```c + // Include/cpython/pystate.h + struct _ts { + /* See Python/ceval.c for comments explaining most fields */ + + struct _ts *prev; + struct _ts *next; + PyInterpreterState *interp; + + struct _frame *frame; + int recursion_depth; + char overflowed; /* The stack has overflowed. Allow 50 more calls + to handle the runtime error. */ + char recursion_critical; /* The current calls must not cause + a stack overflow. */ + int stackcheck_counter; + + /* 'tracing' keeps track of the execution depth when tracing/profiling. + This is to prevent the actual trace/profile code from being recorded in + the trace/profile. */ + int tracing; + int use_tracing; + + Py_tracefunc c_profilefunc; + Py_tracefunc c_tracefunc; + PyObject *c_profileobj; + PyObject *c_traceobj; + + /* The exception currently being raised */ + PyObject *curexc_type; + PyObject *curexc_value; + PyObject *curexc_traceback; + + /* The exception currently being handled, if no coroutines/generators + * are present. Always last element on the stack referred to be exc_info. + */ + _PyErr_StackItem exc_state; + + /* Pointer to the top of the stack of the exceptions currently + * being handled */ + _PyErr_StackItem *exc_info; + + PyObject *dict; /* Stores per-thread state */ + + int gilstate_counter; + + PyObject *async_exc; /* Asynchronous exception to raise */ + unsigned long thread_id; /* Thread id where this tstate was created */ + + int trash_delete_nesting; + PyObject *trash_delete_later; + + /* Called when a thread state is deleted normally, but not when it + * is destroyed after fork(). + * Pain: to prevent rare but fatal shutdown errors (issue 18808), + * Thread.join() must wait for the join'ed thread's tstate to be unlinked + * from the tstate chain. That happens at the end of a thread's life, + * in pystate.c. + * The obvious way doesn't quite work: create a lock which the tstate + * unlinking code releases, and have Thread.join() wait to acquire that + * lock. The problem is that we _are_ at the end of the thread's life: + * if the thread holds the last reference to the lock, decref'ing the + * lock will delete the lock, and that may trigger arbitrary Python code + * if there's a weakref, with a callback, to the lock. But by this time + * _PyRuntime.gilstate.tstate_current is already NULL, so only the simplest + * of C code can be allowed to run (in particular it must not be possible to + * release the GIL). + * So instead of holding the lock directly, the tstate holds a weakref to + * the lock: that's the value of on_delete_data below. Decref'ing a + * weakref is harmless. + * on_delete points to _threadmodule.c's static release_sentinel() function. + * After the tstate is unlinked, release_sentinel is called with the + * weakref-to-lock (on_delete_data) argument, and release_sentinel releases + * the indirectly held lock. + */ + void (*on_delete)(void *); + void *on_delete_data; + + int coroutine_origin_tracking_depth; + + PyObject *async_gen_firstiter; + PyObject *async_gen_finalizer; + + PyObject *context; + uint64_t context_ver; + + /* Unique thread state id. */ + uint64_t id; + + /* XXX signal handlers should also be here */ + }; + ``` + + ```c + /* struct _ts is defined in cpython/pystate.h */ + typedef struct _ts PyThreadState; + ``` + + PyThreadState 保存了和线程相关的信息, 通过 next 和 prev 形成一个双向链表. frame 指向第一个栈帧对象. + +- PyFrameObject + + ```c + typedef struct _frame { + PyObject_VAR_HEAD + struct _frame *f_back; /* previous frame, or NULL */ + PyCodeObject *f_code; /* code segment */ + PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ + PyObject *f_globals; /* global symbol table (PyDictObject) */ + PyObject *f_locals; /* local symbol table (any mapping) */ + PyObject **f_valuestack; /* points after the last local */ + /* Next free slot in f_valuestack. Frame creation sets to f_valuestack. + Frame evaluation usually NULLs it, but a frame that yields sets it + to the current stack top. */ + PyObject **f_stacktop; + PyObject *f_trace; /* Trace function */ + char f_trace_lines; /* Emit per-line trace events? */ + char f_trace_opcodes; /* Emit per-opcode trace events? */ + + /* Borrowed reference to a generator, or NULL */ + PyObject *f_gen; + + int f_lasti; /* Last instruction if called */ + /* Call PyFrame_GetLineNumber() instead of reading this field + directly. As of 2.3 f_lineno is only valid when tracing is + active (i.e. when f_trace is set). At other times we use + PyCode_Addr2Line to calculate the line from the current + bytecode index. */ + int f_lineno; /* Current line number */ + int f_iblock; /* index in f_blockstack */ + char f_executing; /* whether the frame is still executing */ + PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ + PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ + } PyFrameObject; + ``` +PyInterpreterState, PyThreadState 和 frame 的关系图: + +``` ++---------------------+ +| PyInterpreterState | ++---------------------+ + | + | + v ++----------------+ +----------------+ +----------------+ +| PyThreadState | <--> | PyThreadState | <--> | PyThreadState | ... ++----------------+ +----------------+ +----------------+ + | + | + v ++--------+ +--------+ +--------+ +| frame | <-- | frame | <-- | frame | ++--------+ +--------+ +--------+ +``` + +注意 frame 的指针方向(frame->f_back). + +## ceval + +ceval 循环 switch case 的结构: + +```c +for(;;){ + // ... +fast_next_opcode: + // 取下一条指令 + NEXTOPARG(); + + switch(opcode){ + case xxx: + // ... + FAST_DISPATCH() // goto fast_next_opcode + case yyy: + // ... + DISPATCH() // continue + default: + // ... + goto error; + } + +error: + // 错误处理 + + /* Log traceback info. */ + PyTraceBack_Here(f); +} +``` +## sys.settrace 和 sys.setprofile + +```c +// Python/sysmodule.c +static PyObject * +call_trampoline(PyObject* callback, + PyFrameObject *frame, int what, PyObject *arg) +{ + if (PyFrame_FastToLocalsWithError(frame) < 0) { + return NULL; + } + + PyObject *stack[3]; + stack[0] = (PyObject *)frame; + stack[1] = whatstrings[what]; + stack[2] = (arg != NULL) ? arg : Py_None; + + /* call the Python-level function */ + PyObject *result = _PyObject_FastCall(callback, stack, 3); + + PyFrame_LocalsToFast(frame, 1); + if (result == NULL) { + PyTraceBack_Here(frame); + } + + return result; +} + +static int +profile_trampoline(PyObject *self, PyFrameObject *frame, + int what, PyObject *arg) +{ + PyObject *result; + + if (arg == NULL) + arg = Py_None; + result = call_trampoline(self, frame, what, arg); + if (result == NULL) { + PyEval_SetProfile(NULL, NULL); + return -1; + } + Py_DECREF(result); + return 0; +} + +static int +trace_trampoline(PyObject *self, PyFrameObject *frame, + int what, PyObject *arg) +{ + /* 这里的 self 名字太具有误导性了, 拜托认真取个符合语义的名字吧. self 其实是用户设置的 tracefunc, 也就是 tstate->c_traceobj. + */ + PyObject *callback; + PyObject *result; + + if (what == PyTrace_CALL) + callback = self; + else + callback = frame->f_trace; + if (callback == NULL) + return 0; + result = call_trampoline(callback, frame, what, arg); + if (result == NULL) { + PyEval_SetTrace(NULL, NULL); + Py_CLEAR(frame->f_trace); + return -1; + } + if (result != Py_None) { + Py_XSETREF(frame->f_trace, result); + } + else { + Py_DECREF(result); + } + return 0; +} + +static PyObject * +sys_settrace(PyObject *self, PyObject *args) +{ + /* self 指向 sys 模块本身 + args 才是 Python 层调用 sys.settrace(tracefunc) 传入的 tracefunc + + 最终的调用形式: trace_trampline(args, frame, what, arg) + */ + if (trace_init() == -1) + return NULL; + if (args == Py_None) + PyEval_SetTrace(NULL, NULL); + else + PyEval_SetTrace(trace_trampoline, args); + Py_RETURN_NONE; +} + +static PyObject * +sys_setprofile(PyObject *self, PyObject *args) +{ + if (trace_init() == -1) + return NULL; + if (args == Py_None) + PyEval_SetProfile(NULL, NULL); + else + PyEval_SetProfile(profile_trampoline, args); + Py_RETURN_NONE; +} +``` + +```c +// Python/ceval.c + +void +PyEval_SetTrace(Py_tracefunc func, PyObject *arg) +{ + if (PySys_Audit("sys.settrace", NULL) < 0) { + return; + } + + _PyRuntimeState *runtime = &_PyRuntime; + PyThreadState *tstate = _PyRuntimeState_GetThreadState(runtime); + PyObject *temp = tstate->c_traceobj; + runtime->ceval.tracing_possible += (func != NULL) - (tstate->c_tracefunc != NULL); + Py_XINCREF(arg); + tstate->c_tracefunc = NULL; + tstate->c_traceobj = NULL; + /* Must make sure that profiling is not ignored if 'temp' is freed */ + tstate->use_tracing = tstate->c_profilefunc != NULL; + Py_XDECREF(temp); + tstate->c_tracefunc = func; + tstate->c_traceobj = arg; + /* Flag that tracing or profiling is turned on */ + tstate->use_tracing = ((func != NULL) + || (tstate->c_profilefunc != NULL)); +} + +void +PyEval_SetProfile(Py_tracefunc func, PyObject *arg) +{ + if (PySys_Audit("sys.setprofile", NULL) < 0) { + return; + } + + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *temp = tstate->c_profileobj; + Py_XINCREF(arg); + tstate->c_profilefunc = NULL; + tstate->c_profileobj = NULL; + /* Must make sure that tracing is not ignored if 'temp' is freed */ + tstate->use_tracing = tstate->c_tracefunc != NULL; + Py_XDECREF(temp); + tstate->c_profilefunc = func; + tstate->c_profileobj = arg; + /* Flag that tracing or profiling is turned on */ + tstate->use_tracing = (func != NULL) || (tstate->c_tracefunc != NULL); +} +``` + +c_tracefunc 和 c_profilefunc 在进入一个 code block 时会被调用. + +```c +// Python/ceval.c + +if (tstate->use_tracing) { + if (tstate->c_tracefunc != NULL) { + /* tstate->c_tracefunc, if defined, is a + function that will be called on *every* entry + to a code block. Its return value, if not + None, is a function that will be called at + the start of each executed line of code. + (Actually, the function must return itself + in order to continue tracing.) The trace + functions are called with three arguments: + a pointer to the current frame, a string + indicating why the function is called, and + an argument which depends on the situation. + The global trace function is also called + whenever an exception is detected. */ + if (call_trace_protected(tstate->c_tracefunc, + tstate->c_traceobj, + tstate, f, PyTrace_CALL, Py_None)) { + /* Trace function raised an error */ + goto exit_eval_frame; + } + } + if (tstate->c_profilefunc != NULL) { + /* Similar for c_profilefunc, except it needn't + return itself and isn't called for "line" events */ + if (call_trace_protected(tstate->c_profilefunc, + tstate->c_profileobj, + tstate, f, PyTrace_CALL, Py_None)) { + /* Profile function raised an error */ + goto exit_eval_frame; + } + } +} +``` + +具体的用法查看文档: + +- [https://docs.python.org/3/library/sys.html#sys.settrace](https://docs.python.org/3/library/sys.html#sys.settrace) + +- [https://docs.python.org/3/library/sys.html#sys.setprofile](https://docs.python.org/3/library/sys.html#sys.setprofile) + + +### 其它 + +sys 模块增加了很多设置 hook 的方法, 具体可以查看 [sys 文档](https://docs.python.org/3/library/sys.html). + +## 优化 + +- 使用 "threaded code" 优化指令分派效率, 详情见[源代码注释]()和[GCC 文档](http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html) + + GCC 文档说: + + ``` + Such an array of label values serves a purpose much like that of the switch statement. The switch statement is cleaner, so use that rather than an array unless the problem does not fit a switch statement very well. + + Another use of label values is in an interpreter for threaded code. The labels within the interpreter function can be stored in the threaded code for super-fast dispatching. + ``` + +- 预测下一条指令提高性能 + + ```c + /* OpCode prediction macros + Some opcodes tend to come in pairs thus making it possible to + predict the second code when the first is run. For example, + COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE. + + Verifying the prediction costs a single high-speed test of a register + variable against a constant. If the pairing was good, then the + processor's own internal branch predication has a high likelihood of + success, resulting in a nearly zero-overhead transition to the + next opcode. A successful prediction saves a trip through the eval-loop + including its unpredictable switch-case branch. Combined with the + processor's internal branch prediction, a successful PREDICT has the + effect of making the two opcodes run as if they were a single new opcode + with the bodies combined. + + If collecting opcode statistics, your choices are to either keep the + predictions turned-on and interpret the results as if some opcodes + had been combined or turn-off predictions so that the opcode frequency + counter updates for both opcodes. + + Opcode prediction is disabled with threaded code, since the latter allows + the CPU to record separate branch prediction information for each + opcode. + + */ + + #define PREDICT_ID(op) PRED_##op + + #if defined(DYNAMIC_EXECUTION_PROFILE) || USE_COMPUTED_GOTOS + #define PREDICT(op) if (0) goto PREDICT_ID(op) + #else + #define PREDICT(op) \ + do { \ + _Py_CODEUNIT word = *next_instr; \ + opcode = _Py_OPCODE(word); \ + if (opcode == op) { \ + oparg = _Py_OPARG(word); \ + next_instr++; \ + goto PREDICT_ID(op); \ + } \ + } while(0) + #endif + #define PREDICTED(op) PREDICT_ID(op): + ``` + + From 8d0ef469741d49d58287f02417ad0e695e68f1bf Mon Sep 17 00:00:00 2001 From: ausaki Date: 2021年2月20日 20:36:34 +0800 Subject: [PATCH 2/4] rename; add coroutine --- ch01.md | 205 ------- ch06.md | 4 - ch07.md | 155 ------ ch08.md | 127 ----- ch09.md | 11 - ch10.md | 183 ------ ch11.md | 302 ---------- ch14.md => import.md | 0 opcache.md | 0 threadlocal.md | 129 +++++ ...05345円255円230円347円256円241円347円220円206円.md" | 0 ch04.md => "345円210円227円350円241円250円.md" | 0 ...20350円241円214円347円216円257円345円242円203円.md" | 0 "345円215円217円347円250円213円.md" | 522 +++++++++++++++++- ... "345円244円232円347円272円277円347円250円213円.md" | 0 ch05.md => "345円255円227円345円205円270円.md" | 0 ...55347円232円204円346円263円250円351円207円212円.md" | 0 ... "345円255円227円347円254円246円344円270円262円.md" | 0 ch02.md => "346円225円264円346円225円260円.md" | 0 ...73345円236円213円347円263円273円347円273円237円.md" | 0 "350円231円232円346円213円237円346円234円272円.md" | 6 + 21 files changed, 651 insertions(+), 993 deletions(-) delete mode 100644 ch01.md delete mode 100644 ch06.md delete mode 100644 ch07.md delete mode 100644 ch08.md delete mode 100644 ch09.md delete mode 100644 ch10.md delete mode 100644 ch11.md rename ch14.md => import.md (100%) delete mode 100644 opcache.md create mode 100644 threadlocal.md rename ch16.md => "345円206円205円345円255円230円347円256円241円347円220円206円.md" (100%) rename ch04.md => "345円210円227円350円241円250円.md" (100%) rename ch13.md => "345円210円235円345円247円213円345円214円226円350円277円220円350円241円214円347円216円257円345円242円203円.md" (100%) rename ch15.md => "345円244円232円347円272円277円347円250円213円.md" (100%) rename ch05.md => "345円255円227円345円205円270円.md" (100%) rename ch05_dict_more_info.md => "345円255円227円345円205円270円347円232円204円346円233円264円345円244円232円344円277円241円346円201円257円:346円235円245円350円207円252円344円273円243円347円240円201円344円270円255円347円232円204円346円263円250円351円207円212円.md" (100%) rename ch03.md => "345円255円227円347円254円246円344円270円262円.md" (100%) rename ch02.md => "346円225円264円346円225円260円.md" (100%) rename ch12.md => "347円261円273円345円236円213円347円263円273円347円273円237円.md" (100%) diff --git a/ch01.md b/ch01.md deleted file mode 100644 index 6253cf5..0000000 --- a/ch01.md +++ /dev/null @@ -1,205 +0,0 @@ -# Python 对象初探 - -## 对象:PyObject 和 PyVarObject - -PyObject 结构体的相关代码: - -```C -# object.h - -#define PyObject_HEAD \ - _PyObject_HEAD_EXTRA \ - Py_ssize_t ob_refcnt; \ - struct _typeobject *ob_type; - -typedef struct _object { - PyObject_HEAD -} PyObject; -``` - -PyObject 主要包含: - -- ob_refcnt 引用计数 -- ob_type 类型信息 - -PyObject 用来表示定长对象(对象占用的内存空间固定),例如整数对象。而其它的变长对象如字符串使用 PyVarObject 来表示。 - -PyVarObject 结构体相关代码: - -```C -#define PyObject_VAR_HEAD \ - PyObject_HEAD \ - Py_ssize_t ob_size; /* Number of items in variable part */ - -typedef struct { - PyObject_VAR_HEAD -} PyVarObject; - -``` - -PyVarObject 比 PyObject 多了一个 ob_size 信息,ob_size 表示元素的数量,而不是切确的字节数。 - -## 类型对象:PyTypeObject - -PyTypeObject 是所有对象的类型元信息。代码如下: - -```C -typedef struct _typeobject { - PyObject_VAR_HEAD - const char *tp_name; /* For printing, in format "." */ - Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ - - /* Methods to implement standard operations */ - - destructor tp_dealloc; - printfunc tp_print; - getattrfunc tp_getattr; - setattrfunc tp_setattr; - cmpfunc tp_compare; - reprfunc tp_repr; - - /* Method suites for standard classes */ - - PyNumberMethods *tp_as_number; - PySequenceMethods *tp_as_sequence; - PyMappingMethods *tp_as_mapping; - - /* More standard operations (here for binary compatibility) */ - - hashfunc tp_hash; - ternaryfunc tp_call; - reprfunc tp_str; - getattrofunc tp_getattro; - setattrofunc tp_setattro; - - /* Functions to access object as input/output buffer */ - PyBufferProcs *tp_as_buffer; - - /* Flags to define presence of optional/expanded features */ - long tp_flags; - - const char *tp_doc; /* Documentation string */ - - /* Assigned meaning in release 2.0 */ - /* call function for all accessible objects */ - traverseproc tp_traverse; - - /* delete references to contained objects */ - inquiry tp_clear; - - /* Assigned meaning in release 2.1 */ - /* rich comparisons */ - richcmpfunc tp_richcompare; - - /* weak reference enabler */ - Py_ssize_t tp_weaklistoffset; - - /* Added in release 2.2 */ - /* Iterators */ - getiterfunc tp_iter; - iternextfunc tp_iternext; - - /* Attribute descriptor and subclassing stuff */ - struct PyMethodDef *tp_methods; - struct PyMemberDef *tp_members; - struct PyGetSetDef *tp_getset; - struct _typeobject *tp_base; - PyObject *tp_dict; - descrgetfunc tp_descr_get; - descrsetfunc tp_descr_set; - Py_ssize_t tp_dictoffset; - initproc tp_init; - allocfunc tp_alloc; - newfunc tp_new; - freefunc tp_free; /* Low-level free-memory routine */ - inquiry tp_is_gc; /* For PyObject_IS_GC */ - PyObject *tp_bases; - PyObject *tp_mro; /* method resolution order */ - PyObject *tp_cache; - PyObject *tp_subclasses; - PyObject *tp_weaklist; - destructor tp_del; - -#ifdef COUNT_ALLOCS - /* these must be last and never explicitly initialized */ - Py_ssize_t tp_allocs; - Py_ssize_t tp_frees; - Py_ssize_t tp_maxalloc; - struct _typeobject *tp_prev; - struct _typeobject *tp_next; -#endif -} PyTypeObject; -``` - -PyTypeObject 主要包含以下信息: - -- 类型名,tp_name,主要是 Python 内部以及调试的时候使用; - -- 创建该类型对象时分配内存空间大小的信息,即 tp_basicsize 和 tp_itemsize; - -- 与该类型对象相关联的操作信息(就是诸如 tp_print 这样的许多的函数指针); - -PyTypeObject 可以理解为 PyObject 的类型, 在 Python 层面的话,PyObject 对应为 instance,PyTypeObject 对应为 class。 - -在 PyTypeObject 结构体定义的第一行可以看到 `PyObject_VAR_HEAD`,说明 PyTypeObject 也是一个对象(PyVarObject),这又印证了一句话:在 Python 中所有东西都是对象。每一个对象都是有类型的,既然 PyTypeObject 是一个对象,那么它的类型是什么呢?换句话说,类型的类型是什么?答案是元类,即 PyType_Type,对应 Python 中的 type。 - -PyType_Type 的定义: - -```C -PyTypeObject PyType_Type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, /* ob_size */ - "type", /* tp_name */ - sizeof(PyHeapTypeObject), /* tp_basicsize */ - sizeof(PyMemberDef), /* tp_itemsize */ - (destructor)type_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - type_compare, /* tp_compare */ - (reprfunc)type_repr, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - (hashfunc)_Py_HashPointer, /* tp_hash */ - (ternaryfunc)type_call, /* tp_call */ - 0, /* tp_str */ - (getattrofunc)type_getattro, /* tp_getattro */ - (setattrofunc)type_setattro, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_BASETYPE, /* tp_flags */ - type_doc, /* tp_doc */ - (traverseproc)type_traverse, /* tp_traverse */ - (inquiry)type_clear, /* tp_clear */ - 0, /* tp_richcompare */ - offsetof(PyTypeObject, tp_weaklist), /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - type_methods, /* tp_methods */ - type_members, /* tp_members */ - type_getsets, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - offsetof(PyTypeObject, tp_dict), /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - type_new, /* tp_new */ - PyObject_GC_Del, /* tp_free */ - (inquiry)type_is_gc, /* tp_is_gc */ -}; -``` - -```python -In [259]: int.__class__ -Out[259]: type - -In [260]: object.__class__ -Out[260]: type - -In [261]: type.__class__ -Out[261]: type -``` - diff --git a/ch06.md b/ch06.md deleted file mode 100644 index c3ec19d..0000000 --- a/ch06.md +++ /dev/null @@ -1,4 +0,0 @@ -# 最简单的Python模拟——Small Python - -源码在[这里](assets/small_python)。 -> 来源于Google搜索。 \ No newline at end of file diff --git a/ch07.md b/ch07.md deleted file mode 100644 index acf3d89..0000000 --- a/ch07.md +++ /dev/null @@ -1,155 +0,0 @@ -# Python 的编译结果——Code 对象与 pyc 文件 - -## Python 脚本的执行过程 - -- 编译 - - Python 编译器将 py 文件编译成字节码(PyCodeObject)。 - -- 执行 - - Python 虚拟机执行字节码。 - - -## PyCodeObject - -PyCodeObject 定义: - -```C -// Include/code.h - -typedef struct { - PyObject_HEAD - int co_argcount; /* #arguments, except *args */ - int co_nlocals; /* #local variables */ - int co_stacksize; /* #entries needed for evaluation stack */ - int co_flags; /* CO_..., see below */ - PyObject *co_code; /* instruction opcodes */ - PyObject *co_consts; /* list (constants used) */ - PyObject *co_names; /* list of strings (names used) */ - PyObject *co_varnames; /* tuple of strings (local variable names) */ - PyObject *co_freevars; /* tuple of strings (free variable names) */ - PyObject *co_cellvars; /* tuple of strings (cell variable names) */ - /* The rest doesn't count for hash/cmp */ - PyObject *co_filename; /* string (where it was loaded from) */ - PyObject *co_name; /* string (name, for reference) */ - int co_firstlineno; /* first source line number */ - PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */ - void *co_zombieframe; /* for optimization only (see frameobject.c) */ -} PyCodeObject; -``` - -Python 会对源代码中的每个代码块(Code Block)生成一个 PyCodeObject 对象,一个作用域(名字空间)就算做一个代码块。 - -代码块对应的字节码就存放在 PyCodeObject 的 `co_code`字段。 - -## 生成 pyc 文件 - -pyc文件包含三部分信息: - -- magic number - - 用于标识 pyc 版本,不同Python 版本的 magic number 不一样。通过 magic number 可以避免 Python 加载错误版本的 pyc 文件。 - - Python2.5 的 magic number 为: - - ```C - #define MAGIC (62131 | ((long)'\r'<<16) | ((long)'\n'<<24)) - - /* Magic word as global; note that _PyImport_Init() can change the - value of this global to accommodate for alterations of how the - compiler works which are enabled by command line switches. */ - static long pyc_magic = MAGIC; - ``` - -- mtime - - 文件修改时间,用于和 py 文件的修改时间进行对比。Python 在导入模块时优先导入pyc 文件,但是有可能 pyc 文件对应的 py 文件已经被修改了,所以 Python 需要对比 pyc 文件和 py 文件的修改时间,如果 py 文件的修改时间更新,则需要重新编译 pyc 文件。 - -- 字节码,即 PyCodeObject。 - -生成 pyc 文件的代码如下: - -```C -// Python/import.c - -/* Write a compiled module to a file, placing the time of last - modification of its source into the header. - Errors are ignored, if a write error occurs an attempt is made to - remove the file. */ - -static void -write_compiled_module(PyCodeObject *co, char *cpathname, time_t mtime) -{ - FILE *fp; - - fp = open_exclusive(cpathname); - if (fp == NULL) { - if (Py_VerboseFlag) - PySys_WriteStderr( - "# can't create %s\n", cpathname); - return; - } - PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); - /* First write a 0 for mtime */ - PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); - PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION); - if (fflush(fp) != 0 || ferror(fp)) { - if (Py_VerboseFlag) - PySys_WriteStderr("# can't write %s\n", cpathname); - /* Don't keep partial file */ - fclose(fp); - (void) unlink(cpathname); - return; - } - /* Now write the true mtime */ - fseek(fp, 4L, 0); - assert(mtime < LONG_MAX); - PyMarshal_WriteLongToFile((long)mtime, fp, Py_MARSHAL_VERSION); - fflush(fp); - fclose(fp); - if (Py_VerboseFlag) - PySys_WriteStderr("# wrote %s\n", cpathname); -} -``` - -`PyMarshal_WriteObjectToFile`最终调用 `w_object`将 PyCodeObject 写入文件中,`w_object`的代码如下(有删减): - -```C -// Python/marchal.c - -// w_object 的代码很长,都是 if/else if 判断 object 的具体类型。 - -static void -w_object(PyObject *v, WFILE *p) -{ - ... - else if (PyCode_Check(v)) { - PyCodeObject *co = (PyCodeObject *)v; - w_byte(TYPE_CODE, p); - w_long(co->co_argcount, p); - w_long(co->co_nlocals, p); - w_long(co->co_stacksize, p); - w_long(co->co_flags, p); - w_object(co->co_code, p); - w_object(co->co_consts, p); - w_object(co->co_names, p); - w_object(co->co_varnames, p); - w_object(co->co_freevars, p); - w_object(co->co_cellvars, p); - w_object(co->co_filename, p); - w_object(co->co_name, p); - w_long(co->co_firstlineno, p); - w_object(co->co_lnotab, p); - } - ... -``` - -## 解析 pyc 文件 - -我自己写了一个 Python 脚本解析 pyc 文件,放在 [这里](codes/python_scripts/parse_pyc.py) - -## dis - -Python 标准库中的 dis 模块可以用来获取代码对应的字节码。 - diff --git a/ch08.md b/ch08.md deleted file mode 100644 index 9c03158..0000000 --- a/ch08.md +++ /dev/null @@ -1,127 +0,0 @@ -# Python 虚拟机框架 - -Python 虚拟机是一个栈机器,数据存放在栈中,虚拟机不断执行字节码中的指令,操作数据入栈、出栈。 - -虚拟机会创建栈帧 (Frame) 作为执行环境(即命名空间),当一个函数调用另外一个函数时,虚拟机会为被调用函数创建一个新的栈帧。 - - -``` - +-------------------------+ +-------------------------+ - | | | Block stack | - +-----> | Call frame(bar) +------> +-------------------------+ - | | | | Data stack | - | +-----------+-------------+ +-------------------------+ - | | - | +-----------v-------------+ +-------------------------+ - | | | | Block stack | -Call stack | Call frame(foo) +------> +-------------------------+ - | | | | Data stack | - | +-----------+-------------+ +-------------------------+ - | | - | +-----------v-------------+ +-------------------------+ - | | | | Block stack | - | | Call frame(main) +------> +-------------------------+ - +-----> | | | Data stack | - +-------------------------+ +-------------------------+ - -``` - -## PyFrameObject - -定义: - -```C -typedef struct _frame { - PyObject_VAR_HEAD - struct _frame *f_back; /* previous frame, or NULL */ - PyCodeObject *f_code; /* code segment */ - PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ - PyObject *f_globals; /* global symbol table (PyDictObject) */ - PyObject *f_locals; /* local symbol table (any mapping) */ - PyObject **f_valuestack; /* points after the last local */ - /* Next free slot in f_valuestack. Frame creation sets to f_valuestack. - Frame evaluation usually NULLs it, but a frame that yields sets it - to the current stack top. */ - PyObject **f_stacktop; - PyObject *f_trace; /* Trace function */ - - /* If an exception is raised in this frame, the next three are used to - * record the exception info (if any) originally in the thread state. See - * comments before set_exc_info() -- it's not obvious. - * Invariant: if _type is NULL, then so are _value and _traceback. - * Desired invariant: all three are NULL, or all three are non-NULL. That - * one isn't currently true, but"should be". - */ - PyObject *f_exc_type, *f_exc_value, *f_exc_traceback; - - PyThreadState *f_tstate; - int f_lasti; /* Last instruction if called */ - /* As of 2.3 f_lineno is only valid when tracing is active (i.e. when - f_trace is set) -- at other times use PyCode_Addr2Line instead. */ - int f_lineno; /* Current line number */ - int f_iblock; /* index in f_blockstack */ - PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ - PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ -} PyFrameObject; -``` - -## 嵌套作用域 - -LGB 规则:Locals -> Globals -> Builtins - -```python -# main.py -a = 1 -b = 1 - -def f(): - a = 2 - print a # 输出 2 - print b # 输出 1 - -print a # 输出 1 -print b # 输出 1 -``` - -书中提到一个另许多初学者疑惑的例子: - -```python -a = 1 - -def f(): - print a - a = 2 - -f() -``` - -上述代码执行时会报错:`UnboundLocalError: local variable 'a' referenced before assignment`。 - -常见的误解是:在执行到函数 f 的 `print` 语句时,由于 `local` 作用域没有 `a`,所以会去外层作用域寻找 `a`,找到 `a=1`,因此打印 1。 - -实际上 Python 在解析(或者说编译)函数 f 的定义时,发现赋值语句`a = 2`,则会在函数 f 的 `local` 作用域创建 `a`,但是并没有对其初始化,直到执行`a = 2`时才会对 a 进行赋值。在执行`print a`时,在 `local` 作用域找到了`a`,但是由于没有初始化,所以才会出现`UnboundLocalError: local variable 'a' referenced before assignment`错误。 - - -## 闭包 - -LEGB 规则: Locals -> Enclosing -> Globals -> Builtins - -```python -# main.py -a = 1 - -def f(): - a = 2 - def g(): - print a - return g - -g = f() -g() -# 输出 2 - -``` - -## 其它有用的资源 - -- [a-python-interpreter-written-in-python](http://aosabook.org/en/500L/a-python-interpreter-written-in-python.html) \ No newline at end of file diff --git a/ch09.md b/ch09.md deleted file mode 100644 index d401431..0000000 --- a/ch09.md +++ /dev/null @@ -1,11 +0,0 @@ -# Python虚拟机中的一般表达式 - -这一章主要分析了 Python 虚拟机如何执行字节码表达式。想要了解具体信息可以阅读代码(src/Python/ceval.c 的PyEval_EvalFrameEx函数)。 - -- LOAD_CONST -- STORN_NAME -- LOAD_NAME -- BUILD_LIST -- BUILD_NAME -- 等等 - diff --git a/ch10.md b/ch10.md deleted file mode 100644 index 29d8598..0000000 --- a/ch10.md +++ /dev/null @@ -1,183 +0,0 @@ -# Python虚拟机中的控制流 - - -## if 控制流 - -用到的代码: - -```python -# if_control.py -a = 1 -if a> 10: - prin "a> 10" -elif a <= -2: - print "a <= -2" -elif a != 1: - print "a != 1" -elif a == 1: - print "a == 1" -else: - print "Unknown a" - -``` - -if_control.py 对应的字节码: - -```python - 1 0 LOAD_CONST 0 (1) - 3 STORE_NAME 0 (a) - - 2 6 LOAD_NAME 0 (a) - 9 LOAD_CONST 1 (10) - 12 COMPARE_OP 4 (>) - 15 JUMP_IF_FALSE 9 (to 27) - 18 POP_TOP - - 3 19 LOAD_CONST 2 ('a> 10') - 22 PRINT_ITEM - 23 PRINT_NEWLINE - 24 JUMP_FORWARD 72 (to 99) ->> 27 POP_TOP - - 4 28 LOAD_NAME 0 (a) - 31 LOAD_CONST 3 (-2) - 34 COMPARE_OP 1 (<=) - 37 JUMP_IF_FALSE 9 (to 49) - 40 POP_TOP - - 5 41 LOAD_CONST 4 ('a <= -2') - 44 PRINT_ITEM - 45 PRINT_NEWLINE - 46 JUMP_FORWARD 50 (to 99) ->> 49 POP_TOP - - 6 50 LOAD_NAME 0 (a) - 53 LOAD_CONST 0 (1) - 56 COMPARE_OP 3 (!=) - 59 JUMP_IF_FALSE 9 (to 71) - 62 POP_TOP - - 7 63 LOAD_CONST 5 ('a != 1') - 66 PRINT_ITEM - 67 PRINT_NEWLINE - 68 JUMP_FORWARD 28 (to 99) ->> 71 POP_TOP - - 8 72 LOAD_NAME 0 (a) - 75 LOAD_CONST 0 (1) - 78 COMPARE_OP 2 (==) - 81 JUMP_IF_FALSE 9 (to 93) - 84 POP_TOP - - 9 85 LOAD_CONST 6 ('a == 1') - 88 PRINT_ITEM - 89 PRINT_NEWLINE - 90 JUMP_FORWARD 6 (to 99) ->> 93 POP_TOP - - 11 94 LOAD_CONST 7 ('Unknown a') - 97 PRINT_ITEM - 98 PRINT_NEWLINE ->> 99 LOAD_CONST 8 (None) - 102 RETURN_VALUE -None -``` - -## for 控制流 - -用到的代码: - -```C -// for_control.py -lst = [1, 2] -for i in lst: - print i -``` - -对应的字节码: - -```python - 1 0 LOAD_CONST 0 (1) - 3 LOAD_CONST 1 (2) - 6 BUILD_LIST 2 - 9 STORE_NAME 0 (lst) - - 2 12 SETUP_LOOP 19 (to 34) - 15 LOAD_NAME 0 (lst) - 18 GET_ITER ->> 19 FOR_ITER 11 (to 33) - 22 STORE_NAME 1 (i) - - 3 25 LOAD_NAME 1 (i) - 28 PRINT_ITEM - 29 PRINT_NEWLINE - 30 JUMP_ABSOLUTE 19 ->> 33 POP_BLOCK ->> 34 LOAD_CONST 2 (None) - 37 RETURN_VALUE -None -``` - -## while 控制流 - -用到的代码: - -```C -// while_control.py -i = 0 -while i < 10: - i += 1 - if i>= 5: - continue - if i == 20: - break - print i -``` - -对应的字节码: - -```python - 1 0 LOAD_CONST 0 (0) - 3 STORE_NAME 0 (i) - - 2 6 SETUP_LOOP 71 (to 80) ->> 9 LOAD_NAME 0 (i) - 12 LOAD_CONST 1 (10) - 15 COMPARE_OP 0 (<) - 18 JUMP_IF_FALSE 57 (to 78) - 21 POP_TOP - - 3 22 LOAD_NAME 0 (i) - 25 LOAD_CONST 2 (1) - 28 INPLACE_ADD - 29 STORE_NAME 0 (i) - - 4 32 LOAD_NAME 0 (i) - 35 LOAD_CONST 3 (5) - 38 COMPARE_OP 5 (>=) - 41 JUMP_IF_FALSE 7 (to 51) - 44 POP_TOP - - 5 45 JUMP_ABSOLUTE 9 - 48 JUMP_FORWARD 1 (to 52) ->> 51 POP_TOP - - 6>> 52 LOAD_NAME 0 (i) - 55 LOAD_CONST 4 (20) - 58 COMPARE_OP 2 (==) - 61 JUMP_IF_FALSE 5 (to 69) - 64 POP_TOP - - 7 65 BREAK_LOOP - 66 JUMP_FORWARD 1 (to 70) ->> 69 POP_TOP - - 8>> 70 LOAD_NAME 0 (i) - 73 PRINT_ITEM - 74 PRINT_NEWLINE - 75 JUMP_ABSOLUTE 9 ->> 78 POP_TOP - 79 POP_BLOCK ->> 80 LOAD_CONST 5 (None) - 83 RETURN_VALUE -``` \ No newline at end of file diff --git a/ch11.md b/ch11.md deleted file mode 100644 index eb5defc..0000000 --- a/ch11.md +++ /dev/null @@ -1,302 +0,0 @@ -# Python 虚拟机中的函数机制 - -一个函数意味着一个新的作用域(命名空间),当 Python 虚拟机执行一个函数时,首先创建一个 PyFrameObject,然后执行函数的字节码(PyCodeObject)。 - - -PyFunctionObject 的定义: - -```C -typedef struct { - PyObject_HEAD - PyObject *func_code; /* A code object */ - PyObject *func_globals; /* A dictionary (other mappings won't do) */ - PyObject *func_defaults; /* NULL or a tuple */ - PyObject *func_closure; /* NULL or a tuple of cell objects */ - PyObject *func_doc; /* The __doc__ attribute, can be anything */ - PyObject *func_name; /* The __name__ attribute, a string object */ - PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */ - PyObject *func_weakreflist; /* List of weak references */ - PyObject *func_module; /* The __module__ attribute, can be anything */ - - /* Invariant: - * func_closure contains the bindings for func_code->co_freevars, so - * PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code) - * (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0). - */ -} PyFunctionObject; - -``` - -PyCodeObject 是在编译时产生的,包含函数的一些静态信息,如 co_consts, co_names, co_code 等,而 PyFunctionObject 则是执行 def 语句时动态产生的,包含 PyCodeObject 以及其它动态的信息,如 func_globals。 - -一个函数只有一个 PyCodeObject,但是却会产生多个 PyFunctionObject,每次调用函数时都会创建一个新的 PyFunctionObject,该 PyFunctionObject 关联至唯一的 PyCodeObject。 - -## 函数调用流程 - -```python -[func_0.py] -def f(): -0 LOAD_CONST 0 (code object f) -3 MAKE_FUNCTION 0 -6 STORE_NAME 0 (f) - print "Function" - 0 LOAD_CONST 1 ("Function") - 3 PRINT_ITEM - 4 PRINT_NEWLINE - 5 LOAD_CONST 0 (None) - 8 RETURN_VALUE - -f() -9 LOAD_NAME 0 (f) -12 CALL_FUNCTION 0 -15 POP_TOP -16 LOAD_CONST 1 (None) -19 RETURN_VALUE - -``` - -重点指令:MAKE_FUNCTION,CALL_FUNCTION。 - -```C -case MAKE_FUNCTION: - v = POP(); /* code object */ - x = PyFunction_New(v, f->f_globals); - Py_DECREF(v); - /* XXX Maybe this should be a separate opcode? */ - if (x != NULL && oparg> 0) { - // oparg 表示 "具有默认值的参数" 的个数 - v = PyTuple_New(oparg); - if (v == NULL) { - Py_DECREF(x); - x = NULL; - break; - } - while (--oparg>= 0) { - w = POP(); - PyTuple_SET_ITEM(v, oparg, w); - } - err = PyFunction_SetDefaults(x, v); - Py_DECREF(v); - } - PUSH(x); - break; -``` - -```C -case CALL_FUNCTION: - { - PyObject **sp; - PCALL(PCALL_ALL); - sp = stack_pointer; -#ifdef WITH_TSC - x = call_function(&sp, oparg, &intr0, &intr1); -#else - x = call_function(&sp, oparg); -#endif - stack_pointer = sp; - PUSH(x); // 函数返回值 - if (x != NULL) - continue; - break; - } - -static PyObject * -call_function(PyObject ***pp_stack, int oparg -#ifdef WITH_TSC - , uint64* pintr0, uint64* pintr1 -#endif - ) -{ - // oparg 是 short 类型,高字节表示 nk(实参中关键字参数个数),低字节表示 na(实参中位置参数个数) - int na = oparg & 0xff; - int nk = (oparg>>8) & 0xff; - int n = na + 2 * nk; - PyObject **pfunc = (*pp_stack) - n - 1; - PyObject *func = *pfunc; - PyObject *x, *w; - - /* Always dispatch PyCFunction first, because these are - presumed to be the most frequent callable object. - */ - if (PyCFunction_Check(func) && nk == 0) { - int flags = PyCFunction_GET_FLAGS(func); - PyThreadState *tstate = PyThreadState_GET(); - - PCALL(PCALL_CFUNCTION); - if (flags & (METH_NOARGS | METH_O)) { - PyCFunction meth = PyCFunction_GET_FUNCTION(func); - PyObject *self = PyCFunction_GET_SELF(func); - if (flags & METH_NOARGS && na == 0) { - C_TRACE(x, (*meth)(self,NULL)); - } - else if (flags & METH_O && na == 1) { - PyObject *arg = EXT_POP(*pp_stack); - C_TRACE(x, (*meth)(self,arg)); - Py_DECREF(arg); - } - else { - err_args(func, flags, na); - x = NULL; - } - } - else { - PyObject *callargs; - callargs = load_args(pp_stack, na); - READ_TIMESTAMP(*pintr0); - C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); - READ_TIMESTAMP(*pintr1); - Py_XDECREF(callargs); - } - } else { - if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) { - /* optimize access to bound methods */ - PyObject *self = PyMethod_GET_SELF(func); - PCALL(PCALL_METHOD); - PCALL(PCALL_BOUND_METHOD); - Py_INCREF(self); - func = PyMethod_GET_FUNCTION(func); - Py_INCREF(func); - Py_DECREF(*pfunc); - *pfunc = self; - na++; - n++; - } else - Py_INCREF(func); - READ_TIMESTAMP(*pintr0); - if (PyFunction_Check(func)) - x = fast_function(func, pp_stack, n, na, nk); - else - x = do_call(func, pp_stack, na, nk); - READ_TIMESTAMP(*pintr1); - Py_DECREF(func); - } - - /* Clear the stack of the function object. Also removes - the arguments in case they weren't consumed already - (fast_function() and err_args() leave them on the stack). - */ - while ((*pp_stack)> pfunc) { - w = EXT_POP(*pp_stack); - Py_DECREF(w); - PCALL(PCALL_POP); - } - return x; -} -``` - -`fast_function` 和 `PyEval_EvalCodeEx` 的代码太长,就不复制粘贴了。源代码(branch: ch11)有一些注释。 - - -几个和函数参数有关的变量: - -- PyCodeObject->co_argcount - - 形参中参数的个数,不包括 *args 和 **kwargs - -- PyCodeObject->co_nlocals - - 函数局部变量的个数,包括参数个数(co_argcount + *args + **kwargs) - -- na - - 实参中位置参数的个数 - -- nw - - 实参中关键字参数的个数 - - -f_localsplus 布局 - -``` -PyFrameObject: - -+------------+-------------------+---------------------------+ -| | | | -| frame_info | extras | valuestack | -| | | | -+--------------------------------+---------------------------+ - | | - <-------------+ f_localsplus +-----------------> - | | - -extras: - -+----------+---------+-------+----------+----------+----------+ -| | | | | | | -| pos args | kw args | *args | **kwargs | cellvars | freevars | -| | | | | | | -+----------+---------+-------+----------+----------+----------+ - - -``` - -cellvars 一般是外层函数使用,存放着被内层嵌套函数引用的变量 - -freevars 一般是内层嵌套函数使用,存放着对外层函数中变量的引用 - -## 常见的函数定义和调用 - -可以使用下面的例子去思考源码中 fast_function和 PyEval_EvalCodeEx的执行流程。 - -- 没有参数 - - ```py - def f(): - print 'hello world' - - f() - - # co_argcount = 0 - # na = 0, nw = 0 - ``` - - **符合 fast_function 条件** - -- 只有位置参数 - - ```py - def f(a, b): - print 'hello world' - - f(1, 2) - # co_argcount = 2 - # na = 2, nw = 0 - - f(a=1, b=2) - # co_argcount = 2 - # na = 0, nw = 2 - ``` - - **f(1, 2) 符合 fast_function 条件** - - **f(a=1, b=2) 不符合 fast_function 条件** - -- 位置参数和关键字参数 - - ```py - def f(a, b, c=1, d=2): - print 'hello world' - - f(1, 2) - - f(1, 2, 3) - - f(1, 2, 3, 4) - - f(1, 2, c=3) - - f(1, 2, d=3) - ``` - - -- 可变参数 - - ```py - def f(a, b, *args, **kwargs): - print 'hello world' - - f(1, 2) - - f(1, 2, 3, 4, c=1, d=2) - ``` \ No newline at end of file diff --git a/ch14.md b/import.md similarity index 100% rename from ch14.md rename to import.md diff --git a/opcache.md b/opcache.md deleted file mode 100644 index e69de29..0000000 diff --git a/threadlocal.md b/threadlocal.md new file mode 100644 index 0000000..9ea43d5 --- /dev/null +++ b/threadlocal.md @@ -0,0 +1,129 @@ +# Thread Local 的实现原理 + +Python 标准库中有一个模块[_threading_local.py](Lib/_threading_local.py), 使用纯 Pyhton 实现了 thread local. 代码比较少, 复制了一份: + +```py +from weakref import ref +from contextlib import contextmanager + +__all__ = ["local"] + +# We need to use objects from the threading module, but the threading +# module may also want to use our `local` class, if support for locals +# isn't compiled in to the `thread` module. This creates potential problems +# with circular imports. For that reason, we don't import `threading` +# until the bottom of this file (a hack sufficient to worm around the +# potential problems). Note that all platforms on CPython do have support +# for locals in the `thread` module, and there is no circular import problem +# then, so problems introduced by fiddling the order of imports here won't +# manifest. + +class _localimpl: + """A class managing thread-local dicts""" + __slots__ = 'key', 'dicts', 'localargs', 'locallock', '__weakref__' + + def __init__(self): + # The key used in the Thread objects' attribute dicts. + # We keep it a string for speed but make it unlikely to clash with + # a "real" attribute. + self.key = '_threading_local._localimpl.' + str(id(self)) + # { id(Thread) -> (ref(Thread), thread-local dict) } + self.dicts = {} + + def get_dict(self): + """Return the dict for the current thread. Raises KeyError if none + defined.""" + thread = current_thread() + return self.dicts[id(thread)][1] + + def create_dict(self): + """Create a new dict for the current thread, and return it.""" + localdict = {} + key = self.key + thread = current_thread() + idt = id(thread) + def local_deleted(_, key=key): + # When the localimpl is deleted, remove the thread attribute. + thread = wrthread() + if thread is not None: + del thread.__dict__[key] + def thread_deleted(_, idt=idt): + # When the thread is deleted, remove the local dict. + # Note that this is suboptimal if the thread object gets + # caught in a reference loop. We would like to be called + # as soon as the OS-level thread ends instead. + local = wrlocal() + if local is not None: + dct = local.dicts.pop(idt) + # 这里使用弱引用是为了避免循环引用 + # 在 dicts 中记录每个线程是为了在 thread local 被销毁时, 方便删除线程中引用的 thread local + # 在线程中记录 thread local 是为了在线程被销毁时, 方便删除 thread local 中引用的线程. + # 而这些又都是通过弱引用的 callback 来实现的. + # 这个实现思路非常值得学习. + wrlocal = ref(self, local_deleted) + wrthread = ref(thread, thread_deleted) + thread.__dict__[key] = wrlocal + self.dicts[idt] = wrthread, localdict + return localdict + + +@contextmanager +def _patch(self): + impl = object.__getattribute__(self, '_local__impl') + try: + dct = impl.get_dict() + except KeyError: + dct = impl.create_dict() + args, kw = impl.localargs + self.__init__(*args, **kw) + with impl.locallock: + object.__setattr__(self, '__dict__', dct) + yield + + +class local: + __slots__ = '_local__impl', '__dict__' + + def __new__(cls, /, *args, **kw): + if (args or kw) and (cls.__init__ is object.__init__): + raise TypeError("Initialization arguments are not supported") + self = object.__new__(cls) + impl = _localimpl() + impl.localargs = (args, kw) + impl.locallock = RLock() + object.__setattr__(self, '_local__impl', impl) + # We need to create the thread dict in anticipation of + # __init__ being called, to make sure we don't call it + # again ourselves. + impl.create_dict() + return self + + def __getattribute__(self, name): + with _patch(self): + return object.__getattribute__(self, name) + + def __setattr__(self, name, value): + if name == '__dict__': + raise AttributeError( + "%r object attribute '__dict__' is read-only" + % self.__class__.__name__) + with _patch(self): + return object.__setattr__(self, name, value) + + def __delattr__(self, name): + if name == '__dict__': + raise AttributeError( + "%r object attribute '__dict__' is read-only" + % self.__class__.__name__) + with _patch(self): + return object.__delattr__(self, name) + + +from threading import current_thread, RLock +``` + +上面这段代码的有些细节有点不太好理解, 不过基本实现原理还是比较容易理解的. [最早的实现](https://github.com/python/cpython/commit/d15dc06df062fdf0fe8badec2982c6c5e0e28eb0)非常简单, 有助于帮助理解. + +当然, thread local 也有 C 语言的实现, 具体的代码在[_threadmodule.c](https://github.com/ausaki/cpython/tree/v3.9.notes/Modules/_threadmodule.c), 里面有我写的一些注释. + +和使用纯 Python 实现的 thread local 不同的是, 使用 C 实现不需要使用锁, 因为这些操作已经被 GIL 保护了. diff --git a/ch16.md "b/345円206円205円345円255円230円347円256円241円347円220円206円.md" similarity index 100% rename from ch16.md rename to "345円206円205円345円255円230円347円256円241円347円220円206円.md" diff --git a/ch04.md "b/345円210円227円350円241円250円.md" similarity index 100% rename from ch04.md rename to "345円210円227円350円241円250円.md" diff --git a/ch13.md "b/345円210円235円345円247円213円345円214円226円350円277円220円350円241円214円347円216円257円345円242円203円.md" similarity index 100% rename from ch13.md rename to "345円210円235円345円247円213円345円214円226円350円277円220円350円241円214円347円216円257円345円242円203円.md" diff --git "a/345円215円217円347円250円213円.md" "b/345円215円217円347円250円213円.md" index b59d3c8..7f9cc09 100644 --- "a/345円215円217円347円250円213円.md" +++ "b/345円215円217円347円250円213円.md" @@ -222,11 +222,7 @@ case TARGET(GET_AWAITABLE): { 可以看出 await 的基本原理就是 `YIELD_FROM`, 这再一次说明协程和生成器的关系. `GET_AWAITABLE` 将协程放入到栈顶, `YIELD_FROM` 的 receiver 就是该协程. -## asyncio 模块 - - - -### eventloop +## eventloop eventloop 的各种方法分类: @@ -313,9 +309,523 @@ eventloop 的各种方法分类: [eventloop 的官方文档](https://docs.python.org/3/library/asyncio-eventloop.html) +### 调度机制 + +eventloop 支持定时任务功能, call_at 函数将任务设定在指定时间执行, call_later 函数将任务设定在距离当前若干时间后执行, call_soon 函数将任务设定在下个调度点执行. + +简单来看, eventloop 的核心就是一个循环, 部分代码如下: + +```py +class BaseEventLoop(events.AbstractEventLoop): + + def run_forever(self): + """Run until stop() is called.""" + self._check_closed() + self._check_running() + self._set_coroutine_origin_tracking(self._debug) + self._thread_id = threading.get_ident() + + old_agen_hooks = sys.get_asyncgen_hooks() + sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook, + finalizer=self._asyncgen_finalizer_hook) + try: + events._set_running_loop(self) + while True: + self._run_once() + if self._stopping: + break + finally: + self._stopping = False + self._thread_id = None + events._set_running_loop(None) + self._set_coroutine_origin_tracking(False) + sys.set_asyncgen_hooks(*old_agen_hooks) + + def _run_once(self): + """Run one full iteration of the event loop. + + This calls all currently ready callbacks, polls for I/O, + schedules the resulting callbacks, and finally schedules + 'call_later' callbacks. + """ + + # 从 _scheduled 队列中移除 "取消" 的任务 + sched_count = len(self._scheduled) + if (sched_count> _MIN_SCHEDULED_TIMER_HANDLES and + self._timer_cancelled_count / sched_count> + _MIN_CANCELLED_TIMER_HANDLES_FRACTION): + # Remove delayed calls that were cancelled if their number + # is too high + new_scheduled = [] + for handle in self._scheduled: + if handle._cancelled: + handle._scheduled = False + else: + new_scheduled.append(handle) + + heapq.heapify(new_scheduled) + self._scheduled = new_scheduled + self._timer_cancelled_count = 0 + else: + # Remove delayed calls that were cancelled from head of queue. + while self._scheduled and self._scheduled[0]._cancelled: + self._timer_cancelled_count -= 1 + handle = heapq.heappop(self._scheduled) + handle._scheduled = False + + # 计算最小的 timeout, 一般情况下 timeout 等于 _scheduled 队列中最小的时间减去当前时间 + timeout = None + if self._ready or self._stopping: + timeout = 0 + elif self._scheduled: + # Compute the desired timeout. + when = self._scheduled[0]._when + timeout = min(max(0, when - self.time()), MAXIMUM_SELECT_TIMEOUT) + + # BaseEventLoop 没有实现 _selector 和 _process_events, 需要子类自己实现. + # 可以查看 selector_events.py 中的 BaseSelectorEventLoop 和 unix_events.py 中的 _UnixSelectorEventLoop. + event_list = self._selector.select(timeout) + self._process_events(event_list) + + # Handle 'later' callbacks that are ready. + # self._clock_resolution 等于 time.get_clock_info('monotonic').resolution, 我的系统 resolution 等于 1e-9(1 ns). + # self.time() 返回的时间等于 time.monotonic(), 单位是秒, "秒 + 纳秒" 几乎不影响结果. + # 下面的代码将 _scheduled 队列中小于当前时间(end_time) 的任务放入 _ready 队列. + end_time = self.time() + self._clock_resolution + while self._scheduled: + handle = self._scheduled[0] + if handle._when>= end_time: + break + handle = heapq.heappop(self._scheduled) + handle._scheduled = False + self._ready.append(handle) + + # This is the only place where callbacks are actually *called*. + # All other places just add them to ready. + # Note: We run all currently scheduled callbacks, but not any + # callbacks scheduled by callbacks run this time around -- + # they will be run the next time (after another I/O poll). + # Use an idiom that is thread-safe without using locks. + # 执行 _ready 队列中的任务 + ntodo = len(self._ready) + for i in range(ntodo): + handle = self._ready.popleft() + if handle._cancelled: + continue + if self._debug: + try: + self._current_handle = handle + t0 = self.time() + handle._run() + dt = self.time() - t0 + if dt>= self.slow_callback_duration: + logger.warning('Executing %s took %.3f seconds', + _format_handle(handle), dt) + finally: + self._current_handle = None + else: + handle._run() + handle = None # Needed to break cycles when an exception occurs. +``` + +eventloop 的事件循环核心就是 run_forever 和 _run_once. + +eventloop 中有两个任务队列: + +- _scheduled + + _scheduled 是定时任务队列, 使用的数据结构是优先队列, 使用标准库中的 heapq 实现. 排序的 key 是任务的执行时间. 每次进行调度时, 将 _scheduled 队列中到达执行时间的任务取出来放入到 _ready 队列中, 因为 _scheduled 是优先队列, 所以取出任务这个操作非常高效. + +- _ready + + _ready 是准备被调度的任务队列, 当下一个调度点到来时会执行这些任务. _ready 使用的数据结构是标准库中的 collections.deque. + + +### call_at/call_later/call_soon + +call_at 的代码如下: + +```py + def call_at(self, when, callback, *args, context=None): + """Like call_later(), but uses an absolute time. + + Absolute time corresponds to the event loop's time() method. + """ + self._check_closed() + if self._debug: + self._check_thread() + self._check_callback(callback, 'call_at') + timer = events.TimerHandle(when, callback, args, self, context) + if timer._source_traceback: + del timer._source_traceback[-1] + heapq.heappush(self._scheduled, timer) + timer._scheduled = True + return timer +``` + +TimerHandle 的代码如下: + +```py +class TimerHandle(Handle): + """Object returned by timed callback registration methods.""" + + __slots__ = ['_scheduled', '_when'] + + def __init__(self, when, callback, args, loop, context=None): + assert when is not None + super().__init__(callback, args, loop, context) + if self._source_traceback: + del self._source_traceback[-1] + self._when = when + self._scheduled = False + + def _repr_info(self): + info = super()._repr_info() + pos = 2 if self._cancelled else 1 + info.insert(pos, f'when={self._when}') + return info + + def __hash__(self): + return hash(self._when) + + def __lt__(self, other): + if isinstance(other, TimerHandle): + return self._when < other._when + return NotImplemented + + def __le__(self, other): + if isinstance(other, TimerHandle): + return self._when < other._when or self.__eq__(other) + return NotImplemented + + def __gt__(self, other): + if isinstance(other, TimerHandle): + return self._when> other._when + return NotImplemented + + def __ge__(self, other): + if isinstance(other, TimerHandle): + return self._when> other._when or self.__eq__(other) + return NotImplemented + + def __eq__(self, other): + if isinstance(other, TimerHandle): + return (self._when == other._when and + self._callback == other._callback and + self._args == other._args and + self._cancelled == other._cancelled) + return NotImplemented + + def cancel(self): + if not self._cancelled: + self._loop._timer_handle_cancelled(self) + super().cancel() + + def when(self): + """Return a scheduled callback time. + + The time is an absolute timestamp, using the same time + reference as loop.time(). + """ + return self._when +``` + +上面的代码还是比较容易看懂的. + + +## 协程是如何执行的 ### Future -### network +```py +class Future: + + # Class variables serving as defaults for instance variables. + _state = _PENDING + _result = None + _exception = None + _loop = None + _source_traceback = None + _cancel_message = None + # A saved CancelledError for later chaining as an exception context. + _cancelled_exc = None + + __log_traceback = False + + def __init__(self, *, loop=None): + if loop is None: + self._loop = events.get_event_loop() + else: + self._loop = loop + self._callbacks = [] + + + def __schedule_callbacks(self): + """Internal: Ask the event loop to call all callbacks. + + The callbacks are scheduled to be called as soon as possible. Also + clears the callback list. + """ + callbacks = self._callbacks[:] + if not callbacks: + return + + self._callbacks[:] = [] + for callback, ctx in callbacks: + self._loop.call_soon(callback, self, context=ctx) + + def cancelled(self): + """Return True if the future was cancelled.""" + return self._state == _CANCELLED + + # Don't implement running(); see http://bugs.python.org/issue18699 + + def done(self): + """Return True if the future is done. + + Done means either that a result / exception are available, or that the + future was cancelled. + """ + return self._state != _PENDING + + def result(self): + """Return the result this future represents. + + If the future has been cancelled, raises CancelledError. If the + future's result isn't yet available, raises InvalidStateError. If + the future is done and has an exception set, this exception is raised. + """ + if self._state == _CANCELLED: + exc = self._make_cancelled_error() + raise exc + if self._state != _FINISHED: + raise exceptions.InvalidStateError('Result is not ready.') + self.__log_traceback = False + if self._exception is not None: + raise self._exception + return self._result + + def add_done_callback(self, fn, *, context=None): + """Add a callback to be run when the future becomes done. + + The callback is called with a single argument - the future object. If + the future is already done when this is called, the callback is + scheduled with call_soon. + """ + if self._state != _PENDING: + self._loop.call_soon(fn, self, context=context) + else: + if context is None: + context = contextvars.copy_context() + self._callbacks.append((fn, context)) + + def set_result(self, result): + """Mark the future done and set its result. + + If the future is already done when this method is called, raises + InvalidStateError. + """ + if self._state != _PENDING: + raise exceptions.InvalidStateError(f'{self._state}: {self!r}') + self._result = result + self._state = _FINISHED + self.__schedule_callbacks() + + def __await__(self): + if not self.done(): + self._asyncio_future_blocking = True + yield self # This tells Task to wait for completion. + if not self.done(): + raise RuntimeError("await wasn't used with future") + return self.result() # May raise too. + + __iter__ = __await__ # make compatible with 'yield from'. +``` + +> 上面的代码经过大量删减. + +future 是协程, task, 和 eventloop 之间的媒介. + +### Task + +```py +class Task(futures._PyFuture): + def __init__(self, coro, *, loop=None, name=None): + super().__init__(loop=loop) + self._loop.call_soon(self.__step, context=self._context) + + def __step(self, exc=None): + if self.done(): + raise exceptions.InvalidStateError( + f'_step(): already done: {self!r}, {exc!r}') + if self._must_cancel: + if not isinstance(exc, exceptions.CancelledError): + exc = self._make_cancelled_error() + self._must_cancel = False + coro = self._coro + self._fut_waiter = None + + _enter_task(self._loop, self) + # Call either coro.throw(exc) or coro.send(None). + try: + if exc is None: + # We use the `send` method directly, because coroutines + # don't have `__iter__` and `__next__` methods. + result = coro.send(None) + else: + result = coro.throw(exc) + except StopIteration as exc: + if self._must_cancel: + # Task is cancelled right before coro stops. + self._must_cancel = False + super().cancel(msg=self._cancel_message) + else: + super().set_result(exc.value) + except exceptions.CancelledError as exc: + # Save the original exception so we can chain it later. + self._cancelled_exc = exc + super().cancel() # I.e., Future.cancel(self). + except (KeyboardInterrupt, SystemExit) as exc: + super().set_exception(exc) + raise + except BaseException as exc: + super().set_exception(exc) + else: + blocking = getattr(result, '_asyncio_future_blocking', None) + if blocking is not None: + # Yielded Future must come from Future.__iter__(). + if futures._get_loop(result) is not self._loop: + new_exc = RuntimeError( + f'Task {self!r} got Future ' + f'{result!r} attached to a different loop') + self._loop.call_soon( + self.__step, new_exc, context=self._context) + elif blocking: + if result is self: + new_exc = RuntimeError( + f'Task cannot await on itself: {self!r}') + self._loop.call_soon( + self.__step, new_exc, context=self._context) + else: + result._asyncio_future_blocking = False + result.add_done_callback( + self.__wakeup, context=self._context) + self._fut_waiter = result + if self._must_cancel: + if self._fut_waiter.cancel( + msg=self._cancel_message): + self._must_cancel = False + else: + new_exc = RuntimeError( + f'yield was used instead of yield from ' + f'in task {self!r} with {result!r}') + self._loop.call_soon( + self.__step, new_exc, context=self._context) + + elif result is None: + # Bare yield relinquishes control for one event loop iteration. + self._loop.call_soon(self.__step, context=self._context) + elif inspect.isgenerator(result): + # Yielding a generator is just wrong. + new_exc = RuntimeError( + f'yield was used instead of yield from for ' + f'generator in task {self!r} with {result!r}') + self._loop.call_soon( + self.__step, new_exc, context=self._context) + else: + # Yielding something else is an error. + new_exc = RuntimeError(f'Task got bad yield: {result!r}') + self._loop.call_soon( + self.__step, new_exc, context=self._context) + finally: + _leave_task(self._loop, self) + self = None # Needed to break cycles when an exception occurs. + + def __wakeup(self, future): + try: + future.result() + except BaseException as exc: + # This may also be a cancellation. + self.__step(exc) + else: + # Don't pass the value of `future.result()` explicitly, + # as `Future.__iter__` and `Future.__await__` don't need it. + # If we call `_step(value, None)` instead of `_step()`, + # Python eval loop would use `.send(value)` method call, + # instead of `__next__()`, which is slower for futures + # that return non-generator iterators from their `__iter__`. + self.__step() + self = None # Needed to break cycles when an exception occurs. +``` + +> 上面的代码经过大量删减. + +Task 继承自 Future, 其中最关键的方法是 `__step`. + +Task 的 `__init__` 方法中的 `self._loop.call_soon(self.__step, context=self._context)` 将 `__step` 注册到 eventloop 的 _ready 任务队列. 在 eventloop 的第一个事件循环中, `__step` 就会被执行, 从而 Task 中的协程会被启动. + +协程执行到 `await` 一般会返回一个 Future, Task 会向 Future 中添加回调方法, 该方法就是 `__wakeup`, `__wakeup` 间接调用 `__step`. + +就这样通过 `__step` 不断执行协程, 直到协程结束. 如果协程正常结束(return), 那么会抛出 `StopIteration`, 此时 `__step` 会将 `exc.val` 保存起来. + +### 以 asyncio.sleep 为例 + +```py +@types.coroutine +def __sleep0(): + """Skip one event loop run cycle. + + This is a private helper for 'asyncio.sleep()', used + when the 'delay' is set to 0. It uses a bare 'yield' + expression (which Task.__step knows how to handle) + instead of creating a Future object. + """ + yield + + +async def sleep(delay, result=None, *, loop=None): + """Coroutine that completes after a given time (in seconds).""" + if delay <= 0: + await __sleep0() + return result + + if loop is None: + loop = events.get_running_loop() + else: + warnings.warn("The loop argument is deprecated since Python 3.8, " + "and scheduled for removal in Python 3.10.", + DeprecationWarning, stacklevel=2) + + future = loop.create_future() + h = loop.call_later(delay, + futures._set_result_unless_cancelled, + future, result) + try: + return await future + finally: + h.cancel() +``` + +当睡眠的时间到期后, 通过 call_later 注册的定时任务 `futures._set_result_unless_cancelled` 会被 eventloop 执行, `_set_result_unless_cancelled` 的代码如下: + +```py +def _set_result_unless_cancelled(fut, result): + """Helper setting the result only if the future was not cancelled.""" + if fut.cancelled(): + return + fut.set_result(result) +``` + +`await future` 第一次被执行时, 返回了 future 本身, task 接收到 future 后, 调用 `future.add_done_callback` 向 future 添加了回调函数, 该回调函数负责再次执行 `await future`, 此时 `await future` 退出并返回 result. + +`future.set_result` 被执行时, 会执行 task 注册的回调函数(其实并没有直接执行, 而是将回调函数通过 `loop.call_soon` 放入到 eventloop 的任务队列中). + +## 总结 + +弄懂生成器的实现可以说就已经掌握了协程的一半, 另一半就是 eventloop 的实现原理以及 Task 和 Future. + +asyncio 中除了 eventloop, Task 和 Future 之外, 还有许多和网络相关的操作的协程实现, 这些代码我还没有阅读, 不过这些代码不会影响对于协程实现的理解. + + diff --git a/ch15.md "b/345円244円232円347円272円277円347円250円213円.md" similarity index 100% rename from ch15.md rename to "345円244円232円347円272円277円347円250円213円.md" diff --git a/ch05.md "b/345円255円227円345円205円270円.md" similarity index 100% rename from ch05.md rename to "345円255円227円345円205円270円.md" diff --git a/ch05_dict_more_info.md "b/345円255円227円345円205円270円347円232円204円346円233円264円345円244円232円344円277円241円346円201円257円:346円235円245円350円207円252円344円273円243円347円240円201円344円270円255円347円232円204円346円263円250円351円207円212円.md" similarity index 100% rename from ch05_dict_more_info.md rename to "345円255円227円345円205円270円347円232円204円346円233円264円345円244円232円344円277円241円346円201円257円:346円235円245円350円207円252円344円273円243円347円240円201円344円270円255円347円232円204円346円263円250円351円207円212円.md" diff --git a/ch03.md "b/345円255円227円347円254円246円344円270円262円.md" similarity index 100% rename from ch03.md rename to "345円255円227円347円254円246円344円270円262円.md" diff --git a/ch02.md "b/346円225円264円346円225円260円.md" similarity index 100% rename from ch02.md rename to "346円225円264円346円225円260円.md" diff --git a/ch12.md "b/347円261円273円345円236円213円347円263円273円347円273円237円.md" similarity index 100% rename from ch12.md rename to "347円261円273円345円236円213円347円263円273円347円273円237円.md" diff --git "a/350円231円232円346円213円237円346円234円272円.md" "b/350円231円232円346円213円237円346円234円272円.md" index 0645f7a..db21cf1 100644 --- "a/350円231円232円346円213円237円346円234円272円.md" +++ "b/350円231円232円346円213円237円346円234円272円.md" @@ -542,3 +542,9 @@ sys 模块增加了很多设置 hook 的方法, 具体可以查看 [sys 文档]( ``` +## 总结 + + +一个用 Python 写的 Python 解释器: + +- [a-python-interpreter-written-in-python](http://aosabook.org/en/500L/a-python-interpreter-written-in-python.html) \ No newline at end of file From 1daac1816d9b73990ddc1ba145561bbd9a058a30 Mon Sep 17 00:00:00 2001 From: ausaki Date: Sat, 6 Mar 2021 15:53:33 +0800 Subject: [PATCH 3/4] update --- README.md | 6 +- trashcan.md | 105 ++ tuple.md | 302 ++++ "345円207円275円346円225円260円.md" | 62 +- "345円274円261円345円274円225円347円224円250円.md" | 0 ...17350円277円260円347円254円246円(descriptor).md" | 770 ++++++++++ "346円235円202円351円241円271円.md" | 76 + "346円250円241円345円235円227円.md" | 221 +++ ...73345円236円213円347円263円273円347円273円237円.md" | 754 +++++----- ...47263円273円347円273円237円344円271円213円 MRO.md" | 40 + ...7263円273円347円273円237円344円271円213円 slot.md" | 106 ++ ...263円273円347円273円237円344円271円213円 super.md" | 372 +++++ ...37344円271円213円345円205円203円347円261円273円.md" | 1240 +++++++++++++++++ ...36346円200円247円350円256円277円351円227円256円.md" | 932 +++++++++++++ 14 files changed, 4588 insertions(+), 398 deletions(-) create mode 100644 trashcan.md create mode 100644 tuple.md create mode 100644 "345円274円261円345円274円225円347円224円250円.md" create mode 100644 "346円217円217円350円277円260円347円254円246円(descriptor).md" create mode 100644 "346円235円202円351円241円271円.md" create mode 100644 "346円250円241円345円235円227円.md" create mode 100644 "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 MRO.md" create mode 100644 "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 slot.md" create mode 100644 "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 super.md" create mode 100644 "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円205円203円347円261円273円.md" create mode 100644 "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円261円236円346円200円247円350円256円277円351円227円256円.md" diff --git a/README.md b/README.md index 6fc12f6..34c57b9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Python 3.9 源代码阅读笔记 -之前看了 "Python 源码剖析", 这本书是基于 Python 2.5 的, 现在的 Python 已经发生了很大的改变. 因此, 在这里记录下阅读 Python 3.9 的源代码的笔记, 当然阅读内容主要是 Python3 某些新加的特性. +之前看了 "Python 源码剖析", 这本书是基于 Python 2.5 的, 现在的 Python 已经发生了很大的改变, 因此, 我决定重新阅读 Python 的源码. + +在这里记录下阅读 [Python 3.9](https://github.com/ausaki/cpython/tree/v3.9.notes) 的源代码的笔记. - [Python 虚拟机](ceval.md) @@ -8,5 +10,5 @@ -[源码注释分支](https://github.com/ausaki/python) +[源码注释分支](https://github.com/ausaki/cpython/tree/v3.9.notes) diff --git a/trashcan.md b/trashcan.md new file mode 100644 index 0000000..ad66979 --- /dev/null +++ b/trashcan.md @@ -0,0 +1,105 @@ +# Trashcan + +注释已经描述得非常清楚, 无需多言. + +```c +//Include/cpython/object.h + +/* Trashcan mechanism, thanks to Christian Tismer. + +When deallocating a container object, it's possible to trigger an unbounded +chain of deallocations, as each Py_DECREF in turn drops the refcount on "the +next" object in the chain to 0. This can easily lead to stack overflows, +especially in threads (which typically have less stack space to work with). + +A container object can avoid this by bracketing the body of its tp_dealloc +function with a pair of macros: + +static void +mytype_dealloc(mytype *p) +{ + ... declarations go here ... + + PyObject_GC_UnTrack(p); // must untrack first + Py_TRASHCAN_BEGIN(p, mytype_dealloc) + ... The body of the deallocator goes here, including all calls ... + ... to Py_DECREF on contained objects. ... + Py_TRASHCAN_END // there should be no code after this +} + +CAUTION: Never return from the middle of the body! If the body needs to +"get out early", put a label immediately before the Py_TRASHCAN_END +call, and goto it. Else the call-depth counter (see below) will stay +above 0 forever, and the trashcan will never get emptied. + +How it works: The BEGIN macro increments a call-depth counter. So long +as this counter is small, the body of the deallocator is run directly without +further ado. But if the counter gets large, it instead adds p to a list of +objects to be deallocated later, skips the body of the deallocator, and +resumes execution after the END macro. The tp_dealloc routine then returns +without deallocating anything (and so unbounded call-stack depth is avoided). + +When the call stack finishes unwinding again, code generated by the END macro +notices this, and calls another routine to deallocate all the objects that +may have been added to the list of deferred deallocations. In effect, a +chain of N deallocations is broken into (N-1)/(PyTrash_UNWIND_LEVEL-1) pieces, +with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. + +Since the tp_dealloc of a subclass typically calls the tp_dealloc of the base +class, we need to ensure that the trashcan is only triggered on the tp_dealloc +of the actual class being deallocated. Otherwise we might end up with a +partially-deallocated object. To check this, the tp_dealloc function must be +passed as second argument to Py_TRASHCAN_BEGIN(). +*/ + +/* This is the old private API, invoked by the macros before 3.2.4. + Kept for binary compatibility of extensions using the stable ABI. */ +PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); +PyAPI_FUNC(void) _PyTrash_destroy_chain(void); + +/* This is the old private API, invoked by the macros before 3.9. + Kept for binary compatibility of extensions using the stable ABI. */ +PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyObject*); +PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void); + +/* Forward declarations for PyThreadState */ +struct _ts; + +/* Python 3.9 private API, invoked by the macros below. */ +PyAPI_FUNC(int) _PyTrash_begin(struct _ts *tstate, PyObject *op); +PyAPI_FUNC(void) _PyTrash_end(struct _ts *tstate); + +#define PyTrash_UNWIND_LEVEL 50 + +// 注意仔细看 Py_TRASHCAN_BEGIN_CONDITION 这个宏的定义, 它不是一个完整的语句块, do 语句的 body 没有结束. +#define Py_TRASHCAN_BEGIN_CONDITION(op, cond) \ + do { \ + PyThreadState *_tstate = NULL; \ + /* If "cond" is false, then _tstate remains NULL and the deallocator \ + * is run normally without involving the trashcan */ \ + if (cond) { \ + _tstate = PyThreadState_GET(); \ + if (_PyTrash_begin(_tstate, _PyObject_CAST(op))) { \ + break; \ + } \ + } + /* The body of the deallocator is here. */ +#define Py_TRASHCAN_END \ + if (_tstate) { \ + _PyTrash_end(_tstate); \ + } \ + } while (0); + +#define Py_TRASHCAN_BEGIN(op, dealloc) \ + Py_TRASHCAN_BEGIN_CONDITION(op, \ + Py_TYPE(op)->tp_dealloc == (destructor)(dealloc)) + +/* For backwards compatibility, these macros enable the trashcan + * unconditionally */ +#define Py_TRASHCAN_SAFE_BEGIN(op) Py_TRASHCAN_BEGIN_CONDITION(op, 1) +#define Py_TRASHCAN_SAFE_END(op) Py_TRASHCAN_END +``` + +我最喜欢这样的注释, 对新加的某个特性进行详细的描述. 遗憾的是 CPython 的代码中有许多没有详细注释的代码, 对于刚开始阅读 CPython 代码的人来说非常难以理解. + +trashcan 的用法可以查看 tuple 的 tpdealloc 函数. \ No newline at end of file diff --git a/tuple.md b/tuple.md new file mode 100644 index 0000000..3483762 --- /dev/null +++ b/tuple.md @@ -0,0 +1,302 @@ +# Tuple + +PyTupleObject: + +```c +typedef struct { + PyObject_VAR_HEAD + /* ob_item contains space for 'ob_size' elements. + Items must normally not be NULL, except during construction when + the tuple is not yet visible outside the function that builds it. */ + PyObject *ob_item[1]; +} PyTupleObject; + +PyTypeObject PyTuple_Type = { + /* PyVarObject + { + { + 1, // ob_refcnt + &PyType_Type, // ob_type + }, // ob_base(PyObject) + 0, // ob_size + } + */ + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "tuple", /* tp_name */ + sizeof(PyTupleObject) - sizeof(PyObject *), /* tp_basicsize */ + sizeof(PyObject *), /* tp_itemsize */ + (destructor)tupledealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)tuplerepr, /* tp_repr */ + 0, /* tp_as_number */ + &tuple_as_sequence, /* tp_as_sequence */ + &tuple_as_mapping, /* tp_as_mapping */ + (hashfunc)tuplehash, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TUPLE_SUBCLASS, /* tp_flags */ + tuple_new__doc__, /* tp_doc */ + (traverseproc)tupletraverse, /* tp_traverse */ + 0, /* tp_clear */ + tuplerichcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + tuple_iter, /* tp_iter */ + 0, /* tp_iternext */ + tuple_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + tuple_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ + .tp_vectorcall = tuple_vectorcall, +}; +``` + + +注意, PyTupleObject 中的 ob_item 实际上不占据 PyTupleObject 对象的空间, ob_item 是为了方便访问变长元素空间而设置的. 这一点可以从 PyTuple_Type 的 tp_basicsize 看出来, `tp_basicsize = sizeof(PyTupleObject) - sizeof(PyObject *)`. + +PyTupleObject 的内存布局: + +``` ++---------------+-------------------------------+ +| PyVarObject | ... ob_item ... | ++---------------+-------------------------------+ + tp_basicsize tp_itemsize * ob_size +``` + +## 对象缓存池 + +tuple 的对象缓存池仅保存长度小于 PyTuple_MAXSAVESIZE 的 tuple, 而且对缓存池的大小也有限制. + +```c +/* Speed optimization to avoid frequent malloc/free of small tuples */ +// 优化 small tuple 的分配效率, 如果 tuple 的程度小于 PyTuple_MAXSAVESIZE, 那么将其缓存在 freelist. +#ifndef PyTuple_MAXSAVESIZE +#define PyTuple_MAXSAVESIZE 20 /* Largest tuple to save on free list */ +#endif +#ifndef PyTuple_MAXFREELIST +#define PyTuple_MAXFREELIST 2000 /* Maximum number of tuples of each size to save */ +#endif + +#if PyTuple_MAXSAVESIZE> 0 +/* Entries 1 up to PyTuple_MAXSAVESIZE are free lists, entry 0 is the empty + tuple () of which at most one instance will be allocated. +*/ +static PyTupleObject *free_list[PyTuple_MAXSAVESIZE]; +static int numfree[PyTuple_MAXSAVESIZE]; +#endif +``` + +使用对象缓存池的代码可以查看 PyTuple_New, PyTuple_alloc, PyTuple_dealloc. + +## tuple 的创建过程 + +tuple 的创建过程: `PyTuple_New -> tuple_alloc -> PyObject_GC_NewVar -> _PyObject_GC_NewVar` + +### PyTuple_New + +```c +PyObject * +PyTuple_New(Py_ssize_t size) +{ + PyTupleObject *op; + // 如果 size == 0, 那么直接返回 free_list[0]. + // 因为 tuple 是不可变对象, 所以可以这样优化空 tuple 的创建流程. + // 所有的空 tuple 都是同一个对象, 就好像空字符串一样. +#if PyTuple_MAXSAVESIZE> 0 + if (size == 0 && free_list[0]) { + op = free_list[0]; + Py_INCREF(op); + return (PyObject *) op; + } +#endif + op = tuple_alloc(size); + if (op == NULL) { + return NULL; + } + for (Py_ssize_t i = 0; i < size; i++) { + op->ob_item[i] = NULL; + } +#if PyTuple_MAXSAVESIZE> 0 + if (size == 0) { + free_list[0] = op; + ++numfree[0]; + Py_INCREF(op); /* extra INCREF so that this is never freed */ + } +#endif + tuple_gc_track(op); + return (PyObject *) op; +} +``` + +```c +static PyTupleObject * +tuple_alloc(Py_ssize_t size) +{ + PyTupleObject *op; + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } +#if PyTuple_MAXSAVESIZE> 0 + if (size < PyTuple_MAXSAVESIZE && (op = free_list[size]) != NULL) { + assert(size != 0); + // 下面这行代码类似于 free_list[size] = free_list[size].next + free_list[size] = (PyTupleObject *) op->ob_item[0]; + numfree[size]--; + /* Inline PyObject_InitVar */ +#ifdef Py_TRACE_REFS + Py_SIZE(op) = size; + Py_TYPE(op) = &PyTuple_Type; +#endif + _Py_NewReference((PyObject *)op); + } + else +#endif + { + /* Check for overflow */ + if ((size_t)size> ((size_t)PY_SSIZE_T_MAX - (sizeof(PyTupleObject) - + sizeof(PyObject *))) / sizeof(PyObject *)) { + return (PyTupleObject *)PyErr_NoMemory(); + } + op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); + if (op == NULL) + return NULL; + } + return op; +} +``` + +tuple_alloc 会先检查对象缓存池, 如果对象缓存池没有符合的数据, 那么调用 PyObject_GC_NewVar 分配内存. + +### _PyObject_GC_NewVar + +```c +// Modules/gcmodule.c +PyVarObject * +_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) +{ + size_t size; + PyVarObject *op; + + if (nitems < 0) { + PyErr_BadInternalCall(); + return NULL; + } + size = _PyObject_VAR_SIZE(tp, nitems); + op = (PyVarObject *) _PyObject_GC_Malloc(size); + if (op != NULL) + // PyObject_INIT_VAR 负责设置 op->ob_size 和 op->ob_type + op = PyObject_INIT_VAR(op, tp, nitems); + return op; +} +``` + +_PyObject_GC_NewVar 负责分配 PyVarObject 的内存. _PyObject_VAR_SIZE 计算该对象占用的内存, 代码如下: + +```c +#define _PyObject_VAR_SIZE(typeobj, nitems) \ + _Py_SIZE_ROUND_UP((typeobj)->tp_basicsize + \ + (nitems)*(typeobj)->tp_itemsize, \ + SIZEOF_VOID_P) +``` + +可见 _PyObject_VAR_SIZE 对计算的值进行了向上取整, 向上取整是为了内存对齐. 下面是 Python 中定义的一些取整的宏: + +```c +/* Below "a" is a power of 2. */ +/* Round down size "n" to be a multiple of "a". */ +#define _Py_SIZE_ROUND_DOWN(n, a) ((size_t)(n) & ~(size_t)((a) - 1)) +/* Round up size "n" to be a multiple of "a". */ +#define _Py_SIZE_ROUND_UP(n, a) (((size_t)(n) + \ + (size_t)((a) - 1)) & ~(size_t)((a) - 1)) +/* Round pointer "p" down to the closest "a"-aligned address <= "p". */ +#define _Py_ALIGN_DOWN(p, a) ((void *)((uintptr_t)(p) & ~(uintptr_t)((a) - 1))) +/* Round pointer "p" up to the closest "a"-aligned address>= "p". */ +#define _Py_ALIGN_UP(p, a) ((void *)(((uintptr_t)(p) + \ + (uintptr_t)((a) - 1)) & ~(uintptr_t)((a) - 1))) +/* Check if pointer "p" is aligned to "a"-bytes boundary. */ +#define _Py_IS_ALIGNED(p, a) (!((uintptr_t)(p) & (uintptr_t)((a) - 1))) +``` + +这些宏算是业界非常常见的写法. + +分配好对象的内存后, 通过 PyObject_INIT_VAR 设置 op->ob_size 和 op->ob_type. + +### _PyObject_GC_Malloc + +```c +// Modules/gcmodule.c + +static PyObject * +_PyObject_GC_Alloc(int use_calloc, size_t basicsize) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + if (basicsize> PY_SSIZE_T_MAX - sizeof(PyGC_Head)) { + return _PyErr_NoMemory(tstate); + } + size_t size = sizeof(PyGC_Head) + basicsize; + + PyGC_Head *g; + if (use_calloc) { + g = (PyGC_Head *)PyObject_Calloc(1, size); + } + else { + g = (PyGC_Head *)PyObject_Malloc(size); + } + if (g == NULL) { + return _PyErr_NoMemory(tstate); + } + assert(((uintptr_t)g & 3) == 0); // g must be aligned 4bytes boundary + + g->_gc_next = 0; + g->_gc_prev = 0; + gcstate->generations[0].count++; /* number of allocated GC objects */ + if (gcstate->generations[0].count> gcstate->generations[0].threshold && + gcstate->enabled && + gcstate->generations[0].threshold && + !gcstate->collecting && + !_PyErr_Occurred(tstate)) + { + gcstate->collecting = 1; + collect_generations(tstate); + gcstate->collecting = 0; + } + PyObject *op = FROM_GC(g); + return op; +} + +PyObject * +_PyObject_GC_Malloc(size_t basicsize) +{ + return _PyObject_GC_Alloc(0, basicsize); +} +``` + +_PyObject_GC_Malloc 为对象分配内存的同时向对象添加 PyGC_Head. _PyObject_GC_Malloc 最终又是通过调用 PyObject_Malloc 分配内存, 而 PyObject_Malloc 最终在内存池中分配内存. + + +## 总结 + +通过阅读 tuple 的相关实现的代码, 可以了解 CPython 在内部是如何表示对象的(PyTupleObject, PyTuple_Type), 以及如何为对象分配内存. + +## 其它 + +在看 tuple 的代码时, 发现了一个 tuple 所使用的哈希算法, xxhash. + +xxhash 的 [GitHub 地址](https://github.com/Cyan4973/xxHash). + diff --git "a/345円207円275円346円225円260円.md" "b/345円207円275円346円225円260円.md" index 6d4397f..db76363 100644 --- "a/345円207円275円346円225円260円.md" +++ "b/345円207円275円346円225円260円.md" @@ -244,6 +244,12 @@ case TARGET(CALL_FUNCTION_EX): { `CALL_FUNCTION` 和 `CALL_FUNCTION_KW` 会调用 `call_function` 进行函数调用, 而 `CALL_FUNCTION_EX` 会调用 `do_call_core` 进行函数. +`CALL_FUNCTION` 用于实参只有位置参数的情况, 例如 `f(1, 2, 3)`. + +`CALL_FUNCTION_KW` 用于实参包括位置参数和关键字参数的情况, 例如 `f(1, 2, a=3, b=4)`, `f(a=3, b=4)`. + +`CALL_FUNCTION_EX` 用于实参是可变参数的情况, 例如 `f(*[1, 2], **{'a': 3, 'b': 4})` + ### call_function ```c @@ -488,21 +494,51 @@ def f(): a = 1 def g(): b = a + 1 + g() ``` 字节码如下: ``` +Disassembly of : 2 0 LOAD_CONST 1 (1) 2 STORE_DEREF 0 (a) 3 4 LOAD_CLOSURE 0 (a) 6 BUILD_TUPLE 1 - 8 LOAD_CONST 2 () + 8 LOAD_CONST 2 () 10 LOAD_CONST 3 ('f..g') 12 MAKE_FUNCTION 8 (closure) 14 STORE_FAST 0 (g) -``` + + 5 16 LOAD_FAST 0 (g) + 18 CALL_FUNCTION 0 + 20 POP_TOP + 22 LOAD_CONST 0 (None) + 24 RETURN_VALUE + +Disassembly of : + 4 0 LOAD_DEREF 0 (a) + 2 LOAD_CONST 1 (1) + 4 BINARY_ADD + 6 STORE_FAST 0 (b) + 8 LOAD_CONST 0 (None) + 10 RETURN_VALUE +``` + +`STORE_DEREF` 将值保存到当前 frame 的 cellvars 中, cellvars 中的元素类型是 PyCellObject, 该值被包裹在 PyCellObject 中. + +```c +case TARGET(STORE_DEREF): { + PyObject *v = POP(); + PyObject *cell = freevars[oparg]; + PyObject *oldobj = PyCell_GET(cell); + PyCell_SET(cell, v); + Py_XDECREF(oldobj); + DISPATCH(); +} +``` + `LOAD_CLOSURE` 和 `MAKE_FUNCTION` 对应的处理器如下: ```c @@ -532,11 +568,11 @@ case TARGET(MAKE_FUNCTION): { } ``` -可见 `LOAD_CLOSURE` 是直接从当前 frame 的 freevars 数组读取值, 速度相较于 `LOAD_NAME` 更快. +可见 `LOAD_CLOSURE` 是直接从当前 frame 的 freevars 数组读取值, 速度相较于 `LOAD_NAME` 更快. 注意: `LOAD_CLOSURE` 是从当前 frame 的 cellvals 中读取, 获取的值是 PyCellObject 类型. -`func ->func_closure` 保存了内层函数引用外层函数的变量. +`func ->func_closure` 保存了内层函数引用外层函数的变量, 类型是 tuple. -在调用函数时, `func ->func_closure` 保存到 `f->f_localplus` 中, 代码如下: +在调用函数时, `func->func_closure` 保存到 `f->f_localplus` 中, 代码如下: ```c PyObject * @@ -558,6 +594,22 @@ _PyEval_EvalCode(PyThreadState *tstate, } ``` +在内层函数 g 中, 使用 `LOAD_DEREF` 读取引用自外层函数作用域的变量. + +```c +case TARGET(LOAD_DEREF): { + PyObject *cell = freevars[oparg]; + PyObject *value = PyCell_GET(cell); + if (value == NULL) { + format_exc_unbound(tstate, co, oparg); + goto error; + } + Py_INCREF(value); + PUSH(value); + DISPATCH(); +} +``` + ## 总结 函数调用在 CPython 内部的实现非常复杂, 难以用简单的语言来概括, 想要理清内部的各种调用链还是要仔细看代码. diff --git "a/345円274円261円345円274円225円347円224円250円.md" "b/345円274円261円345円274円225円347円224円250円.md" new file mode 100644 index 0000000..e69de29 diff --git "a/346円217円217円350円277円260円347円254円246円(descriptor).md" "b/346円217円217円350円277円260円347円254円246円(descriptor).md" new file mode 100644 index 0000000..a3be18a --- /dev/null +++ "b/346円217円217円350円277円260円347円254円246円(descriptor).md" @@ -0,0 +1,770 @@ +# 描述符 + +描述符在 Python 的类型系统中扮演着一个非常重要的角色, property, `__slots__`, staticmethod, classmethod 等等都是通过描述符实现的. + +本文主要介绍描述符的底层原理, 关于如何使用描述符可以参考 Python 的官方文档或者下面的[参考资料](#参考资料) + +## 基本数据结构和方法 + +PyDescrObject 是最基本的结构, 所有的 descriptor 都会包含它. + +```c +// Include/descrobject.h + +typedef struct { + PyObject_HEAD + PyTypeObject *d_type; // descriptor 所在的类, 创建 descriptor 时被设置. + PyObject *d_name; // descriptor 的名字, 例如 `a = SomeDesc()`, 那么 d_name 就等于 a. 又例如 `__slots__ = ['a', 'b']`, d_name 就等于 a(或者 b). + PyObject *d_qualname; +} PyDescrObject; + +#define PyDescr_COMMON PyDescrObject d_common +``` + +descr_new 是基本的创建 descriptor 的方法. + +```c +static PyDescrObject * +descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) +{ + PyDescrObject *descr; + + descr = (PyDescrObject *)PyType_GenericAlloc(descrtype, 0); + if (descr != NULL) { + Py_XINCREF(type); + descr->d_type = type; + descr->d_name = PyUnicode_InternFromString(name); + if (descr->d_name == NULL) { + Py_DECREF(descr); + descr = NULL; + } + else { + descr->d_qualname = NULL; + } + } + return descr; +} +``` + +## PyMethodDescrObject + +在 PyType_Ready 方法中, PyTypeObject 中的 tp_methods 成员会被转换为 PyMethodDescrObject, 然后保存在 tp_dict 中. + +从结构上来看, PyMethodDescrObject 就是对 PyMethodDef 的简单包装. + +通过将 PyMethodDef 包装为一个 descriptor, 当想要调用对应的方法时都需要通过 descriptor 的 "中介" 方法, 即 tp_call. + +```c +typedef struct { + PyDescr_COMMON; + PyMethodDef *d_method; + vectorcallfunc vectorcall; +} PyMethodDescrObject; +``` + +```c +PyTypeObject PyMethodDescr_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "method_descriptor", + sizeof(PyMethodDescrObject), + 0, + (destructor)descr_dealloc, /* tp_dealloc */ + offsetof(PyMethodDescrObject, vectorcall), /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)method_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + PyVectorcall_Call, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_HAVE_VECTORCALL | + Py_TPFLAGS_METHOD_DESCRIPTOR, /* tp_flags */ + 0, /* tp_doc */ + descr_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + descr_methods, /* tp_methods */ + descr_members, /* tp_members */ + method_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + (descrgetfunc)method_get, /* tp_descr_get */ + 0, /* tp_descr_set */ +}; +/* This is for METH_CLASS in C, not for "f = classmethod(f)" in Python! */ +PyTypeObject PyClassMethodDescr_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "classmethod_descriptor", + sizeof(PyMethodDescrObject), + 0, + (destructor)descr_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)method_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc)classmethoddescr_call, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + descr_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + descr_methods, /* tp_methods */ + descr_members, /* tp_members */ + method_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + (descrgetfunc)classmethod_get, /* tp_descr_get */ + 0, /* tp_descr_set */ +}; +``` + +注意: PyMethodDescr_Type 和 PyClassMethodDescr_Type 是类似的. PyClassMethodDescr_Type 是为了 CPython 内部的类方法准备的, 不是 Python 层面的 `@classmethod`. + +### 创建 PyMethodDescrObject + +不同类型的方法会设置不同的 `descr->vectorcall`. + +```c +PyObject * +PyDescr_NewMethod(PyTypeObject *type, PyMethodDef *method) +{ + /* Figure out correct vectorcall function to use */ + vectorcallfunc vectorcall; + switch (method->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | + METH_O | METH_KEYWORDS | METH_METHOD)) + { + case METH_VARARGS: + vectorcall = method_vectorcall_VARARGS; + break; + case METH_VARARGS | METH_KEYWORDS: + vectorcall = method_vectorcall_VARARGS_KEYWORDS; + break; + case METH_FASTCALL: + vectorcall = method_vectorcall_FASTCALL; + break; + case METH_FASTCALL | METH_KEYWORDS: + vectorcall = method_vectorcall_FASTCALL_KEYWORDS; + break; + case METH_NOARGS: + vectorcall = method_vectorcall_NOARGS; + break; + case METH_O: + vectorcall = method_vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + vectorcall = method_vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + default: + PyErr_Format(PyExc_SystemError, + "%s() method: bad call flags", method->ml_name); + return NULL; + } + + PyMethodDescrObject *descr; + + descr = (PyMethodDescrObject *)descr_new(&PyMethodDescr_Type, + type, method->ml_name); + if (descr != NULL) { + descr->d_method = method; + descr->vectorcall = vectorcall; + } + return (PyObject *)descr; +} + +PyObject * +PyDescr_NewClassMethod(PyTypeObject *type, PyMethodDef *method) +{ + PyMethodDescrObject *descr; + + descr = (PyMethodDescrObject *)descr_new(&PyClassMethodDescr_Type, + type, method->ml_name); + if (descr != NULL) + descr->d_method = method; + return (PyObject *)descr; +} +``` + +### tp_call/vectorcall + +PyMethodDescrObject 支持 vectorcall, 最终调用的是 `descr->vectorcall` + +以 method_vectorcall_VARARGS 为例: + +```c +static inline funcptr +method_enter_call(PyThreadState *tstate, PyObject *func) +{ + if (_Py_EnterRecursiveCall(tstate, " while calling a Python object")) { + return NULL; + } + return (funcptr)((PyMethodDescrObject *)func)->d_method->ml_meth; +} + +static PyObject * +method_vectorcall_VARARGS( + PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) +{ + PyThreadState *tstate = _PyThreadState_GET(); + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + if (method_check_args(func, args, nargs, kwnames)) { + return NULL; + } + PyObject *argstuple = _PyTuple_FromArray(args+1, nargs-1); + if (argstuple == NULL) { + return NULL; + } + PyCFunction meth = (PyCFunction)method_enter_call(tstate, func); + if (meth == NULL) { + Py_DECREF(argstuple); + return NULL; + } + PyObject *result = meth(args[0], argstuple); + Py_DECREF(argstuple); + _Py_LeaveRecursiveCall(tstate); + return result; +} +``` + +可以看到, 最终调用的是 `descr->d_method->ml_meth`. + +PyClassMethodDescr_Type 的 tp_call 如下: + +```c +* Instances of classmethod_descriptor are unlikely to be called directly. + For one, the analogous class "classmethod" (for Python classes) is not + callable. Second, users are not likely to access a classmethod_descriptor + directly, since it means pulling it from the class __dict__. + + This is just an excuse to say that this doesn't need to be optimized: + we implement this simply by calling __get__ and then calling the result. + + 一个例子: dict.fromkeys('abc') 会直接调用 descr 的 classmethod_get, 而不会调用 classmethoddescr_call. + 但是, dict.__dict__['fromkeys'](dict, 'abc') 就会调用 classmethoddescr_call. +*/ +static PyObject * +classmethoddescr_call(PyMethodDescrObject *descr, PyObject *args, + PyObject *kwds) +{ + Py_ssize_t argc = PyTuple_GET_SIZE(args); + if (argc < 1) { + PyErr_Format(PyExc_TypeError, + "descriptor '%V' of '%.100s' " + "object needs an argument", + descr_name((PyDescrObject *)descr), "?", + PyDescr_TYPE(descr)->tp_name); + return NULL; + } + PyObject *self = PyTuple_GET_ITEM(args, 0); + // bound 是一个 PyCMethodObject + PyObject *bound = classmethod_get(descr, NULL, self); + if (bound == NULL) { + return NULL; + } + PyObject *res = PyObject_VectorcallDict(bound, _PyTuple_ITEMS(args)+1, + argc-1, kwds); + Py_DECREF(bound); + return res; +} +``` + +通过上述代码可以知道 CPython 内部是如何调用 C 编写的类方法的, 注意这和 Python 层面的 classmethod 不一样. + +## PyMemberDescrObject + +在 PyType_Ready 方法中, PyTypeObject 中的 tp_members 成员会被转换为 PyMemberDescrObject, 然后保存在 tp_dict 中. + +从结构上来看, PyMemberDescrObject 就是对 PyMemberDef 的简单包装. + +通过将 PyMemberDef 包装为一个 descriptor, 想要访问属性时都需要通过 descriptor 的 "中介" 方法, 即 tp_descr_get 和 tp_descr_set. + +Python 层面的 `__slots__` 和 descriptor 成员都是通过 PyMemberDescrObject 实现的. + +PyMemberDescrObject: + +```c +// Include/descrobject.h + +typedef struct { + PyDescr_COMMON; + struct PyMemberDef *d_member; +} PyMemberDescrObject; +``` + +PyMemberDescr_Type: + +```c +PyTypeObject PyMemberDescr_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "member_descriptor", + sizeof(PyMemberDescrObject), + 0, + (destructor)descr_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)member_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + descr_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + descr_methods, /* tp_methods */ + descr_members, /* tp_members */ + member_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + (descrgetfunc)member_get, /* tp_descr_get */ + (descrsetfunc)member_set, /* tp_descr_set */ +}; +``` + +### 创建 PyMemberDescrObject + +PyDescr_NewMember 方法: + +```c +// Objects/descrobject.c + +PyObject * +PyDescr_NewMember(PyTypeObject *type, PyMemberDef *member) +{ + PyMemberDescrObject *descr; + + descr = (PyMemberDescrObject *)descr_new(&PyMemberDescr_Type, + type, member->name); + if (descr != NULL) + descr->d_member = member; + return (PyObject *)descr; +} +``` + +### PyMemberDescrObject 的 get/set + +member_get: + +```c +// Objects/descrobject.c + +static PyObject * +member_get(PyMemberDescrObject *descr, PyObject *obj, PyObject *type) +{ + PyObject *res; + + if (descr_check((PyDescrObject *)descr, obj, &res)) + return res; + + if (descr->d_member->flags & READ_RESTRICTED) { + if (PySys_Audit("object.__getattr__", "Os", + obj ? obj : Py_None, descr->d_member->name) < 0) { + return NULL; + } + } + + return PyMember_GetOne((char *)obj, descr->d_member); +} +``` + +```c +// Python/structmember.c +// 有删减 + +PyObject * +PyMember_GetOne(const char *addr, PyMemberDef *l) +{ + PyObject *v; + + addr += l->offset; + switch (l->type) { + case T_BOOL: + v = PyBool_FromLong(*(char*)addr); + break; + case T_BYTE: + v = PyLong_FromLong(*(char*)addr); + break; + case T_INT: + v = PyLong_FromLong(*(int*)addr); + break; + case T_OBJECT: + v = *(PyObject **)addr; + if (v == NULL) + v = Py_None; + Py_INCREF(v); + break; + case T_OBJECT_EX: + v = *(PyObject **)addr; + if (v == NULL) + PyErr_SetString(PyExc_AttributeError, l->name); + Py_XINCREF(v); + break; + default: + PyErr_SetString(PyExc_SystemError, "bad memberdescr type"); + v = NULL; + } + return v; +} +``` + +member_set: + +```c +// Objects/descrobject.c + +static int +member_set(PyMemberDescrObject *descr, PyObject *obj, PyObject *value) +{ + int res; + + if (descr_setcheck((PyDescrObject *)descr, obj, value, &res)) + return res; + return PyMember_SetOne((char *)obj, descr->d_member, value); +} +``` + +```c +// Python/structmember.c +// 有删减 + +int +PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) +{ + PyObject *oldv; + + addr += l->offset; + + if ((l->flags & READONLY)) + { + PyErr_SetString(PyExc_AttributeError, "readonly attribute"); + return -1; + } + if (v == NULL) { + if (l->type == T_OBJECT_EX) { + /* Check if the attribute is set. */ + if (*(PyObject **)addr == NULL) { + PyErr_SetString(PyExc_AttributeError, l->name); + return -1; + } + } + else if (l->type != T_OBJECT) { + PyErr_SetString(PyExc_TypeError, + "can't delete numeric/char attribute"); + return -1; + } + } + switch (l->type) { + case T_OBJECT: + case T_OBJECT_EX: + Py_XINCREF(v); + oldv = *(PyObject **)addr; + *(PyObject **)addr = v; + Py_XDECREF(oldv); + break; + default: + PyErr_Format(PyExc_SystemError, + "bad memberdescr type for %s", l->name); + return -1; + } + return 0; +} +``` + +## PyGetSetDescrObject + +PyGetSetDescrObject 是对 PyGetSetDef 的简单包装, 感觉和 PyMemberDescrObject 类似, 都是用于属性访问的. + +在 PyType_Ready 方法中会调用 add_getset 方法将 PyGetSetDef 转换为 PyGetSetDescrObject, 然后添加到 tp_dict 中. + +```c +typedef struct { + PyDescr_COMMON; + PyGetSetDef *d_getset; +} PyGetSetDescrObject; +``` + +```c +PyTypeObject PyGetSetDescr_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "getset_descriptor", + sizeof(PyGetSetDescrObject), + 0, + (destructor)descr_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)getset_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + descr_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + descr_members, /* tp_members */ + getset_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + (descrgetfunc)getset_get, /* tp_descr_get */ + (descrsetfunc)getset_set, /* tp_descr_set */ +}; +``` + +### 一个常见的例子 + +```py +class C: + pass + +c = () +c.__dict__ +``` + +在上面的代码中, 元类在创建类 C 时, type_new 方法会设置类 C 的 tp_getset, 接着在 PyType_Ready 方法中会将 tp_getset 转换为 PyGetSetDescrObject. + +部分代码如下: + +```c +// type_new +if (type->tp_weaklistoffset && type->tp_dictoffset) + type->tp_getset = subtype_getsets_full; +else if (type->tp_weaklistoffset && !type->tp_dictoffset) + type->tp_getset = subtype_getsets_weakref_only; +else if (!type->tp_weaklistoffset && type->tp_dictoffset) + type->tp_getset = subtype_getsets_dict_only; +else + type->tp_getset = NULL; + +// PyType_Ready +if (type->tp_getset != NULL) { + if (add_getset(type, type->tp_getset) < 0) + goto error; +} +``` + +以 subtype_getsets_full 为例: + +```c +static PyGetSetDef subtype_getsets_full[] = { + {"__dict__", subtype_dict, subtype_setdict, + PyDoc_STR("dictionary for instance variables (if defined)")}, + {"__weakref__", subtype_getweakref, NULL, + PyDoc_STR("list of weak references to the object (if defined)")}, + {0} +}; +``` + +subtype_getsets_full 中定义了 `__dict__` 和 `__weakref__`. 当实例 c 访问 `__dict__` 时, 就会调用 PyGetSetDescrObject 的 tp_descr_get(即 getset_get), 接着 tp_descr_get 又会调用 subtype_dict. + +## PyWrapperDescrObject + +PyWrapperDescrObject 和 slot 的实现有关. + +从结构来看, PyWrapperDescrObject 其实就是对 slotdef 的简单包装. + +在 PyType_Ready 方法中会调用 add_operators 方法将 slotdef 转换为 PyWrapperDescrObject, 然后添加到 tp_dict 中. + +通过将 slotdef 包装为一个 descriptor, 当想要调用对应的方法时都需要通过 descriptor 的 "中介" 方法, 即 tp_call. + +```c +// Include/descrobject.h + +/* 在 typeobject.c 中有 `typedef struct wrapperbase slotdef;` */ +struct wrapperbase { + const char *name; + int offset; + void *function; + wrapperfunc wrapper; + const char *doc; + int flags; + PyObject *name_strobj; +}; + +typedef struct { + PyDescr_COMMON; + struct wrapperbase *d_base; // 一般指向 slotdef + void *d_wrapped; /* This can be any function pointer, 对于 slotdef 来说, 就是 slotptr(type, slotdef.offset) */ +} PyWrapperDescrObject; +``` + +```c +// Objects/descrobject.c + +PyTypeObject PyWrapperDescr_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "wrapper_descriptor", + sizeof(PyWrapperDescrObject), + 0, + (destructor)descr_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)wrapperdescr_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc)wrapperdescr_call, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_METHOD_DESCRIPTOR, /* tp_flags */ + 0, /* tp_doc */ + descr_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + descr_methods, /* tp_methods */ + descr_members, /* tp_members */ + wrapperdescr_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + (descrgetfunc)wrapperdescr_get, /* tp_descr_get */ + 0, /* tp_descr_set */ +}; +``` + +### 创建 PyWrapperDescrObject + +```c +// Objects/descrobject.c + +/* 这里的 base 一般就是 slotdef, 而 wrapped 就是 slotdef 中的 offset 指向类中的方法 + 可以看看 typeobject.c 中的 add_operators 方法是如何调用本方法的. +*/ +PyObject * +PyDescr_NewWrapper(PyTypeObject *type, struct wrapperbase *base, void *wrapped) +{ + PyWrapperDescrObject *descr; + + descr = (PyWrapperDescrObject *)descr_new(&PyWrapperDescr_Type, + type, base->name); + if (descr != NULL) { + descr->d_base = base; + descr->d_wrapped = wrapped; + } + return (PyObject *)descr; +} +``` + +### 调用 PyWrapperDescrObject(__call__) + +PyWrapperDescr_Type 的 tp_call 设置为 wrapperdescr_call. wrapperdescr_call 接下来的调用链为 `descr->d_base->wrapper` -> `descr->d_wrapped`. + +```c +// Objects/descrobject.c + +Py_LOCAL_INLINE(PyObject *) +wrapperdescr_raw_call(PyWrapperDescrObject *descr, PyObject *self, + PyObject *args, PyObject *kwds) +{ + wrapperfunc wrapper = descr->d_base->wrapper; + + if (descr->d_base->flags & PyWrapperFlag_KEYWORDS) { + wrapperfunc_kwds wk = (wrapperfunc_kwds)(void(*)(void))wrapper; + return (*wk)(self, args, descr->d_wrapped, kwds); + } + + if (kwds != NULL && (!PyDict_Check(kwds) || PyDict_GET_SIZE(kwds) != 0)) { + PyErr_Format(PyExc_TypeError, + "wrapper %s() takes no keyword arguments", + descr->d_base->name); + return NULL; + } + return (*wrapper)(self, args, descr->d_wrapped); +} + +static PyObject * +wrapperdescr_call(PyWrapperDescrObject *descr, PyObject *args, PyObject *kwds) +{ + Py_ssize_t argc; + PyObject *self, *result; + + /* Make sure that the first argument is acceptable as 'self' */ + assert(PyTuple_Check(args)); + argc = PyTuple_GET_SIZE(args); + if (argc < 1) { + PyErr_Format(PyExc_TypeError, + "descriptor '%V' of '%.100s' " + "object needs an argument", + descr_name((PyDescrObject *)descr), "?", + PyDescr_TYPE(descr)->tp_name); + return NULL; + } + self = PyTuple_GET_ITEM(args, 0); + if (!_PyObject_RealIsSubclass((PyObject *)Py_TYPE(self), + (PyObject *)PyDescr_TYPE(descr))) { + PyErr_Format(PyExc_TypeError, + "descriptor '%V' " + "requires a '%.100s' object " + "but received a '%.100s'", + descr_name((PyDescrObject *)descr), "?", + PyDescr_TYPE(descr)->tp_name, + Py_TYPE(self)->tp_name); + return NULL; + } + + args = PyTuple_GetSlice(args, 1, argc); + if (args == NULL) { + return NULL; + } + result = wrapperdescr_raw_call(descr, self, args, kwds); + Py_DECREF(args); + return result; +} +``` + +## 参考资料 + +- [https://docs.python.org/3/reference/datamodel.html#implementing-descriptors](https://docs.python.org/3/reference/datamodel.html#implementing-descriptors) + +- [https://docs.python.org/3/howto/descriptor.html](https://docs.python.org/3/howto/descriptor.html) \ No newline at end of file diff --git "a/346円235円202円351円241円271円.md" "b/346円235円202円351円241円271円.md" new file mode 100644 index 0000000..e9540b1 --- /dev/null +++ "b/346円235円202円351円241円271円.md" @@ -0,0 +1,76 @@ +# 杂项 + +### 私有变量重命名 + +Python 会对形如 `__varname` 的类属性进行重命名, 重命名后的名字为 `_classname__varname`. 重命名的代码如下: + +```c +// Python/compile.c +PyObject * +_Py_Mangle(PyObject *privateobj, PyObject *ident) +{ + /* Name mangling: __private becomes _classname__private. + This is independent from how the name is used. */ + PyObject *result; + size_t nlen, plen, ipriv; + Py_UCS4 maxchar; + if (privateobj == NULL || !PyUnicode_Check(privateobj) || + PyUnicode_READ_CHAR(ident, 0) != '_' || + PyUnicode_READ_CHAR(ident, 1) != '_') { + Py_INCREF(ident); + return ident; + } + nlen = PyUnicode_GET_LENGTH(ident); + plen = PyUnicode_GET_LENGTH(privateobj); + /* Don't mangle __id__ or names with dots. + + The only time a name with a dot can occur is when + we are compiling an import statement that has a + package name. + + TODO(jhylton): Decide whether we want to support + mangling of the module name, e.g. __M.X. + */ + if ((PyUnicode_READ_CHAR(ident, nlen-1) == '_' && + PyUnicode_READ_CHAR(ident, nlen-2) == '_') || + PyUnicode_FindChar(ident, '.', 0, nlen, 1) != -1) { + Py_INCREF(ident); + return ident; /* Don't mangle __whatever__ */ + } + /* Strip leading underscores from class name */ + ipriv = 0; + while (PyUnicode_READ_CHAR(privateobj, ipriv) == '_') + ipriv++; + if (ipriv == plen) { + Py_INCREF(ident); + return ident; /* Don't mangle if class is just underscores */ + } + plen -= ipriv; + + if (plen + nlen>= PY_SSIZE_T_MAX - 1) { + PyErr_SetString(PyExc_OverflowError, + "private identifier too large to be mangled"); + return NULL; + } + + maxchar = PyUnicode_MAX_CHAR_VALUE(ident); + if (PyUnicode_MAX_CHAR_VALUE(privateobj)> maxchar) + maxchar = PyUnicode_MAX_CHAR_VALUE(privateobj); + + result = PyUnicode_New(1 + nlen + plen, maxchar); + if (!result) + return 0; + /* ident = "_" + priv[ipriv:] + ident # i.e. 1+plen+nlen bytes */ + PyUnicode_WRITE(PyUnicode_KIND(result), PyUnicode_DATA(result), 0, '_'); + if (PyUnicode_CopyCharacters(result, 1, privateobj, ipriv, plen) < 0) { + Py_DECREF(result); + return NULL; + } + if (PyUnicode_CopyCharacters(result, plen+1, ident, 0, nlen) < 0) { + Py_DECREF(result); + return NULL; + } + assert(_PyUnicode_CheckConsistency(result, 1)); + return result; +} +``` \ No newline at end of file diff --git "a/346円250円241円345円235円227円.md" "b/346円250円241円345円235円227円.md" new file mode 100644 index 0000000..dc26430 --- /dev/null +++ "b/346円250円241円345円235円227円.md" @@ -0,0 +1,221 @@ +# 模块 + + +### 模块的定义 + +在编写模块时, 必须定义一个 PyModuleDef 结构体, PyModuleDef 包含了关于模块的所有信息, 例如模块名字, 函数等等. + +```c +// Include/moduleobject.h + +typedef struct PyModuleDef_Base { + PyObject_HEAD + PyObject* (*m_init)(void); + Py_ssize_t m_index; + PyObject* m_copy; +} PyModuleDef_Base; + +typedef struct PyModuleDef{ + PyModuleDef_Base m_base; + const char* m_name; + const char* m_doc; + Py_ssize_t m_size; + PyMethodDef *m_methods; + struct PyModuleDef_Slot* m_slots; + traverseproc m_traverse; + inquiry m_clear; + freefunc m_free; +} PyModuleDef; +``` + +### 模块初始化 + +`_PyModule_CreateInitialized` 可以创建并初始化一个模块. + +```c +// Objects/moduleobject.c + +PyObject * +_PyModule_CreateInitialized(struct PyModuleDef* module, int module_api_version) +{ + const char* name; + PyModuleObject *m; + + if (!PyModuleDef_Init(module)) + return NULL; + name = module->m_name; + if (!check_api_version(name, module_api_version)) { + return NULL; + } + if (module->m_slots) { + PyErr_Format( + PyExc_SystemError, + "module %s: PyModule_Create is incompatible with m_slots", name); + return NULL; + } + /* Make sure name is fully qualified. + + This is a bit of a hack: when the shared library is loaded, + the module name is "package.module", but the module calls + PyModule_Create*() with just "module" for the name. The shared + library loader squirrels away the true name of the module in + _Py_PackageContext, and PyModule_Create*() will substitute this + (if the name actually matches). + */ + if (_Py_PackageContext != NULL) { + const char *p = strrchr(_Py_PackageContext, '.'); + if (p != NULL && strcmp(module->m_name, p+1) == 0) { + name = _Py_PackageContext; + _Py_PackageContext = NULL; + } + } + if ((m = (PyModuleObject*)PyModule_New(name)) == NULL) + return NULL; + + if (module->m_size> 0) { + m->md_state = PyMem_MALLOC(module->m_size); + if (!m->md_state) { + PyErr_NoMemory(); + Py_DECREF(m); + return NULL; + } + memset(m->md_state, 0, module->m_size); + } + + // 转换 module->m_methods 为 PyCFunctionObject, 并添加到 m 中. + if (module->m_methods != NULL) { + if (PyModule_AddFunctions((PyObject *) m, module->m_methods) != 0) { + Py_DECREF(m); + return NULL; + } + } + if (module->m_doc != NULL) { + if (PyModule_SetDocString((PyObject *) m, module->m_doc) != 0) { + Py_DECREF(m); + return NULL; + } + } + m->md_def = module; + return (PyObject*)m; +} +``` + +### 模块方法转换为 PyCFunctionObject + +PyMethodDef 只是一个普通结构体, 因此不能被直接调用, 必须将其转换为 PyCFunctionObject. + +```c +// Include/methodobject.h +struct PyMethodDef { + const char *ml_name; /* The name of the built-in function/method */ + PyCFunction ml_meth; /* The C function that implements it */ + int ml_flags; /* Combination of METH_xxx flags, which mostly + describe the args expected by the C func */ + const char *ml_doc; /* The __doc__ attribute, or NULL */ +}; +``` + +PyModule_AddFunctions -> _add_methods_to_object -> PyCFunction_NewEx -> PyCMethod_New. + +```c +//Objects/methodobject.c + +PyObject * +PyCMethod_New(PyMethodDef *ml, PyObject *self, PyObject *module, PyTypeObject *cls) +{ + /* Figure out correct vectorcall function to use */ + vectorcallfunc vectorcall; + switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | + METH_O | METH_KEYWORDS | METH_METHOD)) + { + case METH_VARARGS: + case METH_VARARGS | METH_KEYWORDS: + /* For METH_VARARGS functions, it's more efficient to use tp_call + * instead of vectorcall. */ + vectorcall = NULL; + break; + case METH_FASTCALL: + vectorcall = cfunction_vectorcall_FASTCALL; + break; + case METH_FASTCALL | METH_KEYWORDS: + vectorcall = cfunction_vectorcall_FASTCALL_KEYWORDS; + break; + case METH_NOARGS: + vectorcall = cfunction_vectorcall_NOARGS; + break; + case METH_O: + vectorcall = cfunction_vectorcall_O; + break; + case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: + vectorcall = cfunction_vectorcall_FASTCALL_KEYWORDS_METHOD; + break; + default: + PyErr_Format(PyExc_SystemError, + "%s() method: bad call flags", ml->ml_name); + return NULL; + } + + PyCFunctionObject *op = NULL; + + if (ml->ml_flags & METH_METHOD) { + if (!cls) { + PyErr_SetString(PyExc_SystemError, + "attempting to create PyCMethod with a METH_METHOD " + "flag but no class"); + return NULL; + } + PyCMethodObject *om = PyObject_GC_New(PyCMethodObject, &PyCMethod_Type); + if (om == NULL) { + return NULL; + } + Py_INCREF(cls); + om->mm_class = cls; + op = (PyCFunctionObject *)om; + } else { + if (cls) { + PyErr_SetString(PyExc_SystemError, + "attempting to create PyCFunction with class " + "but no METH_METHOD flag"); + return NULL; + } + op = PyObject_GC_New(PyCFunctionObject, &PyCFunction_Type); + if (op == NULL) { + return NULL; + } + } + + op->m_weakreflist = NULL; + op->m_ml = ml; + Py_XINCREF(self); + op->m_self = self; + Py_XINCREF(module); + op->m_module = module; + op->vectorcall = vectorcall; + _PyObject_GC_TRACK(op); + return (PyObject *)op; +} +``` + +从代码可以看出, PyCMethod_New 负责创建 PyCFunctionObject 和 PyCMethodObject. PyCFunctionObject(PyCMethodObject) 对 PyMethodDef 进行了一层简单的包装, 多了 m_self, m_module 等信息. 下面是 的定义: + +```c +// Include/cpython/methodobject.h + +typedef struct { + PyObject_HEAD + PyMethodDef *m_ml; /* Description of the C function to call */ + PyObject *m_self; /* Passed as 'self' arg to the C func, can be NULL */ + PyObject *m_module; /* The __module__ attribute, can be anything */ + PyObject *m_weakreflist; /* List of weak references */ + vectorcallfunc vectorcall; +} PyCFunctionObject; + +typedef struct { + PyCFunctionObject func; + PyTypeObject *mm_class; /* Class that defines this method */ +} PyCMethodObject; +``` + +对于不同类型的模块方法, 会选择不同的 vectorcall, 这些 vectorcal 最终会调用 `PyCFunctionObject.m_ml.ml_meth`, 也就是最初在模块中定义的函数. + + diff --git "a/347円261円273円345円236円213円347円263円273円347円273円237円.md" "b/347円261円273円345円236円213円347円263円273円347円273円237円.md" index ca4a88e..3c52f0c 100644 --- "a/347円261円273円345円236円213円347円263円273円347円273円237円.md" +++ "b/347円261円273円345円236円213円347円263円273円347円273円237円.md" @@ -1,468 +1,440 @@ -# Python 虚拟机中的类机制 +# 类型系统 -Python 中的对象模型 +Python 中的类型系统: ``` is instance of +-----+ +---------------------+ +------------------------+ | | | is instance of | | is instance of | | | | | | | - | +-+--v------+ +--+-v-----+ +-----+------+ - +---> | | | | | - | type | | Class A | | instance | - | | | | | | - +---+----^--+ +-----+----+ +------------+ + | +-+--v------+ +--+-v-----+ +-----+-------+ + +---> | | | | | + | type | | Class A | | instance a | + | | | | | | + +---+----^--+ +-----+----+ +-------------+ | | | is sub-class of | |is instance of | | | | | | | +---v----+--+ | | | is sub-class of| - | object <-----------------+ - | | - +-----------+ + +----->| object <-----------------+ + | | | + | +-----------+ + | | + +-------------+ + is sub-class of ``` 上图中有两个比较特殊的对象:object 和 type。 -object 是所有 Class 的基类,所有的 Class 都直接或间接继承至 object,所以 class 是 object 的子类。 +object 是所有类的基类. 为了类型系统的自恰, object 是自己的基类, type 的基类是 object . ```python>>> issubclass(int, object) True >>> issubclass(str, object) True ->>> -``` - -type 是 metaclass,class 是通过 metaclass 创建的,所以 class 是 type 的实例。 - -```python ->>> isinstance(int, type) +>>> issubclass(object, object) +True +>>> issubclass(type, object) True ``` -type 是特殊的对象,它的父类是 object,由于 metaclass 可以继承自父类,所以 type 的 metaclass 是它本身。 +type 是 metaclass,class 是通过 metaclass 创建的,所以 class 是 type 的实例. 为了类型系统的自恰, type 的类型属于自己, object 的类型属于 type. ```python ->>> issubclass(type, object) +>>> isinstance(int, type) True >>> isinstance(object, type) True >>> isinstance(type, type) True ->>> ``` -## 内部类和用户自定义类 - -内部类(例如 int,str,list,dict)使用 PyTypeObject 表示。 - -然而对于用户自定义的类,Python 并不知道用户会在类中定义多少变量和属性,所以必须使用另外一种结构来表示用户自定义的类,那就是 PyHeapTypeObject。 - -```C -/* The *real* layout of a type object when allocated on the heap */ -typedef struct _heaptypeobject { - /* Note: there's a dependency on the order of these members - in slotptr() in typeobject.c . */ - PyTypeObject ht_type; - PyNumberMethods as_number; - PyMappingMethods as_mapping; - PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, - so that the mapping wins when both - the mapping and the sequence define - a given operator (e.g. __getitem__). - see add_operators() in typeobject.c . */ - PyBufferProcs as_buffer; - PyObject *ht_name, *ht_slots; - /* here are optional user slots, followed by the members. */ -} PyHeapTypeObject; +## 数据结构 + +来自 `Include/object.h` 的注释: + +```c +/* Object and type object interface */ + +/* +Objects are structures allocated on the heap. Special rules apply to +the use of objects to ensure they are properly garbage-collected. +Objects are never allocated statically or on the stack; they must be +accessed through special macros and functions only. (Type objects are +exceptions to the first rule; the standard types are represented by +statically initialized type objects, although work on type/class unification +for Python 2.2 made it possible to have heap-allocated type objects too). + +An object has a 'reference count' that is increased or decreased when a +pointer to the object is copied or deleted; when the reference count +reaches zero there are no references to the object left and it can be +removed from the heap. + +An object has a 'type' that determines what it represents and what kind +of data it contains. An object's type is fixed when it is created. +Types themselves are represented as objects; an object contains a +pointer to the corresponding type object. The type itself has a type +pointer pointing to the object representing the type 'type', which +contains a pointer to itself!. + +Objects do not float around in memory; once allocated an object keeps +the same size and address. Objects that must hold variable-size data +can contain pointers to variable-size parts of the object. Not all +objects of the same type have the same size; but the size cannot change +after allocation. (These restrictions are made so a reference to an +object can be simply a pointer -- moving an object would require +updating all the pointers, and changing an object's size would require +moving it if there was another object right next to it.) + +Objects are always accessed through pointers of the type 'PyObject *'. +The type 'PyObject' is a structure that only contains the reference count +and the type pointer. The actual memory allocated for an object +contains other data that can only be accessed after casting the pointer +to a pointer to a longer structure type. This longer type must start +with the reference count and type fields; the macro PyObject_HEAD should be +used for this (to accommodate for future changes). The implementation +of a particular object type can cast the object pointer to the proper +type and back. + +A standard interface exists for objects that contain an array of items +whose size is determined when the object is allocated. +*/ ``` -## 类型初始化 - -PyType_Ready 函数负责对 PyTypeObject 进行初始化,主要工作有: - -- 设置 tp_base(默认为 PyBaseObject_Type,即 Python 层面的 object) - -- 设置 ob_type(一般继承自基类的 ob_type) - -- 设置 tp_bases - -- 初始化 tp_dict(重点) - - - add_operators - - - add_methods +### PyObject - - add_members +最最基础的 PyObject: - - add_getset +```c +// Include/object.h +typedef struct _object { + Py_ssize_t ob_refcnt; + PyTypeObject *ob_type; +} PyObject; +``` -- 计算 MRO(method resolution order) +Python 中所有对象结构体的开头都是 PyObject. -- 将本 type 添加到所有父类的 subclass 中 +PyVarObject 表示变长对象, 其中的 ob_size 一般表示该对象包含多少个元素, 并不一定是字节长度. -### slotdef +```c +// Include/object.h +typedef struct { + PyObject ob_base; + Py_ssize_t ob_size; /* Number of items in variable part */ +} PyVarObject; +``` -定义: +### PyTypeObject + +```c +// Include/cpython/object.h +struct _typeobject { + PyObject_VAR_HEAD + const char *tp_name; /* For printing, in format "." */ + Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ + + /* Methods to implement standard operations */ + + destructor tp_dealloc; + Py_ssize_t tp_vectorcall_offset; + getattrfunc tp_getattr; + setattrfunc tp_setattr; + PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2) + or tp_reserved (Python 3) */ + reprfunc tp_repr; + + /* Method suites for standard classes */ + + PyNumberMethods *tp_as_number; + PySequenceMethods *tp_as_sequence; + PyMappingMethods *tp_as_mapping; + + /* More standard operations (here for binary compatibility) */ + + hashfunc tp_hash; + ternaryfunc tp_call; + reprfunc tp_str; + getattrofunc tp_getattro; + setattrofunc tp_setattro; + + /* Functions to access object as input/output buffer */ + PyBufferProcs *tp_as_buffer; + + /* Flags to define presence of optional/expanded features */ + unsigned long tp_flags; + + const char *tp_doc; /* Documentation string */ + + /* Assigned meaning in release 2.0 */ + /* call function for all accessible objects */ + traverseproc tp_traverse; + + /* delete references to contained objects */ + inquiry tp_clear; + + /* Assigned meaning in release 2.1 */ + /* rich comparisons */ + richcmpfunc tp_richcompare; + + /* weak reference enabler */ + Py_ssize_t tp_weaklistoffset; + + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + struct PyMethodDef *tp_methods; + struct PyMemberDef *tp_members; + struct PyGetSetDef *tp_getset; + struct _typeobject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + Py_ssize_t tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + freefunc tp_free; /* Low-level free-memory routine */ + inquiry tp_is_gc; /* For PyObject_IS_GC */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_cache; + PyObject *tp_subclasses; + PyObject *tp_weaklist; + destructor tp_del; + + /* Type attribute cache version tag. Added in version 2.6 */ + unsigned int tp_version_tag; + + destructor tp_finalize; + vectorcallfunc tp_vectorcall; +}; -```C -/* Table mapping __foo__ names to tp_foo offsets and slot_tp_foo wrapper - functions. The offsets here are relative to the 'PyHeapTypeObject' - structure, which incorporates the additional structures used for numbers, - sequences and mappings. - Note that multiple names may map to the same slot (e.g. __eq__, - __ne__ etc. all map to tp_richcompare) and one name may map to multiple - slots (e.g. __str__ affects tp_str as well as tp_repr). The table is - terminated with an all-zero entry. (This table is further initialized and - sorted in init_slotdefs() below.) */ +/* The *real* layout of a type object when allocated on the heap */ +typedef struct _heaptypeobject { + /* Note: there's a dependency on the order of these members + in slotptr() in typeobject.c . */ + PyTypeObject ht_type; + PyAsyncMethods as_async; + PyNumberMethods as_number; + PyMappingMethods as_mapping; + PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, + so that the mapping wins when both + the mapping and the sequence define + a given operator (e.g. __getitem__). + see add_operators() in typeobject.c . */ + PyBufferProcs as_buffer; + PyObject *ht_name, *ht_slots, *ht_qualname; + struct _dictkeysobject *ht_cached_keys; + PyObject *ht_module; + /* here are optional user slots, followed by the members. */ +} PyHeapTypeObject; +``` -struct wrapperbase { - char *name; - int offset; - void *function; - wrapperfunc wrapper; - char *doc; - int flags; - PyObject *name_strobj; +### PyBaseObject_Type(object) + +PyBaseObject_Type 的定义如下: + +```c +// Objects/typeobject.c + +PyTypeObject PyBaseObject_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "object", /* tp_name */ + sizeof(PyObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + object_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + object_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + (hashfunc)_Py_HashPointer, /* tp_hash */ + 0, /* tp_call */ + object_str, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + PyObject_GenericSetAttr, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + object_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + object_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + object_methods, /* tp_methods */ + 0, /* tp_members */ + object_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + object_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + object_new, /* tp_new */ + PyObject_Del, /* tp_free */ }; ``` -slotdef 表示 PyTypeObject 中的一个个操作,例如 `__add__`,`__str__`。 - -### descriptor - -在 slot 中,包含了很多关于一个操作的信息,但是很可惜,在 tp_dict 中,与 "**getitem**" 关联在一起的,一定不会是一个 slot。原因很简单,slot 不是一个 PyObject,它不能存放在 dict 对象中。当然,如果我们再深入地思考一下,会发现,slot 也不会被调用。既然 slot 不是一个 PyObject,那么它就没有 type,也就无从谈起什么 tp_call 了,所以 slot 是无论如何也不能满足前面描述的 Python 中的 "可调用" 这个概念。 - -前面我们说过,Python 虚拟机在 tp_dict 找到 "**getitem**" 对应的 "操作" 后,会调用该 "操作",所以在 tp_dict 中与 "**getitem**" 对应的只能是另一个包装了 slot 的 PyObject,在 Python 中,这是一个我们称之为 descriptor 的东西。 +## 内部类 -在 Python 内部,存在多种 descriptor,与 PyTypeObject 中的操作对应的是 PyWrapper- DescrObject。在此后的描述中,我们将用术语 descriptor 来专门表示 PyWrapperDescr- Object。一个 descriptor 包含一个 slot,其创建是通过 PyDescr_NewWrapper 完成的。 +内部类的实现可以参考 tuple, list 等对象的代码. -定义: +基本原理: -```C -#define PyDescr_COMMON \ - PyObject_HEAD \ - PyTypeObject *d_type; \ - PyObject *d_name +- 定义对象结构体 + + ```c + typedef struct { + PyObject_VAR_HEAD + int foo; + } PyFooObject; + ``` -typedef struct { - PyDescr_COMMON; - struct wrapperbase *d_base; - void *d_wrapped; /* This can be any function pointer */ -} PyWrapperDescrObject; +- 定义类型结构体 + + ```c + PyTypeObject PyFoo_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "foo", /* tp_name */ + // 定义剩下的 tp_xxx + } + ``` +- 实现 tp_new, tp_alloc, tp_dealloc 等关键方法. -static PyDescrObject * -descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) -{ - PyDescrObject *descr; - - descr = (PyDescrObject *)PyType_GenericAlloc(descrtype, 0); - if (descr != NULL) { - Py_XINCREF(type); - descr->d_type = type; - descr->d_name = PyString_InternFromString(name); - if (descr->d_name == NULL) { - Py_DECREF(descr); - descr = NULL; - } - } - return descr; -} +- 实现其它可选方法和属性(PyMethodDef, PyMemberDef). -PyObject * -PyDescr_NewWrapper(PyTypeObject *type, struct wrapperbase *base, void *wrapped) -{ - PyWrapperDescrObject *descr; - - descr = (PyWrapperDescrObject *)descr_new(&PyWrapperDescr_Type, - type, base->name); - if (descr != NULL) { - descr->d_base = base; - descr->d_wrapped = wrapped; - } - return (PyObject *)descr; -} -``` +## 用户类 -#### 创建 slot 和 descriptor 的联系 - -```C -/* This function is called by PyType_Ready() to populate the type's - dictionary with method descriptors for function slots. For each - function slot (like tp_repr) that's defined in the type, one or more - corresponding descriptors are added in the type's tp_dict dictionary - under the appropriate name (like __repr__). Some function slots - cause more than one descriptor to be added (for example, the nb_add - slot adds both __add__ and __radd__ descriptors) and some function - slots compete for the same descriptor (for example both sq_item and - mp_subscript generate a __getitem__ descriptor). - - In the latter case, the first slotdef entry encoutered wins. Since - slotdef entries are sorted by the offset of the slot in the - PyHeapTypeObject, this gives us some control over disambiguating - between competing slots: the members of PyHeapTypeObject are listed - from most general to least general, so the most general slot is - preferred. In particular, because as_mapping comes before as_sequence, - for a type that defines both mp_subscript and sq_item, mp_subscript - wins. - - This only adds new descriptors and doesn't overwrite entries in - tp_dict that were previously defined. The descriptors contain a - reference to the C function they must call, so that it's safe if they - are copied into a subtype's __dict__ and the subtype has a different - C function in its slot -- calling the method defined by the - descriptor will call the C function that was used to create it, - rather than the C function present in the slot when it is called. - (This is important because a subtype may have a C function in the - slot that calls the method from the dictionary, and we want to avoid - infinite recursion here.) */ +对于用户自定义的类,Python 并不知道用户会在类中定义多少变量和属性,所以必须使用另外一种结构来表示用户自定义的类,那就是 PyHeapTypeObject。 -static int -add_operators(PyTypeObject *type) -{ - PyObject *dict = type->tp_dict; - slotdef *p; - PyObject *descr; - void **ptr; - - init_slotdefs(); - for (p = slotdefs; p->name; p++) { - if (p->wrapper == NULL) - continue; - ptr = slotptr(type, p->offset); - if (!ptr || !*ptr) - continue; - if (PyDict_GetItem(dict, p->name_strobj)) - continue; - descr = PyDescr_NewWrapper(type, p, *ptr); - if (descr == NULL) - return -1; - if (PyDict_SetItem(dict, p->name_strobj, descr) < 0) - return -1; - Py_DECREF(descr); - } - if (type->tp_new != NULL) { - if (add_tp_new_wrapper(type) < 0) - return -1; - } - return 0; -} -``` +### 类的创建 -## 用户自定义 class +类的创建流程见[类型系统之元类](类型系统之元类.md). -```python -class A(object): - name = 'Python' - def __init__(self): - print 'A::__init__' +### 实例的创建 - def f(self): - print 'A::f' +用于测试的 Python 代码如下: - def g(self, aValue): - self.value = aValue - print self.value +```py +class C: + pass -a = A() -a.f() -a.g(10) +C() ``` -模块的字节码: +对应的字节码如下: -```python - 1 0 LOAD_CONST 0 ('A') - 3 LOAD_NAME 0 (object) - 6 BUILD_TUPLE 1 - 9 LOAD_CONST 1 () - 12 MAKE_FUNCTION 0 - 15 CALL_FUNCTION 0 - 18 BUILD_CLASS - 19 STORE_NAME 1 (A) - - 13 22 LOAD_NAME 1 (A) - 25 CALL_FUNCTION 0 - 28 STORE_NAME 2 (a) - - 14 31 LOAD_NAME 2 (a) - 34 LOAD_ATTR 3 (f) - 37 CALL_FUNCTION 0 - 40 POP_TOP - - 15 41 LOAD_NAME 2 (a) - 44 LOAD_ATTR 4 (g) - 47 LOAD_CONST 2 (10) - 50 CALL_FUNCTION 1 - 53 POP_TOP - 54 LOAD_CONST 3 (None) - 57 RETURN_VALUE +``` +28 LOAD_NAME 2 (C) +30 CALL_FUNCTION 0 +32 POP_TOP ``` -class A 的字节码: +CALL_FUNCTION 的调用路径:call_function -> PyObject_Vectorcall -> _PyObject_VectorcallTstate -> _PyObject_MakeTpCall -> tp_call(`type(C).__call__`) -> tp_new(`C.__new__`) -> tp_init(`C.__init__`). -```python - 1 0 LOAD_NAME 0 (__name__) - 3 STORE_NAME 1 (__module__) - - 2 6 LOAD_CONST 0 ('Python') - 9 STORE_NAME 2 (name) - - 3 12 LOAD_CONST 1 () - 15 MAKE_FUNCTION 0 - 18 STORE_NAME 3 (__init__) - - 6 21 LOAD_CONST 2 () - 24 MAKE_FUNCTION 0 - 27 STORE_NAME 4 (f) - - 9 30 LOAD_CONST 3 () - 33 MAKE_FUNCTION 0 - 36 STORE_NAME 5 (g) - 39 LOAD_LOCALS # f_locals 中包含 class 的动态元信息:类属性和方法 - # BUILD_CLASS 指令会用到 f_locals - 40 RETURN_VALUE -``` +如果没有自定义 `__new__` 方法, 那么 tp_new 继承自 object. -```C -case BUILD_CLASS: - u = TOP(); // class 的动态信息 - v = SECOND(); // 基类 tuple - w = THIRD(); // 类名 - STACKADJ(-2); - x = build_class(u, v, w); - SET_TOP(x); - Py_DECREF(u); - Py_DECREF(v); - Py_DECREF(w); - break; -``` +`PyBaseObject_Type.tp_new` 的代码如下: -```C +``` static PyObject * -build_class(PyObject *methods, PyObject *bases, PyObject *name) +object_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - // 1. find metaclass - // 2. build class by metaclass: PyObject_CallFunctionObjArgs - - PyObject *metaclass = NULL, *result, *base; - - if (PyDict_Check(methods)) - metaclass = PyDict_GetItemString(methods, "__metaclass__"); - if (metaclass != NULL) - Py_INCREF(metaclass); - else if (PyTuple_Check(bases) && PyTuple_GET_SIZE(bases)> 0) { - base = PyTuple_GET_ITEM(bases, 0); - metaclass = PyObject_GetAttrString(base, "__class__"); - if (metaclass == NULL) { - PyErr_Clear(); - metaclass = (PyObject *)base->ob_type; - Py_INCREF(metaclass); - } - } - else { - PyObject *g = PyEval_GetGlobals(); - if (g != NULL && PyDict_Check(g)) - metaclass = PyDict_GetItemString(g, "__metaclass__"); - if (metaclass == NULL) - metaclass = (PyObject *) &PyClass_Type; - Py_INCREF(metaclass); - } - result = PyObject_CallFunctionObjArgs(metaclass, name, bases, methods, NULL); - Py_DECREF(metaclass); - if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) { - /* A type error here likely means that the user passed - in a base that was not a class (such the random module - instead of the random.random type). Help them out with - by augmenting the error message with more information.*/ - - PyObject *ptype, *pvalue, *ptraceback; - - PyErr_Fetch(&ptype, &pvalue, &ptraceback); - if (PyString_Check(pvalue)) { - PyObject *newmsg; - newmsg = PyString_FromFormat( - "Error when calling the metaclass bases\n %s", - PyString_AS_STRING(pvalue)); - if (newmsg != NULL) { - Py_DECREF(pvalue); - pvalue = newmsg; - } - } - PyErr_Restore(ptype, pvalue, ptraceback); - } - return result; + if (excess_args(args, kwds)) { + if (type->tp_new != object_new) { + PyErr_SetString(PyExc_TypeError, + "object.__new__() takes exactly one argument (the type to instantiate)"); + return NULL; + } + if (type->tp_init == object_init) { + PyErr_Format(PyExc_TypeError, "%.200s() takes no arguments", + type->tp_name); + return NULL; + } + } + + // 如果 type 是抽象类, 那么抛出异常. + if (type->tp_flags & Py_TPFLAGS_IS_ABSTRACT) { + PyObject *abstract_methods; + PyObject *sorted_methods; + PyObject *joined; + PyObject *comma; + _Py_static_string(comma_id, ", "); + Py_ssize_t method_count; + + /* Compute ", ".join(sorted(type.__abstractmethods__)) + into joined. */ + abstract_methods = type_abstractmethods(type, NULL); + if (abstract_methods == NULL) + return NULL; + sorted_methods = PySequence_List(abstract_methods); + Py_DECREF(abstract_methods); + if (sorted_methods == NULL) + return NULL; + if (PyList_Sort(sorted_methods)) { + Py_DECREF(sorted_methods); + return NULL; + } + comma = _PyUnicode_FromId(&comma_id); + if (comma == NULL) { + Py_DECREF(sorted_methods); + return NULL; + } + joined = PyUnicode_Join(comma, sorted_methods); + method_count = PyObject_Length(sorted_methods); + Py_DECREF(sorted_methods); + if (joined == NULL) + return NULL; + if (method_count == -1) + return NULL; + + PyErr_Format(PyExc_TypeError, + "Can't instantiate abstract class %s " + "with abstract method%s %U", + type->tp_name, + method_count> 1 ? "s" : "", + joined); + Py_DECREF(joined); + return NULL; + } + return type->tp_alloc(type, 0); } ``` -调用流程: - -PyObject_CallFunctionObjArgs -> PyObject_Call -> metaclass.ob_type.tp_call -> type.type_call -> metaclass.tp_new - -metaclass 创建 class - -`metaclass.__new__(meta_class, name, bases, attrs)` - - -**疑问** - -- 没有看懂 type_new 中对 `__slot__` 的处理 - - -## 从 class 创建 instance - -```python -22 LOAD_NAME 1 (A) -25 CALL_FUNCTION 0 -28 STORE_NAME 2 (a) -``` - -CALL_FUNCTION 的调用路径:call_function -> do_call -> PyObject_Call -> tp_call -> tp_new(object_new),最终调用 class 的 tp_new 方法创建 instance - - -class 创建 instance - - - `meta_class.__call__` - - - `class.__new__` - - - `class.__init__` - -### 读取属性 - -1. 尝试从 mro 列表查找属性 (_PyType_Lookup),假设查找结果为 descr - - 1.1 如果 descr 是 data descriptor,则调用 descr.ob_type.tp_descr_get 获取真正的属性。读取属性结束。 - -2. 尝试从实例的__dict__中查找属性,如果查找成功,读取属性结束。 - -3. 如果 descr.ob_type.tp_descr_get不等于 NULL,则调用 descr.ob_type.tp_descr_get 获取真正的属性。读取属性结束。 - -4. 如果 descr不等于 NULL,则返回 descr。读取属性结束。 - -5. 返回 NULL。 - - -## 测试 - -- 类 class - - 创建流程 - - - 观察各个 tp_xxx 变量 - - - 用户自定义__init__, __new__等方法,观察 fixup_slot_dispatchers 如何修改默认方法。 - -- 实例instace - - - 创建流程:__new__, __init__ - - - 如何读取属性 - - - descriptor - - - bound method, unbound method +如果用户没有自定义 `__init__` 方法, 那么 tp_init 继承自 object. +`PyBaseObject_Type.tp_init` 的代码如下: +```c +static int +object_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyTypeObject *type = Py_TYPE(self); + if (excess_args(args, kwds)) { + if (type->tp_init != object_init) { + PyErr_SetString(PyExc_TypeError, + "object.__init__() takes exactly one argument (the instance to initialize)"); + return -1; + } + if (type->tp_new == object_new) { + PyErr_Format(PyExc_TypeError, + "%.200s.__init__() takes exactly one argument (the instance to initialize)", + type->tp_name); + return -1; + } + } + return 0; +} +``` \ No newline at end of file diff --git "a/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 MRO.md" "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 MRO.md" new file mode 100644 index 0000000..f2e7665 --- /dev/null +++ "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 MRO.md" @@ -0,0 +1,40 @@ +# 类型系统之 MRO + +```py +class A: + pass + +class B: + pass + +class C(A, B): + pass + +class D(B, A): + pass + +class E(C, D): + pass +``` + +执行上面的代码会报错: + +``` +Traceback (most recent call last): + File "/home/vvv/Workspace/cpython/mydemo/class_demo.py", line 16, in + class E(C, D): +TypeError: Cannot create a consistent method resolution +order (MRO) for bases A, B +``` + +## C3 算法 + +算法实现 + +在 CPython 中对应的算法实现是 `Objects/typeobject.c` 中的 `mro_implementation` 函数. 先看 [参考](#参考)然后再看算法实现更容易理解. + +## 参考 + +- [https://www.python.org/download/releases/2.3/mro/](https://www.python.org/download/releases/2.3/mro/) + +- [https://en.wikipedia.org/wiki/C3_linearization](https://en.wikipedia.org/wiki/C3_linearization) \ No newline at end of file diff --git "a/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 slot.md" "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 slot.md" new file mode 100644 index 0000000..bc469f0 --- /dev/null +++ "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 slot.md" @@ -0,0 +1,106 @@ +# 类型系统之 slot + +slot 指的是 PyTypeObject(PyHeapTypeObject) 中的: + +- tp_new +- tp_init +- tp_call +- tp_hash +- tp_repr +- tp_str +- tp_richcompare +- as_async +- as_number +- as_mapping +- as_sequence +- 等等 + +这些方法可以在 Python 层面进行重载(overload), 例如 tp_new 对应的 `__new__`, tp_init 对应的 `__init__`, as_number.nb_add 对应的 `__add__` 等等. + +CPython 内部需要将 `__xxx__` 和 slot 关联起来, 这样当进行某些操作时可以找到用户定义的方法. + +### slot table + +slot table 其实是一个数组, 保存了 `__xxx__` 和 tp_xxx 之间的关系, 下面的注释说它们之间既可以是一对多也可以是多对一的关系. + +```c +// Objects/typeobject.c +// 完整内容可以查看源码 +/* +Table mapping __foo__ names to tp_foo offsets and slot_tp_foo wrapper functions. + +The table is ordered by offsets relative to the 'PyHeapTypeObject' structure, +which incorporates the additional structures used for numbers, sequences and +mappings. Note that multiple names may map to the same slot (e.g. __eq__, +__ne__ etc. all map to tp_richcompare) and one name may map to multiple slots +(e.g. __str__ affects tp_str as well as tp_repr). The table is terminated with +an all-zero entry. (This table is further initialized in +_PyTypes_InitSlotDefs().) +*/ +typedef struct wrapperbase slotdef; + +#define TPSLOT(NAME, SLOT, FUNCTION, WRAPPER, DOC) \ + {NAME, offsetof(PyTypeObject, SLOT), (void *)(FUNCTION), WRAPPER, \ + PyDoc_STR(DOC)} +#define FLSLOT(NAME, SLOT, FUNCTION, WRAPPER, DOC, FLAGS) \ + {NAME, offsetof(PyTypeObject, SLOT), (void *)(FUNCTION), WRAPPER, \ + PyDoc_STR(DOC), FLAGS} +#define ETSLOT(NAME, SLOT, FUNCTION, WRAPPER, DOC) \ + {NAME, offsetof(PyHeapTypeObject, SLOT), (void *)(FUNCTION), WRAPPER, \ + PyDoc_STR(DOC)} + +static slotdef slotdefs[] = { + TPSLOT("__getattribute__", tp_getattr, NULL, NULL, ""), + TPSLOT("__getattr__", tp_getattr, NULL, NULL, ""), + TPSLOT("__setattr__", tp_setattr, NULL, NULL, ""), + TPSLOT("__delattr__", tp_setattr, NULL, NULL, ""), + TPSLOT("__repr__", tp_repr, slot_tp_repr, wrap_unaryfunc, + "__repr__($self, /)\n--\n\nReturn repr(self)."), + TPSLOT("__hash__", tp_hash, slot_tp_hash, wrap_hashfunc, + "__hash__($self, /)\n--\n\nReturn hash(self)."), + FLSLOT("__call__", tp_call, slot_tp_call, (wrapperfunc)(void(*)(void))wrap_call, + "__call__($self, /, *args, **kwargs)\n--\n\nCall self as a function.", + PyWrapperFlag_KEYWORDS), + TPSLOT("__str__", tp_str, slot_tp_str, wrap_unaryfunc, + "__str__($self, /)\n--\n\nReturn str(self)."), + {NULL} +}; +``` + +和 slotdefs 相关的两个最重要的操作是: add_operators 和 fixup_slot_dispatchers. + +### 初始化 slotdefs 数组 + +主要任务就是设置 slotdef.name_strobj. + +```c +static int slotdefs_initialized = 0; +/* Initialize the slotdefs table by adding interned string objects for the + names. */ +PyStatus +_PyTypes_InitSlotDefs(void) +{ + if (slotdefs_initialized) { + return _PyStatus_OK(); + } + + for (slotdef *p = slotdefs; p->name; p++) { + /* Slots must be ordered by their offset in the PyHeapTypeObject. */ + assert(!p[1].name || p->offset <= p[1].offset); +#ifdef INTERN_NAME_STRINGS + p->name_strobj = PyUnicode_InternFromString(p->name); + if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) { + return _PyStatus_NO_MEMORY(); + } +#else + p->name_strobj = PyUnicode_FromString(p->name); + if (!p->name_strobj) { + return _PyStatus_NO_MEMORY(); + } +#endif + } + slotdefs_initialized = 1; + return _PyStatus_OK(); +} +``` + diff --git "a/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 super.md" "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 super.md" new file mode 100644 index 0000000..64c8d90 --- /dev/null +++ "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円 super.md" @@ -0,0 +1,372 @@ +# 类型系统之 super + +super 在 CPython 内部其实是一个类, 并不是函数. + +```c +// Objects/typeobject.c + +typedef struct { + PyObject_HEAD + PyTypeObject *type; + PyObject *obj; + PyTypeObject *obj_type; +} superobject; +``` + +```c +// Objects/typeobject.c + +PyTypeObject PySuper_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "super", /* tp_name */ + sizeof(superobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + super_dealloc, /* tp_dealloc */ + 0, /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + super_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + super_getattro, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + super_doc, /* tp_doc */ + super_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + super_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + super_descr_get, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + super_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; +``` + +从 superobject 的结构可以看出, superobject 是一个包裹了类型信息(type 和 object) 的一个对象, 和描述符非常类似. + +当通过 `super().xxx` 访问父类的属性时, 实际上负责获取属性的是 PySuper_Type 的 tp_getattro, 即 super_getattro 方法. + +### super_init + +```c +// Objects/typeobject.c + +static int +super_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + superobject *su = (superobject *)self; + PyTypeObject *type = NULL; + PyObject *obj = NULL; + PyTypeObject *obj_type = NULL; + + if (!_PyArg_NoKeywords("super", kwds)) + return -1; + if (!PyArg_ParseTuple(args, "|O!O:super", &PyType_Type, &type, &obj)) + return -1; + + if (type == NULL) { + /* Call super(), without args -- fill in from __class__ + and first local variable on the stack. */ + PyThreadState *tstate = _PyThreadState_GET(); + PyFrameObject *frame = PyThreadState_GetFrame(tstate); + if (frame == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "super(): no current frame"); + return -1; + } + + PyCodeObject *code = PyFrame_GetCode(frame); + int res = super_init_without_args(frame, code, &type, &obj); + Py_DECREF(frame); + Py_DECREF(code); + + if (res < 0) { + return -1; + } + } + + if (obj == Py_None) + obj = NULL; + if (obj != NULL) { + obj_type = supercheck(type, obj); + if (obj_type == NULL) + return -1; + Py_INCREF(obj); + } + Py_INCREF(type); + Py_XSETREF(su->type, type); + Py_XSETREF(su->obj, obj); + Py_XSETREF(su->obj_type, obj_type); + return 0; +} +``` + +### "super() 无参数调用" 的实现原理 + +以下面的 Python 代码为例: + +```py +class A: + def __init__(self): + super().__init__() + +A() +``` + +对应的字节码如下: + +``` + 4 14 LOAD_BUILD_CLASS + 16 LOAD_CONST 3 () + 18 LOAD_CONST 4 ('A') + 20 MAKE_FUNCTION 0 + 22 LOAD_CONST 4 ('A') + 24 CALL_FUNCTION 2 + 26 STORE_NAME 2 (A) + + 8 28 LOAD_NAME 2 (A) + 30 CALL_FUNCTION 0 + 32 POP_TOP + 34 LOAD_CONST 1 (None) + 36 RETURN_VALUE + +Disassembly of : + 4 0 LOAD_NAME 0 (__name__) + 2 STORE_NAME 1 (__module__) + 4 LOAD_CONST 0 ('A') + 6 STORE_NAME 2 (__qualname__) + + 5 8 LOAD_CLOSURE 0 (__class__) + 10 BUILD_TUPLE 1 + 12 LOAD_CONST 1 () + 14 LOAD_CONST 2 ('A.__init__') + 16 MAKE_FUNCTION 8 (closure) + 18 STORE_NAME 3 (__init__) + 20 LOAD_CLOSURE 0 (__class__) + 22 DUP_TOP + 24 STORE_NAME 4 (__classcell__) + 26 RETURN_VALUE + +Disassembly of : + 6 0 LOAD_GLOBAL 0 (super) + 2 CALL_FUNCTION 0 + 4 LOAD_METHOD 1 (__init__) + 6 CALL_METHOD 0 + 8 POP_TOP + 10 LOAD_CONST 0 (None) + 12 RETURN_VALUE +``` + +Python 为了支持 super() 无参数调用, 在编译字节码对象时做了一些额外的工作, 如下: + +- class A 的字节码对象的 co_cellvars 的长度等于 1, 保存的就是 `__class__`. + +- 在创建 `__init__` 函数时, 通过 LOAD_CLOSURE 加载 `__class__` 到栈上. 可以看到 `__init__` 实际上是一个闭包函数. + +当调用 `__init__` 时, `frame->f_fastlocals` 的布局如下: + +``` +__init__'s args | cellvars | freevars | local vars | stack +``` + +接下来来看对应的 CPython 代码: + +```c +/* 调用形式为 super(), 既然用户没有提供参数, 则尝试从 frame stack 上获取参数. + 这个功能依赖于字节码的协助, 在出现 super() 的代码, Python 会生成和 __class__ 相关的字节码. + 详情参考笔记 "类型系统之元类" 和 "类型系统之super". + + frame->f_fastlocals 的布局: __init__'s args | cellvars | freevars | local vars | stack +*/ +static int +super_init_without_args(PyFrameObject *f, PyCodeObject *co, + PyTypeObject **type_p, PyObject **obj_p) +{ + if (co->co_argcount == 0) { + PyErr_SetString(PyExc_RuntimeError, + "super(): no arguments"); + return -1; + } + + // f->f_localsplus[0] 是 __init__(self, *args, **kwargs) 中的 self + PyObject *obj = f->f_localsplus[0]; + Py_ssize_t i, n; + if (obj == NULL && co->co_cell2arg) { + /* The first argument might be a cell. */ + n = PyTuple_GET_SIZE(co->co_cellvars); + for (i = 0; i < n; i++) { + if (co->co_cell2arg[i] == 0) { + PyObject *cell = f->f_localsplus[co->co_nlocals + i]; + assert(PyCell_Check(cell)); + obj = PyCell_GET(cell); + break; + } + } + } + if (obj == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "super(): arg[0] deleted"); + return -1; + } + + if (co->co_freevars == NULL) { + n = 0; + } + else { + assert(PyTuple_Check(co->co_freevars)); + n = PyTuple_GET_SIZE(co->co_freevars); + } + + PyTypeObject *type = NULL; + for (i = 0; i < n; i++) { + PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i); + assert(PyUnicode_Check(name)); + if (_PyUnicode_EqualToASCIIId(name, &PyId___class__)) { + Py_ssize_t index = co->co_nlocals + + PyTuple_GET_SIZE(co->co_cellvars) + i; + PyObject *cell = f->f_localsplus[index]; + if (cell == NULL || !PyCell_Check(cell)) { + PyErr_SetString(PyExc_RuntimeError, + "super(): bad __class__ cell"); + return -1; + } + type = (PyTypeObject *) PyCell_GET(cell); + if (type == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "super(): empty __class__ cell"); + return -1; + } + if (!PyType_Check(type)) { + PyErr_Format(PyExc_RuntimeError, + "super(): __class__ is not a type (%s)", + Py_TYPE(type)->tp_name); + return -1; + } + break; + } + } + if (type == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "super(): __class__ cell not found"); + return -1; + } + + *type_p = type; + *obj_p = obj; + return 0; +} +``` + +简单来说, 从 `f->f_localsplus[0]` 对应的参数是 `__init__` 的 self, self 对应 super 的第二个参数. 再从 freevars 中获取 super 的第一个参数(`__class__`). + +super() 只在特定的地方调用才合法, 例如实例方法, 类方法. + +### 属性访问 + +```c +static PyObject * +super_getattro(PyObject *self, PyObject *name) +{ + superobject *su = (superobject *)self; + PyTypeObject *starttype; + PyObject *mro; + Py_ssize_t i, n; + + starttype = su->obj_type; + if (starttype == NULL) + goto skip; + + /* We want __class__ to return the class of the super object + (i.e. super, or a subclass), not the class of su->obj. */ + if (PyUnicode_Check(name) && + PyUnicode_GET_LENGTH(name) == 9 && + _PyUnicode_EqualToASCIIId(name, &PyId___class__)) + goto skip; + + mro = starttype->tp_mro; + if (mro == NULL) + goto skip; + + assert(PyTuple_Check(mro)); + n = PyTuple_GET_SIZE(mro); + + /* No need to check the last one: it's gonna be skipped anyway. mro 的最后一个元素是 object + 获取 starttype->tp_mro 中位于 su->type 后面的第一个父类. + 对于 super(MyClass, self) 来说, 目标指向 MyClass.__mro__ 的第二个元素. + */ + for (i = 0; i+1 < n; i++) { + if ((PyObject *)(su->type) == PyTuple_GET_ITEM(mro, i)) + break; + } + i++; /* skip su->type (if any) */ + if (i>= n) + goto skip; + + /* keep a strong reference to mro because starttype->tp_mro can be + replaced during PyDict_GetItemWithError(dict, name) */ + Py_INCREF(mro); + do { + PyObject *res, *tmp, *dict; + descrgetfunc f; + + tmp = PyTuple_GET_ITEM(mro, i); + assert(PyType_Check(tmp)); + + dict = ((PyTypeObject *)tmp)->tp_dict; + assert(dict != NULL && PyDict_Check(dict)); + + res = PyDict_GetItemWithError(dict, name); + if (res != NULL) { + Py_INCREF(res); + + f = Py_TYPE(res)->tp_descr_get; + if (f != NULL) { + tmp = f(res, + /* Only pass 'obj' param if this is instance-mode super + (See SF ID #743627) */ + (su->obj == (PyObject *)starttype) ? NULL : su->obj, + (PyObject *)starttype); + Py_DECREF(res); + res = tmp; + } + + Py_DECREF(mro); + return res; + } + else if (PyErr_Occurred()) { + Py_DECREF(mro); + return NULL; + } + + i++; + } while (i < n); + Py_DECREF(mro); + + skip: + return PyObject_GenericGetAttr(self, name); +} +``` + +## 参考 + +- [Python 关于 super 的文档](https://docs.python.org/3/library/functions.html#super) +- \ No newline at end of file diff --git "a/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円205円203円347円261円273円.md" "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円205円203円347円261円273円.md" new file mode 100644 index 0000000..610725c --- /dev/null +++ "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円205円203円347円261円273円.md" @@ -0,0 +1,1240 @@ +# 元类 + +PyType_Type 的定义: + +```c +// Objects/typeobject.c + +PyTypeObject PyType_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "type", /* tp_name */ + sizeof(PyHeapTypeObject), /* tp_basicsize */ + sizeof(PyMemberDef), /* tp_itemsize */ + (destructor)type_dealloc, /* tp_dealloc */ + offsetof(PyTypeObject, tp_vectorcall), /* tp_vectorcall_offset */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)type_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc)type_call, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)type_getattro, /* tp_getattro */ + (setattrofunc)type_setattro, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TYPE_SUBCLASS | + Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */ + type_doc, /* tp_doc */ + (traverseproc)type_traverse, /* tp_traverse */ + (inquiry)type_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyTypeObject, tp_weaklist), /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + type_methods, /* tp_methods */ + type_members, /* tp_members */ + type_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(PyTypeObject, tp_dict), /* tp_dictoffset */ + type_init, /* tp_init */ + 0, /* tp_alloc */ + type_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ + (inquiry)type_is_gc, /* tp_is_gc */ +}; +``` + +PyType_Type 的 tp_type 是它自己. + +注意, 虽然 PyType_Type 设置了 tp_vectorcall_offset, 但是其实 PyType_Type 的 tp_vectorcall 等于 NULL. + +### PyType_Type 的 tp_basicsize 和 tp_itemsize + +tp_basicsize 等于 sizeof(PyHeapTypeObject), 而 tp_itemsize 等于 sizeof(PyMemberDef). + +对于普通的类来说, tp_basicsize 表示该类的实例的大小, tp_itemsize 表示该类的实例变长元素列表中元素的大小. 例如 PyTuple_Type 的 tp_itemsize 等于 sizeof(PyObject *). + +我们知道 PyType_Type 是元类, 普通类相当于是 PyType_Type 的实例, 因此, tp_basicsize 表示要创建的类的大小, tp_itemsize 表示用户定义字段(类成员)的大小, 这里就是 PyMemberDef 的大小. 当使用 PyType_Type 创建一个类时, 通过 tp_basicsize 和 tp_itemsize 便可以分配正确大小的内存. + +## PyType_Type 创建类的流程 + +调用流程: + +``` +PyType_Type.tp_call -> PyType_Type.tp_new -> PyType_Ready + | + +-------> PyType_Type.tp_init +``` + +### type_call + +```c +// Objects/typeobject.c + +/* args: [name, bases, attrs] + kwds: {}, 关键字参数, 来自于类的定义, 例如: `class C(a=1, b=2)`, kwds 会传递给父类的 __init_subclass__ 方法. +*/ +static PyObject * +type_call(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *obj; + PyThreadState *tstate = _PyThreadState_GET(); + +#ifdef Py_DEBUG + /* type_call() must not be called with an exception set, + because it can clear it (directly or indirectly) and so the + caller loses its exception */ + assert(!_PyErr_Occurred(tstate)); +#endif + + /* Special case: type(x) should return Py_TYPE(x) */ + /* We only want type itself to accept the one-argument form (#27157) */ + if (type == &PyType_Type) { + assert(args != NULL && PyTuple_Check(args)); + assert(kwds == NULL || PyDict_Check(kwds)); + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + + if (nargs == 1 && (kwds == NULL || !PyDict_GET_SIZE(kwds))) { + obj = (PyObject *) Py_TYPE(PyTuple_GET_ITEM(args, 0)); + Py_INCREF(obj); + return obj; + } + + /* SF bug 475327 -- if that didn't trigger, we need 3 + arguments. But PyArg_ParseTuple in type_new may give + a msg saying type() needs exactly 3. */ + if (nargs != 3) { + PyErr_SetString(PyExc_TypeError, + "type() takes 1 or 3 arguments"); + return NULL; + } + } + + if (type->tp_new == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "cannot create '%.100s' instances", + type->tp_name); + return NULL; + } + + obj = type->tp_new(type, args, kwds); + obj = _Py_CheckFunctionResult(tstate, (PyObject*)type, obj, NULL); + if (obj == NULL) + return NULL; + + /* If the returned object is not an instance of type, + it won't be initialized. */ + if (!PyType_IsSubtype(Py_TYPE(obj), type)) + return obj; + + type = Py_TYPE(obj); + if (type->tp_init != NULL) { + int res = type->tp_init(obj, args, kwds); + if (res < 0) { + assert(_PyErr_Occurred(tstate)); + Py_DECREF(obj); + obj = NULL; + } + else { + assert(!_PyErr_Occurred(tstate)); + } + } + return obj; +} +``` + +### type_new + +type_new 的代码非常长, 这里就不复制粘贴了. type_new 的关键要点: + +- 检查基类列表. + + 如果用户在定义类时没有指定基类, 那么使用 PyBaseObject_Type. 如果用户指定了基类, 那么通过 best_base 计算出最合适的作为类的父类(tp_base). + + best_base 的基本原理: 如果基类列表都是 Python 层面定义的类, 那么 best_base 返回 object. 如果基类列表中包含 CPython 层面的内部类, 那么情况比较复杂, 大体原则是返回最年轻的类. + + 一个不常见的异常: + + ```py + class C(collections.OrderedDict, collections.defaultdict): + pass + ``` + 异常信息为: TypeError: multiple bases have instance lay-out conflict. + + OrderedDict 和 defaultdict 都是用 C 实现的, 都继承自 dict. best_baes 发现它们两者互不兼容, 所以报错. + + 详细信息可以阅读源码. + +- 处理 `__slots__`. + + - 检查是否允许定义 `__slots__`. 如果 `base->itemsize != 0`, 那么就不允许定义 `__slots__`. 例如: + + ```py + class C(tuple): + __slots__ = ['a', 'b'] + ``` + + tuple 不允许子类定义 `__slots__`. + + - 检查 `__slots__` 中的字符串是否符合标识符命名规则. + + - 检查 `__slots__` 中的字符串是否出现在类成员中, 如果是的话报错. + +- 调用元类的 tp_alloc 为类分配内存. + + ```c + type = (PyTypeObject *)metatype->tp_alloc(metatype, nslots); + ``` + +- 设置 `ht_slots = slots`. + +- 设置 tp_flags. + + ```c + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE | + Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC; + ``` + +- 设置 tp_as_xxx(魔术方法) + + ```c + type->tp_as_async = &et->as_async; + type->tp_as_number = &et->as_number; + type->tp_as_sequence = &et->as_sequence; + type->tp_as_mapping = &et->as_mapping; + type->tp_as_buffer = &et->as_buffer; + ``` + +- 设置基类 + + ```c + type->tp_bases = bases; + bases = NULL; + Py_INCREF(base); + type->tp_base = base; + ``` + +- 设置 tp_dict + + ```c + Py_INCREF(dict); + type->tp_dict = dict; + ``` + +- 将 `__new__` 转换为 staticmethod. 这个我之前倒是不知道. 例如: + + ```py + class C: + def __new__(cls): + pass + + print(C.__dict__['__new__']) # + ``` + + `__new__` 用于创建实例, 详情参考 [类型系统](./类型系统.md). + +- 将 `__init_subclass__` 和 `__class_getitem__` 转换为 classmethod. + +- 初始化 PyMemberDef 列表: + + ```c + /* Add descriptors for custom slots from __slots__, or for __dict__ */ + mp = PyHeapType_GET_MEMBERS(et); + slotoffset = base->tp_basicsize; + if (et->ht_slots != NULL) { + for (i = 0; i < nslots; i++, mp++) { + mp->name = PyUnicode_AsUTF8( + PyTuple_GET_ITEM(et->ht_slots, i)); + if (mp->name == NULL) + goto error; + mp->type = T_OBJECT_EX; + mp->offset = slotoffset; + + /* __dict__ and __weakref__ are already filtered out */ + assert(strcmp(mp->name, "__dict__") != 0); + assert(strcmp(mp->name, "__weakref__") != 0); + + slotoffset += sizeof(PyObject *); + } + } + ``` + + slotoffset 指向实例(PyObject)内存的偏移量 + + 简单示意图: + + ``` + +------------------+-------------+-------------+-------------+ + | PyHeapTypeObject | PyMemberDef | PyMemberDef | PyMemberDef | + +------------------+-------------+-------------+-------------+ + ^ + | + mp + + slotoffset + | + v + +----------+--------+--------+--------+ + | PyObject | slot 1 | slot 2 | slot 3 | + +----------+--------+--------+--------+ + ``` + + 关于 PyMemberDef 的信息可以看[这里](https://docs.python.org/3/c-api/structures.html#c.PyMemberDef) + +- 设置 tp_dictoffset + + 关于 tp_dictoffset 可以看 [Python 的官方文档](https://docs.python.org/3/c-api/typeobj.html?highlight=slot#c.PyTypeObject.tp_dictoffset). + + tp_dictoffset 和 tp_dict 没有关系, tp_dict 是类自己的 `__dict__`, 而 tp_dictoffset 指的是该类的实例的 `__dict__` 在 PyObject 的偏移量. + + 通过 tp_dictoffset 便可以知道类的实例是否包含 `__dict__`. + + ```c + if (add_dict) { + if (base->tp_itemsize) + type->tp_dictoffset = -(long)sizeof(PyObject *); + else + type->tp_dictoffset = slotoffset; + slotoffset += sizeof(PyObject *); + } + ``` + +- 设置 tp_weaklistoffset + + ```c + if (add_weak) { + assert(!base->tp_itemsize); + type->tp_weaklistoffset = slotoffset; + slotoffset += sizeof(PyObject *); + } + ``` + +- 设置 tp_basicsize, tp_itemsize + + ```c + type->tp_basicsize = slotoffset; + type->tp_itemsize = base->tp_itemsize; + ``` + +- 设置 tp_members(类成员) + + ```c + type->tp_members = PyHeapType_GET_MEMBERS(et); + ``` + + 详情见[这里](https://docs.python.org/3/c-api/typeobj.html?#c.PyTypeObject.tp_members) + +- 设置 tp_getset + + ```c + if (type->tp_weaklistoffset && type->tp_dictoffset) + type->tp_getset = subtype_getsets_full; + else if (type->tp_weaklistoffset && !type->tp_dictoffset) + type->tp_getset = subtype_getsets_weakref_only; + else if (!type->tp_weaklistoffset && type->tp_dictoffset) + type->tp_getset = subtype_getsets_dict_only; + else + type->tp_getset = NULL; + ``` + +- 设置 tp_getattro 和 tp_setattro + + ```c + if (type->tp_dictoffset != 0 || nslots> 0) { + if (base->tp_getattr == NULL && base->tp_getattro == NULL) + type->tp_getattro = PyObject_GenericGetAttr; + if (base->tp_setattr == NULL && base->tp_setattro == NULL) + type->tp_setattro = PyObject_GenericSetAttr; + } + ``` + +- 设置 GC 相关的操作方法 + + ```c + type->tp_dealloc = subtype_dealloc; + + /* Always override allocation strategy to use regular heap */ + type->tp_alloc = PyType_GenericAlloc; + type->tp_free = PyObject_GC_Del; + type->tp_traverse = subtype_traverse; + type->tp_clear = subtype_clear; + ``` + +- 设置 `__classcell__` + + ```c + cell = _PyDict_GetItemIdWithError(dict, &PyId___classcell__); + if (cell != NULL) { + /* At least one method requires a reference to its defining class */ + if (!PyCell_Check(cell)) { + PyErr_Format(PyExc_TypeError, + "__classcell__ must be a nonlocal cell, not %.200R", + Py_TYPE(cell)); + goto error; + } + PyCell_Set(cell, (PyObject *) type); + if (_PyDict_DelItemId(dict, &PyId___classcell__) < 0) { + goto error; + } + } + else if (PyErr_Occurred()) { + goto error; + } + ``` + + 详情见 [__classcell__](#__classcell__) + +- PyType_Ready + + ```c + /* Initialize the rest */ + if (PyType_Ready(type) < 0) + goto error; + ``` + + 详情见 [PyType_Ready](#PyType_Ready) + +- fixup_slot_dispatchers + + 详情见 [fixup_slot_dispatchers](#fixup_slot_dispatchers) + +- set_names + + 调用所有 descriptor 的 `__set_name__` 方法. + +- init_subclass + + 调用父类的 `__init_subclass__` 方法, 详情见 [__init_subclass__](https://docs.python.org/3/reference/datamodel.html#object.__init_subclass__). + + +### `__classcell__` + +经过我的测试, 发现如下代码会出现 `__classcell__`: + +``` +class C: + def __init__(self): + print(__class__) + return super().__init__() +C() +``` + +对应的字节码如下: + +``` + 4 0 LOAD_BUILD_CLASS + 2 LOAD_CONST 0 () + 4 LOAD_CONST 1 ('C') + 6 MAKE_FUNCTION 0 + 8 LOAD_CONST 1 ('C') + 10 CALL_FUNCTION 2 + 12 STORE_NAME 0 (C) + + 8 14 LOAD_NAME 0 (C) + 16 CALL_FUNCTION 0 + 18 POP_TOP + 20 LOAD_CONST 2 (None) + 22 RETURN_VALUE + +Disassembly of : + 4 0 LOAD_NAME 0 (__name__) + 2 STORE_NAME 1 (__module__) + 4 LOAD_CONST 0 ('C') + 6 STORE_NAME 2 (__qualname__) + + 5 8 LOAD_CLOSURE 0 (__class__) + 10 BUILD_TUPLE 1 + 12 LOAD_CONST 1 () + 14 LOAD_CONST 2 ('C.__init__') + 16 MAKE_FUNCTION 8 (closure) + 18 STORE_NAME 3 (__init__) + 20 LOAD_CLOSURE 0 (__class__) + 22 DUP_TOP + 24 STORE_NAME 4 (__classcell__) + 26 RETURN_VALUE + +Disassembly of : + 6 0 LOAD_GLOBAL 0 (print) + 2 LOAD_DEREF 0 (__class__) + 4 CALL_FUNCTION 1 + 6 POP_TOP + + 7 8 LOAD_GLOBAL 1 (super) + 10 CALL_FUNCTION 0 + 12 LOAD_METHOD 2 (__init__) + 14 CALL_METHOD 0 + 16 RETURN_VALUE +``` + +当代码中出现 super() 时, 会产生 `__classcell__` 和 `__class__`. + +注意, 类 C 对应的字节码对象的 `co_cellvars` 的长度为 1, 其中保存的就是 `__class__`. + +在上面的字节码中可以看出, `__init__` 方法是一个闭包函数, 在 `__init__` 的代码中可以通过 `__class__` 变量访问当前所在的类. + +`__classcell__` 保存的就是 `__class__`, 当元类在创建类时, 如果发现 dict 包含 `__classcell__`, 那么会设置 `__classcell__` 为当前想要创建的类. + +`__classcell__` 和 `__class__` 都是 PyCellObject, PyCellObject 其实就是一个非常简单的对象, 其中的内部字段 ob_ref 指向实际的值. PyCellObject 起到一个类似 "惰性求值" 的作用. +当执行类 C 的字节码时 `__classcell__` 和 `__class__` 中的 ob_ref 都是 NULL, 直到元类创建类对象时, 发现 `__classcell__` 然后设置 ob_ref 指向当前创建的类. + +一些链接: + +- [https://stackoverflow.com/questions/41343263/provide-classcell-example-for-python-3-6-metaclass](https://stackoverflow.com/questions/41343263/provide-classcell-example-for-python-3-6-metaclass) + +### PyType_GenericAlloc + +```c +// Objects/typeobject.c + +/* 为类分配内存 + type 一般是 PyType_Type + nitems 一般等于 nslots(排除了 __dict__ 和 __weakref__) +*/ +PyObject * +PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) +{ + PyObject *obj; + // 可以简单认为 type 就是 PyType_Type, PyType_Type 的 tp_basicsize 等于 sizeof(PyHeapTypeObject), tp_itemsize 等于 sizeof(PyMemberDef) + // 那么 size = sizeof(PyHeapTypeObject) + (nitems + 1) * sizeof(PyMemberDef) + const size_t size = _PyObject_VAR_SIZE(type, nitems+1); + /* note that we need to add one, for the sentinel */ + + if (_PyType_IS_GC(type)) { + obj = _PyObject_GC_Malloc(size); + } + else { + obj = (PyObject *)PyObject_MALLOC(size); + } + + if (obj == NULL) { + return PyErr_NoMemory(); + } + + // '0円' 就等于 0 啊, 这里为什么要写 '0円' 呢? 这行代码看起来是非常久之前写的, 难道那时候编译器或者 memset 对 '0円' 和 0 的处理不一样. + memset(obj, '0円', size); + + // 设置 obj->ob_type 和 obj->ob_size + if (type->tp_itemsize == 0) { + (void)PyObject_INIT(obj, type); + } + else { + (void) PyObject_INIT_VAR((PyVarObject *)obj, type, nitems); + } + + if (_PyType_IS_GC(type)) { + _PyObject_GC_TRACK(obj); + } + return obj; +} +``` + +PyType_GenericAlloc 的核心功能就是分配内存. + +如果 `type->tp_itemsize != 0`, 那么分配的内存除了 PyHeapTypeObject, 还会在尾部添加额外的内存区域, 用于保存 `__slots__`. 同时在 ob_size 保存 `__slots__` 的数量. + +简单的内存结构示意图: + +``` ++------------------+-------------+-------------+-------------+ +| PyHeapTypeObject | PyMemberDef | PyMemberDef | PyMemberDef | ++------------------+-------------+-------------+-------------+ + | <------ __slots__ ------> +``` + +通过 ob_size 便知道尾部是否保存有 `__slots__`. + + +### PyType_Ready + +工作流程: + +- 如果 type 已经初始化, 则马上退出(return 0). + + ```c + if (type->tp_flags & Py_TPFLAGS_READY) { + assert(_PyType_CheckConsistency(type)); + return 0; + } + ``` + +- 其它一些检查工作, 具体查看源码. + +- 设置 tp_base 和 tp_bases. 如果 tp_base 等于 NULL, 那么将其设置为 PyBaseObject_Type. 如果 tp_bases 等于 NULL, 那么将其设置为 `(tp_base, )`. + +- 设置 tp_dict. 如果 tp_dict 等于 NULL, 就设置为一个全新的空的 dict. + +- add_operators + + 往 tp_dict 中添加 `__xxx__`. 详情见 [add_operators](#add_operators) + +- add_methods + + add_methods 负责将 tp_methods 中的方法添加到 tp_dict 中. 详情见 [add_methods](#add_methods) + +- add_members + + 对于内部类, tp_members 指向一个静态的 PyMemberDef 数组. + + 对于用户定义的类, 如果定义了 `__slots__` 的话, tp_members 指向 PyHeapTypeObject 的结尾. 用户. + + add_members 将 PyMemberDef 转换为 PyMemberDescrObject, 然后保存在 tp_dict 中. + + 详情见 [https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_members](https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_members) + +- add_getset + +- mro_internal + + ```c + /* Calculate method resolution order, 忽略细节的话, 主要功能可以看 mro_implementation 和 pmerge 函数 */ + if (mro_internal(type, NULL) < 0) + goto error; + ``` + + 详情见 [类型系统之 MRO](./类型系统之%20MRO.md). + +- inherit_special + + 视情况继承 base 的: + + - tp_traverse + - tp_clear + - tp_new + - tp_basicsize + - tp_itemsize + - tp_weaklistoffset + - tp_dictoffset + + 以及设置 tp_flags + + ```c + /* Inherit special flags from dominant base */ + if (type->tp_base != NULL) + inherit_special(type, type->tp_base); + ``` + +- inherit_slots + + 简单来说, 如果 type 没有定义 tp_xxx, 但是 base 定义了, 那么将其复制到 type. + + ``` + #define SLOTDEFINED(SLOT) \ + (base->SLOT != 0 && \ + (basebase == NULL || base->SLOT != basebase->SLOT)) + + #define COPYSLOT(SLOT) \ + if (!type->SLOT && SLOTDEFINED(SLOT)) type->SLOT = base->SLOT + ``` + + 继承的内容非常多, 具体看 inherit_slots 的代码. + + ```c + /* Initialize tp_dict properly */ + bases = type->tp_mro; + assert(bases != NULL); + assert(PyTuple_Check(bases)); + n = PyTuple_GET_SIZE(bases); + for (i = 1; i < n; i++) { + PyObject *b = PyTuple_GET_ITEM(bases, i); + if (PyType_Check(b)) + inherit_slots(type, (PyTypeObject *)b); + } + ``` + +- 继承基类的 tp_as_xxx. + + ```c + /* Some more special stuff */ + base = type->tp_base; + if (base != NULL) { + if (type->tp_as_async == NULL) + type->tp_as_async = base->tp_as_async; + if (type->tp_as_number == NULL) + type->tp_as_number = base->tp_as_number; + if (type->tp_as_sequence == NULL) + type->tp_as_sequence = base->tp_as_sequence; + if (type->tp_as_mapping == NULL) + type->tp_as_mapping = base->tp_as_mapping; + if (type->tp_as_buffer == NULL) + type->tp_as_buffer = base->tp_as_buffer; + } + ``` + +- 更新基类的 tp_subclasses + + ```c + /* Link into each base class's list of subclasses */ + bases = type->tp_bases; + n = PyTuple_GET_SIZE(bases); + for (i = 0; i < n; i++) { + PyObject *b = PyTuple_GET_ITEM(bases, i); + if (PyType_Check(b) && + add_subclass((PyTypeObject *)b, type) < 0) + goto error; + } + ``` + +- 更新 tp_flags + + ```c + /* All done -- set the ready flag */ + type->tp_flags = + (type->tp_flags & ~Py_TPFLAGS_READYING) | Py_TPFLAGS_READY; + ``` + +### fixup_slot_dispatchers + +来自代码的一段注释: + +```c +/* Common code for update_slots_callback() and fixup_slot_dispatchers(). + * + * This is meant to set a "slot" like type->tp_repr or + * type->tp_as_sequence->sq_concat by looking up special methods like + * __repr__ or __add__. The opposite (adding special methods from slots) is + * done by add_operators(), called from PyType_Ready(). Since update_one_slot() + * calls PyType_Ready() if needed, the special methods are already in place. + * + * The special methods corresponding to each slot are defined in the "slotdef" + * array. Note that one slot may correspond to multiple special methods and vice + * versa. For example, tp_richcompare uses 6 methods __lt__, ..., __ge__ and + * tp_as_number->nb_add uses __add__ and __radd__. In the other direction, + * __add__ is used by the number and sequence protocols and __getitem__ by the + * sequence and mapping protocols. This causes a lot of complications. + * + * In detail, update_one_slot() does the following: + * + * First of all, if the slot in question does not exist, return immediately. + * This can happen for example if it's tp_as_number->nb_add but tp_as_number + * is NULL. + * + * For the given slot, we loop over all the special methods with a name + * corresponding to that slot (for example, for tp_descr_set, this would be + * __set__ and __delete__) and we look up these names in the MRO of the type. + * If we don't find any special method, the slot is set to NULL (regardless of + * what was in the slot before). + * + * Suppose that we find exactly one special method. If it's a wrapper_descriptor + * (i.e. a special method calling a slot, for example str.__repr__ which calls + * the tp_repr for the 'str' class) with the correct name ("__repr__" for + * tp_repr), for the right class, calling the right wrapper C function (like + * wrap_unaryfunc for tp_repr), then the slot is set to the slot that the + * wrapper_descriptor originally wrapped. For example, a class inheriting + * from 'str' and not redefining __repr__ will have tp_repr set to the tp_repr + * of 'str'. + * In all other cases where the special method exists, the slot is set to a + * wrapper calling the special method. There is one exception: if the special + * method is a wrapper_descriptor with the correct name but the type has + * precisely one slot set for that name and that slot is not the one that we + * are updating, then NULL is put in the slot (this exception is the only place + * in update_one_slot() where the *existing* slots matter). + * + * When there are multiple special methods for the same slot, the above is + * applied for each special method. As long as the results agree, the common + * resulting slot is applied. If the results disagree, then a wrapper for + * the special methods is installed. This is always safe, but less efficient + * because it uses method lookup instead of direct C calls. + * + * There are some further special cases for specific slots, like supporting + * __hash__ = None for tp_hash and special code for tp_new. + * + * When done, return a pointer to the next slotdef with a different offset, + * because that's convenient for fixup_slot_dispatchers(). This function never + * sets an exception: if an internal error happens (unlikely), it's ignored. */ + ``` + +fixup_slot_dispatchers 和 add_operators 可以看作是两个相反的操作, 彼此之间联系紧密, 两者一起阅读有助于理解. + +fixup_slot_dispatchers 根据 slotdefs 设置 tp_xxx. add_operators 根据 slotdefs 往 tp_dict 中添加 `__xxx__` 方法. + + +```c +static slotdef * +update_one_slot(PyTypeObject *type, slotdef *p) +{ + PyObject *descr; + PyWrapperDescrObject *d; + void *generic = NULL, *specific = NULL; + int use_generic = 0; + int offset = p->offset; + int error; + void **ptr = slotptr(type, offset); + // int debug = PySys_GetObject("_debug") != NULL ? 1 : 0; + + // if(debug){ + // printf("update_one_slot: %s\n", p->name); + // if(ptr == NULL){ + // printf("update_one_slot: ptr == NULL\n"); + // } + // } + if (ptr == NULL) { + // 跳过相邻的 offset 相同的 slot + do { + ++p; + } while (p->offset == offset); + return p; + } + /* We may end up clearing live exceptions below, so make sure it's ours. */ + assert(!PyErr_Occurred()); + do { + /* Use faster uncached lookup as we won't get any cache hits during type setup. */ + descr = find_name_in_mro(type, p->name_strobj, &error); + // if(debug){ + // printf("update_one_slot: descr = %p\n", descr); + // } + if (descr == NULL) { + if (error == -1) { + /* It is unlikely but not impossible that there has been an exception + during lookup. Since this function originally expected no errors, + we ignore them here in order to keep up the interface. */ + PyErr_Clear(); + } + // 针对 tp_iternext(__next__) 做特殊处理 + if (ptr == (void**)&type->tp_iternext) { + specific = (void *)_PyObject_NextNotImplemented; + } + // 继续下一条 slotdef, 如果 offset 不相等的话退出循环. + continue; + } + + if (Py_IS_TYPE(descr, &PyWrapperDescr_Type) && + ((PyWrapperDescrObject *)descr)->d_base->name_strobj == p->name_strobj) { + // 如果 descr 是 PyWrapperDescr_Type 而且名字和 slotdef 相同 + void **tptr = resolve_slotdups(type, p->name_strobj); + // tptr == NULL 说明 type 中定义了多个和 p->name_strobj 对应的方法 + if (tptr == NULL || tptr == ptr) + generic = p->function; + d = (PyWrapperDescrObject *)descr; + // if(debug){ + // printf("update_one_slot: tptr = %p, descr = %s %p %p %d\n", tptr, d->d_base->name, d->d_base->wrapper, p->wrapper, PyType_IsSubtype(type, PyDescr_TYPE(d))); + // } + if ((specific == NULL || specific == d->d_wrapped) && + d->d_base->wrapper == p->wrapper && + PyType_IsSubtype(type, PyDescr_TYPE(d))) + { + specific = d->d_wrapped; + // if(debug){ + // printf("update_one_slot: specific 1\n"); + // } + } + else { + /* We cannot use the specific slot function because either + - it is not unique: there are multiple methods for this + slot and they conflict + - the signature is wrong (as checked by the ->wrapper + comparison above) + - it's wrapping the wrong class + */ + use_generic = 1; + } + } + else if (Py_IS_TYPE(descr, &PyCFunction_Type) && + PyCFunction_GET_FUNCTION(descr) == + (PyCFunction)(void(*)(void))tp_new_wrapper && + ptr == (void**)&type->tp_new) + { + /* The __new__ wrapper is not a wrapper descriptor, + so must be special-cased differently. + If we don't do this, creating an instance will + always use slot_tp_new which will look up + __new__ in the MRO which will call tp_new_wrapper + which will look through the base classes looking + for a static base and call its tp_new (usually + PyType_GenericNew), after performing various + sanity checks and constructing a new argument + list. Cut all that nonsense short -- this speeds + up instance creation tremendously. */ + specific = (void *)type->tp_new; + /* XXX I'm not 100% sure that there isn't a hole + in this reasoning that requires additional + sanity checks. I'll buy the first person to + point out a bug in this reasoning a beer. */ + // if(debug){ + // printf("update_one_slot: specific 2 \n"); + // } + } + else if (descr == Py_None && + ptr == (void**)&type->tp_hash) { + /* We specifically allow __hash__ to be set to None + to prevent inheritance of the default + implementation from object.__hash__ */ + specific = (void *)PyObject_HashNotImplemented; + // if(debug){ + // printf("update_one_slot: specific 3 \n"); + // } + } + else { + use_generic = 1; + generic = p->function; + // if(debug){ + // printf("update_one_slot: generic\n"); + // } + } + } while ((++p)->offset == offset); + // if(debug){ + // printf("update_one_slot: specific = %p, generic = %p, use_generic = %d\n", specific, generic, use_generic); + // } + if (specific && !use_generic) + *ptr = specific; + else + *ptr = generic; + return p; +} + +static void +fixup_slot_dispatchers(PyTypeObject *type) +{ + slotdef *p; + + assert(slotdefs_initialized); + for (p = slotdefs; p->name; ) + p = update_one_slot(type, p); +} +``` + +工作流程: + +- 如果 slotdef 的 offset 对应的 ptr 等于 NULL, 说明 slotdef 的 offset 指向的是 tp_as_xxx 内部, 而 tp_as_xxx 等于 NULL. 在这种情况下, 跳过接下来相邻的 offset 相同的 slotdef. + +- 调用 find_name_in_mro, 从 tp_mro 的各个基类的 tp_dcit 中查找 `slotdef->name_strobj` 方法. 如果没有找到则继续下一个 slotdef. + + 如果找到了, 则判断该方法的类型, 根据情况设置 special 或者 generic. + +- 最终将 special 或者 generic 设置给 `*ptr`. + +special 可能等于 `descr->d_wrapped`, generic 可能等于 `slotdef->function`. + +#### 我的一些观察 + +对于 CPython 内部定义的类来说, 在 PyType_Ready 中调用 add_operators 函数, add_operators 函数根据 slotdefs 添加对应的 `__xxx__` 方法到 tp_dict 中. 之后不会再调用 fixup_slot_dispatchers. + +对于用户定义的类来说, 在 type_new 中先调用 PyType_Ready, 接着调用 fixup_slot_dispatchers. add_operators 设置了 tp_dict, inherit_slots 继承了基类的 tp_xxx 方法, 接着 +fixup_slot_dispatchers 又反过来设置 tp_xxx 方法. + +为什么要调用 fixup_slot_dispatchers 呢? + +如果用户没有在类中自定义 `__xxx__` 方法, 我觉得是没必要调用 fixup_slot_dispatchers 的. + +如果用户定义了 `__xxx__` 方法, 那么该方法的优先级肯定是最高的, fixup_slot_dispatchers 此时会将 `*ptr` 设置为 `slotdef->function`(general). `slotdef->function` 会调用自定义的 `__xxx__` 方法. + +以 `__init__` 为例, 如果用户自定义了 `__init__` 方法, 那么 tp_init 会被设置为 slot_tp_init. + +slotdef: + +```c + FLSLOT("__init__", tp_init, slot_tp_init, (wrapperfunc)(void(*)(void))wrap_init, + "__init__($self, /, *args, **kwargs)\n--\n\n" + "Initialize self. See help(type(self)) for accurate signature.", + PyWrapperFlag_KEYWORDS), +``` + +slot_tp_init: + +```c +static int +slot_tp_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyThreadState *tstate = _PyThreadState_GET(); + + _Py_IDENTIFIER(__init__); + int unbound; + PyObject *meth = lookup_method(self, &PyId___init__, &unbound); + if (meth == NULL) { + return -1; + } + + PyObject *res; + if (unbound) { + res = _PyObject_Call_Prepend(tstate, meth, self, args, kwds); + } + else { + res = _PyObject_Call(tstate, meth, args, kwds); + } + Py_DECREF(meth); + if (res == NULL) + return -1; + if (res != Py_None) { + PyErr_Format(PyExc_TypeError, + "__init__() should return None, not '%.200s'", + Py_TYPE(res)->tp_name); + Py_DECREF(res); + return -1; + } + Py_DECREF(res); + return 0; +} +``` + +### add_operators + +add_operators 的主要功能是往 tp_dict 中添加 `__xxx__` 方法. + +遍历 slotdefs 数组, 如果 `slotdef->offset` 在 type 对应的 slot 存在, 而且`slotdef->name_strobj` 不存在于 tp_dict, 那么向 tp_dict 中添加一个 PyWrapperDescrObject. + +如果用户自定义了 `__xxx__` 方法, 那么 add_operators 不会往 tp_dict 中添加东西. 其实 add_operators 主要针对 CPython 内部的类. + +```c +//Objects/typeobject.c + +/* This function is called by PyType_Ready() to populate the type's + dictionary with method descriptors for function slots. For each + function slot (like tp_repr) that's defined in the type, one or more + corresponding descriptors are added in the type's tp_dict dictionary + under the appropriate name (like __repr__). Some function slots + cause more than one descriptor to be added (for example, the nb_add + slot adds both __add__ and __radd__ descriptors) and some function + slots compete for the same descriptor (for example both sq_item and + mp_subscript generate a __getitem__ descriptor). + + In the latter case, the first slotdef entry encountered wins. Since + slotdef entries are sorted by the offset of the slot in the + PyHeapTypeObject, this gives us some control over disambiguating + between competing slots: the members of PyHeapTypeObject are listed + from most general to least general, so the most general slot is + preferred. In particular, because as_mapping comes before as_sequence, + for a type that defines both mp_subscript and sq_item, mp_subscript + wins. + + This only adds new descriptors and doesn't overwrite entries in + tp_dict that were previously defined. The descriptors contain a + reference to the C function they must call, so that it's safe if they + are copied into a subtype's __dict__ and the subtype has a different + C function in its slot -- calling the method defined by the + descriptor will call the C function that was used to create it, + rather than the C function present in the slot when it is called. + (This is important because a subtype may have a C function in the + slot that calls the method from the dictionary, and we want to avoid + infinite recursion here.) */ + +static int +add_operators(PyTypeObject *type) +{ + PyObject *dict = type->tp_dict; + slotdef *p; + PyObject *descr; + void **ptr; + + assert(slotdefs_initialized); + for (p = slotdefs; p->name; p++) { + if (p->wrapper == NULL) + continue; + ptr = slotptr(type, p->offset); + if (!ptr || !*ptr) + continue; + // 如果在 tp_dict 中已经存在, 那么继续下一个 slotdef + if (PyDict_GetItemWithError(dict, p->name_strobj)) + continue; + if (PyErr_Occurred()) { + return -1; + } + if (*ptr == (void *)PyObject_HashNotImplemented) { + /* Classes may prevent the inheritance of the tp_hash + slot by storing PyObject_HashNotImplemented in it. Make it + visible as a None value for the __hash__ attribute. */ + if (PyDict_SetItem(dict, p->name_strobj, Py_None) < 0) + return -1; + } + else { + descr = PyDescr_NewWrapper(type, p, *ptr); + if (descr == NULL) + return -1; + if (PyDict_SetItem(dict, p->name_strobj, descr) < 0) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + } + } + if (type->tp_new != NULL) { + if (add_tp_new_wrapper(type) < 0) + return -1; + } + return 0; +} +``` + +添加完成后 tp_dict 和 PyWrapperDescrObject 的示意图: + +``` + +---------+ + +--->| getattr |<-------------------------------------------+ + | +---------+ | + | | ++--------------------+ | +-------------+ +----------------------+ | +| | | +--->| dict | +--->| PyWrapperDescrObject | | +| PyHeapTypeObject | | | +-------------+ | +----------------------+ | +---------------+ +| | | | | | | | d_common |----+--->| PyDescrObject | ++--------------------+ | | +-------------+ | +----------------------+ | +---------------+ +| tp_call |-+ | | __call__ |----+ | d_base | | | d_type +--+ ++--------------------+ | +-------------+ +----------------------| | +---------------+ | +| tp_dict |----+ | | | d_wrapped +----+ | d_name | | ++--------------------+ +-------------+ +----------------------+ +---------------+ | +| ... | | | | ++--------------------+ +-------------+ | + ^ | + | | + +-------------------------------------------------------------------------------------------------+ +``` + +以 tuple 为例, add_operators 会添加以下方法: + +``` +add_operators: __repr__ +add_operators: __hash__ +add_operators: __getattribute__ +add_operators: __lt__ +add_operators: __le__ +add_operators: __eq__ +add_operators: __ne__ +add_operators: __gt__ +add_operators: __ge__ +add_operators: __iter__ +add_operators: __len__ +add_operators: __getitem__ +add_operators: __add__ +add_operators: __mul__ +add_operators: __rmul__ +add_operators: __contains__ +``` + +以 tuple 的 `__hash__` 为例, 当调用 `__hash__` 方法时, Python 首先找到的是 PyWrapperDescrObject, 接着的调用链为: `PY_TYPE(descr)->tp_call` -> `wrapperdescr_call` -> `descr->d_base->wrapper` -> `descr->d_wrapped`. + +关于 PyDescr_NewWrapper 的信息可以查看[这里](./描述符(descriptor).md). + +### add_methods + +add_methods 负责将 tp_methods 中的方法添加到 tp_dict 中, add_methods 只对 CPython 内部定义的类有效(当然前提是 tp_methods 不等于 NULL), 因为用户定义的类的 tp_methods 等于 NULL. + +从 ml_flags 可以知道该方法属于什么类型, 常见的类型有: + +- METH_VARARGS +- METH_KEYWORDS +- METH_NOARGS +- METH_O +- METH_CLASS +- METH_STATIC + +add_methods 的代码如下: + +```c +//Objects/typeobject.c + +/* Add the methods from tp_methods to the __dict__ in a type object */ +static int add_methods(PyTypeObject *type, PyMethodDef *meth) { + PyObject *dict = type->tp_dict; + PyObject *name; + + for (; meth->ml_name != NULL; meth++) { + PyObject *descr; + int err; + int isdescr = 1; + if (meth->ml_flags & METH_CLASS) { + if (meth->ml_flags & METH_STATIC) { + PyErr_SetString(PyExc_ValueError, + "method cannot be both class and static"); + return -1; + } + descr = PyDescr_NewClassMethod(type, meth); + } else if (meth->ml_flags & METH_STATIC) { + PyObject *cfunc = PyCFunction_NewEx(meth, (PyObject *)type, NULL); + if (cfunc == NULL) return -1; + descr = PyStaticMethod_New(cfunc); + isdescr = 0; // PyStaticMethod is not PyDescrObject + Py_DECREF(cfunc); + } else { + descr = PyDescr_NewMethod(type, meth); + } + if (descr == NULL) return -1; + + if (isdescr) { + name = PyDescr_NAME(descr); + } else { + name = PyUnicode_FromString(meth->ml_name); + if (name == NULL) { + Py_DECREF(descr); + return -1; + } + } + + if (!(meth->ml_flags & METH_COEXIST)) { + if (PyDict_GetItemWithError(dict, name)) { + if (!isdescr) { + Py_DECREF(name); + } + Py_DECREF(descr); + continue; + } else if (PyErr_Occurred()) { + if (!isdescr) { + Py_DECREF(name); + } + return -1; + } + } + err = PyDict_SetItem(dict, name, descr); + if (!isdescr) { + Py_DECREF(name); + } + Py_DECREF(descr); + if (err < 0) return -1; + } + return 0; +} +``` + +## 实例 + +### 一个普通的类 + +```py +class C: + pass +``` + +- `C.__basicsize__ = 32` + +- `C.__itemsize__ = 0` + +- `C.__dictoffset__ = 16` + +- `C.__weakrefoffset__ = 24` + +C 的实例的内存布局: + +``` ++-----------+---------+----------+-------------+ +| ob_refcnt | ob_type | __dict__ | __weakref__ | ++-----------+---------+----------+-------------+ + 8 8 8 8 +``` + +### 定义了 __slots__ 的类 + +```py +class C: + __slots__ = ('a', 'b', '__dict__') +``` + +- `C.__basicsize__ = 40` + +- `C.__itemsize__ = 0` + +- `C.__dictoffset__ = 32` + +- `C.__weakrefoffset__ = 24` + +C 的实例的内存布局: + +``` ++-----------+---------+---+---+----------+-------------+ +| ob_refcnt | ob_type | a | b | __dict__ | __weakref__ | ++-----------+---------+---+---+----------+-------------+ + 8 8 8 8 8 8 +``` + diff --git "a/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円261円236円346円200円247円350円256円277円351円227円256円.md" "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円261円236円346円200円247円350円256円277円351円227円256円.md" new file mode 100644 index 0000000..cadc7d0 --- /dev/null +++ "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円345円261円236円346円200円247円350円256円277円351円227円256円.md" @@ -0,0 +1,932 @@ +# 属性访问 + +这里的属性泛指实例属性, 实例方法, 类属性, 类方法. + +属性访问相关的字节码有: LOAD_ATTR, LOAD_METHOD + +### 读取属性 (LOAD_ATTR) + +无论是使用类访问属性, 还是使用实例访问属性, 都是使用 LOAD_ATTR. + +```c +case TARGET(LOAD_ATTR): { + PyObject *name = GETITEM(names, oparg); + PyObject *owner = TOP(); + PyObject *res = PyObject_GetAttr(owner, name); + Py_DECREF(owner); + SET_TOP(res); + if (res == NULL) + goto error; + DISPATCH(); +} +``` + +```c +PyObject * +PyObject_GetAttr(PyObject *v, PyObject *name) +{ + PyTypeObject *tp = Py_TYPE(v); + + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not'%.200s'", + Py_TYPE(name)->tp_name); + return NULL; + } + if (tp->tp_getattro != NULL) + return (*tp->tp_getattro)(v, name); + // tp_getattr 已经废弃了 + if (tp->tp_getattr != NULL) { + const char *name_str = PyUnicode_AsUTF8(name); + if (name_str == NULL) + return NULL; + return (*tp->tp_getattr)(v, (char *)name_str); + } + PyErr_Format(PyExc_AttributeError, + "'%.50s' object has no attribute '%U'", + tp->tp_name, name); + return NULL; +} +``` + +### 通过实例访问属性 + +如果用户没有自定义 `__getattribute__` 或者 `__getattr__`, 那么 tp_getattro 继承自 object, object.tp_getattro 等于 PyObject_GenericGetAttr. + +```c +PyObject * +_PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, + PyObject *dict, int suppress) +{ + /* Make sure the logic of _PyObject_GetMethod is in sync with + this method. + + When suppress=1, this function suppress AttributeError. + */ + + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr = NULL; + PyObject *res = NULL; + descrgetfunc f; + Py_ssize_t dictoffset; + PyObject **dictptr; + + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not'%.200s'", + Py_TYPE(name)->tp_name); + return NULL; + } + Py_INCREF(name); + + if (tp->tp_dict == NULL) { + if (PyType_Ready(tp) < 0) + goto done; + } + // 第一步, 尝试从 tp_mro 中查找描述符属性, 如果描述符存在而且是数据描述符 (PyDescr_IsData), + // 那么调用该描述符的 tp_descr_get 方法, 接着返回 tp_descr_get 的返回值. + + // 从类的 tp_mro 中查找 name + descr = _PyType_Lookup(tp, name); + + f = NULL; + if (descr != NULL) { + Py_INCREF(descr); + f = Py_TYPE(descr)->tp_descr_get; + if (f != NULL && PyDescr_IsData(descr)) { + res = f(descr, obj, (PyObject *)Py_TYPE(obj)); + if (res == NULL && suppress && + PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } + goto done; + } + } + + // 第二步, 尝试从 obj 的 __dict__ 中读取属性. + // 如果参数中的 dict 等于 NULL, 那么尝试根据 tp->tp_dictoffset 找到 obj 中的 __dict__. + // 如果从 dict 中找到对应的属性, 那么返回它. + if (dict == NULL) { + /* Inline _PyObject_GetDictPtr */ + dictoffset = tp->tp_dictoffset; + if (dictoffset != 0) { + if (dictoffset < 0) { + // 如果 dictoffset <0, 意味着 tp->tp_itemsize != 0. + // dictoffset 在 typeobject.c 的 type_new 函数被设置. 代码如下: + // if (add_dict) { + // if (base->tp_itemsize) + // type->tp_dictoffset = -(long)sizeof(PyObject *); + // else + // type->tp_dictoffset = slotoffset; + // slotoffset += sizeof(PyObject *); + // } + // tp->tp_itemsize != 0, 说明 obj 是 PyVarObject, 从而具有 ob_size 属性. + Py_ssize_t tsize = Py_SIZE(obj); + if (tsize < 0) { + tsize = -tsize; + } + size_t size = _PyObject_VAR_SIZE(tp, tsize); + _PyObject_ASSERT(obj, size <= PY_SSIZE_T_MAX); + + dictoffset += (Py_ssize_t)size; + _PyObject_ASSERT(obj, dictoffset> 0); + _PyObject_ASSERT(obj, dictoffset % SIZEOF_VOID_P == 0); + } + dictptr = (PyObject **) ((char *)obj + dictoffset); + dict = *dictptr; + } + } + if (dict != NULL) { + Py_INCREF(dict); + res = PyDict_GetItemWithError(dict, name); + if (res != NULL) { + Py_INCREF(res); + Py_DECREF(dict); + goto done; + } + else { + Py_DECREF(dict); + if (PyErr_Occurred()) { + if (suppress && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } + else { + goto done; + } + } + } + } + + // 第三步, 到了这一步, 说明 dict 中不存在对应的属性. + // 尝试前面第一步获取的描述符, 调用 tp_descr_get 方法, 接着返回 tp_descr_get 的返回值. + if (f != NULL) { + res = f(descr, obj, (PyObject *)Py_TYPE(obj)); + if (res == NULL && suppress && + PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + } + goto done; + } + + // 第四步, 到了这一步, 说明描述符既没设置 tp_descr_get, 也没设置 tp_descr_set. 只好直接返回该描述符. + if (descr != NULL) { + res = descr; + descr = NULL; + goto done; + } + + if (!suppress) { + PyErr_Format(PyExc_AttributeError, + "'%.50s' object has no attribute '%U'", + tp->tp_name, name); + } + done: + Py_XDECREF(descr); + Py_DECREF(name); + return res; +} + +PyObject * +PyObject_GenericGetAttr(PyObject *obj, PyObject *name) +{ + return _PyObject_GenericGetAttrWithDict(obj, name, NULL, 0); +} +``` + +```c +PyObject * +_PyType_Lookup(PyTypeObject *type, PyObject *name) +{ + PyObject *res; + int error; + +#ifdef MCACHE + if (MCACHE_CACHEABLE_NAME(name) && + _PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) { + /* fast path */ + unsigned int h = MCACHE_HASH_METHOD(type, name); + if (method_cache[h].version == type->tp_version_tag && + method_cache[h].name == name) { +#if MCACHE_STATS + method_cache_hits++; +#endif + return method_cache[h].value; + } + } +#endif + + /* We may end up clearing live exceptions below, so make sure it's ours. */ + assert(!PyErr_Occurred()); + + res = find_name_in_mro(type, name, &error); + /* Only put NULL results into cache if there was no error. */ + if (error) { + /* It's not ideal to clear the error condition, + but this function is documented as not setting + an exception, and I don't want to change that. + E.g., when PyType_Ready() can't proceed, it won't + set the "ready" flag, so future attempts to ready + the same type will call it again -- hopefully + in a context that propagates the exception out. + */ + if (error == -1) { + PyErr_Clear(); + } + return NULL; + } + +#ifdef MCACHE + if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(type)) { + unsigned int h = MCACHE_HASH_METHOD(type, name); + method_cache[h].version = type->tp_version_tag; + method_cache[h].value = res; /* borrowed */ + Py_INCREF(name); + assert(((PyASCIIObject *)(name))->hash != -1); +#if MCACHE_STATS + if (method_cache[h].name != Py_None && method_cache[h].name != name) + method_cache_collisions++; + else + method_cache_misses++; +#endif + Py_SETREF(method_cache[h].name, name); + } +#endif + return res; +} +``` + +1. 尝试从 mro 列表查找属性 (_PyType_Lookup),假设查找结果为 descr + + - 如果 descr 是 data descriptor,则调用 descr.ob_type.tp_descr_get 获取真正的属性。读取属性结束。 + +2. 尝试从实例的 `__dict__` 中查找属性,如果查找成功,读取属性结束。 + +3. 如果 descr.ob_type.tp_descr_get 不等于 NULL,则调用 descr.ob_type.tp_descr_get 获取真正的属性。读取属性结束。 + +4. 如果 descr 不等于 NULL,则返回 descr。读取属性结束。 + +5. 返回 NULL。 + +### 属性缓存 + +```c +#ifdef MCACHE +/* Support type attribute cache */ + +/* The cache can keep references to the names alive for longer than + they normally would. This is why the maximum size is limited to + MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large + strings are used as attribute names. */ +#define MCACHE_MAX_ATTR_SIZE 100 +#define MCACHE_SIZE_EXP 12 +#define MCACHE_HASH(version, name_hash) \ + (((unsigned int)(version) ^ (unsigned int)(name_hash)) \ + & ((1 << MCACHE_SIZE_EXP) - 1)) + +#define MCACHE_HASH_METHOD(type, name) \ + MCACHE_HASH((type)->tp_version_tag, \ + ((PyASCIIObject *)(name))->hash) +#define MCACHE_CACHEABLE_NAME(name) \ + PyUnicode_CheckExact(name) && \ + PyUnicode_IS_READY(name) && \ + PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE + +struct method_cache_entry { + unsigned int version; + PyObject *name; /* reference to exactly a str or None */ + PyObject *value; /* borrowed */ +}; + +static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP]; +static unsigned int next_version_tag = 0; +#endif +``` + +### 用户自定义 __getattribute__ + +如果用户自定义了 `__getattribute__` 或者 `__getattr__`, 那么 tp_getattro 会被设置为 slot_tp_getattr_hook. 详情见 fixup_slot_dispatchers. + +```c +/* There are two slot dispatch functions for tp_getattro. + + - slot_tp_getattro() is used when __getattribute__ is overridden + but no __getattr__ hook is present; + + - slot_tp_getattr_hook() is used when a __getattr__ hook is present. + + The code in update_one_slot() always installs slot_tp_getattr_hook(); this + detects the absence of __getattr__ and then installs the simpler slot if + necessary. */ + +static PyObject * +slot_tp_getattro(PyObject *self, PyObject *name) +{ + PyObject *stack[2] = {self, name}; + return vectorcall_method(&PyId___getattribute__, stack, 2); +} + +static PyObject * +call_attribute(PyObject *self, PyObject *attr, PyObject *name) +{ + PyObject *res, *descr = NULL; + descrgetfunc f = Py_TYPE(attr)->tp_descr_get; + + if (f != NULL) { + descr = f(attr, self, (PyObject *)(Py_TYPE(self))); + if (descr == NULL) + return NULL; + else + attr = descr; + } + res = PyObject_CallOneArg(attr, name); + Py_XDECREF(descr); + return res; +} + +static PyObject * +slot_tp_getattr_hook(PyObject *self, PyObject *name) +{ + PyTypeObject *tp = Py_TYPE(self); + PyObject *getattr, *getattribute, *res; + _Py_IDENTIFIER(__getattr__); + + /* speed hack: we could use lookup_maybe, but that would resolve the + method fully for each attribute lookup for classes with + __getattr__, even when the attribute is present. So we use + _PyType_Lookup and create the method only when needed, with + call_attribute. */ + getattr = _PyType_LookupId(tp, &PyId___getattr__); + if (getattr == NULL) { + /* No __getattr__ hook: use a simpler dispatcher */ + tp->tp_getattro = slot_tp_getattro; + return slot_tp_getattro(self, name); + } + Py_INCREF(getattr); + /* speed hack: we could use lookup_maybe, but that would resolve the + method fully for each attribute lookup for classes with + __getattr__, even when self has the default __getattribute__ + method. So we use _PyType_Lookup and create the method only when + needed, with call_attribute. */ + // 如果用户自定义了 __getattribute__ 方法, 那么 getattribute 不可能是 PyWrapperDescr_Type 类型. + // 如果用户没有自定义 __getattribute__ 方法, 那么 getattribute 是父类中定义的 __getattribute__ 方法. + getattribute = _PyType_LookupId(tp, &PyId___getattribute__); + // 先尝试使用 getattribute 获取属性, 如果失败再使用 getattr. + // getattribute 应该不可能等于 NULL, 因为所有的类都继承自 object, 而 object 的 tp_dict 中包含 __getattribute__. + if (getattribute == NULL || + (Py_IS_TYPE(getattribute, &PyWrapperDescr_Type) && + ((PyWrapperDescrObject *)getattribute)->d_wrapped == + (void *)PyObject_GenericGetAttr)) + // 进入这里说明用户没有自定义 __getattribute__ 方法, 很有可能直接继承 object. + res = PyObject_GenericGetAttr(self, name); + else { + Py_INCREF(getattribute); + // 调用用户自定义的 __getattribute__ 方法 + res = call_attribute(self, getattribute, name); + Py_DECREF(getattribute); + } + if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) { + PyErr_Clear(); + // __getattribute__ 失败, 尝试使用 getattr + res = call_attribute(self, getattr, name); + } + Py_DECREF(getattr); + return res; +} +``` + +### 通过类访问属性 + +通过类访问属性时, 此时应该调用元类的 tp_getattro 访问属性. 以 PyType_Type 为例, PyType_Type.tp_getattro 等于 type_getattro. + +```c +/* This is similar to PyObject_GenericGetAttr(), + but uses _PyType_Lookup() instead of just looking in type->tp_dict. */ +static PyObject * +type_getattro(PyTypeObject *type, PyObject *name) +{ + PyTypeObject *metatype = Py_TYPE(type); + PyObject *meta_attribute, *attribute; + descrgetfunc meta_get; + PyObject* res; + + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not'%.200s'", + Py_TYPE(name)->tp_name); + return NULL; + } + + /* Initialize this type (we'll assume the metatype is initialized) */ + if (type->tp_dict == NULL) { + if (PyType_Ready(type) < 0) + return NULL; + } + + /* No readable descriptor found yet */ + meta_get = NULL; + + /* Look for the attribute in the metatype */ + meta_attribute = _PyType_Lookup(metatype, name); + + if (meta_attribute != NULL) { + Py_INCREF(meta_attribute); + meta_get = Py_TYPE(meta_attribute)->tp_descr_get; + + if (meta_get != NULL && PyDescr_IsData(meta_attribute)) { + /* Data descriptors implement tp_descr_set to intercept + * writes. Assume the attribute is not overridden in + * type's tp_dict (and bases): call the descriptor now. + */ + res = meta_get(meta_attribute, (PyObject *)type, + (PyObject *)metatype); + Py_DECREF(meta_attribute); + return res; + } + } + + /* No data descriptor found on metatype. Look in tp_dict of this + * type and its bases */ + attribute = _PyType_Lookup(type, name); + if (attribute != NULL) { + /* Implement descriptor functionality, if any */ + Py_INCREF(attribute); + descrgetfunc local_get = Py_TYPE(attribute)->tp_descr_get; + + Py_XDECREF(meta_attribute); + + if (local_get != NULL) { + /* NULL 2nd argument indicates the descriptor was + * found on the target object itself (or a base) */ + res = local_get(attribute, (PyObject *)NULL, + (PyObject *)type); + Py_DECREF(attribute); + return res; + } + + return attribute; + } + + /* No attribute found in local __dict__ (or bases): use the + * descriptor from the metatype, if any */ + if (meta_get != NULL) { + PyObject *res; + res = meta_get(meta_attribute, (PyObject *)type, + (PyObject *)metatype); + Py_DECREF(meta_attribute); + return res; + } + + /* If an ordinary attribute was found on the metatype, return it now */ + if (meta_attribute != NULL) { + return meta_attribute; + } + + /* Give up */ + PyErr_Format(PyExc_AttributeError, + "type object'%.50s'has no attribute'%U'", + type->tp_name, name); + return NULL; +} +``` + +看完上面的代码, 可以发现和 PyObject_GenericGetAttr(_PyObject_GenericGetAttrWithDict) 非常类似. 获取实例属性涉及实例和实例所属的类, 获取类属性涉及类和类所属的元类. + +### 设置属性 (STORE_ATTR) + +STORE_ATTR 的处理器代码: + +```c + case TARGET(STORE_ATTR): { + PyObject *name = GETITEM(names, oparg); + PyObject *owner = TOP(); + PyObject *v = SECOND(); + int err; + STACK_SHRINK(2); + err = PyObject_SetAttr(owner, name, v); + Py_DECREF(v); + Py_DECREF(owner); + if (err != 0) + goto error; + DISPATCH(); + } +``` + +PyObject_SetAttr: + +```c +int +PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) +{ + PyTypeObject *tp = Py_TYPE(v); + int err; + + if (!PyUnicode_Check(name)) { + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + Py_TYPE(name)->tp_name); + return -1; + } + Py_INCREF(name); + + PyUnicode_InternInPlace(&name); + if (tp->tp_setattro != NULL) { + err = (*tp->tp_setattro)(v, name, value); + Py_DECREF(name); + return err; + } + if (tp->tp_setattr != NULL) { + const char *name_str = PyUnicode_AsUTF8(name); + if (name_str == NULL) { + Py_DECREF(name); + return -1; + } + err = (*tp->tp_setattr)(v, (char *)name_str, value); + Py_DECREF(name); + return err; + } + Py_DECREF(name); + _PyObject_ASSERT(name, Py_REFCNT(name)>= 1); + if (tp->tp_getattr == NULL && tp->tp_getattro == NULL) + PyErr_Format(PyExc_TypeError, + "'%.100s' object has no attributes " + "(%s .%U)", + tp->tp_name, + value==NULL ? "del" : "assign to", + name); + else + PyErr_Format(PyExc_TypeError, + "'%.100s' object has only read-only attributes " + "(%s .%U)", + tp->tp_name, + value==NULL ? "del" : "assign to", + name); + return -1; +} +``` + +### 对实例设置属性 + +对于普通的类来说, tp_setattro 继承自 object 的 tp_setattro, object 的 tp_setattro 等于 PyObject_GenericSetAttr, 代码如下: + +```c +int +_PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, + PyObject *value, PyObject *dict) +{ + PyTypeObject *tp = Py_TYPE(obj); + PyObject *descr; + descrsetfunc f; + PyObject **dictptr; + int res = -1; + + if (!PyUnicode_Check(name)){ + PyErr_Format(PyExc_TypeError, + "attribute name must be string, not '%.200s'", + Py_TYPE(name)->tp_name); + return -1; + } + + if (tp->tp_dict == NULL && PyType_Ready(tp) < 0) + return -1; + + Py_INCREF(name); + + descr = _PyType_Lookup(tp, name); + + if (descr != NULL) { + Py_INCREF(descr); + f = Py_TYPE(descr)->tp_descr_set; + if (f != NULL) { + res = f(descr, obj, value); + goto done; + } + } + + /* XXX [Steve Dower] These are really noisy - worth it? */ + /*if (PyType_Check(obj) || PyModule_Check(obj)) { + if (value && PySys_Audit("object.__setattr__", "OOO", obj, name, value) < 0) + return -1; + if (!value && PySys_Audit("object.__delattr__", "OO", obj, name) < 0) + return -1; + }*/ + + if (dict == NULL) { + dictptr = _PyObject_GetDictPtr(obj); + if (dictptr == NULL) { + if (descr == NULL) { + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute '%U'", + tp->tp_name, name); + } + else { + PyErr_Format(PyExc_AttributeError, + "'%.50s' object attribute '%U' is read-only", + tp->tp_name, name); + } + goto done; + } + res = _PyObjectDict_SetItem(tp, dictptr, name, value); + } + else { + Py_INCREF(dict); + if (value == NULL) + res = PyDict_DelItem(dict, name); + else + res = PyDict_SetItem(dict, name, value); + Py_DECREF(dict); + } + if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_SetObject(PyExc_AttributeError, name); + + done: + Py_XDECREF(descr); + Py_DECREF(name); + return res; +} + +int +PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) +{ + return _PyObject_GenericSetAttrWithDict(obj, name, value, NULL); +} +``` + +流程: + +- 查询基类中是否存在和 name 对应的描述符. 如果存在而且描述符定义了 tp_descr_set 方法, 那么调用 tp_descr_set 设置属性. + +- 如果实例包含 `__dict__`(即 type(obj).tp_dictoffset != 0), 那么将属性保存到 `__dict__`. + +- 如果上述两个条件都不满足, 则报错. + +### 用户自定义 `__setattr__` + +如果用户自定义了 `__setattr__` 方法, 那么类的 tp_setattro 会被设置为 slot_tp_setattro. slotdef 如下: + +```c +TPSLOT("__setattr__", tp_setattro, slot_tp_setattro, wrap_setattr, + "__setattr__($self, name, value, /)\n--\n\nImplement setattr(self, name, value)."), +``` + +slot_tp_setattro: + +```c +static int +slot_tp_setattro(PyObject *self, PyObject *name, PyObject *value) +{ + PyObject *stack[3]; + PyObject *res; + _Py_IDENTIFIER(__delattr__); + _Py_IDENTIFIER(__setattr__); + + stack[0] = self; + stack[1] = name; + if (value == NULL) { + res = vectorcall_method(&PyId___delattr__, stack, 2); + } + else { + stack[2] = value; + res = vectorcall_method(&PyId___setattr__, stack, 3); + } + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; +} +``` + +接下来 vectorcall_method 的功能主要是查找 `__setattr__` 方法, 然后调用它设置属性. + +### 对类设置属性 + +对类设置属性的话需要调用元类的 tp_setattro, 以 PyType_Type 为例, tp_setattro 等于 type_setattro, 代码如下: + +```c +static int +type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) +{ + int res; + if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + PyErr_Format( + PyExc_TypeError, + "can't set attributes of built-in/extension type '%s'", + type->tp_name); + return -1; + } + if (PyUnicode_Check(name)) { + if (PyUnicode_CheckExact(name)) { + if (PyUnicode_READY(name) == -1) + return -1; + Py_INCREF(name); + } + else { + name = _PyUnicode_Copy(name); + if (name == NULL) + return -1; + } +#ifdef INTERN_NAME_STRINGS + if (!PyUnicode_CHECK_INTERNED(name)) { + PyUnicode_InternInPlace(&name); + if (!PyUnicode_CHECK_INTERNED(name)) { + PyErr_SetString(PyExc_MemoryError, + "Out of memory interning an attribute name"); + Py_DECREF(name); + return -1; + } + } +#endif + } + else { + /* Will fail in _PyObject_GenericSetAttrWithDict. */ + Py_INCREF(name); + } + res = _PyObject_GenericSetAttrWithDict((PyObject *)type, name, value, NULL); + if (res == 0) { + /* Clear the VALID_VERSION flag of 'type' and all its + subclasses. This could possibly be unified with the + update_subclasses() recursion in update_slot(), but carefully: + they each have their own conditions on which to stop + recursing into subclasses. */ + PyType_Modified(type); + + if (is_dunder_name(name)) { + res = update_slot(type, name); + } + assert(_PyType_CheckConsistency(type)); + } + Py_DECREF(name); + return res; +} +``` + +type_setattro 最终调用 _PyObject_GenericSetAttrWithDict 设置属性. 值得注意的是, 当修改了类的属性时, 需要调用 PyType_Modified, 修改类的 tp_flag 中的版本标志符. + +```c +void +PyType_Modified(PyTypeObject *type) +{ + /* Invalidate any cached data for the specified type and all + subclasses. This function is called after the base + classes, mro, or attributes of the type are altered. + + Invariants: + + - Py_TPFLAGS_VALID_VERSION_TAG is never set if + Py_TPFLAGS_HAVE_VERSION_TAG is not set (in case of a + bizarre MRO, see type_mro_modified()). + + - before Py_TPFLAGS_VALID_VERSION_TAG can be set on a type, + it must first be set on all super types. + + This function clears the Py_TPFLAGS_VALID_VERSION_TAG of a + type (so it must first clear it on all subclasses). The + tp_version_tag value is meaningless unless this flag is set. + We don't assign new version tags eagerly, but only as + needed. + */ + PyObject *raw, *ref; + Py_ssize_t i; + + if (!_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) + return; + + raw = type->tp_subclasses; + if (raw != NULL) { + assert(PyDict_CheckExact(raw)); + i = 0; + while (PyDict_Next(raw, &i, NULL, &ref)) { + assert(PyWeakref_CheckRef(ref)); + ref = PyWeakref_GET_OBJECT(ref); + if (ref != Py_None) { + PyType_Modified((PyTypeObject *)ref); + } + } + } + type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; +} +``` + +### 调用方法 + +调用方法使用的字节码是 LOAD_METHOD 和 CALL_METHOD. 具体细节和读取属性类似. + +```c + case TARGET(LOAD_METHOD): { + /* Designed to work in tandem with CALL_METHOD. */ + PyObject *name = GETITEM(names, oparg); + PyObject *obj = TOP(); + PyObject *meth = NULL; + + int meth_found = _PyObject_GetMethod(obj, name, &meth); + + if (meth == NULL) { + /* Most likely attribute wasn't found. */ + goto error; + } + + if (meth_found) { + /* We can bypass temporary bound method object. + meth is unbound method and obj is self. + + meth | self | arg1 | ... | argN + */ + SET_TOP(meth); + PUSH(obj); // self + } + else { + /* meth is not an unbound method (but a regular attr, or + something was returned by a descriptor protocol). Set + the second element of the stack to NULL, to signal + CALL_METHOD that it's not a method call. + + NULL | meth | arg1 | ... | argN + */ + SET_TOP(NULL); + Py_DECREF(obj); + PUSH(meth); + } + DISPATCH(); + } + case TARGET(CALL_METHOD): { + /* Designed to work in tamdem with LOAD_METHOD. */ + PyObject **sp, *res, *meth; + + sp = stack_pointer; + + meth = PEEK(oparg + 2); + if (meth == NULL) { + /* `meth` is NULL when LOAD_METHOD thinks that it's not + a method call. + + Stack layout: + + ... | NULL | callable | arg1 | ... | argN + ^- TOP() + ^- (-oparg) + ^- (-oparg-1) + ^- (-oparg-2) + + `callable` will be POPed by call_function. + NULL will will be POPed manually later. + */ + res = call_function(tstate, &sp, oparg, NULL); + stack_pointer = sp; + (void)POP(); /* POP the NULL. */ + } + else { + /* This is a method call. Stack layout: + + ... | method | self | arg1 | ... | argN + ^- TOP() + ^- (-oparg) + ^- (-oparg-1) + ^- (-oparg-2) + + `self` and `method` will be POPed by call_function. + We'll be passing `oparg + 1` to call_function, to + make it accept the `self` as a first argument. + */ + res = call_function(tstate, &sp, oparg + 1, NULL); + stack_pointer = sp; + } + + PUSH(res); + if (res == NULL) + goto error; + DISPATCH(); + } +``` + +这段代码还是挺 trick 的, 不过注释说得很明白. + +以下面的代码为例: + +```py +class C: + def meth(self): + print('call meth') + +c = C() +print(c.meth) +c.meth() +``` + +`print(c.meth)` 会输出 ">", 那么 meth 是怎么转换成 bound method 的呢? + +首先, meth 是类 C 的 tp_dict 中的一个 PyFunctionObject. 根据前面对 "通过实例访问属性" 的分析, 我们知道 PyType_Lookup 返回的 descr 是 meth(PyFunctionObject), PyFunctionObject 对应的类型 PyFunction_Type 的 tp_descr_get 不为 NULL, 因此最终获取的属性是 tp_descr_get 返回的结果. + +PyFunction_Type 的 tp_descr_get 等于 func_descr_get, 代码如下: + +```c +static PyObject * +func_descr_get(PyObject *func, PyObject *obj, PyObject *type) +{ + if (obj == Py_None || obj == NULL) { + Py_INCREF(func); + return func; + } + return PyMethod_New(func, obj); +} +``` + +func_descr_get 就是关键所在, 它返回了一个 PyMethodObject, PyMethodObject 保存了 PyFunctionObject 以及 obj. 因此, `c.meth` 最终得到的是一个 PyMethodObject. \ No newline at end of file From 726cd895df7be379a97ea46e3e364955676678fc Mon Sep 17 00:00:00 2001 From: ausaki Date: Tue, 6 Apr 2021 18:41:11 +0800 Subject: [PATCH 4/4] update --- trashcan.md | 2 +- ...05345円255円230円347円256円241円347円220円206円.md" | 315 +----------- ...03345円234円276円345円233円236円346円224円266円.md" | 475 ++++++++++++++++++ "345円274円261円345円274円225円347円224円250円.md" | 312 ++++++++++++ ...17350円277円260円347円254円246円(descriptor).md" | 0 5 files changed, 789 insertions(+), 315 deletions(-) create mode 100644 "345円236円203円345円234円276円345円233円236円346円224円266円.md" rename "346円217円217円350円277円260円347円254円246円(descriptor).md" => "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円346円217円217円350円277円260円347円254円246円(descriptor).md" (100%) diff --git a/trashcan.md b/trashcan.md index ad66979..6a28b4e 100644 --- a/trashcan.md +++ b/trashcan.md @@ -102,4 +102,4 @@ PyAPI_FUNC(void) _PyTrash_end(struct _ts *tstate); 我最喜欢这样的注释, 对新加的某个特性进行详细的描述. 遗憾的是 CPython 的代码中有许多没有详细注释的代码, 对于刚开始阅读 CPython 代码的人来说非常难以理解. -trashcan 的用法可以查看 tuple 的 tpdealloc 函数. \ No newline at end of file +trashcan 的用法可以查看 tuple 的 tp_dealloc 函数. \ No newline at end of file diff --git "a/345円206円205円345円255円230円347円256円241円347円220円206円.md" "b/345円206円205円345円255円230円347円256円241円347円220円206円.md" index 92310f2..8eb8cf1 100644 --- "a/345円206円205円345円255円230円347円256円241円347円220円206円.md" +++ "b/345円206円205円345円255円230円347円256円241円347円220円206円.md" @@ -439,317 +439,4 @@ arena_object 是内存池中最大的单位,当内存池没有可用的 pool 这一层的内存管理主要和具体的对象有关,例如内部的整数、列表、字典和字符串对象。 -这些内部对象的使用频率非常高,为了避免频繁的分配对象的内存,Python 的做法是为这些对象分配独立的内存池。当需要分配对象时,就从内存池取出一个对象的内存,当释放一个对象时,其内存归还给内存池。其实内存池就是一个简单的数组,数组元素指向对象的内存。指的注意的是,内存池并不保存对象内部指向的数据结构,而是只保存对象本身的结构体。以列表为例,内存池保存列表对象的结构体 `listobj`,但是不保存列表指向的数组,因为这些数组非常占据内存,而且也不好重复利用。 - - -## 垃圾回收 - -Python 中主要的垃圾回收机制是引用计数,辅以标记清除和分代回收。 - -### 引用计数 - -引用计数是非常直观的一种垃圾回收算法,每个对象内部维护着一个引用计数,如果引用计数变为 0,说明该对象生命周期结束,因此可以马上释放它的内存。 - -优点: - -- 原理简单,容易实现。 - -- 垃圾回收的实时性高,一旦对象的引用技术变为 0,马上可以回收其占用的内存。 - -缺点: - -- 循环引用。 - -- 维护引用计数带来许多额外的操作,和对象数量成正比。 - -循环引用只会发生在 container 对象中,例如 list,dict 等。 - -循环引用例子: - -```python -In [505]: l1 = [] - -In [506]: l2 = [] - -In [507]: l1.append(l2) - -In [508]: l2.append(l1) - -In [509]: print l1 -[[[...]]] - -In [510]: print l2 -[[[...]]] - -In [511]: -``` - -``` - +----------+ +----------+ -+--> list 1 | +-------> list 1 | -| +----------+ | +----------+ -| | +---+ +--+ | -| +----------+ | +----------+ -| | | | | | -| +----------+ | +----------+ -| | | | | | -| +----------+ | +----------+ -| | -+----------------------+ -``` - -为了解决循环引用问题,引入了标记 - 清除垃圾回收机制和分代垃圾回收机制。 - -### 标记清除(mark-sweep) - -标记清除的基本原理是: - -- 寻找根对象(root object)的集合,所谓的 root object 即是一些全局引用和函数栈中的引用。这些引用所用的对象是不可被删除的。而这个 root object 集合也是垃圾检测动作的起点。 - -- 从 root object 集合出发,沿着 root object 集合中的每一个引用,如果能到达某个对象 A,则 A 称为可达的(reachable),可达的对象也不可被删除。这个阶段就是垃圾检测阶段。 - -- 当垃圾检测阶段结束后,所有的对象分为了可达的和不可达的(unreachable)两部分,所有的可达对象都必须予以保留,而所有的不可达对象所占用的内存将被回收,这就是垃圾回收阶段。 - -### container 对象链表 - -由于只有 contaier 对象才会存在循环引用的问题,所以 Python 为这些特定对象增加了一个额外的头信息,即 PyGG_Head。 - -```C -typedef union _gc_head { - struct { - union _gc_head *gc_next; - union _gc_head *gc_prev; - Py_ssize_t gc_refs; - } gc; - double dummy; /* force worst-case alignment */ -} PyGC_Head; - -PyObject * -_PyObject_GC_Malloc(size_t basicsize) -{ - PyObject *op; - PyGC_Head *g = (PyGC_Head *)PyObject_MALLOC( - sizeof(PyGC_Head) + basicsize); - if (g == NULL) - return PyErr_NoMemory(); - g->gc.gc_refs = GC_UNTRACKED; - generations[0].count++; /* number of allocated GC objects */ - if (generations[0].count> generations[0].threshold && - enabled && - generations[0].threshold && - !collecting && - !PyErr_Occurred()) { - collecting = 1; - collect_generations(); - collecting = 0; - } - op = FROM_GC(g); - return op; -} -``` - -通过 PyGC_Head,所有的 container 对象形成一个循环双向链表。 - -gc_refs 有四种状态: - -- GC_UNTRACKED - - 这是通过 `PyObject_GC_Malloc` 分配对象后的初始状态。实际值等于 - 2。 - -- GC_REACHABLE - - 这个状态表示对象在 GC 链表上。实际值等于 - 3 - -- GC_TENTATIVELY_UNREACHABLE - - 当进行 GC 时,gc_refs 可能会被设置为这个状态,表示对象可能不可达。实际值等于 - 4。 - -- `> 0` - - 当进行 GC 时,gc_refs 表示对象 refcnt。 - -gc_refs 其实使用最低位表示 tp_finalized 是否被调用,其余高位用来表示状态。具体看下面的代码。 - -```C -/* Bit 0 is set when tp_finalize is called */ -#define _PyGC_REFS_MASK_FINALIZED (1 << 0) -/* The (N-1) most significant bits contain the gc state / refcount */ -#define _PyGC_REFS_SHIFT (1) -#define _PyGC_REFS_MASK (((size_t) -1) << _PyGC_REFS_SHIFT) - -#define _PyGCHead_REFS(g) ((g)->gc.gc_refs>> _PyGC_REFS_SHIFT) -#define _PyGCHead_SET_REFS(g, v) do { \ - (g)->gc.gc_refs = ((g)->gc.gc_refs & ~_PyGC_REFS_MASK) \ - | (((size_t)(v)) << _PyGC_REFS_SHIFT); \ - } while (0) -#define _PyGCHead_DECREF(g) ((g)->gc.gc_refs -= 1 << _PyGC_REFS_SHIFT) - -#define _PyGCHead_FINALIZED(g) (((g)->gc.gc_refs & _PyGC_REFS_MASK_FINALIZED) != 0) -#define _PyGCHead_SET_FINALIZED(g, v) do { \ - (g)->gc.gc_refs = ((g)->gc.gc_refs & ~_PyGC_REFS_MASK_FINALIZED) \ - | (v != 0); \ - } while (0) - -#define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) -#define _PyGC_SET_FINALIZED(o, v) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o), v) - -#define _PyGC_REFS(o) _PyGCHead_REFS(_Py_AS_GC(o)) - -#define _PyGC_REFS_UNTRACKED (-2) -#define _PyGC_REFS_REACHABLE (-3) -#define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) -``` - -### 分代回收 - -分代回收的基本原理是:将所有的对象按照其存活时间划分成不同的代,对象的存活时间越长,说明该对象越不可能是垃圾,被回收的概率越低。存活时间指的是经历的垃圾回收的次数,经历次数越多,说明代越老。 - -Python 的 GC 有三个代,每个代其实就是一个双向循环链表,该链表通过前面的 PyGC_Head 连接起来。 - -```C -struct gc_generation { - PyGC_Head head; - int threshold; /* collection threshold */ - int count; /* count of allocations or collections of younger - generations */ -}; - -#define NUM_GENERATIONS 3 -#define GEN_HEAD(n) (&generations[n].head) - -/* linked lists of container objects */ -static struct gc_generation generations[NUM_GENERATIONS] = { - /* PyGC_Head, threshold, count */ - {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, - {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, - {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, -}; -``` - -第 0 代是最年轻的代,新分配的对象被添加到第 0 代的链表上(当然并不是所有 GC 对象都被添加到链表上)。 - -### 回收流程 - -主要流程: - -- 当 PyObject_GC_Malloc 分配对象内存时,第 0 代的 count 加 1。如果 count 超过阈值(700)那么就会触发 GC。 - -- 调用 collect_generations 函数。该函数找到 count 超过阈值的最老的一代,假设为 generation。然后在开始收集垃圾对象之前先调用 start 用户回调函数,收集完成后在调用 stop 用户回调函数。 - - 这里有一个优化后面再讲。 - -- 调用 collect 函数。collect 函数负责主要的垃圾回收工作。 - -collect 函数的流程: - -- 将 generation 和比 generation 更年轻的代的 count 设置为 0。假设比 young 老一辈的代为 old(odl = young + 1),如果 old 存在,那么将 old 的 count 加 1。 - -- 将比 generation 更年轻的代合并到 generation 链表的后面,形成一条链表。假设这条链表名字为 young。 - -- udpate_refs。遍历 young 链表,将每个 container 对象的 refcnt 复制到 PyGC_Head 的 gc_refs 字段。 - -- subtract_refs。遍历 young 链表,访问每个 container 对象时,调用对象的 tp_traverse 方法,该方法支持回调函数以便访问对象内部引用的其它对象,然后将每个引用的其它对象的 gc_refs 减 1。 - - 这一步的目的是断开所有的引用链。对象之间的循环引用被断开了。 - -- move_unreachable。 - - 遍历 young 链表,如果 contaienr 对象的 gc_refs 大于 0,说明该对象是从外部可以直接访问到的,这称为可达的(reachable)。将可达对象的 gc_refs 设为 GC_REACHABLE,然后通过 tp_traverse 访问对象内部引用的其它对象。 - - - 如果引用对象的的 gc_refs 等于 0,那么将其设置为 1,因为这个引用对象可以从 container 对象访问到,这称为间接可达。 - - - 如果引用对象的 gc_refs 等于 GC_TENTATIVELY_UNREACHABLE,说明这个引用对象在此之前已经被 move_unreachable 访问过,而且原来的 gc_refs 等于 0,GC_TENTATIVELY_UNREACHABLE 表示对象可能不可达。现在发现 container 对象可以访问到该引用对象,因此将该引用对象的 gc_refs 设置为 1,并将其从 unreachable 链表移回到 young 链表。 - - 如果 container 对象的 gc_refs 等于 0(除了大于 0 只可能等于 0),那么暂时认为其不可达,将 gc_refs 设置为 GC_TENTATIVELY_UNREACHABLE,并将其移到 unreachable 链表。 - -- 到这一步标记阶段已经完成了,young 链表上的对象都是可达的,unreachable 链表上的对象都是不可达的,也就是垃圾。 - -- 如果 old 存在,那么将 young 链表合并到 old 链表,如果 old 不存在,那么说明 young 是最老的一代,说明这次垃圾回收是完整地对所有代进行的。这个过程就是分代回收,每经历一次垃圾回收,留下来的对象就 "老了一代"。 - - -collect 函数的其它工作: - -- move_legacy_finalizers。将 unreachable 链表中包含 tp->tp_del(tp_del != NULL,tp_del 对应 Python 中的__del__)方法的对象移到到 finalizers 链表,同时将这些对象的 gc_refs 设置为 GC_REACHABLE。 - -- move_legacy_finalizer_reachable。遍历 finalizers 链表,通过对象的 tp->traverse 方法,将对象引用的其它对象也移到动 finalizers 链表,前提是其它对象在 unreachable 链表上。 - -- handle_weakrefs。处理弱引用,这个暂时还没搞懂。 - -- finalize_garbage。遍历 unreachable 链表,调用对象的 tp_finalizer 方法。 - -- check_garbage。再次检查 unreachable 链表是否真的全部是不可达对象。检查方法是 update_refs 和 subtract_refs。如果发现某个对象的 gc_refs 不等于 0,那么将 unreachable 链表合并到 old 链表。 - -- delete_garbage。遍历 unreachable 链表,调用对象的 tp_clear 方法。这一步就是垃圾回收。tp_clear 会触发链式反应。 - - 以列表的 tp_clear 为例: - - ```C - static int - list_clear(PyListObject *a) - { - Py_ssize_t i; - PyObject **item = a->ob_item; - if (item != NULL) { - /* Because XDECREF can recursively invoke operations on - this list, we make it empty first. */ - i = Py_SIZE(a); - Py_SIZE(a) = 0; - a->ob_item = NULL; - a->allocated = 0; - while (--i>= 0) { - Py_XDECREF(item[i]); - } - PyMem_FREE(item); - } - /* Never fails; the return value can be ignored. - Note that there is no guarantee that the list is actually empty - at this point, because XDECREF may have populated it again! */ - return 0; - } - ``` - -- handle_legacy_finalizers。将 finalizers 链表合并到 old 链表。 - -- clear_freelists。释放对象特有的内存池,例如列表的 free_list。这一步只有在 generation 等于最老的一代才会进行。 - - -### 回收优化 - -为了减少垃圾收集的次数,Python 引入了两个变量:long_lived_pending 和 long_lived_total。 - -先定义两个概念: - -- 完全垃圾收集:在 collect_generations 函数,找到的 generatioon 是最老的一代,也就是第 2 代,因此垃圾收集需要从最老的一代开始,然后到最年轻的一代结束。完全垃圾收集指的就是对所有代进行垃圾收集。 - -- 部分垃圾收集:与完全垃圾收集的概念相反,部分垃圾收集指的是只对中间一代(第 1 代)到最年轻一代(第 0 代)进行垃圾收集。 - -long_lived_total 表示上次进行完全垃圾收集后第 2 代链表的长度。 - -long_lived_pending 表示自上次进行完全垃圾收集以来,每次进行部分垃圾收集后第 1 代链表长度的累积值。前面提到第 1 代链表会合并到第 2 代,所以名字中有 pending。 - - -优化:在 collect_generations 函数中,如果找到的 generation 等于 2,那么只有 long_lived_pending> long_lived_total / 4 才会进行完全垃圾收集,否则寻找一下个代。 - -因为第2代保存了非常多的对象,几乎是所有一直存活的对象,对第2代进行垃圾回收是非常耗时的,另外,对第2代频繁进行垃圾回收也是毫无意义的。 - -### 相关文章和讨论 - -- 早期的一篇关于 Python GC 的文章:[http://www.arctrix.com/nas/python/gc/](http://www.arctrix.com/nas/python/gc/) - -- Python Dev Mail List 上关于 Python GC 实现的讨论: - - - [http://mail.python.org/pipermail/python-dev/2000-March/002385.html](http://mail.python.org/pipermail/python-dev/2000-March/002385.html) - - - [http://mail.python.org/pipermail/python-dev/2000-March/002434.html](http://mail.python.org/pipermail/python-dev/2000-March/002434.html) - - - [http://mail.python.org/pipermail/python-dev/2000-March/002497.html](http://mail.python.org/pipermail/python-dev/2000-March/002497.html) - - - 减少 GC 运行的次数。[https://mail.python.org/pipermail/python-dev/2008-June/080579.html](https://mail.python.org/pipermail/python-dev/2008-June/080579.html) - -- 关于优化 Python GC 的 issue: - - - 减少 GC 追踪对象的数量,例如只包含不可变对象的元祖可以不用被 GC 追踪。 - - [https://bugs.python.org/issue4688](https://bugs.python.org/issue4688) - - [https://bugs.python.org/issue14775](https://bugs.python.org/issue14775) \ No newline at end of file +这些内部对象的使用频率非常高,为了避免频繁的分配对象的内存,Python 的做法是为这些对象分配独立的内存池。当需要分配对象时,就从内存池取出一个对象的内存,当释放一个对象时,其内存归还给内存池。其实内存池就是一个简单的数组,数组元素指向对象的内存。指的注意的是,内存池并不保存对象内部指向的数据结构,而是只保存对象本身的结构体。以列表为例,内存池保存列表对象的结构体 `listobj`,但是不保存列表指向的数组,因为这些数组非常占据内存,而且也不好重复利用。 \ No newline at end of file diff --git "a/345円236円203円345円234円276円345円233円236円346円224円266円.md" "b/345円236円203円345円234円276円345円233円236円346円224円266円.md" new file mode 100644 index 0000000..7459d6d --- /dev/null +++ "b/345円236円203円345円234円276円345円233円236円346円224円266円.md" @@ -0,0 +1,475 @@ +# 垃圾回收 + +Python 中主要的垃圾回收机制是引用计数,辅以标记清除和分代回收。 + +## 引用计数 + +引用计数是非常直观的一种垃圾回收算法,每个对象内部维护着一个引用计数,如果引用计数变为 0,说明该对象生命周期结束,因此可以马上释放它的内存。 + +优点: + +- 原理简单,容易实现。 + +- 垃圾回收的实时性高,一旦对象的引用技术变为 0,马上可以回收其占用的内存。 + +缺点: + +- 循环引用。 + +- 维护引用计数带来许多额外的操作,和对象数量成正比。 + +循环引用只会发生在 container 对象中,例如 list,dict 等。 + +循环引用例子: + +```python +In [505]: l1 = [] + +In [506]: l2 = [] + +In [507]: l1.append(l2) + +In [508]: l2.append(l1) + +In [509]: print l1 +[[[...]]] + +In [510]: print l2 +[[[...]]] + +In [511]: +``` + +``` + +----------+ +----------+ ++--> list 1 | +-------> list 1 | +| +----------+ | +----------+ +| | +---+ +--+ | +| +----------+ | +----------+ +| | | | | | +| +----------+ | +----------+ +| | | | | | +| +----------+ | +----------+ +| | ++----------------------+ +``` + +为了解决循环引用问题,引入了标记 - 清除垃圾回收机制和分代垃圾回收机制。 + +## 和 GC 相关的几个 tp_xxx + +### tp_clear + +摘自 [官方文档](https://docs.python.org/3/c-api/typeobj.html?highlight=slot#c.PyTypeObject.tp_clear): + +An optional pointer to a clear function for the garbage collector. This is only used if the Py_TPFLAGS_HAVE_GC flag bit is set. The signature is: + +```c +int tp_clear(PyObject *); +``` + +The tp_clear member function is used to break reference cycles in cyclic garbage detected by the garbage collector. Taken together, all tp_clear functions in the system must combine to break all reference cycles. This is subtle, and if in any doubt supply a tp_clear function. For example, the tuple type does not implement a tp_clear function, because it’s possible to prove that no reference cycle can be composed entirely of tuples. Therefore the tp_clear functions of other types must be sufficient to break any cycle containing a tuple. This isn’t immediately obvious, and there’s rarely a good reason to avoid implementing tp_clear. + +Implementations of tp_clear should drop the instance’s references to those of its members that may be Python objects, and set its pointers to those members to NULL, as in the following example: + +```c +static int +local_clear(localobject *self) +{ + Py_CLEAR(self->key); + Py_CLEAR(self->args); + Py_CLEAR(self->kw); + Py_CLEAR(self->dict); + return 0; +} +``` + +The Py_CLEAR() macro should be used, because clearing references is delicate: the reference to the contained object must not be decremented until after the pointer to the contained object is set to NULL. This is because decrementing the reference count may cause the contained object to become trash, triggering a chain of reclamation activity that may include invoking arbitrary Python code (due to finalizers, or weakref callbacks, associated with the contained object). If it’s possible for such code to reference self again, it’s important that the pointer to the contained object be NULL at that time, so that self knows the contained object can no longer be used. The Py_CLEAR() macro performs the operations in a safe order. + +Because the goal of tp_clear functions is to break reference cycles, it’s not necessary to clear contained objects like Python strings or Python integers, which can’t participate in reference cycles. On the other hand, it may be convenient to clear all contained Python objects, and write the type’s tp_dealloc function to invoke tp_clear. + +More information about Python’s garbage collection scheme can be found in section Supporting Cyclic Garbage Collection. + +在 typeobject.c 的 type_new 函数中, 可以看到用户定义的类的 tp_clear 被设置为 subtype_dealloc. + +```c +type->tp_clear = subtype_clear; +``` + +### tp_dealloc + +摘自 [官方文档](https://docs.python.org/3/c-api/typeobj.html?highlight=slot#c.PyTypeObject.tp_dealloc): + +A pointer to the instance destructor function. This function must be defined unless the type guarantees that its instances will never be deallocated (as is the case for the singletons None and Ellipsis). The function signature is: + +```c +void tp_dealloc(PyObject *self); +``` + +The destructor function is called by the Py_DECREF() and Py_XDECREF() macros when the new reference count is zero. At this point, the instance is still in existence, but there are no references to it. The destructor function should free all references which the instance owns, free all memory buffers owned by the instance (using the freeing function corresponding to the allocation function used to allocate the buffer), and call the type’s tp_free function. If the type is not subtypable (doesn’t have the Py_TPFLAGS_BASETYPE flag bit set), it is permissible to call the object deallocator directly instead of via tp_free. The object deallocator should be the one used to allocate the instance; this is normally PyObject_Del() if the instance was allocated using PyObject_New() or PyObject_VarNew(), or PyObject_GC_Del() if the instance was allocated using PyObject_GC_New() or PyObject_GC_NewVar(). + +Finally, if the type is heap allocated (Py_TPFLAGS_HEAPTYPE), the deallocator should decrement the reference count for its type object after calling the type deallocator. In order to avoid dangling pointers, the recommended way to achieve this is: + +```c +static void foo_dealloc(foo_object *self) { + PyTypeObject *tp = Py_TYPE(self); + // free references and buffers here + tp->tp_free(self); + Py_DECREF(tp); +} +``` + +当减少引用计数时发现引用计数等于 0, 便会调用 tp_dealloc. tp_alloc 一般调用 tp_free, 释放对象的成员, 以及本身的内存. + +在 typeobject.c 的 type_new 函数中, 可以看到用户定义的类的 tp_dealloc 被设置为 subtype_dealloc. + +```c +type->tp_dealloc = subtype_dealloc; +``` + +### tp_free + +摘自 [官方文档](https://docs.python.org/3/c-api/typeobj.html?highlight=slot#c.PyTypeObject.tp_free) + +An optional pointer to an instance deallocation function. Its signature is: + +```c +void tp_free(void *self); +``` + +An initializer that is compatible with this signature is PyObject_Free(). + +Inheritance: + +This field is inherited by static subtypes, but not by dynamic subtypes (subtypes created by a class statement) + +Default: + +In dynamic subtypes, this field is set to a deallocator suitable to match PyType_GenericAlloc() and the value of the Py_TPFLAGS_HAVE_GC flag bit. + +For static subtypes, PyBaseObject_Type uses PyObject_Del. + +在 typeobject.c 的 type_new 函数中, 可以看到用户定义的类的 tp_free 被设置为 PyObject_GC_Del. + +```c +type->tp_free = PyObject_GC_Del; +``` + +### tp_del + +tp_del 已经不推荐使用. + +在 Python 中 tp_del 对应于 `__del__`. + +### tp_finalize + +[官方文档](https://docs.python.org/3/c-api/typeobj.html?highlight=slot#c.PyTypeObject.tp_finalize) + +在 Python 中 tp_finalize 对应于 `__del__`. + +### 总结 + +tp_clear 用于打破对象之间的循环引用. + +tp_dealloc 用于释放对象内的成员的内存. + +tp_free 用于释放对象本身的内存. + +它们之间的调用关系为: + +``` +Py_DECREF + | + v +tp_dealloc -> tp_clear + | + +-------> tp_free +``` + +参考: + +- [PEP 442 -- Safe object finalization](https://www.python.org/dev/peps/pep-0442/) + +## 标记清除(mark-sweep) + +标记清除的基本原理是: + +- 寻找根对象(root object)的集合,所谓的 root object 即是一些全局引用和函数栈中的引用。这些引用所用的对象是不可被删除的。而这个 root object 集合也是垃圾检测动作的起点。 + +- 从 root object 集合出发,沿着 root object 集合中的每一个引用,如果能到达某个对象 A,则 A 称为可达的(reachable),可达的对象也不可被删除。这个阶段就是垃圾检测阶段。 + +- 当垃圾检测阶段结束后,所有的对象分为了可达的和不可达的(unreachable)两部分,所有的可达对象都必须予以保留,而所有的不可达对象所占用的内存将被回收,这就是垃圾回收阶段。 + +## container 对象链表 + +由于只有 contaier 对象才会存在循环引用的问题,所以 Python 为这些特定对象增加了一个额外的头信息,即 PyGG_Head。 + +```C +typedef union _gc_head { + struct { + union _gc_head *gc_next; + union _gc_head *gc_prev; + Py_ssize_t gc_refs; + } gc; + double dummy; /* force worst-case alignment */ +} PyGC_Head; + +PyObject * +_PyObject_GC_Malloc(size_t basicsize) +{ + PyObject *op; + PyGC_Head *g = (PyGC_Head *)PyObject_MALLOC( + sizeof(PyGC_Head) + basicsize); + if (g == NULL) + return PyErr_NoMemory(); + g->gc.gc_refs = GC_UNTRACKED; + generations[0].count++; /* number of allocated GC objects */ + if (generations[0].count> generations[0].threshold && + enabled && + generations[0].threshold && + !collecting && + !PyErr_Occurred()) { + collecting = 1; + collect_generations(); + collecting = 0; + } + op = FROM_GC(g); + return op; +} +``` + +通过 PyGC_Head,所有的 container 对象形成一个循环双向链表。 + +gc_refs 有四种状态: + +- GC_UNTRACKED + + 这是通过 `PyObject_GC_Malloc` 分配对象后的初始状态。实际值等于 -2。 + +- GC_REACHABLE + + 这个状态表示对象在 GC 链表上。实际值等于 -3 + +- GC_TENTATIVELY_UNREACHABLE + + 当进行 GC 时,gc_refs 可能会被设置为这个状态,表示对象可能不可达。实际值等于 -4。 + +- `> 0` + + 当进行 GC 时,gc_refs 表示对象 refcnt。 + +gc_refs 其实使用最低位表示 tp_finalized 是否被调用,其余高位用来表示状态。具体看下面的代码。 + +```C +/* Bit 0 is set when tp_finalize is called */ +#define _PyGC_REFS_MASK_FINALIZED (1 << 0) +/* The (N-1) most significant bits contain the gc state / refcount */ +#define _PyGC_REFS_SHIFT (1) +#define _PyGC_REFS_MASK (((size_t) -1) << _PyGC_REFS_SHIFT) + +#define _PyGCHead_REFS(g) ((g)->gc.gc_refs>> _PyGC_REFS_SHIFT) +#define _PyGCHead_SET_REFS(g, v) do { \ + (g)->gc.gc_refs = ((g)->gc.gc_refs & ~_PyGC_REFS_MASK) \ + | (((size_t)(v)) << _PyGC_REFS_SHIFT); \ + } while (0) +#define _PyGCHead_DECREF(g) ((g)->gc.gc_refs -= 1 << _PyGC_REFS_SHIFT) + +#define _PyGCHead_FINALIZED(g) (((g)->gc.gc_refs & _PyGC_REFS_MASK_FINALIZED) != 0) +#define _PyGCHead_SET_FINALIZED(g, v) do { \ + (g)->gc.gc_refs = ((g)->gc.gc_refs & ~_PyGC_REFS_MASK_FINALIZED) \ + | (v != 0); \ + } while (0) + +#define _PyGC_FINALIZED(o) _PyGCHead_FINALIZED(_Py_AS_GC(o)) +#define _PyGC_SET_FINALIZED(o, v) _PyGCHead_SET_FINALIZED(_Py_AS_GC(o), v) + +#define _PyGC_REFS(o) _PyGCHead_REFS(_Py_AS_GC(o)) + +#define _PyGC_REFS_UNTRACKED (-2) +#define _PyGC_REFS_REACHABLE (-3) +#define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) +``` + +## 分代回收 + +分代回收的基本原理是:将所有的对象按照其存活时间划分成不同的代,对象的存活时间越长,说明该对象越不可能是垃圾,被回收的概率越低。存活时间指的是经历的垃圾回收的次数,经历次数越多,说明代越老。 + +Python 的 GC 有三个代,每个代其实就是一个双向循环链表,该链表通过前面的 PyGC_Head 连接起来。 + +```C +struct gc_generation { + PyGC_Head head; + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ +}; + +#define NUM_GENERATIONS 3 +#define GEN_HEAD(n) (&generations[n].head) + +/* linked lists of container objects */ +static struct gc_generation generations[NUM_GENERATIONS] = { + /* PyGC_Head, threshold, count */ + {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0}, + {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0}, + {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0}, +}; +``` + +第 0 代是最年轻的代,新分配的对象被添加到第 0 代的链表上(当然并不是所有 GC 对象都被添加到链表上)。 + +## 回收流程 + +主要流程: + +- 当 PyObject_GC_Malloc 分配对象内存时,第 0 代的 count 加 1。如果 count 超过阈值(700)那么就会触发 GC。 + +- 调用 collect_generations 函数。该函数找到 count 超过阈值的最老的一代,假设为 generation。然后在开始收集垃圾对象之前先调用 start 用户回调函数,收集完成后在调用 stop 用户回调函数。 + + 这里有一个优化后面再讲。 + +- 调用 collect 函数。collect 函数负责主要的垃圾回收工作。 + +collect 函数的流程: + +- 将 generation 和比 generation 更年轻的代的 count 设置为 0。假设比 young 老一辈的代为 old(odl = young + 1),如果 old 存在,那么将 old 的 count 加 1。 + +- 将比 generation 更年轻的代合并到 generation 链表的后面,形成一条链表。假设这条链表名字为 young。 + +- udpate_refs。遍历 young 链表,将每个 container 对象的 refcnt 复制到 PyGC_Head 的 gc_refs 字段。 + +- subtract_refs。遍历 young 链表,访问每个 container 对象时,调用对象的 tp_traverse 方法,该方法支持回调函数以便访问对象内部引用的其它对象,然后将每个引用的其它对象的 gc_refs 减 1。 + + 这一步的目的是断开所有的引用链。对象之间的循环引用被断开了。 + +- move_unreachable。 + + 遍历 young 链表,如果 contaienr 对象的 gc_refs 大于 0,说明该对象是从外部可以直接访问到的,这称为可达的(reachable)。将可达对象的 gc_refs 设为 GC_REACHABLE,然后通过 tp_traverse 访问对象内部引用的其它对象。 + + - 如果引用对象的的 gc_refs 等于 0,那么将其设置为 1,因为这个引用对象可以从 container 对象访问到,这称为间接可达。 + + - 如果引用对象的 gc_refs 等于 GC_TENTATIVELY_UNREACHABLE,说明这个引用对象在此之前已经被 move_unreachable 访问过,而且原来的 gc_refs 等于 0,GC_TENTATIVELY_UNREACHABLE 表示对象可能不可达。现在发现 container 对象可以访问到该引用对象,因此将该引用对象的 gc_refs 设置为 1,并将其从 unreachable 链表移回到 young 链表。 + + 如果 container 对象的 gc_refs 等于 0(除了大于 0 只可能等于 0),那么暂时认为其不可达,将 gc_refs 设置为 GC_TENTATIVELY_UNREACHABLE,并将其移到 unreachable 链表。 + +- 到这一步标记阶段已经完成了,young 链表上的对象都是可达的,unreachable 链表上的对象都是不可达的,也就是垃圾。 + +- 如果 old 存在,那么将 young 链表合并到 old 链表,如果 old 不存在,那么说明 young 是最老的一代,说明这次垃圾回收是完整地对所有代进行的。这个过程就是分代回收,每经历一次垃圾回收,留下来的对象就 "老了一代"。 + + +collect 函数的其它工作: + +- move_legacy_finalizers。将 unreachable 链表中包含 tp->tp_del(tp_del != NULL,tp_del 对应 Python 中的__del__)方法的对象移到到 finalizers 链表,同时将这些对象的 gc_refs 设置为 GC_REACHABLE。 + + 如果定义 tp_del, GC 认为这些对象是无法回收的, 因此将它们从 unreachable 链表中移出来. + +- move_legacy_finalizer_reachable。遍历 finalizers 链表,通过对象的 tp->traverse 方法,将对象引用的其它对象也移到动 finalizers 链表,前提是其它对象在 unreachable 链表上。 + +- handle_weakrefs。处理弱引用。 + + 简单来讲就是遍历 unreachable 链表, 调用对象的弱引用的回调函数. 具体的细节我还全部没想明白. + +- finalize_garbage。遍历 unreachable 链表,调用对象的 tp_finalizer 方法。 + +- check_garbage。再次检查 unreachable 链表是否真的全部是不可达对象。检查方法是 update_refs 和 subtract_refs。如果发现某个对象的 gc_refs 不等于 0,那么将 unreachable 链表合并到 old 链表。 + +- delete_garbage。遍历 unreachable 链表,调用对象的 tp_clear 方法。 + + tp_clear 负责打破循环引用, 简单来说就是减少内部引用的成员的引用计数. tp_clear 导致一系列的链式反应. 最终所有循环引用中的对象的引用计数都会变为 0, 此时会调用 tp_dealloc, tp_dealloc 一般会调用 tp_clear 和 tp_free, 因此在 tp_clear 要注意无限循环调用的问题. tp_free 负责回收对象本身的内存. + + 以列表的 tp_clear 为例: + + ```C + static int + list_clear(PyListObject *a) + { + Py_ssize_t i; + PyObject **item = a->ob_item; + if (item != NULL) { + /* Because XDECREF can recursively invoke operations on + this list, we make it empty first. */ + i = Py_SIZE(a); + Py_SIZE(a) = 0; + a->ob_item = NULL; + a->allocated = 0; + while (--i>= 0) { + Py_XDECREF(item[i]); + } + PyMem_FREE(item); + } + /* Never fails; the return value can be ignored. + Note that there is no guarantee that the list is actually empty + at this point, because XDECREF may have populated it again! */ + return 0; + } + ``` + +- handle_legacy_finalizers。将 finalizers 链表合并到 old 链表。 + +- clear_freelists。释放对象特有的内存池,例如列表的 free_list。这一步只有在 generation 等于最老的一代才会进行。 + + +## 回收优化 + +为了减少垃圾收集的次数,Python 引入了两个变量:long_lived_pending 和 long_lived_total。 + +先定义两个概念: + +- 完全垃圾收集:在 collect_generations 函数,找到的 generatioon 是最老的一代,也就是第 2 代,因此垃圾收集需要从最老的一代开始,然后到最年轻的一代结束。完全垃圾收集指的就是对所有代进行垃圾收集。 + +- 部分垃圾收集:与完全垃圾收集的概念相反,部分垃圾收集指的是只对中间一代(第 1 代)到最年轻一代(第 0 代)进行垃圾收集。 + +long_lived_total 表示上次进行完全垃圾收集后第 2 代链表的长度。 + +long_lived_pending 表示自上次进行完全垃圾收集以来,每次进行部分垃圾收集后第 1 代链表长度的累积值。前面提到第 1 代链表会合并到第 2 代,所以名字中有 pending。 + + +优化:在 collect_generations 函数中,如果找到的 generation 等于 2,那么只有 long_lived_pending> long_lived_total / 4 才会进行完全垃圾收集,否则寻找一下个代。 + +因为第 2 代保存了非常多的对象,几乎是所有一直存活的对象,对第 2 代进行垃圾回收是非常耗时的,另外,对第 2 代频繁进行垃圾回收也是毫无意义的。 + +## 总结 + +GC 中最复杂的就是如何正确地处理弱引用的回调函数以及 tp_finalize. + +弱引用的回调函数和 tp_finalize 都有可能是 Python 代码, 而执行 Python 代码会回到 ceval, 从而有可能当前执行 GC 的线程放弃 GIL, 其它线程开始执行. + +一些容易导致的问题如下: + +- 重入问题. Python 层面的回调函数可以调用任意函数. + +- Python 层面的代码有可能复活 unreachable 对象. + +- Python 层面的代码有可能访问某些已经执行了 tp_clear 的对象, 这些对象的状态可能已经不合法了, 访问这些对象可能导致 segmentfault. + +解决办法是 handle_weakrefs 函数, handle_weakrefs 负责清除 unreachable 对象的弱引用链表. 这样以来在 delete_garbage 时就不会调用弱引用的回调函数. + +可以看一下源码中的 [gc_weakref.txt](https://github.com/python/cpython/blob/master/Modules/gc_weakref.txt). + +为了避免一些 GC 导致的奇怪问题, 在 `__del__` 和弱引用的回调函数中不要做大多复杂的事情, 最好不要访问可能在循环引用链中的对象. + +``` +People simply shouldn't try to use `__del__` or weakref callbacks to do fancy stuff. +``` + +### 相关文章和讨论 + +- 早期的一篇关于 Python GC 的文章:[http://www.arctrix.com/nas/python/gc/](http://www.arctrix.com/nas/python/gc/) + +- Python Dev Mail List 上关于 Python GC 实现的讨论: + + - [http://mail.python.org/pipermail/python-dev/2000-March/002385.html](http://mail.python.org/pipermail/python-dev/2000-March/002385.html) + + - [http://mail.python.org/pipermail/python-dev/2000-March/002434.html](http://mail.python.org/pipermail/python-dev/2000-March/002434.html) + + - [http://mail.python.org/pipermail/python-dev/2000-March/002497.html](http://mail.python.org/pipermail/python-dev/2000-March/002497.html) + + - 减少 GC 运行的次数。[https://mail.python.org/pipermail/python-dev/2008-June/080579.html](https://mail.python.org/pipermail/python-dev/2008-June/080579.html) + +- 关于优化 Python GC 的 issue: + + - 减少 GC 追踪对象的数量,例如只包含不可变对象的元祖可以不用被 GC 追踪。 + + [https://bugs.python.org/issue4688](https://bugs.python.org/issue4688) + + [https://bugs.python.org/issue14775](https://bugs.python.org/issue14775) \ No newline at end of file diff --git "a/345円274円261円345円274円225円347円224円250円.md" "b/345円274円261円345円274円225円347円224円250円.md" index e69de29..9254b20 100644 --- "a/345円274円261円345円274円225円347円224円250円.md" +++ "b/345円274円261円345円274円225円347円224円250円.md" @@ -0,0 +1,312 @@ +# 弱引用 + + +## 数据结构 + +```c +// Include/weakrefobject.h + +typedef struct _PyWeakReference PyWeakReference; + +struct _PyWeakReference { + PyObject_HEAD + + /* The object to which this is a weak reference, or Py_None if none. + * Note that this is a stealth reference: wr_object's refcount is + * not incremented to reflect this pointer. + */ + PyObject *wr_object; + + /* A callable to invoke when wr_object dies, or NULL if none. */ + PyObject *wr_callback; + + /* A cache for wr_object's hash code. As usual for hashes, this is -1 + * if the hash code isn't known yet. + */ + Py_hash_t hash; + + /* If wr_object is weakly referenced, wr_object has a doubly-linked NULL- + * terminated list of weak references to it. These are the list pointers. + * If wr_object goes away, wr_object is set to Py_None, and these pointers + * have no meaning then. + */ + PyWeakReference *wr_prev; + PyWeakReference *wr_next; +}; +``` + +PyWeakReference 对象很简单, 其中包含了引用的目标对象, 回调函数. 另外 PyWeakReference 对象通过 wr_prev 和 wr_next 构成一个双向链表. + +弱引用对象的关键就是没有增加目标对象的引用计数. + +```c +PyTypeObject +_PyWeakref_RefType = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "weakref", + sizeof(PyWeakReference), + 0, + weakref_dealloc, /*tp_dealloc*/ + 0, /*tp_vectorcall_offset*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_as_async*/ + (reprfunc)weakref_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + (hashfunc)weakref_hash, /*tp_hash*/ + (ternaryfunc)weakref_call, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + 0, /*tp_doc*/ + (traverseproc)gc_traverse, /*tp_traverse*/ + (inquiry)gc_clear, /*tp_clear*/ + (richcmpfunc)weakref_richcompare, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + weakref_methods, /*tp_methods*/ + weakref_members, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + weakref___init__, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + weakref___new__, /*tp_new*/ + PyObject_GC_Del, /*tp_free*/ +}; +``` + +## 弱引用的一些操作 + +### 初始化一个弱引用对象 + +```c +static PyObject * +weakref___new__(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyWeakReference *self = NULL; + PyObject *ob, *callback = NULL; + + if (parse_weakref_init_args("__new__", args, kwargs, &ob, &callback)) { + PyWeakReference *ref, *proxy; + PyWeakReference **list; + + if (!PyType_SUPPORTS_WEAKREFS(Py_TYPE(ob))) { + PyErr_Format(PyExc_TypeError, + "cannot create weak reference to '%s' object", + Py_TYPE(ob)->tp_name); + return NULL; + } + if (callback == Py_None) + callback = NULL; + list = GET_WEAKREFS_LISTPTR(ob); + get_basic_refs(*list, &ref, &proxy); + if (callback == NULL && type == &_PyWeakref_RefType) { + if (ref != NULL) { + /* We can re-use an existing reference. */ + Py_INCREF(ref); + return (PyObject *)ref; + } + } + /* We have to create a new reference. */ + /* Note: the tp_alloc() can trigger cyclic GC, so the weakref + list on ob can be mutated. This means that the ref and + proxy pointers we got back earlier may have been collected, + so we need to compute these values again before we use + them. */ + self = (PyWeakReference *) (type->tp_alloc(type, 0)); + if (self != NULL) { + init_weakref(self, ob, callback); + if (callback == NULL && type == &_PyWeakref_RefType) { + insert_head(self, list); + } + else { + PyWeakReference *prev; + + get_basic_refs(*list, &ref, &proxy); + prev = (proxy == NULL) ? ref : proxy; + if (prev == NULL) + insert_head(self, list); + else + insert_after(self, prev); + } + } + } + return (PyObject *)self; +} +``` + +流程: + +- 检查目标对象是否支持弱引用. 判断标准是: + + ```c + /* Test if a type supports weak references */ + #define PyType_SUPPORTS_WEAKREFS(t) ((t)->tp_weaklistoffset> 0) + ``` + +- 获取目标对象的弱引用链表 list. + +- 调用 `get_basic_refs(*list, &ref, &proxy)` 获取 ref 和 proxy. + + ref 表示一个 callback 等于 NULL 的弱引用对象(PyWeakReference), proxy 表示一个 callback 等于 NULL 的 weakproxy 对象或者 weakcallableproxy 对象. + + 目标对象的弱引用链表最前面的两个节点一般是 ref 和 proxy. + +- 如果 callback 等于 NULL, 而且 ref 不为 NULL, 那么可以复用 ref, 此时直接返回 ref. + +- 否则新生成一个弱引用对象, 然后将其添加到目标对象的弱引用链表. + +目标对象的弱引用链表的示意图: + +``` ++-----------------+ +| PyObject | ++-----------------+ +| PyObject_HEAD | ++-----------------+ +| __dict__ | ++-----------------+ +| __weakref__ | -----> +-------------------+ +-------------------+ ++-----------------+ | PyWeakReference | -----> | PyWeakReference | ... + +-------------------+ +-------------------+ +``` + +### 获取目标对象(weakref_call) + +我们通过调用弱引用对象来获取目标对象, `target_obj = wr_ojb()`. + +调用对象的底层实现自然是 `__call__`, 即 tp_call. + +```c +static PyObject * +weakref_call(PyWeakReference *self, PyObject *args, PyObject *kw) +{ + static char *kwlist[] = {NULL}; + + if (PyArg_ParseTupleAndKeywords(args, kw, ":__call__", kwlist)) { + PyObject *object = PyWeakref_GET_OBJECT(self); + Py_INCREF(object); + return (object); + } + return NULL; +} +``` + +PyWeakref_GET_OBJECT 是一个宏, 作用是返回目标对象. 如果目标对象的引用计数大于 0, 那么返回目标对象, 否则返回 None. + +### 目标对象被回收时发生了什么 + +目标对象被回收时, 首先会调用目标对象的 tp_dealloc, tp_dealloc 会调用 PyObject_ClearWeakRefs. + +```c +/* Note that there's an inlined copy-paste of handle_callback() in gcmodule.c's + * handle_weakrefs(). + */ +static void +handle_callback(PyWeakReference *ref, PyObject *callback) +{ + PyObject *cbresult = PyObject_CallOneArg(callback, (PyObject *)ref); + + if (cbresult == NULL) + PyErr_WriteUnraisable(callback); + else + Py_DECREF(cbresult); +} + +/* This function is called by the tp_dealloc handler to clear weak references. + * + * This iterates through the weak references for 'object' and calls callbacks + * for those references which have one. It returns when all callbacks have + * been attempted. + */ +void +PyObject_ClearWeakRefs(PyObject *object) +{ + PyWeakReference **list; + + if (object == NULL + || !PyType_SUPPORTS_WEAKREFS(Py_TYPE(object)) + || Py_REFCNT(object) != 0) + { + PyErr_BadInternalCall(); + return; + } + list = GET_WEAKREFS_LISTPTR(object); + /* Remove the callback-less basic and proxy references */ + if (*list != NULL && (*list)->wr_callback == NULL) { + clear_weakref(*list); + if (*list != NULL && (*list)->wr_callback == NULL) + clear_weakref(*list); + } + if (*list != NULL) { + PyWeakReference *current = *list; + Py_ssize_t count = _PyWeakref_GetWeakrefCount(current); + PyObject *err_type, *err_value, *err_tb; + + PyErr_Fetch(&err_type, &err_value, &err_tb); + if (count == 1) { + PyObject *callback = current->wr_callback; + + current->wr_callback = NULL; + clear_weakref(current); + if (callback != NULL) { + if (Py_REFCNT((PyObject *)current)> 0) { + handle_callback(current, callback); + } + Py_DECREF(callback); + } + } + else { + PyObject *tuple; + Py_ssize_t i = 0; + + tuple = PyTuple_New(count * 2); + if (tuple == NULL) { + _PyErr_ChainExceptions(err_type, err_value, err_tb); + return; + } + + for (i = 0; i < count; ++i) { + PyWeakReference *next = current->wr_next; + + if (Py_REFCNT((PyObject *)current)> 0) { + Py_INCREF(current); + PyTuple_SET_ITEM(tuple, i * 2, (PyObject *) current); + PyTuple_SET_ITEM(tuple, i * 2 + 1, current->wr_callback); + } + else { + Py_DECREF(current->wr_callback); + } + current->wr_callback = NULL; + clear_weakref(current); + current = next; + } + for (i = 0; i < count; ++i) { + PyObject *callback = PyTuple_GET_ITEM(tuple, i * 2 + 1); + + /* The tuple may have slots left to NULL */ + if (callback != NULL) { + PyObject *item = PyTuple_GET_ITEM(tuple, i * 2); + handle_callback((PyWeakReference *)item, callback); + } + } + Py_DECREF(tuple); + } + assert(!PyErr_Occurred()); + PyErr_Restore(err_type, err_value, err_tb); + } +} +``` + +PyObject_ClearWeakRefs 负责清理目标对象的弱引用链表, 同时调用弱引用的 callback. + diff --git "a/346円217円217円350円277円260円347円254円246円(descriptor).md" "b/347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円346円217円217円350円277円260円347円254円246円(descriptor).md" similarity index 100% rename from "346円217円217円350円277円260円347円254円246円(descriptor).md" rename to "347円261円273円345円236円213円347円263円273円347円273円237円344円271円213円346円217円217円350円277円260円347円254円246円(descriptor).md"

    AltStyle によって変換されたページ (->オリジナル) /