list
Python内存管理中的基石
Python中所有类型创建对象时,底层都是与PyObject和PyVarObject结构体实现,一般情况下由单个元素组成对象内部会使用PyObject结构体(float)、由多个元素组成的对象内部会使用PyVarObject结构体
2个结构体
- PyObject,此结构体中包含3个元素。
- _PyObject_HEAD_EXTRA,用于构造双向链表。
- ob_refcnt,引用计数器。
- *ob_type,数据类型。
- PyVarObject,次结构体中包含4个元素(ob_base中包含3个元素)
- ob_base,PyObject结构体对象,即:包含PyObject结构体中的三个元素。
- ob_size,内部元素个数。
3个宏定义
- PyObject_HEAD,代指PyObject结构体。
- PyVarObject_HEAD,代指PyVarObject对象。
- _PyObject_HEAD_EXTRA,代指前后指针,用于构造双向队列。
空list占多大内存
print(list().__sizeof__()) # 40
list结构体
// PyVarObject
typedef struct {
// PyObject
PyObject ob_base;
// 列表长度,int类型,8字节
Py_ssize_t ob_size;
} PyVarObject;
#define PyObject_VAR_HEAD PyVarObject ob_base;
typedef _W64 int Py_ssize_t;
typedef struct _object {
_PyObject_HEAD_EXTRA
// 引用计数器int类型,8字节
Py_ssize_t ob_refcnt;
// 指针,8字节
PyTypeObject *ob_type;
} PyObject;
typedef struct {
PyObject_VAR_HEAD
// 指向列表元素的指针向量。 列表[0] 是ob_item[0],等等。
// 指针的指针,存放列表元素
// 为什么要用指针的指针呢?
// python的list并不直接保存元素,而是保存元素的指针,这也是同一个list中可以同时存放多种类型元素的根本原因。
// 指针8字节
PyObject **ob_item;
// 列表容量,int类型,8字节
Py_ssize_t allocated;
} PyListObject;
创建列表
PyObject *
PyList_New(Py_ssize_t size)
{
// 处理size < 0的情况
if (size < 0) {
// 打印错误日志
PyErr_BadInternalCall();
return NULL;
}
struct _Py_list_state *state = get_list_state();
PyListObject *op;
#ifdef Py_DEBUG
// PyList_New() must not be called after _PyList_Fini()
assert(state->numfree != -1);
#endif
if (state->numfree) {
state->numfree--;
op = state->free_list[state->numfree];
_Py_NewReference((PyObject *)op);
}
else {
op = PyObject_GC_New(PyListObject, &PyList_Type);
if (op == NULL) {
return NULL;
}
}
if (size <= 0) {
op->ob_item = NULL;
}
else {
// 给item分配内存
op->ob_item = (PyObject **) PyMem_Calloc(size, sizeof(PyObject *));
if (op->ob_item == NULL) {
Py_DECREF(op);
return PyErr_NoMemory();
}
}
// 设置len=0
Py_SET_SIZE(op, size);
// 设置cap=0
op->allocated = size;
_PyObject_GC_TRACK(op);
// 返回列表指针
return (PyObject *) op;
}
添加元素
listType = list()
ele = "德玛西亚"
// 实际上是将内存地址放到了列表中,即存储了一个指针,固定大小为8字节
listType.append(ele)
static int
app1(PyListObject *self, PyObject *v)
{
// 获取列表len
Py_ssize_t n = PyList_GET_SIZE(self);
assert (v != NULL);
assert((size_t)n + 1 < PY_SSIZE_T_MAX);
/* list_resize
Add padding to make the allocated size multiple of 4.
The growth pattern is: 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
*/
// 判断cap是否够,n+1因为操作是增加元素.
if (list_resize(self, n+1) < 0)
return -1;
// 追加元素
Py_INCREF(v);
PyList_SET_ITEM(self, n, v);
return 0;
}
int
PyList_Append(PyObject *op, PyObject *newitem)
{
if (PyList_Check(op) && (newitem != NULL))
return app1((PyListObject *)op, newitem);
PyErr_BadInternalCall();
return -1;
}
插入元素
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
// 计算len
Py_ssize_t i, n = Py_SIZE(self);
PyObject **items;
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
assert((size_t)n + 1 < PY_SSIZE_T_MAX);
// 扩容或者缩容
if (list_resize(self, n+1) < 0)
return -1;
if (where < 0) {
where += n;
if (where < 0)
where = 0;
}
if (where > n)
where = n;
// 移位
items = self->ob_item;
for (i = n; --i >= where; )
items[i+1] = items[i];
Py_INCREF(v);
items[where] = v;
return 0;
}
int
PyList_Insert(PyObject *op, Py_ssize_t where, PyObject *newitem)
{
if (!PyList_Check(op)) {
PyErr_BadInternalCall();
return -1;
}
return ins1((PyListObject *)op, where, newitem);
}
删除元素
只将len
减少1,如果下次append操作,会将第三个覆盖。
static PyObject *
list_pop_impl(PyListObject *self, Py_ssize_t index)
/*[clinic end generated code: output=6bd69dcb3f17eca8 input=b83675976f329e6f]*/
{
PyObject *v;
int status;
// if len(list) == 0 触发panic
if (Py_SIZE(self) == 0) {
/* Special-case most common failure cause */
PyErr_SetString(PyExc_IndexError, "pop from empty list");
return NULL;
}
// 如果索引小于0,index=len(list)+index
if (index < 0)
index += Py_SIZE(self);
// 索引越界
if (!valid_index(index, Py_SIZE(self))) {
PyErr_SetString(PyExc_IndexError, "pop index out of range");
return NULL;
}
// 找到val
v = self->ob_item[index];
if (index == Py_SIZE(self) - 1) {
// 计算容量扩缩容
status = list_resize(self, Py_SIZE(self) - 1);
// 直接返回
if (status >= 0)
return v; /* and v now owns the reference the list had */
else
return NULL;
}
Py_INCREF(v);
// 移动数据
status = list_ass_slice(self, index, index+1, (PyObject *)NULL);
if (status < 0) {
Py_DECREF(v);
return NULL;
}
return v;
}
清空元素
直接将结构体元素的指针置为 NULL即可,GC将没有引用的内存地址回收。
static int
_list_clear(PyListObject *a)
{
Py_ssize_t i;
PyObject **item = a->ob_item;
if (item != NULL) {
/* Because XDECREF can recursively invoke operations on
this list, we make it empty first. */
i = Py_SIZE(a);
Py_SET_SIZE(a, 0);
// 直接将item改为NULL
a->ob_item = NULL;
a->allocated = 0;
while (--i >= 0) {
Py_XDECREF(item[i]);
}
// 释放无用内存
PyMem_Free(item);
}
/* Never fails; the return value can be ignored.
Note that there is no guarantee that the list is actually empty
at this point, because XDECREF may have populated it again! */
return 0;
}
销毁列表
- 引用计数器>0,不操作
- 否则:将每一个元素的引用计数减一,再将结构体参数改为对应的空值,将PyListObject放入freelist(容量为80)中等待销毁。
创建列表会先去freelist中找一个拿出来,就不用再次申请内存销毁内存了。
扩缩容
static int
// Py_ssize_t : len长度,要新增就传入len+1,要删除就传入len-1
// PyListObject : list指针
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated, num_allocated_bytes;
// allocated是cap
Py_ssize_t allocated = self->allocated;
// 如果cap大于len,并且 len >= (cap/2),表示cap足够,直接修改len,然后进行操作即可,返回状态码status=0
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
// 设置结构体的len为newsize
Py_SET_SIZE(self, newsize);
return 0;
}
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* Add padding to make the allocated size multiple of 4.
* The growth pattern is: 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, ...
* Note: new_allocated won't overflow because the largest possible value
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
*/
// 扩容: cap < newsize
// 缩容: newsize < (cap/2)
// 扩缩容机制算法:((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3;
if (newsize - Py_SIZE(self) > (Py_ssize_t)(new_allocated - newsize))
new_allocated = ((size_t)newsize + 3) & ~(size_t)3;
if (newsize == 0)
new_allocated = 0;
if (new_allocated <= (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
num_allocated_bytes = new_allocated * sizeof(PyObject *);
// 基于realloc进行扩缩容,可能涉及原来元素的迁移。
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
}
else {
// integer overflow
items = NULL;
}
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
// 重新设置list结构体
self->ob_item = items;
Py_SET_SIZE(self, newsize);
self->allocated = new_allocated;
return 0;
}