static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated, num_allocated_bytes;
Py_ssize_t allocated = self->allocated;
/* Bypass realloc() when a previous overallocation is large enough
to accommodate the newsize. If the newsize falls lower than half
the allocated size, then proceed with the realloc() to shrink the list.
*/
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
Py_SIZE(self) = newsize;
return 0;
}
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
* Note: new_allocated won't overflow because the largest possible value
* is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t.
*/
new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6);
if (new_allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
PyErr_NoMemory();
return -1;
}
if (newsize == 0)
new_allocated = 0;
num_allocated_bytes = new_allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Realloc(self->ob_item, num_allocated_bytes);
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Py_SIZE(self) = newsize;
self->allocated = new_allocated;
return 0;
}
查看CPython方法的源代码append(上面列出),我发现append只要数组中有空闲空间,复杂度就是O(1),否则Python以指数方式增加新的大小(+3/+6固定量)列表的内存)并将现有元素复制到新数组中。事实证明,由于分配的数组大小呈指数增长,随着列表的增长,重新分配的次数会减少。因此,例如:
lst = []
for i in range(10):
lst.append(i)
第一次重新分配时,复制 1 个元素,第二次复制 4 个元素,第三次复制 10 个元素,依此类推,理论上给出了以下形式的几何级数:1+4+10+⋯≤O(n)。据我了解,随着每次后续迭代,重新分配之间的操作数量线性增长,并且 n 加法的重新分配总数以 O(logn) 的形式减少。那么,在这种情况下,为什么对于 n 个附加序列,总复杂度是 O(n),而不是 O(n logn)?
我们来算一下。为简单起见,指数增加 - 2 倍(如您所知,这并不重要)。那些。 1、重新分配 - 2、另一个重新分配 - 4 等等。
N = 2 n(请注意这里和下面大 N 和小 n 之间的区别)重新分配元素的总数- n 件。 O(logN)。还有 N 次插入,时间复杂度为 O(1) - 总时间复杂度为 O(N)。现在关于复制...第一个是 1 个元素,第二个是两个元素,然后是 4 个元素,最后一个是 2 n-1。
但是 1+2+4+...+2 n-1 = 2 n -1 = O(N)。
插入 N 个元素的总时间为 O(log N)+O(N)+O(N) = O(N)
怎么了?你从哪里得到乘法?并非每个元素都需要重新分配内存:)