Python Tuple實現原理 - C - CPython | Waim Chiu = 神的直觉

# 首先簡單瞭解一下 tuple 是什麽東西

元組（tuple）是 Python 存儲容器之一，最大特點就是，它是 immutable，像其他 list、dictionary、set 這些，都是 mutable

當然，tuple 雖然是 immutable 類型，但其實跟 mutable 是沒有區別的，只是 CPython 在 tuple 實現上，我規定了不讓你修改，比如當你修改 tuple，我就給你抛異常

# Tuple 是怎麽發音？

通過網上瞭解到，Python 的創始人 Guido van Rossum，2011 年在 tweet 上鄭重説明 tuple 是怎么發音的

tuple怎麼讀.jpg

龜叔說，他是這樣念 tuple 的🤣

周一、三、五這三天，唸 too-pull

周二、四、六這三天，唸 tub-pull

星期日他不會談到 tuple，所以周日沒有 tuple 怎麼唸的問題

好了，回歸正題

# tuple 與 list 差別在哪

tuple 創建時是用 () 表示，或者 tuple，如果調用 tuple () 來創建 tuple 效率並不高，應爲它需要執行 CALL_FUNCTION
tuple 屬於 immutable，意味著，你不可以修改、增減 tuple 的值，因爲 tuple 的不可變特性，所以沒有 append、remove、pop 等可以變更值得操作
儅要建立只有一個 item 的 tuple 時，一定要加上逗號，否則不會建立 tuple

tuple 比 list 占用空間要更小

	import sys

	a = (1, 2, 3)
	b = [1, 2, 3]

	print(sys.getsizeof(a))
	print(sys.getsizeof(b))

	>>> 64
	>>> 120

具體是怎麽計算大小的，以後有空再聊🐱‍👤

# tuple 有以下優點

因爲是 immutable，運行效率比 list 還要快
占用空間更小
可以作爲 dictionary 的 key 使用，因爲是不可變元素

# CPython 中 tuple 的結構

	PyTypeObject PyTuple_Type = {
	PyVarObject_HEAD_INIT(&PyType_Type, 0)
	"tuple",
	sizeof(PyTupleObject) - sizeof(PyObject *),
	sizeof(PyObject *),
	(destructor)tupledealloc, /* tp_dealloc */
	0, /* tp_vectorcall_offset */
	0, /* tp_getattr */
	0, /* tp_setattr */
	0, /* tp_as_async */
	(reprfunc)tuplerepr, /* tp_repr */
	0, /* tp_as_number */
	&tuple_as_sequence, /* tp_as_sequence */
	&tuple_as_mapping, /* tp_as_mapping */
	(hashfunc)tuplehash, /* tp_hash */
	0, /* tp_call */
	0, /* tp_str */
	PyObject_GenericGetAttr, /* tp_getattro */
	0, /* tp_setattro */
	0, /* tp_as_buffer */
	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_HAVE_GC \|
	Py_TPFLAGS_BASETYPE \| Py_TPFLAGS_TUPLE_SUBCLASS, /* tp_flags */
	tuple_new__doc__, /* tp_doc */
	(traverseproc)tupletraverse, /* tp_traverse */
	0, /* tp_clear */
	tuplerichcompare, /* tp_richcompare */
	0, /* tp_weaklistoffset */
	tuple_iter, /* tp_iter */
	0, /* tp_iternext */
	tuple_methods, /* tp_methods */
	0, /* tp_members */
	0, /* tp_getset */
	0, /* tp_base */
	0, /* tp_dict */
	0, /* tp_descr_get */
	0, /* tp_descr_set */
	0, /* tp_dictoffset */
	0, /* tp_init */
	0, /* tp_alloc */
	tuple_new, /* tp_new */
	PyObject_GC_Del, /* tp_free */
	.tp_vectorcall = tuple_vectorcall,
	};

看看 tuple 支持的 method 操作

	static PyMethodDef tuple_methods[] = {
	TUPLE___GETNEWARGS___METHODDEF
	TUPLE_INDEX_METHODDEF // Python 中的 tuple ().index 操作
	TUPLE_COUNT_METHODDEF // Python 中的 tuple ().count 操作
	{"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O\|METH_CLASS, PyDoc_STR("See PEP 585")},
	{NULL, NULL} /* sentinel */
	};

	#define TUPLE_INDEX_METHODDEF \
	{"index", (PyCFunction)(void(*)(void))tuple_index, METH_FASTCALL, tuple_index__doc__},

	#define TUPLE_COUNT_METHODDEF \
	{"count", (PyCFunction)tuple_count, METH_O, tuple_count__doc__},

tuple 的核心結構為

	#define PyObject_VAR_HEAD PyVarObject ob_base;

	typedef struct {
	// PyObject ob_base; 簡化為
	struct _object *_ob_next; \
	struct _object *_ob_prev; // 雙向環狀鏈表中上一個和下一個的指針，Python 内部將對象放到鏈表中便於進行内存管理
	Py_ssize_t ob_refcnt; // 引用計數器
	PyTypeObject *ob_type; // 類型
	Py_ssize_t ob_size; // 元素個數
	} PyVarObject;

	typedef struct {
	PyObject_VAR_HEAD
	/* ob_item contains space for 'ob_size' elements.
	Items must normally not be NULL, except during construction when
	the tuple is not yet visible outside the function that builds it. */
	PyObject *ob_item[1]; // 存儲元組中的元素 -> [pointer,]
	} PyTupleObject;

儅創建一個 tuple 的執行過程

	static PyObject *
	tuple_new(PyTypeObject type, PyObject args, PyObject *kwargs)
	{
	// 首先調用 tuple_new 方法，對應 Python tuple 的__new__方法
	PyObject *return_value = NULL;
	PyObject *iterable = NULL;

	if ((type == &PyTuple_Type) &&
	!_PyArg_NoKeywords("tuple", kwargs)) {
	goto exit;
	}
	if (!_PyArg_CheckPositional("tuple", PyTuple_GET_SIZE(args), 0, 1)) {
	goto exit;
	}
	if (PyTuple_GET_SIZE(args) < 1) {
	goto skip_optional;
	}
	iterable = PyTuple_GET_ITEM(args, 0);
	skip_optional:
	return_value = tuple_new_impl(type, iterable); // 重點在這

	exit:
	return return_value;
	}

	static PyObject *
	tuple_new_impl(PyTypeObject type, PyObject iterable)
	/[clinic end generated code: output=4546d9f0d469bce7 input=86963bcde633b5a2]/
	{
	if (type != &PyTuple_Type)
	// 類型檢查
	return tuple_subtype_new(type, iterable);

	if (iterable == NULL)
	// 創建空元組，如: a = () 或 a = tuple ()
	return PyTuple_New(0);
	else
	// 創建非空元組，如: a = (1,2,3) 或 a = tuple (1,2,3) 或 a = tuple ([1,2,3])
	return PySequence_Tuple(iterable);
	}

在創建非空元組時會調用 PySequence_Tuple 方法，代碼比較長，慢慢分析一下😷

	PyObject *
	PySequence_Tuple(PyObject *v)
	{
	PyObject it; / iter(v) */
	Py_ssize_t n; /* guess for result tuple size */
	PyObject *result = NULL;
	Py_ssize_t j;

	if (v == NULL) {
	// 儅爲空時，返回 NULL, 且必須持有 GIL, 否則抛異常
	return null_error();
	}

	/* Special-case the common tuple and list cases, for efficiency. */
	// 爲了提高效率，判斷 v 是否為 tuple 類型，並直接返回
	if (PyTuple_CheckExact(v)) {
	/* Note that we can't know whether it's safe to return
	a tuple subclass instance as-is, hence the restriction
	to exact tuples here. In contrast, lists always make
	a copy, so there's no need for exactness below. */
	Py_INCREF(v); // 引用計數 + 1
	return v; // 返回 tuple
	}
	if (PyList_CheckExact(v))
	// 判斷 v 是否為 PyListType 類型
	// 主要調用了_PyTuple_FromArray 方法，傳遞 PyListType 的 ob_item, 也就是存儲元素指針的屬性，還有大小
	// 創建一個新的 tuple, 并通過循環將每個元素一個一個賦值到 PyTupleType 的 ob_item 裏，並返回 PyObject *
	return PyList_AsTuple(v);

	/* Get iterator. */
	it = PyObject_GetIter(v);
	// 獲取迭代器，it 為 PyTupleIter_Type 類型，如 a = tuple (range (10)), a = tuple (range (10).__iter__())
	if (it == NULL)
	return NULL;

	/* Guess result size and allocate space. */
	n = PyObject_LengthHint(v, 10); // 獲取長度（元素個數）
	// 預測大小用於分配空間，裏面涉及到__len__的判斷，返回 - 1 表示異常
	if (n == -1)
	goto Fail;
	// 創建一個 tuple, 代碼實現在下邊
	result = PyTuple_New(n);
	if (result == NULL)
	// 創建失敗
	goto Fail;

	/* Fill the tuple. */
	// 開始迭代填充元組
	for (j = 0; ; ++j) {
	// 調用 it 的__next__方法，實際調用的是 PyTupleIter_Type 的 tupleiter_next 方法，通過下標返回每一個元素
	PyObject *item = PyIter_Next(it);
	if (item == NULL) {
	// 如果 item 為 NULL, 表示迭代器已經耗盡了
	if (PyErr_Occurred())
	// 判斷是否異常
	goto Fail;
	break;
	}
	if (j >= n) {
	// 如果迭代元素比預測的多 (開闢的内存不足)
	size_t newn = (size_t)n;
	/* The over-allocation strategy can grow a bit faster
	than for lists because unlike lists the
	over-allocation isn't permanent -- we reclaim
	the excess before the end of this routine.
	So, grow by ten and then add 25%.
	*/
	newn += 10u; // 原長度 +10
	newn += newn >> 2; // (原長度 + 10) * 0.25
	if (newn > PY_SSIZE_T_MAX) {
	// 判斷内存溢出 (sys.maxsize)
	/* Check for overflow */
	PyErr_NoMemory();
	Py_DECREF(item); // 元素引用計數 - 1
	goto Fail;
	}
	n = (Py_ssize_t)newn; // 大小
	// 進行擴容，具體細節就不講了 (懶的看)
	if (_PyTuple_Resize(&result, n) != 0) {
	Py_DECREF(item);
	goto Fail;
	}
	}
	// 將每個元素放到元組中，通過下標逐個設置
	PyTuple_SET_ITEM(result, j, item);
	}

	/* Cut tuple back if guess was too large. */
	// 在結束之前，會回收多餘的部分，因爲可能沒那麽多數據，縮容
	if (j < n &&
	_PyTuple_Resize(&result, j) != 0)
	goto Fail;

	Py_DECREF(it); // 迭代器引用計數 - 1
	return result;

	Fail:
	Py_XDECREF(result);
	Py_DECREF(it);
	return NULL;
	}

PyTuple_New 方法實現

	PyObject *
	PyTuple_New(Py_ssize_t size)
	{
	PyTupleObject *op;
	// 緩存相關操作
	// 創建空元組時，優先去緩存中獲取
	//size 等於 0 時，去緩存中獲取 free_list [0]
	#if PyTuple_MAXSAVESIZE > 0
	if (size == 0 && free_list[0]) {
	op = free_list[0];
	Py_INCREF(op);
	return (PyObject *) op;
	}
	#endif
	// 此時 size=0, 為空元組開闢内存
	// 内部創建元組結構體對象，並添加到相應的鏈表中用於内存管理
	op = tuple_alloc(size);
	if (op == NULL) {
	return NULL;
	}
	// 由於是空元組，所以沒有元素，如果創建非空元組，size 就是元素的個數
	for (Py_ssize_t i = 0; i < size; i++) {
	op->ob_item[i] = NULL;
	}
	// 長度為 0, 則默認直接緩存起來
	#if PyTuple_MAXSAVESIZE > 0
	if (size == 0) {
	free_list[0] = op;
	++numfree[0];
	Py_INCREF(op); /* extra INCREF so that this is never freed */
	}
	#endif
	tuple_gc_track(op);
	return (PyObject *) op;
	}

在創建 tuple 過程中，有一個步驟是取出迭代器進行填充元素，那我們可以創建一個實現了__iter__和__next__的類來測試一下

	class A(object):
	def __init__(self):
	self.array = [1, 2, 3]

	def __iter__(self):
	return self

	def __next__(self):
	try:
	return self.array.pop()
	except IndexError:
	raise StopIteration

	a = tuple(A())
	print(a)
	>>> (3, 2, 1) # 結果是相反的，因爲我是用最簡單的 pop 彈出尾元素

占个坑… 有时间再补充

CPython C

# 首先簡單瞭解一下 tuple 是什麽東西

# Tuple 是怎麽發音？

# tuple 與 list 差別在哪

# tuple 有以下優點

# CPython 中 tuple 的結構

getattr實現原理

Python range的實現原理