Python map对象探究 - C - CPython | Waim Chiu = 神的直觉

# Python map

map 在 Python 中是一个内置类，在初始化时，第一个参数传递 function 对象，后面可以跟一个或多个序列参数

	class map(object):
	"""
	map(func, *iterables) --> map object

	Make an iterator that computes the function using arguments from
	each of the iterables. Stops when the shortest iterable is exhausted.
	"""
	def __getattribute__(self, args, *kwargs): # real signature unknown
	""" Return getattr(self, name). """
	pass

	def __init__(self, func, *iterables): # real signature unknown; restored from __doc__
	# 在初始化时，必须传递一个 function object, 后面跟一个或者多个序列对象
	pass

	def __iter__(self, args, *kwargs): # real signature unknown
	""" Implement iter(self). """
	# 支持迭代操作
	pass

	@staticmethod # known case of __new__
	def __new__(args, *kwargs): # real signature unknown
	""" Create and return a new object. See help(type) for accurate signature. """
	pass

	def __next__(self, args, *kwargs): # real signature unknown
	""" Implement next(self). """
	pass

	def __reduce__(self, args, *kwargs): # real signature unknown
	""" Return state information for pickling. """
	pass

代码注释说明，创建一个迭代器，使用来自每个可迭代对象的参数计算函数。当最短的迭代用完时停止，可以测试一下

	def f(x, y):
	# 返回每个列表的元素相加结果
	return x + y

	array1 = [1, 2, 3]
	array2 = [1, 2, 3, 4, 5]

	r = map(f, array1, array2)

	for i in r:
	print(i)

	########################## 打印结果 ##########################
	>>> 2
	>>> 4
	>>> 6

先来看看 map 在 CPython 中的结构，位于源代码中 Python/bltinmodule.c

	PyTypeObject PyMap_Type = {
	PyVarObject_HEAD_INIT(&PyType_Type, 0)
	"map", /* tp_name */
	sizeof(mapobject), /* tp_basicsize */
	0, /* tp_itemsize */
	/* methods */
	(destructor)map_dealloc, /* tp_dealloc */
	0, /* tp_vectorcall_offset */
	0, /* tp_getattr */
	0, /* tp_setattr */
	0, /* tp_as_async */
	0, /* tp_repr */
	0, /* tp_as_number */
	0, /* tp_as_sequence */
	0, /* tp_as_mapping */
	0, /* tp_hash */
	0, /* tp_call */
	0, /* tp_str */
	PyObject_GenericGetAttr, /* tp_getattro */
	0, /* tp_setattro */
	0, /* tp_as_buffer */
	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_HAVE_GC \|
	Py_TPFLAGS_BASETYPE, /* tp_flags */
	map_doc, /* tp_doc */
	(traverseproc)map_traverse, /* tp_traverse */
	0, /* tp_clear */
	0, /* tp_richcompare */
	0, /* tp_weaklistoffset */
	PyObject_SelfIter, /* tp_iter */
	(iternextfunc)map_next, /* tp_iternext */
	map_methods, /* tp_methods */
	0, /* tp_members */
	0, /* tp_getset */
	0, /* tp_base */
	0, /* tp_dict */
	0, /* tp_descr_get */
	0, /* tp_descr_set */
	0, /* tp_dictoffset */
	0, /* tp_init */
	PyType_GenericAlloc, /* tp_alloc */
	map_new, /* tp_new */
	PyObject_GC_Del, /* tp_free */
	};

在我们比如调用 map (func, array1, array2) 时，首先调用了 map_new 方法

	static PyObject *
	map_new(PyTypeObject type, PyObject args, PyObject *kwds)
	{
	PyObject it, iters, *func;
	mapobject *lz;
	Py_ssize_t numargs, i;

	if (type == &PyMap_Type && !_PyArg_NoKeywords("map", kwds))
	// 判断类型是否为 PyMap_Type 并且校验是否传递了关键字参数，如果传递了就抛异常，map 是不接受关键字参数的
	return NULL;

	numargs = PyTuple_Size(args); // 获取位置参数的个数
	if (numargs < 2) {
	// 如果位置参数小于 2，抛异常，map 必须接收一个 func，至少一个可迭代对象
	PyErr_SetString(PyExc_TypeError,
	"map() must have at least two arguments.");
	return NULL;
	}

	// 申请一个元组，容量为 numargs - 1, 用于存放传递的所有可迭代对象的 iterator
	iters = PyTuple_New(numargs-1);
	if (iters == NULL)
	// 申请失败
	return NULL;

	// 根据传递的可迭代对象个数进行 for 循环
	for (i=1 ; i<numargs ; i++) {
	/* Get iterator. */
	// PyTuple_GET_ITEM 根据下标获取对应 i 位置的可迭代对象
	// PyObject_GetIter 通过调用对象的 tp_iter 方法，返回一个 iterator
	it = PyObject_GetIter(PyTuple_GET_ITEM(args, i));
	if (it == NULL) {
	// 当对象没有定义不是可迭代对象，或者没有定义__iter__, 或者__iter__返回空，就会抛异常
	Py_DECREF(iters);
	return NULL;
	}
	// 在 iters 指定的下标插入 iterator, 与 PyTuple_SetItem 不同，PyTuple_SET_ITEM 不会检查错误，只用于填充元组
	PyTuple_SET_ITEM(iters, i-1, it);
	}

	/* create mapobject structure */
	lz = (mapobject *)type->tp_alloc(type, 0); // 调用 PyMap_Type 的 tp_alloc, 为其实例对象申请空间
	if (lz == NULL) {
	// 申请失败
	Py_DECREF(iters); 引用计数-1
	return NULL;
	}
	lz->iters = iters; // 赋值 iters
	func = PyTuple_GET_ITEM(args, 0); // 获取第一个参数，也就是传递的 function object
	Py_INCREF(func); // 引用计数 + 1, function object 被当作参数传递给 mapobject 了
	lz->func = func; // 赋值 func

	return (PyObject *)lz; // 返回 mapobject
	}

当调用 map (func, array1, array2) 时，CPython 返回了一个 mapobject 对象

	>>> map(lambda x: print(x), [1,2,3])
	<map object at 0x000002771D500F70>

当对 mapobject 进行迭代时，会调用 PyObject_SelfIter 返回本身，然后不断调用__next__，逐步迭代，对应的是 map_next 方法

	PyObject *
	PyObject_SelfIter(PyObject *obj)
	{
	Py_INCREF(obj);
	return obj;
	}

	static PyObject *
	map_next(mapobject *lz)
	{
	/*
	_PY_FASTCALL_SMALL_STACK 是一个宏定义，默认为 5
	在 C 堆栈上分配的 PyObject 数组的建议大小（位置参数的数量）以避免在堆内存上分配内存
	如果参数个数小于 5，small_stack 数组会先尝试在栈区分配，就可以在栈中申请，然后通过传递位置参数的方式对函数进行调用
	通过 PyObject_Vectorcall 函数来对函数进行调用
	*/
	PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
	PyObject **stack;
	PyObject *result = NULL; // 调用返回值
	PyThreadState *tstate = _PyThreadState_GET(); // 获取当前线程状态对象

	const Py_ssize_t niters = PyTuple_GET_SIZE(lz->iters); //iters 是一个元组，存放着多个迭代器，迭代器的数量
	if (niters <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
	// 如果参数小于等于 5，那么获取这些迭代器中的元素时，可以直接使用在 C 栈中申请的数组进行存储
	stack = small_stack;
	}
	else {
	// 如果大于 5，只能在堆区重新申请
	stack = PyMem_Malloc(niters * sizeof(stack[0]));
	if (stack == NULL) {
	// 申请失败，传入线程状态对象，设置异常信息
	_PyErr_NoMemory(tstate);
	return NULL;
	}
	}

	Py_ssize_t nargs = 0;
	// 根据存储的迭代器数量进行循环操作
	for (Py_ssize_t i=0; i < niters; i++) {
	PyObject *it = PyTuple_GET_ITEM(lz->iters, i); // 获取 iters 对应下标中的迭代器
	PyObject *val = Py_TYPE(it)->tp_iternext(it); // 执行每个迭代器的__next__函数，比如它是 list 或者 tuple，就分别执行各自的__next__函数
	if (val == NULL) {
	/*
	调用__next__函数返回了空，调用完毕
	在前面说到：当最短的迭代用完时停止，也就是说，假如传递了 map (func, [1,2,3],["a","b","c","d","e"])，
	当进行第 4 次调用时，第一个迭代器会返回空，就直接结束了，第二个迭代器的 "d"."e" 就不会返回了
	*/
	goto exit;c
	}
	// 将 val 设置在数组索引为 i 的位置，继续下一次循环，获取下一个迭代器中的元素设置在数组 stack 中
	stack[i] = val;
	//nargs 与迭代器的个数相同，假如迭代器对象为 3 个，小于 5，stack 会申请在栈区，因为长度为 5，所以后面两个元素是无效的
	// 所以在调用时，需要指定有效的参数个数
	nargs++;
	}
	// 开始调用获取结果，这个函数是 Python3.9 新增的，Python3.8 调用的是_PyObject_FastCall
	result = _PyObject_VectorcallTstate(tstate, lz->func, stack, nargs, NULL);

	exit:
	for (Py_ssize_t i=0; i < nargs; i++) {
	// 将 stack 中指针指向的对象引用计数 - 1
	Py_DECREF(stack[i]);
	}
	if (stack != small_stack) {
	// 不相等，说明 stack 是在堆区申请的，需要释放
	PyMem_Free(stack);
	}
	return result; // 返回调用结果
	}

	static inline PyObject *
	_PyObject_VectorcallTstate(PyThreadState tstate, PyObject callable,
	PyObject const args, size_t nargsf,
	PyObject *kwnames)
	{
	vectorcallfunc func;
	PyObject *res;

	//kwnames 传递为 NULL，PyTuple_Check 表示如果 kwnames 是元组对象或者子类，则返回 True
	assert(kwnames == NULL \|\| PyTuple_Check(kwnames));
	// 位置参数不为空，PyVectorcall_NARGS 会返回参数的实际数量
	assert(args != NULL \|\| PyVectorcall_NARGS(nargsf) == 0);

	// 返回 callable 中 vectorcall 函数指针，如果不支持 vectorcall 协议（要么类型不支持，要么具体实例不支持），返回 NULL
	func = PyVectorcall_Function(callable);
	if (func == NULL) {
	Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); // 返回参数的实际数量
	// 如果 func 为 NULL, 可能类型不支持，那么就会尝试在 callable 中获取 tp_call 调用 callback, 因为它可能是其它对象
	// 但最后还是会调用_Py_CheckFunctionResult
	return _PyObject_MakeTpCall(tstate, callable, args, nargs, kwnames);
	}
	res = func(callable, args, nargsf, kwnames); // 函数调用
	return _Py_CheckFunctionResult(tstate, callable, res, NULL); // 返回结果
	}

map 的内容还是比较少的，基本比较核心的都探究完了😴

CPython C

# Python map

CPython VM的工作原理

Python zip内置类的背后机制