先来谈谈 PHP 5 中数组元素的添加和修改,由于 PHP 5 中数组元素的插入顺序以及 hash 碰撞都是通过双向链表的方式来维护,所以虽然实现起来有些复杂,但理解起来相对容易一些。
// hash 碰撞双向链表的维护 #define CONNECT_TO_BUCKET_DLLIST(element, list_head) \ (element)->pNext = (list_head); \ (element)->pLast = NULL; \ if ((element)->pNext) { \ (element)->pNext->pLast = (element); \ } #define CONNECT_TO_GLOBAL_DLLIST_EX(element, ht, last, next)\ (element)->pListLast = (last); \ (element)->pListNext = (next); \ if ((last) != NULL) { \ (last)->pListNext = (element); \ } else { \ (ht)->pListHead = (element); \ } \ if ((next) != NULL) { \ (next)->pListLast = (element); \ } else { \ (ht)->pListTail = (element); \ } \ // 数组元素插入顺序双向链表的维护 #define CONNECT_TO_GLOBAL_DLLIST(element, ht) \ CONNECT_TO_GLOBAL_DLLIST_EX(element, ht, (ht)->pListTail, (Bucket *) NULL); \ if ((ht)->pInternalPointer == NULL) { \ (ht)->pInternalPointer = (element); \ } // 数组元素的更新 #define UPDATE_DATA(ht, p, pData, nDataSize) \ if (nDataSize == sizeof(void*)) { \ // 值为指针类型的元素的更新 \ if ((p)->pData != &(p)->pDataPtr) { \ pefree_rel((p)->pData, (ht)->persistent); \ } \ // pDataPtr 存储元素值的地址,pData 存储 pDataPtr 的地址 \ memcpy(&(p)->pDataPtr, pData, sizeof(void *)); \ (p)->pData = &(p)->pDataPtr; \ } else { \ // 如果数组元素为值类型,则存入 pData,此时 pDataPtr 为 Null \ if ((p)->pData == &(p)->pDataPtr) { \ (p)->pData = (void *) pemalloc_rel(nDataSize, (ht)->persistent); \ (p)->pDataPtr=NULL; \ } else { \ (p)->pData = (void *) perealloc_rel((p)->pData, nDataSize, (ht)->persistent); \ /* (p)->pDataPtr is already NULL so no need to initialize it */ \ } \ memcpy((p)->pData, pData, nDataSize); \ } // 数组元素的初始化 #define INIT_DATA(ht, p, _pData, nDataSize); \ if (nDataSize == sizeof(void*)) { \ // 指针类型元素的初始化 \ memcpy(&(p)->pDataPtr, (_pData), sizeof(void *)); \ (p)->pData = &(p)->pDataPtr; \ } else { \ // 值类型元素的初始化 \ (p)->pData = (void *) pemalloc_rel(nDataSize, (ht)->persistent);\ memcpy((p)->pData, (_pData), nDataSize); \ (p)->pDataPtr=NULL; \ } // hashtable 初始化校验,如果没有初始化,则初始化 hashtable #define CHECK_INIT(ht) do { \ if (UNEXPECTED((ht)->nTableMask == 0)) { \ (ht)->arBuckets = (Bucket **) pecalloc((ht)->nTableSize, sizeof(Bucket *), (ht)->persistent); \ (ht)->nTableMask = (ht)->nTableSize - 1; \ } \ } while (0) // 数组元素的新增或更新(精简掉了一些宏调用和代码片段) ZEND_API int _zend_hash_add_or_update(HashTable *ht, const char *arKey, uint nKeyLength, void *pData, uint nDataSize, void **pDest, int flag ZEND_FILE_LINE_DC) { ulong h; uint nIndex; Bucket *p; CHECK_INIT(ht); h = zend_inline_hash_func(arKey, nKeyLength); nIndex = h & ht->nTableMask; p = ht->arBuckets[nIndex]; while (p != NULL) { if (p->arKey == arKey || ((p->h == h) && (p->nKeyLength == nKeyLength) && !memcmp(p->arKey, arKey, nKeyLength))) { // 数组元素更新逻辑 if (flag & HASH_ADD) { return FAILURE; } ZEND_ASSERT(p->pData != pData); if (ht->pDestructor) { ht->pDestructor(p->pData); } UPDATE_DATA(ht, p, pData, nDataSize); if (pDest) { *pDest = p->pData; } return SUCCESS; } p = p->pNext; } // 数组元素新增逻辑 if (IS_INTERNED(arKey)) { p = (Bucket *) pemalloc(sizeof(Bucket), ht->persistent); p->arKey = arKey; } else { p = (Bucket *) pemalloc(sizeof(Bucket) + nKeyLength, ht->persistent); p->arKey = (const char*)(p + 1); memcpy((char*)p->arKey, arKey, nKeyLength); } p->nKeyLength = nKeyLength; INIT_DATA(ht, p, pData, nDataSize); p->h = h; // hash 碰撞链表维护 CONNECT_TO_BUCKET_DLLIST(p, ht->arBuckets[nIndex]); if (pDest) { *pDest = p->pData; } // 数组元素写入顺序维护 CONNECT_TO_GLOBAL_DLLIST(p, ht); ht->arBuckets[nIndex] = p; ht->nNumOfElements++; ZEND_HASH_IF_FULL_DO_RESIZE(ht); /* If the Hash table is full, resize it */ return SUCCESS; }
PHP 5 中的数组在新增或修改元素时,首先会根据给定的 key 计算得到相应的 hash 值,然后据此得到 arBuckets 的索引 nIndex,最终得到链表中第一个 Bucket( hash 碰撞链表的表头),即p。
如果是更新数组中已有的项,那么会从 p 开始遍历 hash 碰撞链表,直到找到 arkey 与给定的 key 相同的 Bucket,然后更新 pData。