Lua5.4.3源码剖析02-Table的实现

本文剖析Table的实现(ltable.c),对Lua中Table的创建做了详细的注释;首先说明Table的定义以及创建过程;然后在Table创建一个新的键值;重点阐述了table的数组部分和哈希部分的扩容。这里重点分析table表的新键值的插入以及table的空间扩展.这里的空间扩展势必导致重新哈希;这里与前一个版本相比,针对不同的类型键值哈希的分类更加详细;参见mainposition函数的分析;以此引发了设置键值和获得键值对的修改。

1. Table的定义

表结构定义注释

typedef struct Table {
  CommonHeader;
  lu_byte flags;        // metatable的元方法是否存在的标记
  lu_byte lsizenode;    // 哈希表的对数底长度(node成员)
  unsigned int alimit;  // 下面array数组的长度  
  TValue *array;        // 这里是Table的数组部分
  Node *node;           // 哈希部分的首元素
  Node *lastfree;       // 哈希部分的最后一个元素
  struct Table *metatable;  // 表的元方法
  GCObject *gclist;
} Table;

// Node结构说明
typedef union Node {
  // 这个结构主要是给哈希表部分使用的
  struct NodeKey {
    TValuefields;  /* fields for value */
    lu_byte key_tt;  /* key type */
    int next;       //前一个元素到当前节点的距离
    Value key_val;  /* key value */
  } u;
  // 给数组部分使用的
  TValue i_val;  
} Node;

* Tabel的创建

Table *luaH_new (lua_State *L) {
  // #define LUA_VTABLE	makevariant(LUA_TTABLE, 0)
  // 就是Table的01基础类型LUA_TTABLE
  GCObject *o = luaC_newobj(L, LUA_VTABLE, sizeof(Table));
  Table *t = gco2t(o);
  t->metatable = NULL;
  t->flags = cast_byte(maskflags);  /* table has no metamethod fields */
  t->array = NULL;
  t->alimit = 0;
  // setnodevector函数将哈希部分的初始化
  // t->node = cast(Node *, dummynode); node初始化为dummynode
  // t->lsizenode = 0; 哈希部分长度为0
  setnodevector(L, t, 0);
  return t;
}

* 创建新KEY

void luaH_newkey (lua_State *L, Table *t, const TValue *key, TValue *value) {
  Node *mp;
  TValue aux;
  // ... 
  // 省略一些代码
  // ...

  // 找到这个key的哈希位置
  mp = mainpositionTV(t, key);
  if (!isempty(gval(mp)) || isdummy(t)) {  /* main position is taken? */
    Node *othern;
    // 找到一个空闲位置
    // 参见下一节(新KEY的位置)
    Node *f = getfreepos(t);  /* get a free place */
    if (f == NULL) {  /* cannot find a free place? */
      // 没有空闲位置时则扩展Table表的空间
      rehash(L, t, key);  /* grow table */
      /* whatever called 'newkey' takes care of TM cache */
      luaH_set(L, t, key, value);  /* insert key into grown table */
      return;
    }
    // 空闲位置不为空的情况
    lua_assert(!isdummy(t));
    // 找到这个mp节点实际的哈希位置(mp节点实际是借用这个位置)
    // 参见下一节(新KEY的位置)
    othern = mainposition(t, keytt(mp), &keyval(mp));
    // 两个节点不相同时那么mp是借用了othern节点的位置
    // 所以mp必须要搬到空闲位置f上去
    // mp节点就是需要设置的最终节点
    if (othern != mp) {  /* is colliding node out of its main position? */
      /* yes; move colliding node into free position */
      // 找到mp的前一个节点
      while (othern + gnext(othern) != mp)  /* find previous */
        othern += gnext(othern);
      // mp的前一个节点现在是othern了
      // 重新将f设置为othern的下一个节点
      gnext(othern) = cast_int(f - othern);  /* rechain to point to 'f' */
      // 将原来mp的内容赋予f节点,让出mp节点
      *f = *mp;  /* copy colliding node into free pos. (mp->next also goes) */
      // mp节点的下一个节点链接到f上
      // cast_int(mp - f)
      // 意思是f的next在上面已经赋值为mp的next(*f = *mp)
      // 但是mp地址和f地址还有一些差额要增加到f的next上
      if (gnext(mp) != 0) {
        gnext(f) += cast_int(mp - f);  /* correct 'next' */
        gnext(mp) = 0;  /* now 'mp' is free */
      }
      setempty(gval(mp));
    }
    else {  /* colliding node is in its own main position */
      /* new node will go into free position */
      // othern和mp是同一个节点
      // 所以只要把mp链接到f节点上
      // 将mp设置为f节点
      if (gnext(mp) != 0)
        gnext(f) = cast_int((mp + gnext(mp)) - f);  /* chain new position */
      else lua_assert(gnext(f) == 0);
      // 补上f节点到mp节点的差额
      gnext(mp) = cast_int(f - mp);
      mp = f;
    }
  }
  setnodekey(L, mp, key);
  luaC_barrierback(L, obj2gco(t), key);
  lua_assert(isempty(gval(mp)));
  setobj2t(L, gval(mp), value);
}

* 新KEY的位置

// 获得一个空间位置
static Node *getfreepos (Table *t) {
  if (!isdummy(t)) {
    // t->node是一个数组
    while (t->lastfree > t->node) {
      t->lastfree--;
      if (keyisnil(t->lastfree))
        return t->lastfree;
    }
  }
  return NULL;  /* could not find a free place */
}

// 计算key的哈希值
// ktt是键的类型
// kvl是键的值
static Node *mainposition (const Table *t, int ktt, const Value *kvl) {
  switch (withvariant(ktt)) {
    case LUA_VNUMINT: {
      lua_Integer key = ivalueraw(*kvl);
      // int类型的hash算法
      // key % lsizenode
      return hashint(t, key);
    }
    case LUA_VNUMFLT: {
      lua_Number n = fltvalueraw(*kvl);
      // 参见l_hashfloat实现
      return hashmod(t, l_hashfloat(n));
    }
    case LUA_VSHRSTR: {
      TString *ts = tsvalueraw(*kvl);
      // 利用ts->hash作为key
      // ts->hash % lszienode
      return hashstr(t, ts);
    }
    case LUA_VLNGSTR: {
      TString *ts = tsvalueraw(*kvl);
      // ts->hash % lszienode
      return hashpow2(t, luaS_hashlongstr(ts));
    }
    case LUA_VFALSE:
      return hashboolean(t, 0);
    case LUA_VTRUE:
      return hashboolean(t, 1);
    case LUA_VLIGHTUSERDATA: {
      void *p = pvalueraw(*kvl);
      // #define hashpointer(t,p)	hashmod(t, point2uint(p))
      // p地址转换成int值
      return hashpointer(t, p);
    }
    case LUA_VLCF: {
      lua_CFunction f = fvalueraw(*kvl);
      return hashpointer(t, f);
    }
    default: {
      GCObject *o = gcvalueraw(*kvl);
      return hashpointer(t, o);
    }
  }
}

2. Table的空间扩展

* 重新哈希

在luaH_newkey函数中,当没有可用节点时将会调用rehash函数;rehash函数将重新分配数组部分和哈希部分的长度

static void rehash (lua_State *L, Table *t, const TValue *ek) {
  unsigned int asize;  /* optimal size for array part */
  unsigned int na;  /* number of keys in the array part */
  // MAXABITS这里的值设定为系统的int值宽度*8 -1
  // #define MAXABITS	cast_int(sizeof(int) * CHAR_BIT - 1)
  unsigned int nums[MAXABITS + 1];
  int i;
  int totaluse;
  for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
  setlimittosize(t);
  // totaluse 是数组已用节点数量和哈希列表已用节点数量之和
  // na是数组的节点数量统计;在computesizes函数中会重新计算
  // nums是分片统计的节点数量
  // 计算数组部分的key数量
  na = numusearray(t, nums);  /* count keys in array part */
  totaluse = na;  /* all those keys are integer keys */
  // 计算哈希部分的key数量
  totaluse += numusehash(t, nums, &na);  /* count keys in hash part */
  /* count extra key */
  // 当前未加入的节点ek也要计算进去
  if (ttisinteger(ek))
    na += countint(ivalue(ek), nums);
  totaluse++;
  /* compute new size for array part */
  // asize是重新计算的数组节点数量(需要的节点长度,大于na)
  // na是重新计算后的实际节点数量
  // 重新计算依赖分片统计值(nums)以及原先的数组已用节点数量(na);算法参见computesizes
  asize = computesizes(nums, &na);
  /* resize the table to new computed sizes */
  // 根据新计算的数组长度asize和哈希列表长度(totoaluse -na)重新调整表的节点分布
  // 具体算法参见后续luaH_resize注释
  luaH_resize(L, t, asize, totaluse - na);
}

* 重新哈希核心算法

/*
** Compute the optimal size for the array part of table 't'. 'nums' is a
** "count array" where 'nums[i]' is the number of integers in the table
** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of
** integer keys in the table and leaves with the number of keys that
** will go to the array part; return the optimal size.  (The condition
** 'twotoi > 0' in the for loop stops the loop if 'twotoi' overflows.)
*/
static unsigned int computesizes (unsigned int nums[], unsigned int *pna) {
  int i;
  unsigned int twotoi;  /* 2^i (candidate for optimal size) */
  unsigned int a = 0;  /* number of elements smaller than 2^i */
  unsigned int na = 0;  /* number of elements to go to array part */
  unsigned int optimal = 0;  /* optimal size for array part */
  /* loop while keys can fill more than half of total size */
  // 我带上了代码的英文注释,需要注意的点
  // twotoi > 0 的判断是为了防止twotoi的溢出
  // i从0开始;0表示区间[2^0 ~ 2^1]
  for (i = 0, twotoi = 1;twotoi > 0 && *pna > twotoi / 2;i++, twotoi *= 2) {
    a += nums[i];
    // a > twotoi/2 表示的是节点数量只要大于总空间的一半
    // 也就是利用率能达到50%就记录一下
    if (a > twotoi/2) {  /* more than half elements present? */
      optimal = twotoi;  /* optimal size (till now) */
      na = a;  /* all elements up to 'optimal' will go to array part */
    }
  }
  lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
  // pna重新调整为实际统计的数组节点数量
  // 数组长度为optimal时数组节点数量
  *pna = na;
  return optimal;
}

* 扩展Table核心算法

/*
** Resize table 't' for the new given sizes. Both allocations (for
** the hash part and for the array part) can fail, which creates some
** subtleties. If the first allocation, for the hash part, fails, an
** error is raised and that is it. Otherwise, it copies the elements from
** the shrinking part of the array (if it is shrinking) into the new
** hash. Then it reallocates the array part.  If that fails, the table
** is in its original state; the function frees the new hash part and then
** raises the allocation error. Otherwise, it sets the new hash part
** into the table, initializes the new part of the array (if any) with
** nils and reinserts the elements of the old hash back into the new
** parts of the table.
*/
// newasize数组部分的长度
// nhsize哈希部分的长度
void luaH_resize (lua_State *L, Table *t, unsigned int newasize,
                                          unsigned int nhsize) {
  unsigned int i;
  Table newt;  /* to keep the new hash part */
  unsigned int oldasize = setlimittosize(t);
  TValue *newarray;
  /* create new hash part with appropriate size into 'newt' */
  // 重新创建哈希部分的长度
  // 参见下一节(扩展哈希表核心算法)
  setnodevector(L, &newt, nhsize);
  // 数组新长度小于原有长度(缩小)
  if (newasize < oldasize) {  /* will array shrink? */
    t->alimit = newasize;  /* pretend array has new size... */
    // 把t的hash信息保存到newt中
    exchangehashpart(t, &newt);  /* and new hash */
    /* re-insert into the new hash the elements from vanishing slice */
    // 如果是缩小的话;把原有的数组多余节点添加到t中
    for (i = newasize; i < oldasize; i++) {
      if (!isempty(&t->array[i]))
        luaH_setint(L, t, i + 1, &t->array[i]);
    }
    // 然后再恢复newt中的哈希列表信息到t中
    t->alimit = oldasize;  /* restore current size... */
    exchangehashpart(t, &newt);  /* and hash (in case of errors) */
  }
  /* allocate new array */
  // 创建新的数组部分(newarray)
  newarray = luaM_reallocvector(L, t->array, oldasize, newasize, TValue);
  if (l_unlikely(newarray == NULL && newasize > 0)) {  /* allocation failed? */
    freehash(L, &newt);  /* release new hash part */
    luaM_error(L);  /* raise error (with array unchanged) */
  }
  /* allocation ok; initialize new part of the array */
  // 把哈希列表部分保存到newt中
  exchangehashpart(t, &newt);  /* 't' has the new hash ('newt' has the old) */
  // t设置新的数组部分
  t->array = newarray;  /* set new array part */
  t->alimit = newasize;
  // 数组新扩展的部分县设置为空
  for (i = oldasize; i < newasize; i++)  /* clear new slice of the array */
     setempty(&t->array[i]);
  /* re-insert elements from old hash part into new parts */
  // 重新把newt的哈希列表部分的节点插入到t中
  reinsert(L, &newt, t);  /* 'newt' now has the old hash */
  // 释放newt
  freehash(L, &newt);  /* free old hash part */
}

* 扩展哈希表核心算法

// size是哈希表的长度
static void setnodevector (lua_State *L, Table *t, unsigned int size) {
  // 为0时时初始化Table的哈希列表部分
  if (size == 0) {  /* no elements to hash part? */
    // 初始节点设置为dummynode
    // lsizenode 为 0
    t->node = cast(Node *, dummynode);  /* use common 'dummynode' */
    t->lsizenode = 0;
    t->lastfree = NULL;  /* signal that it is using dummy node */
  }
  else {
    int i;
    // 先计算size的以2为底的对数值
    int lsize = luaO_ceillog2(size);
    if (lsize > MAXHBITS || (1u << lsize) > MAXHSIZE)
      luaG_runerror(L, "table overflow");
    // 再计算实际的长度 2^lsize
    size = twoto(lsize);
    // node 扩展到size长度(哈希数组)
    t->node = luaM_newvector(L, size, Node);
    // 初始化所有节点 next = 0
    for (i = 0; i < (int)size; i++) {
      Node *n = gnode(t, i);
      gnext(n) = 0;
      setnilkey(n);
      setempty(gval(n));
    }
    // lsizenode 值为哈希列表长度的对数值
    t->lsizenode = cast_byte(lsize);
    // lastfree为最后一个节点
    t->lastfree = gnode(t, size);  /* all positions are free */
  }
}

* 用到的辅助函数

上述扩展空间的函数用到两个关键的函数,一个是备份还原哈希列表信息函数(exchangehashpart);一个是重新将哈希节点信息插入另外一张Table中(reinsert)

/*
** Exchange the hash part of 't1' and 't2'.
*/
static void exchangehashpart (Table *t1, Table *t2) {
  // t1的哈希列表信息临时保存
  lu_byte lsizenode = t1->lsizenode;
  Node *node = t1->node;
  Node *lastfree = t1->lastfree;
  // t2的哈希列表信息设置给t1
  t1->lsizenode = t2->lsizenode;
  t1->node = t2->node;
  t1->lastfree = t2->lastfree;
  // t1的哈希信息设置给t2
  t2->lsizenode = lsizenode;
  t2->node = node;
  t2->lastfree = lastfree;
}

static void reinsert (lua_State *L, Table *ot, Table *t) {
  int j;
  // 重新将ot表中的哈希节点插入到t表中
  // 这里需要注意一个细节就是哈希列表也是一个数组
  // 平坦的数组设计;这是一个优化的地方,而不是用链表进行链接
  int size = sizenode(ot);
  for (j = 0; j < size; j++) {
    Node *old = gnode(ot, j);
    if (!isempty(gval(old))) {
      /* doesn't need barrier/invalidate cache, as entry was
         already present in the table */
      TValue k;
      getnodekey(L, &k, old);
      luaH_set(L, t, &k, gval(old));
    }
  }
}

3. Table的其他

* Table的销毁

创建也难销毁更难

void luaH_free (lua_State *L, Table *t) {
  // freehash 释放掉哈希列表的数组
  freehash(L, t);
  // 释放数组部分
  luaM_freearray(L, t->array, luaH_realasize(t));
  // 释放t
  luaM_free(L, t);
}

* 其他函数说明

luaH_getn函数是计算table的大小;luaH_set设置一个键值对;luaH_get获得一个键值信息