全面梳理字符串在Lua虚拟机中的实现,以便以后查阅,也分享给我的朋友们。本笔记的内容是解析源码(lstring.c);说明字符串在虚拟机的具体实现,首先说明字符串管理器的结构以及初始化,这个管理器管理虚拟机中创建的所有常规字符串(相对长字符串类型),然后讲述调用接口(luaS_newlstr)创建字符串; 创建字符串时要为这个字符串在管理器哈希表中确定一个位置;创建字符串时有可能引发字符串管理器的扩容和缩小,最后因为引入了长字符串,那么在创建字符串时要考虑长字符串类型的管理。
1. 字符串管理器 1.1 字符串管理结构 1 2 3 4 5 typedef struct stringtable { TString **hash; int nuse; int size; } stringtable;
1.2 初始化函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 void luaS_init (lua_State *L) { global_State *g = G (L); int i, j; stringtable *tb = &G (L)->strt; tb->hash = luaM_newvector (L, MINSTRTABSIZE, TString*); tablerehash (tb->hash, 0 , MINSTRTABSIZE); tb->size = MINSTRTABSIZE; g->memerrmsg = luaS_newliteral (L, MEMERRMSG); luaC_fix (L, obj2gco (g->memerrmsg)); for (i = 0 ; i < STRCACHE_N; i++) for (j = 0 ; j < STRCACHE_M; j++) g->strcache[i][j] = g->memerrmsg; }
2. 字符串的创建 2.1 核心哈希算法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 unsigned int luaS_hash (const char *str, size_t l, unsigned int seed) { unsigned int h = seed ^ cast_uint (l); for (; l > 0 ; l--) h ^= ((h<<5 ) + (h>>2 ) + cast_byte (str[l - 1 ])); return h; } unsigned int luaS_hashlongstr (TString *ts) { lua_assert (ts->tt == LUA_VLNGSTR); if (ts->extra == 0 ) { size_t len = ts->u.lnglen; ts->hash = luaS_hash (getstr (ts), len, ts->hash); ts->extra = 1 ; } return ts->hash; }
2.2 创建字符串接口 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 TString *luaS_newlstr (lua_State *L, const char *str, size_t l) { if (l <= LUAI_MAXSHORTLEN) return internshrstr (L, str, l); else { TString *ts; if (l_unlikely (l >= (MAX_SIZE - sizeof (TString))/sizeof (char ))) luaM_toobig (L); ts = luaS_createlngstrobj (L, l); memcpy (getstr (ts), str, l * sizeof (char )); return ts; } }
2.3 创建的核心函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 static TString *internshrstr (lua_State *L, const char *str, size_t l) { TString *ts; global_State *g = G (L); stringtable *tb = &g->strt; unsigned int h = luaS_hash (str, l, g->seed); TString **list = &tb->hash[lmod (h, tb->size)]; lua_assert (str != NULL ); for (ts = *list; ts != NULL ; ts = ts->u.hnext) { if (l == ts->shrlen && (memcmp (str, getstr (ts), l * sizeof (char )) == 0 )) { if (isdead (g, ts)) changewhite (ts); return ts; } } if (tb->nuse >= tb->size) { growstrtab (L, tb); list = &tb->hash[lmod (h, tb->size)]; } ts = createstrobj (L, l, LUA_VSHRSTR, h); memcpy (getstr (ts), str, l * sizeof (char )); ts->shrlen = cast_byte (l); ts->u.hnext = *list; *list = ts; tb->nuse++; return ts; }
2.4 长字符串创建 1 2 3 4 5 6 7 TString *luaS_createlngstrobj (lua_State *L, size_t l) { TString *ts = createstrobj (L, l, LUA_VLNGSTR, G (L)->seed); ts->u.lnglen = l; return ts; }
3. 字符串表扩展 触发哈希字符串表空间扩展是在哈希表中元素大于表的长度时,调用growstrtab函数
3.1 扩展接口 1 2 3 4 5 6 7 8 9 10 11 12 13 static void growstrtab (lua_State *L, stringtable *tb) { if (l_unlikely (tb->nuse == MAX_INT)) { luaC_fullgc (L, 1 ); if (tb->nuse == MAX_INT) luaM_error (L); } if (tb->size <= MAXSTRTB / 2 ) luaS_resize (L, tb->size * 2 ); }
3.2 扩展实现 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 void luaS_resize (lua_State *L, int nsize) { stringtable *tb = &G (L)->strt; int osize = tb->size; TString **newvect; if (nsize < osize) tablerehash (tb->hash, osize, nsize); newvect = luaM_reallocvector (L, tb->hash, osize, nsize, TString*); if (l_unlikely (newvect == NULL )) { if (nsize < osize) tablerehash (tb->hash, nsize, osize); } else { tb->hash = newvect; tb->size = nsize; if (nsize > osize) tablerehash (newvect, osize, nsize); } }
3.3 重新哈希位置 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 static void tablerehash (TString **vect, int osize, int nsize) { int i; for (i = osize; i < nsize; i++) vect[i] = NULL ; for (i = 0 ; i < osize; i++) { TString *p = vect[i]; vect[i] = NULL ; while (p) { TString *hnext = p->u.hnext; unsigned int h = lmod (p->hash, nsize); p->u.hnext = vect[h]; vect[h] = p; p = hnext; } } }
总结与Lua5.1.4的区别
重新resize的策略发生变化,hash表默认长度为MINSTRTABSIZE(128),当元素数量大于128时size称指数扩展,当元素数量等于MAX_INT时做一次gc回收多余的字符串空间,空间小于最大值的一半时允许扩展空间;缩小空间时只是置空交由GC去负责回收。
luaS_new增加了字符串的指针的cache,主要提供给虚拟机api使用,内部仍然使用luaS_newlstr函数.
增加了一个大字符串类型,小于LUAI_MAXSHORTLEN(40)长度默认是LUA_VSHRSTR类型,长字符串类型为LUA_VLNGSTR