Lua解析脚本过程中的关键数据结构介绍(2)

1 /*
 2 ** nodes for block list (list of active blocks)
 3 */
 4 typedef struct BlockCnt {
 5  struct BlockCnt *previous;  /* chain */
 6  int breaklist;  /* list of jumps out of this loop */
 7  lu_byte nactvar;  /* # active locals outside the breakable structure */
 8  lu_byte upval;  /* true if some variable in the block is an upvalue */
 9  lu_byte isbreakable;  /* true if `block' is a loop */
10 } BlockCnt;

Lua使用BlockCnt来保存一个block的数据。与FuncState的分析方法类似,BlockCnt使用一个previous变量保存外围block的引用,形成一个栈结构。

下面介绍一些在lobject.h文件里面的数据结构

1 /*
 2 ** Function Prototypes
 3 */
 4 typedef struct Proto {
 5  CommonHeader;
 6  TValue *k;  /* constants used by the function */
 7  Instruction *code;
 8  struct Proto **p;  /* functions defined inside the function */
 9  int *lineinfo;  /* map from opcodes to source lines */
10  struct LocVar *locvars;  /* information about local variables */
11  TString **upvalues;  /* upvalue names */
12  TString  *source;
13  int sizeupvalues;
14  int sizek;  /* size of `k' */
15  int sizecode;
16  int sizelineinfo;
17  int sizep;  /* size of `p' */
18  int sizelocvars;
19  int linedefined;
20  int lastlinedefined;
21  GCObject *gclist;
22  lu_byte nups;  /* number of upvalues */
23  lu_byte numparams;
24  lu_byte is_vararg;
25  lu_byte maxstacksize;
26 } Proto;

结构体Proto是lua函数协议的描述,在lua解析脚本时首先会将main chunk代码包裹为一个函数,用main proto描述,接着将里面定义的内部函数一一用Proto结构体描述,将这些Proto的关系用树来组合起来,例如有lua源码文件如下

1 a = 1
2 function f1()
3 -- ...
4 end
5 function f2()
6    function f3()
7    -- ...
8    end
9 end

则parse完成后会有如图如下关系

在Proto结构体中,k指向一个const变量数组,存放则函数要用到的常量;code指向lua parse过程中生成的本函数的instruction集合;p就是指向本函数内部定义的函数的那些proto;locvars指向本函数局部变量数组;upvalues指向本函数upvalue变量数组;nups为upvalue的数量;numparams为函数参数的数量;is_vararg表示函数是否接收可变参数;maxstacksize为函数stack的max大小。

在编译期间lua使用Proto描述函数的,当lua vm开始运行vm时需要根据Proto生成相应的Closure来执行vm instructions。

1 typedef union Closure {
2  CClosure c;
3  LClosure l;
4 } Closure;

Closure要么代表了c函数,要么为lua函数,在这里我们只看lua函数的LClosure

1 #define ClosureHeader \
2    CommonHeader; lu_byte isC; lu_byte nupvalues; GCObject *gclist; \
3    struct Table *env
4 //... ...
5 typedef struct LClosure {
6  ClosureHeader;
7  struct Proto *p;
8  UpVal *upvals[1];
9 } LClosure;

在LClousre中,p就是指向对应函数的Proto结构体啦,upvals顾名思义就是此closure的upvalue数组罗。在ClosureHeader宏中isC表示此closure是否是c函数,nupvalues为upvalue数目,env指向了此closue运行时的函数环境,在lua中可以用stefenv来改变当前函数的环境,就是改变env变量的指向啦。

最后,在文件lopcode.h中定义了lua vm的指令结构

下面是vm指令的一些定义与描述,我在相应vm指令的上方添加了一些注释

1 typedef enum {
 2 /*----------------------------------------------------------------------
 3 name        args    description
 4 ------------------------------------------------------------------------*/
 5 OP_MOVE,/*    A B    R(A) := R(B)                    */
 6 //Constants are usually numbers or strings. Each function has its own constant list, or pool.
 7 OP_LOADK,/*    A Bx    R(A) := Kst(Bx)                    */
 8 OP_LOADBOOL,/*    A B C    R(A) := (Bool)B; if (C) pc++            */
 9 //The optimization rule is  a simple one: If no other instructions have been generated,
10 //then a LOADNIL as the first instruction can be optimized away.
11 OP_LOADNIL,/*    A B    R(A) := ... := R(B) := nil            */
12
13 OP_GETUPVAL,/*    A B    R(A) := UpValue[B]                */
14 OP_GETGLOBAL,/*    A Bx    R(A) := Gbl[Kst(Bx)]                */
15 OP_GETTABLE,/*    A B C    R(A) := R(B)[RK(C)]                */
16
17 OP_SETGLOBAL,/*    A Bx    Gbl[Kst(Bx)] := R(A)                */
18 OP_SETUPVAL,/*    A B    UpValue[B] := R(A)                */
19 OP_SETTABLE,/*    A B C    R(A)[RK(B)] := RK(C)                */
20
21 OP_NEWTABLE,/*    A B C    R(A) := {} (size = B,C)                */
22
23 //This instruction is used for object-oriented programming. It is only generated for method calls that use the colon syntax.
24 //R(B) is the register holding the reference to the table with the method.
25 OP_SELF,/*    A B C    R(A+1) := R(B); R(A) := R(B)[RK(C)]        */
26
27 //The optimization rule is simple: If both terms of a subexpression are numbers,
28 //the subexpression will be evaluated at compile time.
29 OP_ADD,/*    A B C    R(A) := RK(B) + RK(C)                */
30 OP_SUB,/*    A B C    R(A) := RK(B) - RK(C)                */
31 OP_MUL,/*    A B C    R(A) := RK(B) * RK(C)                */
32 OP_DIV,/*    A B C    R(A) := RK(B) / RK(C)                */
33 OP_MOD,/*    A B C    R(A) := RK(B) % RK(C)                */
34 OP_POW,/*    A B C    R(A) := RK(B) ^ RK(C)                */
35 OP_UNM,/*    A B    R(A) := -R(B)                    */
36 OP_NOT,/*    A B    R(A) := not R(B)                */
37 //Returns the length of the object in R(B)
38 OP_LEN,/*    A B    R(A) := length of R(B)                */
39
40 //Performs concatenation of two or more strings.
41 //The source registers must be consecutive, and C must always be greater than B.
42 OP_CONCAT,/*    A B C    R(A) := R(B).. ... ..R(C)            */
43
44 //if sBx is 0, the VM will proceed to the next instruction
45 OP_JMP,/*    sBx    pc+=sBx                    */
46
47 /*If the boolean result is not A, then skip the next instruction.
48 Conversely, if the boolean result equals A, continue with the next instruction.*/
49 OP_EQ,/*    A B C    if ((RK(B) == RK(C)) ~= A) then pc++        */
50 OP_LT,/*    A B C    if ((RK(B) <  RK(C)) ~= A) then pc++          */
51 OP_LE,/*    A B C    if ((RK(B) <= RK(C)) ~= A) then pc++          */
52
53 OP_TEST,/*    A C    if not (R(A) <=> C) then pc++            */
54 //register R(B) is coerced into a boolean.
55 OP_TESTSET,/*    A B C    if (R(B) <=> C) then R(A) := R(B) else pc++    */
56
57 //If B is 0, parameters range from R(A+1) to the top of the stack.If B is 1, the function has no parameters.
58 //If C is 1, no return results are saved. If C is 0, then multiple return results are saved, depending on the called function
59 //CALL always updates the top of stack value.
60 OP_CALL,/*    A B C    R(A), ... ,R(A+C-2) := R(A)(R(A+1), ... ,R(A+B-1)) */
61 OP_TAILCALL,/*    A B C    return R(A)(R(A+1), ... ,R(A+B-1))        */
62 //If B is 1, there are no return values. If B is 0, the set of values from R(A) to the top of the stack is returned.
63 OP_RETURN,/*    A B    return R(A), ... ,R(A+B-2)    (see note)    */
64
65 //FORPREP initializes a numeric for loop, while FORLOOP performs an iteration of a numeric for loop.
66 OP_FORLOOP,/*    A sBx    R(A)+=R(A+2);
67            if R(A) <?= R(A+1) then { pc+=sBx; R(A+3)=R(A) }*/
68 OP_FORPREP,/*    A sBx    R(A)-=R(A+2); pc+=sBx                */
69
70 //Performs an iteration of a generic for loop.
71 OP_TFORLOOP,/*    A C    R(A+3), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2));
72                        if R(A+3) ~= nil then R(A+2)=R(A+3) else pc++    */
73 //This instruction is used to initialize array elements in a table.
74 //If B is 0, the table is set with a variable number of array elements, from register R(A+1) up to the top of the stack.
75 //If C is 0, the next instruction is cast as an integer, and used as the C value.
76 OP_SETLIST,/*    A B C    R(A)[(C-1)*FPF+i] := R(A+i), 1 <= i <= B    */
77
78 /*If a local is used as an upvalue, then the local variable need to be placed somewhere,
79 other wise it will go out of scope and disappear when a lexicalblock enclosing the local variable ends.
80 CLOSE performs this operation for all affected local variables for do end blocks or loop blocks.
81 RETURN also does an implicit CLOSE when a function returns.*/
82 OP_CLOSE,/*    A    close all variables in the stack up to (>=) R(A)*/
83 /*Each upvalue corresponds to either a MOVE or a GETUPVAL pseudo-instruction.
84 Only the B field on either of these pseudo-instructions are significant.*/
85 //MOVE pseudo-instructions corresponds to local variable R(B) in the current lexical block.
86 //GETUPVAL pseudo-instructions corresponds upvalue number B in the current lexical block.
87 OP_CLOSURE,/*    A Bx    R(A) := closure(KPROTO[Bx], R(A), ... ,R(A+n))    */
88
89 //If B is 0, VARARG copies as many values as it can based on the number of parameters passed.
90 //If a fixed number of values is required, B is a value greater than 1.
91 OP_VARARG/*    A B    R(A), R(A+1), ..., R(A+B-1) = vararg        */
92 } OpCode;

Lua 语言 15 分钟快速入门

Lua程序设计(第2版)中文 PDF

Lua程序设计(第二版)阅读笔记

NetBSD 将支持用 Lua 脚本开发内核组件

CentOS 编译安装 Lua LuaSocket

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/8de998e8b74a4c5412650de37939cbe4.html