通过分析luajit字节码文件格式可知,luajit文件由文件头和原型数组组成,而原型又包括原型头和原型体,文件头中包含了字节码文件的一些关键信息,目前的反编译工具根据标准的luajit2.0文件格式解析文件,如果对字节码文件的信息自定义,将直接导致反编译过程中出现异常
下面修改luajit字节码的三点并测试:signature标志、STRIP与BE、opcode顺序
修改signature
标准的luajit字节码的signature为0x1B4C4A,定义在lj_bcdump.h中,定义如下:
#define BCDUMP_HEAD1 0x1b
#define BCDUMP_HEAD2 0x4c
#define BCDUMP_HEAD3 0x4a
直接将其修改成:
#define BCDUMP_HEAD1 0x21
#define BCDUMP_HEAD2 0x3c
#define BCDUMP_HEAD3 0x5a
这样在使用luajit -b命令生成字节码时,signature字段将变成0x213C5A 但是只修改这个地方在文件加载时会报错,因为luajit使用同一个API加载lua脚本文件和luajit字节码文件,在lj_load.c中定义如下:
/* -- Load Lua source code and bytecode ----- */
static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud)
{
LexState *ls = (LexState *)ud;
GCproto *pt;
GCfunc *fn;
int bc;
UNUSED(dummy);
cframe_errfunc(L->cframe) = -1; /* Inherit error function. */
bc = lj_lex_setup(L, ls);
if (ls->mode && !strchr(ls->mode, bc ? 'b' : 't')) {
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XMODE));
lj_err_throw(L, LUA_ERRSYNTAX);
}
pt = bc ? lj_bcread(ls) : lj_parse(ls);
fn = lj_func_newL_empty(L, pt, tabref(L->env));
/* Don't combine above/below into one statement. */
setfuncV(L, L->top++, fn);
return NULL;
}
当bc值为1时,调用lj_bcread读取字节码文件信息,否则调用lj_parse进行源码转换,跟踪进lj_lex_setup函数如下:
int lj_lex_setup(lua_State *L, LexState *ls)
{
int header = 0;
ls->L = L;
ls->fs = NULL;
ls->pe = ls->p = NULL;
ls->vstack = NULL;
ls->sizevstack = 0;
ls->vtop = 0;
ls->bcstack = NULL;
ls->sizebcstack = 0;
ls->tok = 0;
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
lex_next(ls); /* Read-ahead first char. */
if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
(uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
ls->p += 2;
lex_next(ls);
header = 1;
}
if (ls->c == '#') { /* Skip POSIX #! header line. */
do {
lex_next(ls);
if (ls->c == LEX_EOF) return 0;
} while (!lex_iseol(ls));
lex_newline(ls);
header = 1;
}
if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
if (header) {
/* ** Loading bytecode with an extra headeris disabled for security
** reasons. This may circumvent the usualcheck for bytecode vs.
** Lua code by looking at the first char.Since this is a potential
** security violation no attempt is madeto echo the chunkname either. */
setstrV(L, L->top++, lj_err_str(L,LJ_ERR_BCBAD));
lj_err_throw(L, LUA_ERRSYNTAX);
}
return 1;
}
return 0;
}
它通过lex_next函数先处理,当ls->c == LUA_SIGNATURE[0]时,返回1,也就是会读取字节码文件,否则返回0,转换源码文件,这里的ls->表示的是当前字节,初始状态经过lex_next处理时,ls→指向文件的第一个字节,同时看LUA_SIGNATURE在lua.h中定义如下:
#define LUA_SIGNATURE "\033Lua"
可知 LUA_SIGNATURE[0] 为 0x1B,与字节码文件的第一个字节对应,同时可以看到在lj_bcread.c文件的bcread_header函数中是这样判断的:
if (bcread_byte(ls) != BCDUMP_HEAD2 ||
bcread_byte(ls) != BCDUMP_HEAD3 ||
bcread_byte(ls) != BCDUMP_VERSION) return 0;
由于第一个字节在文件加载时判断过了,因此在读取signature时从第二个字节开始判断,因此需要重新定义LUA_SIGNATURE为:
#define LUA_SIGNATURE "\041Lua"
这里只用保证 LUA_SIGNATURE[0] = BCDUMP_HEAD1即可。
- 1反编译修改后字节码文件
Luajit-decomp:报错如下,比较隐蔽,因为luajit -bl命令出错没有生成汇编文件:
Ljd:报错如下,提示magic字段错误,比较明显,不是luajit文件格式:
总结:luajit使用luajit filename命令加载文件时,通过文件第一个字节判断时lua源文件还是luajit字节码文件,源文件进行词法转换,字节码文件调用lj_bcread读取字节码文件内容,并从第二个字节开始校验字节码文件的正确性
2.修改STRIP与BE
STRIP与BE位于luajit字节码文件头的flags标志中。STRIP在第一位,1表示去除调试信息,0表示包含调试信息。BE在第二位,0表示小端对齐,1表示大端对齐,定义在lj_bcdump.h中如下:
#define BCDUMP_F_BE 0x01
#define BCDUMP_F_STRIP 0x02
#define BCDUMP_F_FFI 0x04
#define BCDUMP_F_FR2 0x08
将STRIP与BE的位置互换,重定义如下:
#define BCDUMP_F_BE 0x02
#define BCDUMP_F_STRIP 0x01
#define BCDUMP_F_FFI 0x04
#define BCDUMP_F_FR2 0x08
2.1反编译修改后字节码文件
Luajit-decomp:同样的错误,反汇编没有内容
Ljd:报错如下,从错误提示可以看出是读取头部的时候出错了
总结:修改后正常编译时,STRIP = 1 ,互换后反编译解析认为BE = 1,会根据大端规则去解析,实际上是小端存储,因此出错,并且解析的过程中认为有调试信息,实际上没有,也会导致出错。当-bg编译时,STRIP = 0,互换后BE =0,默认小端下STRIP=0,因此反编译正常。
3.修改opcode顺序
Luajit字节码文件的字段中存放了该原型的字节码指令,位置为原型体的开始处,luajit每个字节码指令占4字节,在lj_bc.h中定义,共97个:
#define BCDEF(_) \
/* Comparison ops. ORDER OPR. */ \
_(ISLT, var, ___, var, lt)\
_(ISGE, var, ___, var, lt)\
_(ISLE, var, ___, var, le)\
_(ISGT, var, ___, var, le)\
\
_(ISEQV, var, ___, var, eq)\
_(ISNEV, var, ___, var, eq)\
_(ISEQS, var, ___, str, eq)\
_(ISNES, var, ___, str, eq)\
_(ISEQN, var, ___, num, eq)\
_(ISNEN, var, ___, num, eq)\
_(ISEQP, var, ___, pri, eq)\
_(ISNEP, var, ___, pri, eq)\
\
/* Unary test and copy ops. */ \
_(ISTC, dst, ___, var, ___)\
_(ISFC, dst, ___, var, ___)\
_(IST, ___, ___, var, ___)\
_(ISF, ___, ___, var, ___)\
_(ISTYPE, var, ___, lit, ___)\
_(ISNUM, var, ___, lit, ___)\
\
/* Unary ops. */ \
_(MOV, dst, ___, var, ___)\
_(NOT, dst, ___, var, ___)\
_(UNM, dst, ___, var, unm)\
_(LEN, dst, ___, var, len)\
\
/* Binary ops. ORDER OPR. VV last, POW mustbe next. */ \
_(ADDVN, dst, var, num, add)\
_(SUBVN, dst, var, num, sub)\
_(MULVN, dst, var, num, mul)\
_(DIVVN, dst, var, num, div)\
_(MODVN, dst, var, num, mod)\
\
_(ADDNV, dst, var, num, add)\
_(SUBNV, dst, var, num, sub)\
_(MULNV, dst, var, num, mul)\
_(DIVNV, dst, var, num, div)\
_(MODNV, dst, var, num, mod)\
\
_(ADDVV, dst, var, var, add)\
_(SUBVV, dst, var, var, sub)\
_(MULVV, dst, var, var, mul)\
_(DIVVV, dst, var, var, div)\
_(MODVV, dst, var, var, mod)\
\
_(POW, dst, var, var, pow)\
_(CAT, dst, rbase, rbase, concat)\
\
/* Constant ops. */ \
_(KSTR, dst, ___, str, ___)\
_(KCDATA, dst, ___, cdata, ___)\
_(KSHORT, dst, ___, lits, ___)\
_(KNUM, dst, ___, num, ___)\
_(KPRI, dst, ___, pri, ___)\
_(KNIL, base, ___, base, ___)\
\
/* Upvalue and function ops. */ \
_(UGET, dst, ___, uv, ___)\
_(USETV, uv, ___, var, ___)\
_(USETS, uv, ___, str, ___)\
_(USETN, uv, ___, num, ___)\
_(USETP, uv, ___, pri, ___)\
_(UCLO, rbase, ___, jump, ___)\
_(FNEW, dst, ___, func, gc)\
\
/* Table ops. */ \
_(TNEW, dst, ___, lit, gc)\
_(TDUP, dst, ___, tab, gc)\
_(GGET, dst, ___, str, index)\
_(GSET, var, ___, str, newindex)\
_(TGETV, dst, var, var, index)\
_(TGETS, dst, var, str, index)\
_(TGETB, dst, var, lit, index)\
_(TGETR, dst, var, var, index)\
_(TSETV, var, var, var, newindex)\
_(TSETS, var, var, str, newindex)\
_(TSETB, var, var, lit, newindex)\
_(TSETM, base, ___, num, newindex)\
_(TSETR, var, var, var, newindex)\
\
/* Calls and vararg handling. T = tail call.*/ \
_(CALLM, base, lit, lit, call)\
_(CALL, base, lit, lit, call)\
_(CALLMT, base, ___, lit, call)\
_(CALLT, base, ___, lit, call)\
_(ITERC, base, lit, lit, call)\
_(ITERN, base, lit, lit, call)\
_(VARG, base, lit, lit, ___)\
_(ISNEXT, base, ___, jump, ___)\
\
/* Returns. */ \
_(RETM, base, ___, lit, ___)\
_(RET, rbase, ___, lit, ___)\
_(RET0, rbase, ___, lit, ___)\
_(RET1, rbase, ___, lit, ___)\
\
/* Loops and branches. I/J = interp/JIT,I/C/L = init/call/loop. */ \
_(FORI, base, ___, jump, ___)\
_(JFORI, base, ___, jump, ___)\
\
_(FORL, base, ___, jump, ___)\
_(IFORL, base, ___, jump, ___)\
_(JFORL, base, ___, lit, ___)\
\
_(ITERL, base, ___, jump, ___)\
_(IITERL, base, ___, jump, ___)\
_(JITERL, base, ___, lit, ___)\
\
_(LOOP, rbase, ___, jump, ___)\
_(ILOOP, rbase, ___, jump, ___)\
_(JLOOP, rbase, ___, lit, ___)\
\
_(JMP, rbase, ___, jump, ___)\
\
/* Function headers. I/J = interp/JIT, F/V/C= fixarg/vararg/C func. */ \
_(FUNCF, rbase, ___, ___, ___)\
_(IFUNCF, rbase, ___, ___, ___)\
_(JFUNCF, rbase, ___, lit, ___)\
_(FUNCV, rbase, ___, ___, ___)\
_(IFUNCV, rbase, ___, ___, ___)\
_(JFUNCV, rbase, ___, lit, ___)\
_(FUNCC, rbase, ___, ___, ___)\
_(FUNCCW, rbase, ___, ___, ___)
typedef enum {
#define BCENUM(name, ma, mb, mc,mt) BC_##name,
BCDEF(BCENUM)
#undef BCENUM
BC__MAX
} BCOp;
字节码指令中,第一个字节存放的是opcode,实质是该字节码的opcode在BCOp中的下标,因此修改上述BCDEF宏定义的顺序后,对应字节码opcode的顺序也跟着改变,即生成的字节码文件与标准的字节码文件中指令的opcode会改变
打乱指令顺序后如下(指令顺序随便打乱会影响jit功能:LuaJit中的JIT原理分析):
#define BCDEF(_) \
/* Unary test and copy ops. */ \
_(ISTC, dst, ___, var, ___) \
_(ISFC, dst, ___, var, ___) \
_(IST, ___, ___, var, ___) \
_(ISF, ___, ___, var, ___) \
_(ISTYPE, var, ___, lit, ___) \
_(ISNUM, var, ___, lit, ___) \
\
/* Comparison ops. ORDER OPR. */ \
_(ISLT, var, ___, var, lt) \
_(ISGE, var, ___, var, lt) \
_(ISLE, var, ___, var, le) \
_(ISGT, var, ___, var, le) \
\
_(ISEQV, var, ___, var, eq) \
_(ISNEV, var, ___, var, eq) \
_(ISEQS, var, ___, str, eq) \
_(ISNES, var, ___, str, eq) \
_(ISEQN, var, ___, num, eq) \
_(ISNEN, var, ___, num, eq) \
_(ISEQP, var, ___, pri, eq) \
_(ISNEP, var, ___, pri, eq) \
\
/* Unary ops. */ \
_(MOV, dst, ___, var, ___) \
_(NOT, dst, ___, var, ___) \
_(UNM, dst, ___, var, unm) \
_(LEN, dst, ___, var, len) \
\
/* Upvalue and function ops. */ \
_(UGET, dst, ___, uv, ___) \
_(USETV, uv, ___, var, ___) \
_(USETS, uv, ___, str, ___) \
_(USETN, uv, ___, num, ___) \
_(USETP, uv, ___, pri, ___) \
_(UCLO, rbase, ___, jump, ___) \
_(FNEW, dst, ___, func, gc) \
\
/* Constant ops. */ \
_(KSTR, dst, ___, str, ___) \
_(KCDATA, dst, ___, cdata, ___) \
_(KSHORT, dst, ___, lits, ___) \
_(KNUM, dst, ___, num, ___) \
_(KPRI, dst, ___, pri, ___) \
_(KNIL, base, ___, base, ___) \
\
/* Calls and vararg handling. T = tail call. */ \
_(CALLM, base, lit, lit, call) \
_(CALL, base, lit, lit, call) \
_(CALLMT, base, ___, lit, call) \
_(CALLT, base, ___, lit, call) \
_(ITERC, base, lit, lit, call) \
_(ITERN, base, lit, lit, call) \
_(VARG, base, lit, lit, ___) \
_(ISNEXT, base, ___, jump, ___) \
\
/* Table ops. */ \
_(TNEW, dst, ___, lit, gc) \
_(TDUP, dst, ___, tab, gc) \
_(GGET, dst, ___, str, index) \
_(GSET, var, ___, str, newindex) \
_(TGETV, dst, var, var, index) \
_(TGETS, dst, var, str, index) \
_(TGETB, dst, var, lit, index) \
_(TGETR, dst, var, var, index) \
_(TSETV, var, var, var, newindex) \
_(TSETS, var, var, str, newindex) \
_(TSETB, var, var, lit, newindex) \
_(TSETM, base, ___, num, newindex) \
_(TSETR, var, var, var, newindex) \
\
/* Returns. */ \
_(RETM, base, ___, lit, ___) \
_(RET, rbase, ___, lit, ___) \
_(RET0, rbase, ___, lit, ___) \
_(RET1, rbase, ___, lit, ___) \
\
/* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \
_(FORI, base, ___, jump, ___) \
_(JFORI, base, ___, jump, ___) \
\
_(FORL, base, ___, jump, ___) \
_(IFORL, base, ___, jump, ___) \
_(JFORL, base, ___, lit, ___) \
\
_(ITERL, base, ___, jump, ___) \
_(IITERL, base, ___, jump, ___) \
_(JITERL, base, ___, lit, ___) \
\
_(LOOP, rbase, ___, jump, ___) \
_(ILOOP, rbase, ___, jump, ___) \
_(JLOOP, rbase, ___, lit, ___) \
\
_(JMP, rbase, ___, jump, ___) \
\
/* Binary ops. ORDER OPR. VV last, POW must be next. */ \
_(ADDVN, dst, var, num, add) \
_(SUBVN, dst, var, num, sub) \
_(MULVN, dst, var, num, mul) \
_(DIVVN, dst, var, num, div) \
_(MODVN, dst, var, num, mod) \
\
_(ADDNV, dst, var, num, add) \
_(SUBNV, dst, var, num, sub) \
_(MULNV, dst, var, num, mul) \
_(DIVNV, dst, var, num, div) \
_(MODNV, dst, var, num, mod) \
\
_(ADDVV, dst, var, var, add) \
_(SUBVV, dst, var, var, sub) \
_(MULVV, dst, var, var, mul) \
_(DIVVV, dst, var, var, div) \
_(MODVV, dst, var, var, mod) \
\
_(POW, dst, var, var, pow) \
_(CAT, dst, rbase, rbase, concat) \
\
/* Function headers. I/J = interp/JIT, F/V/C = fixarg/vararg/C func. */ \
_(FUNCF, rbase, ___, ___, ___) \
_(IFUNCF, rbase, ___, ___, ___) \
_(JFUNCF, rbase, ___, lit, ___) \
_(FUNCV, rbase, ___, ___, ___) \
_(IFUNCV, rbase, ___, ___, ___) \
_(JFUNCV, rbase, ___, lit, ___) \
_(FUNCC, rbase, ___, ___, ___) \
_(FUNCCW, rbase, ___, ___, ___)
同时lj_bc.h中指令定义下方的assert语句要保证正确才能编译成功,不改变同一类型指令中的顺序,一般情况下不用修改assert语句,除了LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT);改为LJ_STATIC_ASSERT(((int)BC_ISLT+3) == (int)BC_ISGT);
LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV);
LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV);
LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES);
LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN);
LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP);
LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE);
LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT);
LJ_STATIC_ASSERT(((int)BC_ISLT+3) == (int)BC_ISGT);
LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC);
LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM);
LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT);
LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET);
LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL);
LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL);
LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL);
LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL);
LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP);
LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP);
LJ_STATIC_ASSERT((int)BC_FUNCF + 1 == (int)BC_IFUNCF);
LJ_STATIC_ASSERT((int)BC_FUNCF + 2 == (int)BC_JFUNCF);
LJ_STATIC_ASSERT((int)BC_FUNCV + 1 == (int)BC_IFUNCV);
LJ_STATIC_ASSERT((int)BC_FUNCV + 2 == (int)BC_JFUNCV);
只是改变指令的顺序,能够正常编译luajit源代码,但是可能运行会出错,因为luajit中存在预先编译的luajit字节码,位于/host/buildvm_libbc.h中,这个文件通过genlibbc.lua脚本生成,下载的源码中已经存在该头文件,正常编译luajit时直接使用,有如下函数已经预编译成了字节码:
static const struct { const char *name; int ofs; } libbc_map[] = {
{"math_deg",0},
{"math_rad",25},
{"string_len",50},
{"table_foreachi",69},
{"table_foreach",136},
{"table_getn",207},
{"table_remove",226},
{"table_move",355},
{NULL,502}
};
字节码如下:
static const uint8_t libbc_code[] = {
#if LJ_FR2
0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
6,252,127,76,4,2,0,0
#else
0,1,2,0,0,1,2, 24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,16,
0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,0,0,41,
4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,128,76,6,2,
0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,0,0,18,3,0,0,41,
4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,0,88,8,1,128,76,7,2,
0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,
0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,8,2,0,0,88,3,23,128,59,3,2,0,43,4,
0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,
3,12,128,59,3,1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,
4,252,127,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,
16,2,14,0,16,3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,
4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,18,6,2,0,18,7,1,0,41,8,255,
255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,76,4,2,0,0
#endif
};
libbc_map中,指定了每个函数字节码的起始位置,根据下一个函数的起始位置,可以计算出函数的占字节码的长度。预编译的字节码区分了FR2和非FR2,区别见 64位与32位字节码区别
libbc_code中存储的每个函数的头部如下:
typedef struct{
uchar flags;
uchar arguments_size;
uchar frame_size;
uchar upvalue_size;
uleb128 ComplexConstant_size;
uleb128 NumericConstant_size;
uleb128 ins_size;
}fun_header;
字节码头部存储了函数相关的信息,与luajit字节码文件中原型的头部类似,除了没有原型大小字段,函数头部后面存储的是各个字节码指令,再后面是复杂常量和数值常量,修改luajit中opcode顺序后,需要将这些预编译库函数的指令opcode全部对应修改,替换为修改后的指令集下标,自动替换的Python脚本:
def get_opcode_list(fname):
f = open(fname)
line = f.readline()
opcodes = []
while line:
index_1 = line.find("_(")
if index_1 != -1:
index_2 = line.find(",")
str_p = line[index_1+2:index_2]
opcodes.append(str_p)
line = f.readline()
return opcodes
name_1 = "opcodes_1.txt"
name_2 = "opcodes_2.txt"
opcodes_1 = get_opcode_list(name_1)
opcodes_2 = get_opcode_list(name_2)
def get_replace_code(i):
str_s = opcodes_1[i]
for k in range(len(opcodes_2)):
if opcodes_2[k] == str_s:
return k
funcs = [0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
2,0,76,3,2,0,75,0,1,0,0,2,
0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
6,252,127,76,4,2,0,0]
indxs = [0,25,50,69,136,207,226,355]
for i in indxs:
off = i
pos = i
pos += 4
# 三个uleb128格式长度,第一个为complex常量个数,第二个为数字常量个数,第三个为指令个数
nums = funcs[pos]
pos += 1
while nums >= 0x80:
nums = funcs[pos]
pos += 1
nums = funcs[pos]
pos += 1
while nums >= 0x80:
nums = funcs[pos]
pos += 1
# pos 位置开始读取指令个数
nums = funcs[pos]
pos += 1
value = nums
if value >= 0x80:
value = value & 0x7f
sh = 0
while funcs[pos] > 0x80:
sh += 7
value = value | ((funcs[pos] & 0x7f) << sh)
pos += 1
value = value | ((funcs[pos] & 0x7f) << sh)
pos += 1
# 从pos开始 读取value 个指令
for i in range(value):
ins = funcs[pos]
ins_rep = get_replace_code(ins)
funcs[pos] = ins_rep
pos += 4
print(funcs)
其中funcs中为预编译库函数的字节码,opcodes_1.txt中存放的是原始的BCDEF指令宏定义,opcodes_2.txt中存放的是修改后的BCDEF指令宏定义。用输出的funcs替换原始的libbc_code。
3.1反编译修改后字节码文件
1)标准luajit反汇编:
2)Luajit-decomp反编译:
3)Ljd反编译:
修改指令顺序后出错情况难以预测,需要将指令还原才可以正常反汇编或反编译
4、 修改原型头部
原型头部包括了原型的一些基本信息,如原型占字节大小、flags标志、参数个数、frame大小、upvalue个数、复杂常量个数、数值常量个数、指令个数。这里我们把参数个数和upvalue个数互换,把常量个数、指令个数互换。
1) 互换参数个数和upvalue个数
① 在lj_bcread.c中,替换如下:
// numparams = bcread_byte(ls);
// framesize = bcread_byte(ls);
// sizeuv = bcread_byte(ls);
//替换读取顺序
sizeuv = bcread_byte(ls);
numparams = bcread_byte(ls);
framesize = bcread_byte(ls);
② 在lj_bcwrite.c中,替换如下:
// *p++ = pt->numparams;
// *p++ = pt->framesize;
// *p++ = pt->sizeuv;
//替换写入顺序
*p++ = pt->sizeuv;
*p++ = pt->numparams;
*p++ = pt->framesize;
2) 替换常量个数和指令个数
① 在lj_bcread.c中,替换如下:
// sizekgc = bcread_uleb128(ls);
// sizekn = bcread_uleb128(ls);
// sizebc = bcread_uleb128(ls) + 1;
//替换读取的顺序
sizebc = bcread_uleb128(ls) + 1;
sizekgc = bcread_uleb128(ls);
sizekn = bcread_uleb128(ls);
② 在lj_bcwrite.c中,替换如下:
// p = lj_strfmt_wuleb128(p, pt->sizekgc);
// p = lj_strfmt_wuleb128(p, pt->sizekn);
// p = lj_strfmt_wuleb128(p, pt->sizebc-1);
//替换写入顺序
p = lj_strfmt_wuleb128(p, pt->sizebc-1);
p = lj_strfmt_wuleb128(p, pt->sizekgc);
p = lj_strfmt_wuleb128(p, pt->sizekn);
同样由于预编译了一些字节码,这些字节码的原型头部也要对应修改,这些预编译字节码的加载过程见 LuaJit预编译库函数加载过程