tiktok反编译思路
dreamland理解VMP的基本原理
VMP将原始JavaScript代码转换为自定义字节码,然后通过虚拟机解释执行。反编译的第一步是理解这个过程:
代码被转换为字节码指令
字节码通常会被加密或混淆
虚拟机解释器执行这些指令
提取加密的字节码
1 2 3 4 5
| function() { return "" }
|
解密字节码
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| // 1. Base64解码 const decoded = atob(Ab78());
// 2. 提取XOR密钥 var XORkey = 0; for (var i = 4; i < 8; ++i) XORkey += decoded.charCodeAt(i);
// 3. 使用XOR密钥解密 var decrypted = Uint8Array.from(decoded.slice(8), (char, i) => { return (char.charCodeAt(0) ^ (XORkey + XORkey % 10 * i) % 256) >>> 0; });
// 4. 解压缩 const decompressed = zlib.inflateRawSync(Buffer.from(decrypted));
|
解析指令集和字符串表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
| function parseByteCode(data) { let index = 0; const strings = []; const stringCount = readLEB128(data, index); for (let i = 0; i < stringCount; i++) { strings.push(readString(data, index)); } const instructionSets = []; const instructionSetCount = readLEB128(data, index); for (let i = 0; i < instructionSetCount; i++) { const argsLength = readLEB128(data, index); const isStrictMode = Boolean(readLEB128(data, index)); const exceptionHandlers = []; const handlerCount = readLEB128(data, index); for (let j = 0; j < handlerCount; j++) { exceptionHandlers.push([ readLEB128(data, index), readLEB128(data, index), readLEB128(data, index), readLEB128(data, index) ]); } const instructions = []; const instructionCount = readLEB128(data, index); for (let j = 0; j < instructionCount; j++) { instructions.push(readLEB128(data, index)); } instructionSets.push([instructions, argsLength, isStrictMode, exceptionHandlers]); } return { strings, instructionSets }; }
|
构建指令映射表
1 2 3 4 5 6 7 8 9 10
| const opcodeMap = { 0: "返回(如果执行状态不为0)", 1: "定义对象属性", 2: "条件跳转", 3: "创建全局变量引用", 4: "比较小于等于", 5: "后置递减", 6: "压入常量", 7: "设置对象属性", };
|
实现反编译器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| function decompile(instructionSet, strings, vmIndex) { const [instructions, argsLength, strictMode, exceptionHandlers] = instructionSet; let code = [`function VM${vmIndex}(${Array(argsLength).fill(0).map((_, i) => `arg${i}`).join(', ')}) {`]; let maxPointer = 0; code.push(` var ${Array(16).fill(0).map((_, i) => `v${i}`).join(',')}; // 生成的变量`); code.push(` var executionStack = []; // 初始化执行栈`); let index = 0; let stack = []; let pointer = -1; while (index < instructions.length) { const opCode = instructions[index++]; const byteCodePos = index; switch (opCode) { case 0: code.push(` if (0 !== executionState) return;`); break; case 1: { const stringIndex = instructions[index++]; code.push(` Object.defineProperty(v${pointer-1}, "${strings[stringIndex]}", { value: v${pointer}, writable: true, configurable: true, enumerable: true });`); pointer--; break; } case 6: pointer++; const value = instructions[index++]; code.push(` v${pointer} = ${value};`); break; } } code.push("}"); return code.join('\n'); }
|
处理控制流和函数调用
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| case 2: { const jumpOffset = instructions[index++]; code.push(` // if (!v${pointer}) skip ${jumpOffset} to ${index + jumpOffset}`); break; }
case 41: { const argsLen = instructions[index++]; code.push(` v${pointer-argsLen-2} = v${pointer-argsLen-1}(${ Array(argsLen).fill(0).map((_, i) => `v${pointer-argsLen+i}`).join(',') });`); pointer = pointer - argsLen - 1; break; }
|
生成和保存反编译代码
1 2 3 4 5
| instructionSets.forEach((instructionSet, index) => { const code = decompile(instructionSet, strings, index); fs.writeFileSync(`functions/VM${index}.js`, code); });
|
分析关键函数
反编译后,需要分析关键函数,特别是签名生成相关的函数:
分析VM86等签名相关函数
理解MD5哈希、RC4加密等算法的实现
研究X-Bogus和_signature参数的生成逻辑
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| function signURL(url, userAgent) { url = appendParam(url, 'msToken', msToken); const query = getQueryString(url); const hash1 = MD5(query); const xBogus = generateXBogus(); url = appendParam(url, 'X-Bogus', xBogus); const signature = generateSignature(url, userAgent); url = appendParam(url, '_signature', signature); return url; }
|
总结
反编译VMP是一个复杂的过程,需要深入理解JavaScript虚拟机原理、二进制解析和代码转换技术。