写在前面
本文看下如何模拟解释器执行指令码。需要一些前置内容:
用Java手写jvm之系列 中的前4篇文章,当然如果你已经了解了这部分内容,也可以不看。
1:正文
既然是模拟解释器,我们肯定要先来定义一个解释器类了:
/**
* 解释器,负责基于指令码来执行具体程序
*/
public class Interpreter {
/**
* 解释器构造函数,直接就开始干活了!!!
*
* @param m
*/
public Interpreter(MemberInfo m) {
// 获取代码信息
CodeAttribute codeAttr = m.codeAttribute();
// 获取局部变量表大小
int maxLocals = codeAttr.maxLocals();
// 获取操作数栈大小
int maxStack = codeAttr.maxStack();
// 获取字节码数组,也就是我们的代码在class文件中的形式
byte[] byteCode = codeAttr.data();
// 模拟启动一个要执行代码的线程
Thread thread = new Thread();
// 创建一个执行方法对应的栈帧
Frame frame = thread.newFrame(maxLocals, maxStack);
// 当栈帧推到线程栈顶,成为当前栈帧
thread.pushFrame(frame);
// 开始执行代码了
loop(thread, byteCode);
}
private void loop(Thread thread, byte[] byteCode) {
Frame frame = thread.popFrame();
// 字节码读取器,负责维护程序计数器,以及读取指令码
BytecodeReader reader = new BytecodeReader();
while (true) {
// 获取当前的程序计数器位置,并读取该位置的指令码
int pc = frame.nextPC();
// 设置程序计数器位置
thread.setPC(pc);
// 充值字节码读取器的字节码信息和位置信息
reader.reset(byteCode, pc);
// 获取指令码
byte opcode = reader.readByte();
// 根据指令码获取对应的执行指令的指令类
Instruction inst = Factory.newInstruction(opcode);
if (null == inst) {
System.out.println("寄存器(指令)尚未实现 " + byteToHexString(new byte[]{opcode}));
break;
}
// 获取操作数,比如IADD指令就需要从操作数的栈顶获取2个操作数,但并不是所有的指令都需要该操作
// 比如iconst_0就不需要
inst.fetchOperands(reader);
// 设置程序计数器到方法栈帧中
frame.setNextPC(reader.pc());
System.out.println("寄存器(指令):" + byteToHexString(new byte[]{opcode}) + " -> " + inst.getClass().getSimpleName() + " => 局部变量表:" + JSON.toJSONString(frame.localVars().getSlots()) + " 操作数栈:" + JSON.toJSONString(frame.operandStack().getSlots()));
// 真正的执行指令!!!
inst.execute(frame);
}
}
// ...
}
BytecodeReader类负责从指令码数组中读取指令码,原码如下:
package com.dahuyou.tryy.too.simulate.interpreter.instructions.base;
public class BytecodeReader {
// 指令码字节数组
private byte[] codes;
// program counter,程序计数器
private int pc;
public void reset(byte[] codes, int pc) {
this.codes = codes;
this.pc = pc;
}
public int pc() {
return this.pc;
}
// [go]int8 = [java]byte
// 读1个字节,程序计数器+1
public byte readByte() {
byte code = this.codes[this.pc];
this.pc++;
return code;
}
//[go]int16 = [java]short
// 读2个字节
public short readShort() {
byte byte1 = readByte();
byte byte2 = readByte();
return (short) ((byte1 << 8) | byte2);
}
// 读4个字节
public int readInt() {
int byte1 = this.readByte();
int byte2 = this.readByte();
int byte3 = this.readByte();
int byte4 = this.readByte();
return (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
}
// 读n个4个字节
public int[] readInts(int n) {
int[] ints = new int[n];
for (int i = 0; i < n; i++) {
ints[i] = this.readInt();
}
return ints;
}
//used by lookupswitch and tableswitcch
// 跳过padding
public void skipPadding() {
while (this.pc % 4 != 0) {
this.readByte();
}
}
}
代码byte opcode = reader.readByte();
是获取要执行的指令码,程序Instruction inst = Factory.newInstruction(opcode);
是根据指令码获取执行该指令码的指令对象,Instruction是一个接口,定义了执行一个指令需要的操作,如下:
package com.dahuyou.tryy.too.simulate.interpreter.instructions.base;
import com.dahuyou.tryy.too.simulate.interpreter.runtime.area.Frame;
public interface Instruction {
/**
* 获取操作数 比如IADD 就需要从栈顶获取2个整数,当然并不是所有的指令都需要这个操作,比如iconst_0,只需要将整数0压倒栈顶,并不需要从栈顶获取任何数据以供execute方法使用
* @param reader
*/
void fetchOperands(BytecodeReader reader);
/**
* 执行操作,如IADD,就是执行具体的相加操作了
* @param frame
*/
void execute(Frame frame);
static void branch(Frame frame, int offset) {
int pc = frame.thread().pc();
int nextPC = pc + offset;
frame.setNextPC(nextPC);
}
}
inst.execute(frame);
就是真正的执行指令了,比如IADD,就是执行两个整数的相加操作了,如下:
public class IADD extends InstructionNoOperands {
@Override
public void execute(Frame frame) {
OperandStack stack = frame.operandStack();
int v2 = stack.popInt();
int v1 = stack.popInt();
int res = v1 + v2;
stack.pushInt(res);
}
}
最后,因为指令时非常多的,当前有2百多个,所以,Instruction的子类也是非常多的,比如数学运算指令IADD,本地变量表,操作数栈的操作指令Store,load,大小比较指令if_icmpge,方法执行指令invokespecial,invokevirtual,对象创建指令new等。具体看原码吧!
然后来测试下效果,main类:
package com.dahuyou.tryy.too.simulate.interpreter;
import com.dahuyou.tryy.too.simulate.interpreter.parse.clazz.classpath.Classpath;
import com.dahuyou.tryy.too.simulate.interpreter.parse.clazz.clazzfile.ClassFile;
import com.dahuyou.tryy.too.simulate.interpreter.parse.clazz.clazzfile.MemberInfo;
import com.dahuyou.tryy.too.simulate.interpreter.parse.clazz.cmd.Cmd;
/**
* -Xthejrepath D:\programs\javas\java1.8/jre -Xthetargetclazz D:\test\itstack-demo-jvm-master\try-too-simulate-interpreter\target\test-classes\org\itstack\demo\test\HelloWorld
*/
public class Main {
public static void main(String[] args) {
Cmd cmd = Cmd.parse(args);
if (!cmd.ok || cmd.helpFlag) {
System.out.println("Usage: <main class> [-options] class [args...]");
return;
}
if (cmd.versionFlag) {
//注意案例测试都是基于1.8,另外jdk1.9以后使用模块化没有rt.jar
System.out.println("java version \"1.8.0\"");
return;
}
startJVM(cmd);
}
private static void startJVM(Cmd cmd) {
// 创建classpath
Classpath cp = new Classpath(cmd.thejrepath, cmd.classpath);
// System.out.printf("classpath:%s class:%s args:%s\n", cp, cmd.getMainClass(), cmd.getAppArgs());
System.out.printf("classpath:%s parsed class:%s \n", cp, cmd.thetargetclazz);
//获取className
// String className = cmd.getMainClass().replace(".", "/");
try {
// byte[] classData = cp.readClass(className);
/*byte[] classData = cp.readClass(cmd.thetargetclazz.replace(".", "/"));
System.out.println(Arrays.toString(classData));
System.out.println("classData:");
for (byte b : classData) {
//16进制输出
System.out.print(String.format("%02x", b & 0xff) + " ");
}*/
String clazzName = cmd.thetargetclazz.replace(".", "/");
// 创建className对应的ClassFile对象
ClassFile classFile = loadClass(clazzName, cp);
MemberInfo mainMethod = getMainMethod(classFile);
if (null == mainMethod) {
System.out.println("Main method not found in class " + cmd.classpath);
return;
}
// 核心重点代码:通过解释器来执行main方法
new Interpreter(mainMethod);
} catch (Exception e) {
System.out.println("Could not find or load main class " + cmd.getMainClass());
e.printStackTrace();
}
}
/**
* 获取main函数,这里我们要模拟是执行器执行main函数的过程,当然其他方法也是一样的!!!
* @param classFile
* @return
*/
private static MemberInfo getMainMethod(ClassFile classFile) {
if (null == classFile) return null;
MemberInfo[] methods = classFile.methods();
for (MemberInfo m : methods) {
if ("main".equals(m.name()) && "([Ljava/lang/String;)V".equals(m.descriptor())) {
return m;
}
}
return null;
}
/**
* 生成class文件对象
* @param clazzName
* @param cp
* @return
*/
private static ClassFile loadClass(String clazzName, Classpath cp) {
try {
// 获取类class对应的byte数组
byte[] classData = cp.readClass(clazzName);
return new ClassFile(classData);
} catch (Exception e) {
System.out.println("无法加载到类: " + clazzName);
return null;
}
}
}
测试的类:
public class HelloWorld {
public static void main(String[] args) {
int sum = 0;
for (int i = 1; i <= 10; i++) {
sum += i;
}
System.out.println(sum);
}
}
运行主函数前配置下program argument:
-Xthejrepath
D:\programs\javas\java1.8/jre
-Xthetargetclazz
D:\test\itstack-demo-jvm-master\try-too-simulate-interpreter\target\test-classes\org\itstack\demo\test\HelloWorld
debug Interpreter类可以看下指令码都是啥内容:
也可以通过执行javap 看下是否与实际相符:
最后运行看效果:
classpath:com.dahuyou.tryy.too.simulate.interpreter.parse.clazz.classpath.Classpath@45283ce2 parsed class:D:\test\itstack-demo-jvm-master\try-too-simulate-interpreter\target\test-classes\org\itstack\demo\test\HelloWorld
寄存器(指令):0x03 -> ICONST_0 => 局部变量表:[{"num":0},{"num":0},{"num":0}] 操作数栈:[{"num":0},{"num":0}]
寄存器(指令):0x3c -> ISTORE_1 => 局部变量表:[{"num":0},{"num":0},{"num":0}] 操作数栈:[{"num":0},{"num":0}]
寄存器(指令):0x04 -> ICONST_1 => 局部变量表:[{"num":0},{"num":0},{"num":0}] 操作数栈:[{"num":0},{"num":0}]
寄存器(指令):0x3d -> ISTORE_2 => 局部变量表:[{"num":0},{"num":0},{"num":0}] 操作数栈:[{"num":1},{"num":0}]
寄存器(指令):0x1c -> ILOAD_2 => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":1},{"num":0}]
寄存器(指令):0x10 -> BIPUSH => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":1},{"num":0}]
寄存器(指令):0xa3 -> IF_ICMPGT => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":1},{"num":10}]
寄存器(指令):0x1b -> ILOAD_1 => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":1},{"num":10}]
寄存器(指令):0x1c -> ILOAD_2 => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":0},{"num":10}]
寄存器(指令):0x60 -> IADD => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":0},{"num":1}]
寄存器(指令):0x3c -> ISTORE_1 => 局部变量表:[{"num":0},{"num":0},{"num":1}] 操作数栈:[{"num":1},{"num":1}]
寄存器(指令):0x84 -> IINC => 局部变量表:[{"num":0},{"num":1},{"num":1}] 操作数栈:[{"num":1},{"num":1}]
寄存器(指令):0xa7 -> GOTO => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":1},{"num":1}]
寄存器(指令):0x1c -> ILOAD_2 => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":1},{"num":1}]
寄存器(指令):0x10 -> BIPUSH => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":2},{"num":1}]
寄存器(指令):0xa3 -> IF_ICMPGT => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":2},{"num":10}]
寄存器(指令):0x1b -> ILOAD_1 => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":2},{"num":10}]
寄存器(指令):0x1c -> ILOAD_2 => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":1},{"num":10}]
寄存器(指令):0x60 -> IADD => 局部变量表:[{"num":0},{"num":1},{"num":2}] 操作数栈:[{"num":1},{"num":2}]
...
写在后面
参考文章列表
用Java手写jvm之系列 。
jvm之字节码 。