qemu/kvm架构
cpu虚拟化的示例
Reference: kvmtest.c [LWN.net]
主要步骤:
- QEMU通过/dev/kvm设备文件发起KVM_CREATE_VM ioctl,请求KVM创建一个虚拟机。KVM创建虚拟机相应的结构体,并为QEMU返回一个虚拟机文件描述符
- QEMU通过虚拟机文件描述符发起KVM_CREATE_VCPU ioctl,请求KVM创建一个vCPU。KVM创建vCPU相应的结构体并初始化,返回一个vCPU文件描述符。
- QEMU通过vCPU文件描述符发起KVM_RUN ioctl,vCPU线程执行VMLAUNCH指令进入非根模式,执行虚拟机代码直至发生VM-Exit。
- KVM根据VM-Exit的原因进行相应处理,如果与IO有关,则需要进一步返回到QEMU中进行处理。
运行结果:
代码实现:
/* Sample code for /dev/kvm API */
#include <err.h>
#include <fcntl.h>
#include <linux/kvm.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
int main(void)
{
int kvm, vmfd, vcpufd, ret;
const uint8_t code[] = {
/* 写入指定端口 0x3f8,输出 Hello */
0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
0x00, 0xd8, /* add %bl, %al */
0x04, '0', /* add $'0', %al */
0xee, /* out %al, (%dx) */
0xb0, '\n', /* mov $'\n', %al */
0xee, /* out %al, (%dx) */
0xb0, 'H', /* mov $'H', %al */
0xee, /* out %al, (%dx) */
0xb0, 'e', /* mov $'e', %al */
0xee, /* out %al, (%dx) */
0xb0, 'l', /* mov $'l', %al */
0xee, /* out %al, (%dx) */
0xb0, 'l', /* mov $'l', %al */
0xee, /* out %al, (%dx) */
0xb0, 'o', /* mov $'o', %al */
0xee, /* out %al, (%dx) */
0xb0, '\n', /* mov $'\n', %al */
0xee, /* out %al, (%dx) */
0xf4, /* hlt */
};
uint8_t *mem;
struct kvm_sregs sregs;
size_t mmap_size;
struct kvm_run *run;
// ** step 1. 打开 KVM 模块设备文件
kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC);
if (kvm == -1)
err(1, "/dev/kvm");
// 获取 KVM API 版本
/* Make sure we have the stable version of the API */
ret = ioctl(kvm, KVM_GET_API_VERSION, NULL);
if (ret == -1)
err(1, "KVM_GET_API_VERSION");
if (ret != 12)
errx(1, "KVM_GET_API_VERSION %d, expected 12", ret);
// ** step 2. KVM_CREATE_VM 创建虚拟机获得虚拟机文件描述符
vmfd = ioctl(kvm, KVM_CREATE_VM, (unsigned long)0);
if (vmfd == -1)
err(1, "KVM_CREATE_VM");
// 分配 4KB 内存空间存放二进制代码
// 这里的 0x1000(HVA)
/* Allocate one aligned page of guest memory to hold the code. */
mem = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (!mem)
err(1, "allocating guest memory");
// 将二进制代码复制至分配的内存页中
memcpy(mem, code, sizeof(code));
// KVM_SET_USER_MEMORY_REGION 将该内存页映射至虚拟机物理地址 0x1000(GPA) 处
/* Map it to the second page frame (to avoid the real-mode IDT at 0). */
struct kvm_userspace_memory_region region = {
.slot = 0,
.guest_phys_addr = 0x1000,
.memory_size = 0x1000,
.userspace_addr = (uint64_t)mem,
};
ret = ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, ®ion);
if (ret == -1)
err(1, "KVM_SET_USER_MEMORY_REGION");
// ** step 3. KVM_CREATE_VCPU 创建 vCPU 获得 vCPU 文件描述符
vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, (unsigned long)0);
if (vcpufd == -1)
err(1, "KVM_CREATE_VCPU");
// ** step 4. 获取 QEMU/KVM 共享内存空间大小,并映射 kvm_run 结构体
/* Map the shared kvm_run structure and following data. */
ret = ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL);
if (ret == -1)
err(1, "KVM_GET_VCPU_MMAP_SIZE");
mmap_size = ret;
if (mmap_size < sizeof(*run))
errx(1, "KVM_GET_VCPU_MMAP_SIZE unexpectedly small");
// 使用 vCPU 文件描述符
// 映射 kvm_run 结构体
run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
if (!run)
err(1, "mmap vcpu");
// ** step 5. 设置 CS 寄存器和 RIP 寄存器,使得 vCPU 从 0x1000 处开始执行
/* Initialize CS to point at 0, via a read-modify-write of sregs. */
ret = ioctl(vcpufd, KVM_GET_SREGS, &sregs);
if (ret == -1)
err(1, "KVM_GET_SREGS");
sregs.cs.base = 0;
sregs.cs.selector = 0;
ret = ioctl(vcpufd, KVM_SET_SREGS, &sregs);
if (ret == -1)
err(1, "KVM_SET_SREGS");
/* Initialize registers: instruction pointer for our code, addends, and
* initial flags required by x86 architecture. */
struct kvm_regs regs = {
.rip = 0x1000,
.rax = 2,
.rbx = 2,
.rflags = 0x2,
};
ret = ioctl(vcpufd, KVM_SET_REGS, ®s);
if (ret == -1)
err(1, "KVM_SET_REGS");
/* Repeatedly run code and handle VM exits. */
while (1) {
// ** step 6. KVM_RUN 运行 vCPU
ret = ioctl(vcpufd, KVM_RUN, NULL);
if (ret == -1)
err(1, "KVM_RUN");
// ** step 7. 处理 VM-Exit
switch (run->exit_reason) {
case KVM_EXIT_HLT: // hlt 指令触发 VM-Exit
puts("KVM_EXIT_HLT");
return 0; // 退出程序
case KVM_EXIT_IO: // 依次调用 out 指令向 0x3f8 端口写入字符时,会触发 VM-Exit,使得程序返回到用户态处理
// 输出写入 0x3f8 端口的字符
if (run->io.direction == KVM_EXIT_IO_OUT && run->io.size == 1 && run->io.port == 0x3f8 && run->io.count == 1)
// 调用 putchar 函数输出字符
putchar(*(((char *)run) + run->io.data_offset));
else
errx(1, "unhandled KVM_EXIT_IO");
break;
case KVM_EXIT_FAIL_ENTRY:
errx(1, "KVM_EXIT_FAIL_ENTRY: hardware_entry_failure_reason = 0x%llx",
(unsigned long long)run->fail_entry.hardware_entry_failure_reason);
case KVM_EXIT_INTERNAL_ERROR:
errx(1, "KVM_EXIT_INTERNAL_ERROR: suberror = 0x%x", run->internal.suberror);
default:
errx(1, "exit_reason = 0x%x", run->exit_reason);
}
}
}
KVM API
/usr/include/linux/kvm.h
ioctl | KVM API | Description | Example |
---|---|---|---|
ioctls for /dev/kvm fds | KVM_GET_API_VERSION | 获取 KVM API 版本 | kvm = open("/dev/kvm", O_RDWR | O_CLOEXEC) ret = ioctl(kvm, KVM_GET_API_VERSION, NULL) |
KVM_CREATE_VM | 创建虚拟机获得虚拟机文件描述符 | vmfd = ioctl(kvm, KVM_CREATE_VM, 0) | |
ioctls for VM fds | KVM_SET_USER_MEMORY_REGION | 将内存页映射至虚拟机物理地址处 | ret = ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, ®ion) |
KVM_CREATE_VCPU | 创建 vCPU 获得 vCPU 文件描述符 | vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0) | |
KVM_GET_VCPU_MMAP_SIZE | 获取 QEMU/KVM 共享内存空间大小 | mmap_size = ioctl(kvm, KVM_GET_VCPU_MMAP_SIZE, NULL); 使用 vCPU 文件描述符,映射 kvm_run 结构体 run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0) | |
ioctls for vcpu fds | KVM_GET_SREGS | 获取 CS 寄存器和 RIP 寄存器 | ret = ioctl(vcpufd, KVM_GET_SREGS, &sregs) |
KVM_SET_SREGS | 设置 CS 寄存器和 RIP 寄存器 | sregs.cs.base = 0 sregs.cs.selector = 0 ret = ioctl(vcpufd, KVM_SET_SREGS, &sregs) struct kvm_regs regs = { .rip = 0x1000, .rax = 2, .rbx = 2, .rflags = 0x2, } ret = ioctl(vcpufd, KVM_SET_REGS, ®s) | |
KVM_RUN | 运行 vCPU | ret = ioctl(vcpufd, KVM_RUN, NULL) |