基于MDEV的PCI设备虚拟化DEMO实现

news2025/1/22 22:53:55

利用周末时间做了一个MDEV虚拟化PCI设备的小试验,简单记录一下:

DEMO架构,此图参考了内核文档:Documentation/driver-api/vfio-mediated-device.rst

host kernel watchdog pci driver:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>

#define IO_BAR0_SIZE 32
#define IO_CONF_SIZE 0x100
#define CZL_WDG_DEVICE_VENDOR_ID 0xbeef
#define CZL_WDG_DEVICE_DEVICE_ID 0x1001
#define API_DBG(fmt, ...) do { \
                printk("%s line %d, "fmt, __func__, __LINE__, ##__VA_ARGS__); \
        } while (0)

struct czl_wdg_dev {
	dev_t         wdg_devt;
	struct class *wdg_class;
	struct cdev   wdg_cdev;
	struct device dev;
};

struct mdev_region_info {
	u64 start;
	u64 phys_start;
	u32 size;
	u64 vfio_offset;
};

struct wdg_mdev_state {
	u8 *config;
	u8 *iobase;
	struct mdev_device *mdev;
	struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
	u32 bar_mask[VFIO_PCI_NUM_REGIONS];
	struct list_head next;
	struct vfio_device_info dev_info;
	int index;
	struct mutex ops_lock;
};

static const struct file_operations czl_wdg_fops = {
	.owner          = THIS_MODULE,
};

static struct mutex wdg_mdev_list_lock;
static struct list_head wdg_mdev_devices_list;
#define WDG_VFIO_PCI_OFFSET_SHIFT   (40)
#define WDG_VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_INDEX_TO_OFFSET(index) \
                                        ((u64)(index) << WDG_VFIO_PCI_OFFSET_SHIFT)
#define WDG_VFIO_PCI_OFFSET_MASK    \
                                (((u64)(1) << WDG_VFIO_PCI_OFFSET_SHIFT) - 1)
#define MAX_WDGS                    (16)
static struct czl_wdg_dev czl_wdg;

static ssize_t
czl_wdg_dev_show(struct device *dev, struct device_attribute *attr,
                 char *buf)
{
	return sprintf(buf, "mdev emulated pci watchdog device by caozilong.\n");
}
static DEVICE_ATTR_RO(czl_wdg_dev);

static struct attribute *wdg_dev_attrs[] = {
	&dev_attr_czl_wdg_dev.attr,
	NULL,
};

static const struct attribute_group wdg_dev_group = {
	.name  = "czl_wdg",
	.attrs = wdg_dev_attrs,
};

static const struct attribute_group *wdg_dev_groups[] = {
	&wdg_dev_group,
	NULL,
};


static ssize_t
mdev_dev_show(struct device *dev, struct device_attribute *attr,
              char *buf)
{
	if (mdev_from_dev(dev)) {
		return sprintf(buf, "This is watchdog %s\n", dev_name(dev));
	}

	return sprintf(buf, "\n");
}

static DEVICE_ATTR_RO(mdev_dev);

static struct attribute *mdev_dev_attrs[] = {
	&dev_attr_mdev_dev.attr,
	NULL,
};

static const struct attribute_group mdev_dev_group = {
	.name  = "caozilong",
	.attrs = mdev_dev_attrs,
};

static const struct attribute_group *mdev_dev_groups[] = {
	&mdev_dev_group,
	NULL,
};


static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
{
	int i;
	char name[128];
	const char *name_str[3] = {"Soft Watchdog", "Hardware Watchdog", "Dummy Watchdog"};

	for (i = 0; i < 3; i++) {
		snprintf(name, 128, "%s-%d", dev_driver_string(dev), i + 1);
		if (!strcmp(kobj->name, name)) {
			return sprintf(buf, "%s\n", name_str[i]);
		}
	}

	return -EINVAL;
}

static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
                               char *buf)
{
	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
}

static ssize_t
available_instances_show(struct kobject *kobj, struct device *dev, char *buf)
{
	struct wdg_mdev_state *mds;
	int used = 0;

	list_for_each_entry(mds, &wdg_mdev_devices_list, next) {
		used ++;
	}

	return sprintf(buf, "%d\n", (MAX_WDGS - used));
}

static MDEV_TYPE_ATTR_RO(name);
static MDEV_TYPE_ATTR_RO(device_api);
static MDEV_TYPE_ATTR_RO(available_instances);

static struct attribute *mdev_types_attrs[] = {
	&mdev_type_attr_name.attr,
	&mdev_type_attr_device_api.attr,
	&mdev_type_attr_available_instances.attr,
	NULL,
};

static struct attribute_group mdev_type_group1 = {
	.name  = "1",
	.attrs = mdev_types_attrs,
};

static struct attribute_group mdev_type_group2 = {
	.name  = "2",
	.attrs = mdev_types_attrs,
};

static struct attribute_group mdev_type_group3 = {
	.name  = "3",
	.attrs = mdev_types_attrs,
};

static struct attribute_group *mdev_type_groups[] = {
	&mdev_type_group1,
	&mdev_type_group2,
	&mdev_type_group3,
	NULL,
};

static int czl_wdg_open(struct mdev_device *mdev)
{
	pr_info("%s line %d, wdg device opened.\n",
	        __func__, __LINE__);
	return 0;
}

static void czl_wdg_close(struct mdev_device *mdev)
{
	pr_info("%s line %d, wdg device close.\n",
	        __func__, __LINE__);
	return;
}

// fill pci config space meta data & capabilities.
int wdg_create_config_space(struct wdg_mdev_state *mstate)
{
	// vendor id, device id.
	*((unsigned int *)&mstate->config[0]) = CZL_WDG_DEVICE_VENDOR_ID |
	                                        (CZL_WDG_DEVICE_DEVICE_ID << 16);
	*((unsigned short *)&mstate->config[4]) = 0x0001;
	*((unsigned short *)&mstate->config[6]) = 0x0200;

	mstate->config[0x8] =  0x10;
	mstate->config[0x9] =  0x02;
	mstate->config[0xa] =  0x00;
	mstate->config[0xb] =  0x07;

	*((unsigned int *)&mstate->config[0x10]) = 0x000001;
	mstate->bar_mask[0] = ~(IO_BAR0_SIZE) + 1;
	*((unsigned int *)&mstate->config[0x2c]) = 0x10011af4;

	// cap ptr.
	mstate->config[0x34] =  0x00;
	mstate->config[0x3d] =  0x01;
	mstate->config[0x40] =  0x23;
	mstate->config[0x43] =  0x80;
	mstate->config[0x44] =  0x23;
	mstate->config[0x48] =  0x23;
	mstate->config[0x4c] =  0x23;
	mstate->config[0x60] =  0x50;

	mstate->config[0x61] =  0x43;
	mstate->config[0x62] =  0x49;
	mstate->config[0x63] =  0x20;
	mstate->config[0x64] =  0x53;
	mstate->config[0x65] =  0x65;
	mstate->config[0x66] =  0x72;
	mstate->config[0x67] =  0x69;
	mstate->config[0x68] =  0x61;
	mstate->config[0x69] =  0x6c;
	mstate->config[0x6a] =  0x2f;
	mstate->config[0x6b] =  0x55;
	mstate->config[0x6c] =  0x41;
	mstate->config[0x6d] =  0x52;
	mstate->config[0x6e] =  0x54;

	return 0;
}

static int czl_wdg_create(struct kobject *kobj, struct mdev_device *mdev)
{
	int i;
	struct wdg_mdev_state *mstate;
	char name[32];

	if (!mdev)
		return -EINVAL;

	for (i = 0; i < 3; i++) {
		snprintf(name, 32, "%s-%d", dev_driver_string(mdev_parent_dev(mdev)), i + 1);
		if (!strcmp(kobj->name, name)) {
			break;
		}
	}

	if (i >= 3) {
		return -EINVAL;
	}

	mstate = kzalloc(sizeof(struct wdg_mdev_state), GFP_KERNEL);
	if (mstate == NULL)
		return -ENOMEM;
	// group number in mdev_type.
	mstate->index = i + 1;
	mstate->config = kzalloc(IO_CONF_SIZE, GFP_KERNEL);
	if (mstate->config == NULL) {
		pr_err("%s line %d, alloc pci config buffer failure.\n",
		       __func__, __LINE__);
		kfree(mstate);
		return -ENOMEM;
	}

	mstate->iobase = kzalloc(IO_BAR0_SIZE, GFP_KERNEL);
	if (mstate->iobase == NULL) {
		pr_err("%s line %d, alloc pci io buffer failure.\n",
		       __func__, __LINE__);
		kfree(mstate->config);
		kfree(mstate);
		return -ENOMEM;
	}

	memset(mstate->config, 0x00, IO_CONF_SIZE);

	mutex_init(&mstate->ops_lock);
	mstate->mdev = mdev;
	mdev_set_drvdata(mdev, mstate);
	wdg_create_config_space(mstate);

	mutex_lock(&wdg_mdev_list_lock);
	list_add(&mstate->next, &wdg_mdev_devices_list);
	mutex_unlock(&wdg_mdev_list_lock);

	return 0;
}

static int czl_wdg_remove(struct mdev_device *mdev)
{
	struct wdg_mdev_state *mds, *tmp_mds;
	struct wdg_mdev_state *mstate = mdev_get_drvdata(mdev);

	int ret = -EINVAL;

	mutex_lock(&wdg_mdev_list_lock);
	list_for_each_entry_safe(mds, tmp_mds, &wdg_mdev_devices_list, next) {
		if (mstate == mds) {
			list_del(&mstate->next);
			mdev_set_drvdata(mdev, NULL);
			kfree(mstate->config);
			kfree(mstate->iobase);
			kfree(mstate);
			ret = 0;
			break;
		}
	}
	mutex_unlock(&wdg_mdev_list_lock);

	return ret;
}

static void handle_pci_cfg_space_write(struct wdg_mdev_state *mstate, u16 offset,
                                       u8 *buf, u32 count)
{
	u32 cfg_addr, bar_mask;

	switch (offset) {
	case 0x04: /* device control */
	case 0x06: /* device status */
		// do nothing
		break;
	case 0x3c:
		mstate->config[0x3c] = buf[0];
		break;
	case 0x3d:
		break;
	case 0x10:  /* BAR0 */
		cfg_addr = *(u32 *)buf;
		pr_info("BAR0 addr 0x%x\n", cfg_addr);
		if (cfg_addr == 0xffffffff) {
			bar_mask = mstate->bar_mask[0];
			cfg_addr = (cfg_addr & bar_mask);
		}
		cfg_addr |= (mstate->config[offset] & 0x3ul);
		*((unsigned int *)&mstate->config[offset]) = cfg_addr;
		break;
	case 0x14:  /* BAR1 */
	case 0x18:  /* BAR2 */
	case 0x20:  /* BAR4 */
		*((unsigned int *)&mstate->config[offset]) = 0;
		break;
	default:
		pr_info("PCI config write @0x%x of %d bytes not handled\n",
		        offset, count);
		break;

	}

	return;
}

static void handle_pci_cfg_space_read(struct wdg_mdev_state *mstate, u16 offset,
                                      u8 *buf, u32 count)
{
	memcpy(buf, (mstate->config + offset), count);
	return;
}

static void mdev_read_base(struct wdg_mdev_state *mstate)
{
	int index, pos;
	u32 start_lo, start_hi;
	u32 mem_type;

	pos = PCI_BASE_ADDRESS_0;
	for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++)  {
		if (!mstate->region_info[index].size)
			continue;
		start_lo = (*(u32 *)(mstate->config + pos)) &
		           PCI_BASE_ADDRESS_MEM_MASK;
		mem_type = (*(u32 *)(mstate->config + pos)) &
		           PCI_BASE_ADDRESS_MEM_TYPE_MASK;

		switch (mem_type) {
		case PCI_BASE_ADDRESS_MEM_TYPE_64:
			start_hi = (*(u32 *)(mstate->config + pos + 4));
			pos += 4;
			break;
		case PCI_BASE_ADDRESS_MEM_TYPE_32:
		case PCI_BASE_ADDRESS_MEM_TYPE_1M:
		default:
			start_hi = 0;
			break;
		}
		pos += 4;
		mstate->region_info[index].start = ((u64)start_hi << 32) | start_lo;
	}

	return;
}

static void handle_bar_write(unsigned int index, struct wdg_mdev_state *mstate,
                             u16 offset, u8 *buf, u32 count)
{
	pr_info("%s line %d, bar %d, write offset 0x%x, count 0x%x, val 0x%x.\n",
	        __func__, __LINE__, index, offset, count, *buf);
	memcpy(mstate->iobase + offset, buf, count);
	return;
}

static void handle_bar_read(unsigned int index, struct wdg_mdev_state *mstate,
                            u16 offset, u8 *buf, u32 count)
{
	pr_info("%s line %d, bar %d, read offset 0x%x, count 0x%x, val 0x%x.\n",
	        __func__, __LINE__, index, offset, count, *buf);
	memcpy(buf, mstate->iobase + offset, count);
	return;
}

static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count,
                           loff_t pos, bool is_write)
{
	int ret = 0;
	unsigned int index;
	loff_t offset;
	struct wdg_mdev_state *mstate;

	if (!mdev || !buf)
		return -EINVAL;

	mstate = mdev_get_drvdata(mdev);
	if (!mstate) {
		pr_err("%s line %d. get mstate failure.\n", __func__, __LINE__);
		return -EINVAL;
	}

	mutex_lock(&mstate->ops_lock);
	index = WDG_VFIO_PCI_OFFSET_TO_INDEX(pos);
	offset = pos & WDG_VFIO_PCI_OFFSET_MASK;
	switch (index) {
	case VFIO_PCI_CONFIG_REGION_INDEX:
		pr_info("%s: PCI config space %s at offset 0x%llx\n",
		        __func__, is_write ? "write" : "read", offset);
		if (is_write) {
			handle_pci_cfg_space_write(mstate, offset, buf, count);
		} else {
			handle_pci_cfg_space_read(mstate, offset, buf, count);
		}
		break;
	case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
		if (!mstate->region_info[index].start)
			mdev_read_base(mstate);
		if (is_write) {
			pr_info("%s: write bar%d offset 0x%llx, val 0x%x.\n",
			        __func__, index, offset, *buf);
			handle_bar_write(index, mstate, offset, buf, count);
		} else {
			pr_info("%s: read bar%d offset 0x%llx, val 0x%x.\n",
			        __func__, index, offset, *buf);
			handle_bar_read(index, mstate, offset, buf, count);
		}
		break;
	default:
		ret = -1;
		goto failed;
	}

	ret = count;

failed:
	mutex_unlock(&mstate->ops_lock);

	return ret;
}

static ssize_t czl_wdg_read(struct mdev_device *mdev, char __user *buf,
                            size_t count, loff_t *ppos)
{
	unsigned int done = 0;
	int ret;

	pr_info("%s line %d, read count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);
	while (count) {
		size_t filled;

		if (count >= 4 && !(*ppos % 4)) {
			u32 val;

			ret =  mdev_access(mdev, (u8 *)&val, sizeof(val),
			                   *ppos, false);
			if (ret <= 0)
				goto read_err;
			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;
			filled = 4;
		} else if (count >= 2 && !(*ppos % 2)) {
			u16 val;
			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, false);
			if (ret <= 0)
				goto read_err;
			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;
			filled = 2;
		} else {
			u8 val;

			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, false);
			if (ret <= 0)
				goto read_err;
			if (copy_to_user(buf, &val, sizeof(val)))
				goto read_err;
			filled = 1;
		}
		count -= filled;
		done += filled;
		*ppos += filled;
		buf += filled;
	}

	pr_info("%s line %d, read count 0x%x.\n", __func__, __LINE__, done);
	return done;

read_err:
	pr_err("%s line %d, read err happend.\n", __func__, __LINE__);
	return -EFAULT;
}

static ssize_t czl_wdg_write(struct mdev_device *mdev, const char __user *buf,
                             size_t count, loff_t *ppos)
{
	unsigned int done = 0;
	int ret;

	pr_info("%s line %d, write count 0x%lx, pos 0x%llx.\n", __func__, __LINE__, count, *ppos);
	while (count) {
		size_t filled;

		if (count >= 4 && !(*ppos % 4)) {
			u32 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;

			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, true);
			if (ret <= 0)
				goto write_err;
			filled = 4;
		}  else if (count >= 2 && !(*ppos % 2)) {
			u16 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;
			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, true);
			if (ret <= 0)
				goto write_err;
			filled = 2;
		} else {
			u8 val;

			if (copy_from_user(&val, buf, sizeof(val)))
				goto write_err;
			ret = mdev_access(mdev, (u8 *)&val, sizeof(val),
			                  *ppos, true);
			if (ret <= 0)
				goto write_err;
			filled = 1;
		}
		count -= filled;
		done += filled;
		*ppos += filled;
		buf += filled;
	}

	pr_info("%s line %d, write count 0x%x.\n", __func__, __LINE__, done);
	return done;

write_err:
	pr_err("%s line %d, write failure.\n", __func__, __LINE__);
	return -EFAULT;
}

static int wdg_get_device_info(struct mdev_device *mdev, struct vfio_device_info *dev_info)
{
	dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
	dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
	dev_info->num_irqs = VFIO_PCI_NUM_IRQS;

	return 0;
}

static int wdg_get_region_info(struct mdev_device *mdev, struct vfio_region_info *region_info)
{
	unsigned int size = 0;
	struct wdg_mdev_state *mstate;
	u32 bar_index;

	if (!mdev) {
		pr_err("%s line %d,mdev is null.\n", __func__, __LINE__);
		return -EINVAL;
	}

	mstate = mdev_get_drvdata(mdev);
	if (!mstate) {
		pr_err("%s line %d,mstat is null.\n", __func__, __LINE__);
		return -EINVAL;
	}

	bar_index = region_info->index;
	if (bar_index >= VFIO_PCI_NUM_REGIONS) {
		pr_err("%s line %d,bar index %d exceeds.\n", __func__, __LINE__, bar_index);
		return -EINVAL;
	}

	mutex_lock(&mstate->ops_lock);
	switch (bar_index) {
	case VFIO_PCI_CONFIG_REGION_INDEX:
		size = IO_CONF_SIZE;
		break;
	case VFIO_PCI_BAR0_REGION_INDEX:
		size = IO_BAR0_SIZE;
		break;
	default:
		size = 0;
		break;
	}

	mstate->region_info[bar_index].size = size;
	mstate->region_info[bar_index].vfio_offset =
	        WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
	region_info->size = size;
	region_info->offset = WDG_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
	region_info->flags = VFIO_REGION_INFO_FLAG_READ |
	                     VFIO_REGION_INFO_FLAG_WRITE;

	mutex_unlock(&mstate->ops_lock);

	return 0;
}

static int wdg_get_irq_info(struct mdev_device *mdev, struct vfio_irq_info *irq_info)
{
	switch (irq_info->index) {
	case VFIO_PCI_INTX_IRQ_INDEX:
	case VFIO_PCI_MSI_IRQ_INDEX:
	case VFIO_PCI_REQ_IRQ_INDEX:
		break;
	default:
		pr_err("%s line %d, irq idx %d is invalid.\n",
		       __func__, __LINE__, irq_info->index);
		return -EINVAL;
	}

	irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
	irq_info->count = 1;
	if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
		irq_info->flags |= (VFIO_IRQ_INFO_MASKABLE |
		                    VFIO_IRQ_INFO_AUTOMASKED);
	else
		irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;

	return 0;
}

static long czl_wdg_ioctl(struct mdev_device *mdev, unsigned int cmd,
                          unsigned long arg)
{
	int ret = 0;
	unsigned long minsz;
	struct wdg_mdev_state *mstate;

	pr_info("czl wdg ioctl enter.\n");

	if (!mdev) {
		pr_err("%s line %d, mdev is null.\n", __func__, __LINE__);
		return -EINVAL;
	}

	mstate = mdev_get_drvdata(mdev);
	if (!mstate) {
		pr_err("%s line %d, cant find mstate data.\n", __func__, __LINE__);
		return -ENODEV;
	}

	switch (cmd) {
	case VFIO_DEVICE_GET_INFO: {
		struct vfio_device_info info;
		minsz = offsetofend(struct vfio_device_info, num_irqs);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;
		if (info.argsz < minsz) {
			pr_err("%s line %d, info.argsz %d < minsz %ld.\n",
			       __func__, __LINE__, info.argsz, minsz);
			return -EINVAL;
		}

		ret = wdg_get_device_info(mdev, &info);
		if (ret) {
			pr_err("%s line %d, get device info failure.\n", __func__, __LINE__);
			return ret;
		}
		memcpy(&mstate->dev_info, &info, sizeof(info));
		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;
		return 0;
	}
	case VFIO_DEVICE_GET_REGION_INFO: {
		struct vfio_region_info info;

		minsz = offsetofend(struct vfio_region_info, offset);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;
		if (info.argsz < minsz) {
			pr_err("%s line %d, info.argsz %d < minsz %ld.\n",
			       __func__, __LINE__, info.argsz, minsz);
			return -EINVAL;
		}

		ret = wdg_get_region_info(mdev, &info);
		if (ret) {
			pr_err("%s line %d, get region info failure.\n", __func__, __LINE__);
			return ret;
		}

		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;
		return 0;
	}
	case VFIO_DEVICE_GET_IRQ_INFO: {
		struct vfio_irq_info info;

		minsz = offsetofend(struct vfio_irq_info, count);
		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;
		if ((info.argsz < minsz) ||
		    (info.index >= mstate->dev_info.num_irqs))
			return -EINVAL;
		ret = wdg_get_irq_info(mdev, &info);
		if (ret)
			return ret;
		if (copy_to_user((void __user *)arg, &info, minsz))
			return -EFAULT;
		return 0;
	}
	case VFIO_DEVICE_SET_IRQS: {
		pr_info("%s line %d, set irqs.\n", __func__, __LINE__);
		return 0;
	}
	case VFIO_DEVICE_RESET:
		pr_info("%s line %d, reset.\n", __func__, __LINE__);
		return 0;
	}

	return -EINVAL;
}

static const struct mdev_parent_ops wdg_mdev_fops = {
	.owner                  = THIS_MODULE,
	.dev_attr_groups        = wdg_dev_groups,
	.mdev_attr_groups       = mdev_dev_groups,
	.supported_type_groups  = mdev_type_groups,
	.create                 = czl_wdg_create,
	.remove                 = czl_wdg_remove,
	.open                   = czl_wdg_open,
	.release                = czl_wdg_close,
	.read                   = czl_wdg_read,
	.write                  = czl_wdg_write,
	.ioctl                  = czl_wdg_ioctl,
};

static void wdg_device_release(struct device *dev)
{
	pr_info("czl wdg devide release.\n");
}

static int mdev_wdg_init(void)
{
	int ret = 0;

	pr_info("czl wdg init.\n");

	memset(&czl_wdg, 0x00, sizeof(czl_wdg));

	ret = alloc_chrdev_region(&czl_wdg.wdg_devt, 0, MINORMASK + 1, "czl_wdg");
	if (ret < 0) {
		pr_err("error: failed to register czl wdg device, err:%d\n", ret);
		return -1;
	}

	cdev_init(&czl_wdg.wdg_cdev, &czl_wdg_fops);
	cdev_add(&czl_wdg.wdg_cdev, czl_wdg.wdg_devt, MINORMASK + 1);

	pr_info("major_number:%d\n", MAJOR(czl_wdg.wdg_devt));

	czl_wdg.wdg_class = class_create(THIS_MODULE, "czl_wdg");
	if (IS_ERR(czl_wdg.wdg_class)) {
		pr_err("error: failed to create wdg class.\n");
		ret = -1;
		goto failed1;
	}

	czl_wdg.dev.class = czl_wdg.wdg_class;
	czl_wdg.dev.release = wdg_device_release;
	dev_set_name(&czl_wdg.dev, "%s", "czl_wdg");
	ret = device_register(&czl_wdg.dev);
	if (ret) {
		pr_err("%s line %d, register wdg device failure.\n", __func__, __LINE__);
		ret = -1;
		goto  failed2;
	}

	ret = mdev_register_device(&czl_wdg.dev, &wdg_mdev_fops);
	if (ret) {
		pr_err("%s line %d, register wdg mdev device failure.\n", __func__, __LINE__);
		ret = -1;
		goto  failed3;
	}

	mutex_init(&wdg_mdev_list_lock);
	INIT_LIST_HEAD(&wdg_mdev_devices_list);

	pr_info("czl wdg init success.\n");
	goto done;
failed3:
	device_unregister(&czl_wdg.dev);
failed2:
	class_destroy(czl_wdg.wdg_class);
failed1:
	cdev_del(&czl_wdg.wdg_cdev);
	unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);
done:
	return ret;
}

static void mdev_wdg_exit(void)
{
	czl_wdg.dev.bus = NULL;
	mdev_unregister_device(&czl_wdg.dev);
	device_unregister(&czl_wdg.dev);
	cdev_del(&czl_wdg.wdg_cdev);
	unregister_chrdev_region(czl_wdg.wdg_devt, MINORMASK + 1);
	class_destroy(czl_wdg.wdg_class);
	czl_wdg.wdg_class = NULL;

	pr_info("czl_wdg_unload.\n");
	return;
}

module_init(mdev_wdg_init)
module_exit(mdev_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine pci watchdog pci driver

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/cdev.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/uuid.h>
#include <linux/vfio.h>
#include <linux/iommu.h>
#include <linux/sysfs.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/mdev.h>
#include <linux/pci.h>
#include <linux/idr.h>

static int devno;
static DEFINE_IDR(wdg_minors);
static DEFINE_MUTEX(wdg_minors_lock);
#define WDG_MINORS_COUNT 256

struct wdg_pci_state {
	struct pci_dev *pdev;
	struct device *dev;
	int iobase;
	int iolen;
	int major;
	int minor;
};

static struct class *wdg_class;
static const struct pci_device_id czl_pci_table[] = {
	{       PCI_DEVICE(0xbeef, 0x1001),       },
	{ 0,                                      }
};

static int czl_wdg_open(struct inode *inode, struct file *file)
{
	int rc = 0;
	int major, minor;

	major = imajor(inode);
	minor = iminor(inode);
	mutex_lock(&wdg_minors_lock);
	file->private_data = idr_find(&wdg_minors, minor);
	mutex_unlock(&wdg_minors_lock);
	if (!file->private_data) {
		pr_err("%s line %d, cant find wdg structure.\n",
		       __func__, __LINE__);
		rc = -1;
	}

	return rc;
}

static int czl_wdg_release(struct inode *inode, struct file *file)
{
	return 0;
}

ssize_t czl_wdg_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
	int i;
	struct wdg_pci_state *wdgdev = NULL;
	unsigned char *kbuf = NULL;
	int actuallen = 0;

	wdgdev = file->private_data;
	if (!wdgdev) {
		pr_err("%s line %d, read failure.\n", __func__, __LINE__);
		return -1;
	}

	if (*ppos > wdgdev->iolen) {
		pr_err("%s line %d, read pos %lld exceed max io len %d.\n",
		       __func__, __LINE__, *ppos, wdgdev->iolen);
		return -1;
	}

	kbuf = kzalloc(GFP_KERNEL, size);
	if (kbuf == NULL) {
		pr_err("%s line %d, alloc kbuf failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	for (i = 0; (i < size) && ((*ppos + i) <  wdgdev->iolen); i++) {
		kbuf[i] = inb(wdgdev->iobase + *ppos + i);
		actuallen ++;
	}

	copy_to_user(buf, kbuf, actuallen);
	kfree(kbuf);
	return actuallen;
}

static ssize_t czl_wdg_write(struct file *file, const char __user *buf,
                             size_t count, loff_t *ppos)
{
	int i;
	struct wdg_pci_state *wdgdev = NULL;
	unsigned char *kbuf = NULL;
	int actuallen = 0;

	wdgdev = file->private_data;
	if (!wdgdev) {
		pr_err("%s line %d, read failure.\n", __func__, __LINE__);
		return -1;
	}

	if (*ppos > wdgdev->iolen) {
		pr_err("%s line %d, read pos %lld exceed max io len %d.\n",
		       __func__, __LINE__, *ppos, wdgdev->iolen);
		return -1;
	}

	kbuf = kzalloc(GFP_KERNEL, count);
	if (kbuf == NULL) {
		pr_err("%s line %d, alloc kbuf failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	copy_from_user(kbuf, buf, count);

	for (i = 0; (i < count) && ((*ppos + i) <  wdgdev->iolen); i++) {
		outb((u8)kbuf[i], wdgdev->iobase + *ppos + i);
		actuallen ++;
	}

	kfree(kbuf);
	return actuallen;
}

static const struct file_operations czl_wdg_fops = {
	.owner          = THIS_MODULE,
	.open           = czl_wdg_open,
	.release        = czl_wdg_release,
	.read           = czl_wdg_read,
	.write          = czl_wdg_write,
};

static char *wdg_devnode(struct device *dev, umode_t *mode)
{
	if (mode)
		*mode = 06666;
	return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
}

static int wdg_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
	struct wdg_pci_state *wdgdev = NULL;

	pr_info("%s line %d, wdg pci device & driver binding.\n", __func__, __LINE__);

	wdgdev = kzalloc(GFP_KERNEL, sizeof(*wdgdev));
	if (!wdgdev) {
		pr_err("%s line %d, fail to alloc buffer.\n",
		       __func__, __LINE__);
		goto err0;
	}

	wdgdev->major = devno;

	wdgdev->pdev = pci_dev_get(pdev);
	wdgdev->iobase = pci_resource_start(pdev, 0);
	wdgdev->iolen = pci_resource_len(pdev, 0);
	mutex_lock(&wdg_minors_lock);
	wdgdev->minor = idr_alloc(&wdg_minors, wdgdev, 0, WDG_MINORS_COUNT, GFP_KERNEL);
	mutex_unlock(&wdg_minors_lock);
	if (wdgdev->minor < 0) {
		pr_err("%s line %d, get minor failure from idr.\n", __func__, __LINE__);
		goto err1;
	}

	pr_info("%s line %d, major %d, minor %d, iobase 0x%x.\n", __func__, __LINE__,
	        devno, wdgdev->minor, wdgdev->iobase);
	wdgdev->dev = device_create(wdg_class, NULL, MKDEV(devno, wdgdev->minor),
	                            NULL, "czl-wdg-%d", wdgdev->minor);
	if (!wdgdev->dev || IS_ERR(wdgdev->dev)) {
		pr_err("%s line %d, create wdg device failure.\n",
		       __func__, __LINE__);
		goto err2;
	}

	pci_set_drvdata(pdev, wdgdev);
	return 0;
err2:
	idr_remove(&wdg_minors, wdgdev->minor);
err1:
	if (wdgdev) {
		kfree(wdgdev);
	}
err0:
	return -1;
}

static void wdg_pci_remove(struct pci_dev *pdev)
{
	struct wdg_pci_state *wdgdev;

	pr_info("%s line %d, wdg pci device & driver removing.\n", __func__, __LINE__);

	wdgdev = pci_get_drvdata(pdev);
	pci_set_drvdata(pdev, NULL);
	pci_dev_put(pdev);
	wdgdev->pdev = NULL;
	device_destroy(wdg_class, MKDEV(devno, wdgdev->minor));
	idr_remove(&wdg_minors, wdgdev->minor);
	kfree(wdgdev);

	return;
}

static struct pci_driver czl_wdg_driver = {
	.name           = "czl-mdev-wdg",
	.id_table       = czl_pci_table,
	.probe          = wdg_pci_probe,
	.remove         = wdg_pci_remove,
};
static int czl_wdg_init(void)
{
	int ret;

	wdg_class = class_create(THIS_MODULE, "czl-wdg");
	if (!wdg_class) {
		pr_err("%s line %d, create watchdog class failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	wdg_class->devnode = wdg_devnode;

	devno = register_chrdev(0, "czl-wdg", &czl_wdg_fops);
	if (devno < 0) {
		pr_err("%s line %d, register wdg device chrno failure.\n",
		       __func__, __LINE__);
		class_destroy(wdg_class);
		return -1;
	}

	ret = pci_register_driver(&czl_wdg_driver);

	return ret;
}

static void czl_wdg_exit(void)
{
	pci_unregister_driver(&czl_wdg_driver);
	unregister_chrdev(devno, "czl-wdg");
	class_destroy(wdg_class);
	idr_destroy(&wdg_minors);
	return;
}

module_init(czl_wdg_init)
module_exit(czl_wdg_exit)
MODULE_LICENSE("GPL v2");

virtual machine kernel space test case

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdarg.h>

void dump_buf(unsigned char *buf, int len)
{
	int i;

	for (i = 0; i < len; i++) {
		if (i % 16 == 0)
			printf("\n0x%04x: ", i);
		printf("0x%02x ", buf[i]);
	}

	printf("\n");
	return;
}

int main(void)
{
	int wdgfd;
	int status;
	unsigned char buf[32];

	wdgfd = open("/dev/czl-wdg-0", O_RDWR);
	if (wdgfd < 0) {
		printf("%s line %d, open failure.\n",
		       __func__, __LINE__);
		return -1;
	}

	while (1) {
		memset(buf, 0x00, 32);

		status = read(wdgfd, buf, 32);
		if (status < 0) {
			printf("%s line %d, read failure.\n",
			       __func__, __LINE__);
			return -1;
		}

		printf("%s line %d, read %d.\n", __func__, __LINE__, status);

		dump_buf(buf, 32);

		memset(buf, 0x5a, 32);
		lseek(wdgfd, 0, SEEK_SET);
		status = write(wdgfd, buf, 32);
		if (status < 0) {
			printf("%s line %d, read failure.\n",
			       __func__, __LINE__);
			return -1;
		}
		printf("%s line %d, read %d.\n", __func__, __LINE__, status);

		sleep(1);
	}

	close(wdgfd);
	return 0;
}

测试过程:

1.安装WDG MDEV驱动:

sudo insmod czl-mdev-wdg.ko

2.创建mdev设备

创建两个mdev设备

echo "f422fd86-35c0-11ef-8e50-9342c1138a56" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-1/create
echo "c04de378-35d8-11ef-95c3-339660dfc874" > /sys/devices/virtual/czl_wdg/czl_wdg/mdev_supported_types/czl_wdg-2/create

3.将第二步创建的mdev设别透传给QEMU虚拟机启动:

qemu-system-x86_64 -m 4096 -smp 4 --enable-kvm -drive file=/home/zlcao/Workspace/iso/ps.img -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/f422fd86-35c0-11ef-8e50-9342c1138a56 -device vfio-pci,sysfsdev=/sys/bus/mdev/devices/c04de378-35d8-11ef-95c3-339660dfc874

系统启动后,可以看到虚拟机环境下出现了透传的MDEV PCI设备,设备vendor/device id为0xbeef1001,符合代码设定。

4.虚拟机内安装WDG PCI设备驱动:

上图中可以看到,两个透传的MDEV设备已经和一个名为"serial"的PCI设备驱动绑定,这并不符合预期,需要将默认的"serial"驱动和MDEV设备解绑,在QEMU虚拟机控制台中输入如下命令解绑驱动:

echo -n 0000:00:04.0 > /sys/bus/pci/drivers/serial/unbind
echo -n 0000:00:05.0 > /sys/bus/pci/drivers/serial/unbind

之后就可以安装我们的WDG PCI驱动了:

sudo insmod czl-mdev-drv.ko

安装成功后,虚拟机设备目录下出现了WDG PCI的设备节点:

此时,两个MDEV PCI设备也显示绑定到了正确的驱动:

5.运行测试用例,读写WDG PCI设备的BAR0地址空间:

此时可以看到,虚拟机中对WDG设备BAR0空间的读写调用被“透传"到了HOST机的MDEV PCI设备驱动上,可以基于对BAR0空间的回调实现我们的业务逻辑。


结束

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/1876977.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

【Java】面试必问之Java常见线上故障排查方案详解

一、问题解析 在软件开发过程中&#xff0c;排查和修复产线问题是每⼀位⼯程师都需要掌握的基本技能。但是在⽣产环境中&#xff0c; 程序代码、硬件、⽹络、协作软件等任⼀因素&#xff0c;都会引发意想不到的问题&#xff0c;所以排查产线问题⽐较困 难&#xff0c;所以问…

关于数据库的ACID几点

首先的话就是关于ACID&#xff0c;最重要的就是原子性了&#xff0c;这是基础。 原子性是指事务包含的所有操作&#xff0c;要么全部完成&#xff0c;要么全部不完成。如果不能保证原子性&#xff0c;可能会出现以下问题&#xff1a; 数据不一致&#xff1a;事务中的部分操作…

QT事件处理及实例(鼠标事件、键盘事件、事件过滤)

这篇文章通过鼠标事件、键盘事件和事件过滤的三个实例介绍事件处理的实现。 鼠标事件及实例 鼠标事件包括鼠标的移动、按下、松开、单击和双击等。 创建一个MouseEvent项目&#xff0c;通过项目介绍如何获得和处理鼠标事件。程序效果如下图所示。 界面布局代码如下&#xff…

【算法训练记录——Day36】

Day36——贪心Ⅳ 1.leetcode_452用最少数量的箭引爆气球2.leetcode_435无重叠区间3.leetcode_763划分字母区间4.leetcode_ 1.leetcode_452用最少数量的箭引爆气球 思路&#xff1a;看了眼题解&#xff0c;局部最优&#xff1a;当气球出现重叠&#xff0c;一起射&#xff0c;所用…

【工具推荐】Nuclei

文章目录 NucleiLinux安装方式Kali安装Windows安装 Nuclei Nuclei 是一款注重于可配置性、可扩展性和易用性的基于模板的快速漏洞验证工具。它使用 Go 语言开发&#xff0c;具有强大的可配置性、可扩展性&#xff0c;并且易于使用。Nuclei 的核心是利用模板&#xff08;表示为简…

多机调度问题

#include<iostream> #include<string> using namespace std; struct work {int time;int number; }; int setwork0(int m,int n,int a[],struct work w[]) {int maxtime0;for(int i1; i<m; i){cout<<i<<"号设备处理作业"<<w[i].num…

AD9026芯片开发实录5-ADRV9026 - FAQ

1. What information should I provide to help speed resolution of my issue?  Please provide as much detail as possible including all of the detail described in the table below 2. What are the key specifications of ADRV9026 chip?  The ADRV9026 is a 4…

kafka学习笔记08

Springboot项目整合spring-kafka依赖包配置 有这种方式&#xff0c;就是可以是把之前test里的配置在这写上&#xff0c;用Bean注解上。 现在来介绍第二种方式&#xff1a; 1.添加kafka依赖&#xff1a; 2.添加kafka配置方式: 编写代码发送消息&#xff1a; 测试&#xff1a; …

ROS2自定义接口Python实现机器人移动

1.创建机器人节点接口 cd chapt3_ws/ ros2 pkg create example_interfaces_rclpy --build-type ament_python --dependencies rclpy example_ros2_interfaces --destination-directory src --node-name example_interfaces_robot_02 --maintainer-name "Joe Chen" …

【STM32】在标准库中使用定时器

1.TIM简介 STM32F407系列控制器有2个高级控制定时器、10个通用定时器和2个基本定时器。通常情况下&#xff0c;先看定时器挂在哪个总线上APB1或者APB2&#xff0c;然后定时器时钟需要在此基础上乘以2。 2.标准库实现定时中断 #ifndef __BSP_TIMER_H #define __BSP_TIMER_H#if…

计算机基础知识——C基础+C指针+char类型

指针 这里讲的很细 https://blog.csdn.net/weixin_43624626/article/details/130715839 内存地址&#xff1a;内存中每个字节单位都有一个编号&#xff08;一般用十六进制表示&#xff09; 存储类型 数据类型 *指针变量名&#xff1b;int *p; //定义了一个指针变量p,指向的数…

自研Eclipse插件的生成及安装和使用

说明&#xff1a; 本处是使用个人自研的Eclipse插件为例&#xff0c;创建了一个菜单式的插件组&#xff0c;插件组下&#xff0c;有一个生成右击Jakarta EE服务端点类后&#xff0c;生成端点对应的Restful客户端。有什么问题&#xff0c;欢迎大家交流&#xff01;&#xff01;…

仓库管理系统11--物资设置

1、添加用户控件 <UserControl x:Class"West.StoreMgr.View.GoodsTypeView"xmlns"http://schemas.microsoft.com/winfx/2006/xaml/presentation"xmlns:x"http://schemas.microsoft.com/winfx/2006/xaml"xmlns:mc"http://schemas.openxm…

[小试牛刀-习题练]《计算机组成原理》之计算机系统概述【详解过程】

【计算机系统概述】 1、【冯诺伊曼结构】计算机中数据采用二进制编码表示&#xff0c;其主要原因是&#xff08;D&#xff09; I、二进制运算规则简单II、制造两个稳态的物理器件较为容易III、便于逻辑门电路实现算术运算 A.仅I、Ⅱ B.仅I、Ⅲ C.仅Ⅱ、Ⅲ D. I、Ⅱ、Ⅲ I…

redis 单节点数据如何平滑迁移到集群中

目的 如何把一个redis单节点的数据迁移到 redis集群中 方案&#xff1a; 使用命令redis-cli --cluster import 导入数据至集群 --cluster-from <arg>--cluster-from-user <arg> 数据源用户--cluster-from-pass <arg> 数据源密码--cluster-from-askpass--c…

Linux开发讲课20--- QSPI

SPI 是英语 Serial Peripheral interface 的缩写&#xff0c;顾名思义就是串行外围设备接口&#xff0c;一种高速的&#xff0c;全双工&#xff0c;同步的通信总线&#xff0c;并且在芯片的管脚上只占用四根线&#xff0c;节约了芯片的管脚&#xff0c;为 PCB 的布局上节省空间…

《SpringBoot+Vue》Chapter04 SpringBoot整合Web开发

返回JSON数据 默认实现 依赖 <dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency>在springboot web依赖中加入了jackson-databind作为JSON处理器 创建一个实体类对象…

为什么Modbus链接/从机不通?From 摩尔信使MThings

为了回应用户平日里关于摩尔信使&#xff08;MThings&#xff09;使用过程中最常见的问题&#xff0c;包括“网络链接连不上”、“为什么不能增加串口”和“为什么从机不通”&#xff0c;我们在此统一介绍解决方法。 1、具备哪些通信能力 支持串口和网络两种通信方式。 需要…

我们后端程序员不是操作MyBatis的CRUD Boy

大家好&#xff0c;我是南哥。 一个对Java程序员进阶成长颇有研究的人&#xff0c;今天我们接着新的一篇Java进阶指南。 为啥都戏称后端是CRUD Boy&#xff1f;难道就因为天天怼着数据库CRUD吗&#xff1f;要我说&#xff0c;是这个岗位的位置要的就是你CRUD&#xff0c;你不…

FastAPI教程II

本文参考FastAPI教程https://fastapi.tiangolo.com/zh/tutorial Cookie参数 定义Cookie参数与定义Query和Path参数一样。 具体步骤如下&#xff1a; 导入Cookie&#xff1a;from fastapi import Cookie声明Cookie参数&#xff0c;声明Cookie参数的方式与声明Query和Path参数…