前言
CXL 是一个比较新的技术,所以我研究的内核源码是选了当前比较新的内核版本 linux 6.0。打算将内核关于 CXL 的驱动进行解析一遍,一步一步慢慢来。
在阅读之前,希望读者能有一定的 PCIe 基础知识,精力有限,不能把所有知识点都能说的很详细,需要一定的基础才能理解,同时,希望在学习的过程中,手边能有 PCIe 5.0 Spec 以及 CXL 2.0 Spec,以便随时查看,当然,我也会尽量把重点的部分截图在博文中。
最后,如果有问题请留言讨论。
Ref
《PCI_Express_Base_5.0r1.0》
《CXL Specification_rev2p0_ver1p0_2020Oct26》
正文
涉及的部分寄存器,Ref CXL 2.0 Spec :
1. CXL Subsystem Component Register Ranges
2. CXL.cache and CXL.mem Architectural Register Header
3. CXL HDM Decoder Capability Header
4. CXL HDM Decoder Capability Structure
// linux-6.0\drivers\cxl\core\regs.c
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. */
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <cxlmem.h>
#include <cxlpci.h>
/**
* DOC: cxl registers
*
* CXL device capabilities are enumerated by PCI DVSEC (Designated
* Vendor-specific) and / or descriptors provided by platform firmware.
* They can be defined as a set like the device and component registers
* mandated by CXL Section 8.1.12.2 Memory Device PCIe Capabilities and
* Extended Capabilities, or they can be individual capabilities
* appended to bridged and endpoint devices.
*
* Provide common infrastructure for enumerating and mapping these
* discrete capabilities.
*/
/**
* cxl_probe_component_regs() - Detect CXL Component register blocks
* @dev: Host device of the @base mapping
* @base: Mapping containing the HDM Decoder Capability Header
* @map: Map object describing the register block information found
*
* See CXL 2.0 8.2.4 Component Register Layout and Definition
* See CXL 2.0 8.2.5.5 CXL Device Register Interface
*
* Probe for component register information and return it in map object.
*/
// 探测 CXL 组件寄存器块, 找到设备中的 HDM Decoder 寄存器并记录位置和大小
// dev : Host 设备
// base : 包含 HDM 解码能力头的映射基地址,就是寄存器块所在位置的基地址,已映射后的虚拟地址
// map : 描述发现的寄存器块信息的对象
// Ref CXL 2.0 Spec 8.1.9 Register Locator DVSEC
// 8.2.5 CXL.cache and CXL.mem Registers
void cxl_probe_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_reg_map *map)
{
int cap, cap_count;
u32 cap_array;
*map = (struct cxl_component_reg_map) { 0 };
/*
* CXL.cache and CXL.mem registers are at offset 0x1000 as defined in
* CXL 2.0 8.2.4 Table 141. 如上图
*/
// CXL_CM_OFFSET 0x1000
// CXL_CM_OFFSET 偏移处为 .cache 和 .mem 的寄存器位置
base += CXL_CM_OFFSET;
cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET);
// 见上图 CXL.cache and CXL.mem Architectural Register Header
// 或Ref CXL 2.0 8.2.5.1 CXL Capability Header Register
// CXL Capability Header Register 的 CXL_Capability_ID (0:15) 必须为 1,不为 1 报错
if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
dev_err(dev,
"Couldn't locate the CXL.cache and CXL.mem capability array header.\n");
return;
}
/* It's assumed that future versions will be backward compatible */
// 见上图 CXL.cache and CXL.mem Architectural Register Header
// 或Ref CXL 2.0 8.2.5.1 CXL Capability Header Register
// 31:24 Array_Size 定义了存在的元素数目
cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_array);
// 遍历,每个元素 4 个字节,首个DWORD 为头,略过
for (cap = 1; cap <= cap_count; cap++) {
void __iomem *register_block;
u32 hdr;
int decoder_cnt;
u16 cap_id, offset;
u32 length;
hdr = readl(base + cap * 0x4);
// 读 Capability Header id 与偏移
cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
register_block = base + offset;
// Ref CXL 2.0 8.2.5.5 CXL HDM Decoder Capability Header
// 或上图 CXL HDM Decoder Capability Header
// CXL_CM_CAP_CAP_ID_HDM == 0x5
// 只处理 HDM Decoder Registers
switch (cap_id) {
case CXL_CM_CAP_CAP_ID_HDM:
dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
offset);
hdr = readl(register_block);
// Ref 上图 CXL HDM Decoder Capability Structure
// or CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure
decoder_cnt = cxl_hdm_decoder_count(hdr);
length = 0x20 * decoder_cnt + 0x10;
// 计算长度以及偏移并记录
map->hdm_decoder.valid = true;
map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
map->hdm_decoder.size = length;
break;
default:
dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
offset);
break;
}
}
}
EXPORT_SYMBOL_NS_GPL(cxl_probe_component_regs, CXL);
5. CXL Device Capability Header Register
/**
* cxl_probe_device_regs() - Detect CXL Device register blocks
* @dev: Host device of the @base mapping
* @base: Mapping of CXL 2.0 8.2.8 CXL Device Register Interface
* @map: Map object describing the register block information found
*
* Probe for device register information and return it in map object.
*/
// 探测 CXL 设备寄存器块,记录位置及大小
// dev : Host 设备
// base : 寄存器块所在位置的基地址,已映射后的虚拟地址
// map : 描述发现的寄存器块信息的对象
void cxl_probe_device_regs(struct device *dev, void __iomem *base,
struct cxl_device_reg_map *map)
{
int cap, cap_count;
u64 cap_array;
*map = (struct cxl_device_reg_map){ 0 };
// 由上图 CXL Subsystem Component Register Ranges 知
// CXL Device register blocks 偏移为 0 == CXLDEV_CAP_ARRAY_OFFSET
// Ref CXL 2.0 8.2.8 CXL Device Register Interface
cap_array = readq(base + CXLDEV_CAP_ARRAY_OFFSET);
// Cap ID 必须为 0 == CXLDEV_CAP_ARRAY_CAP_ID
// Ref CXL 2.0 CXL Device Capabilities Array Register : Capability ID
if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) !=
CXLDEV_CAP_ARRAY_CAP_ID)
return;
// 同样,要获得元素 Capabilities 数量
cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
// 进行遍历,从 1开始,跳过头 1 DWORD
for (cap = 1; cap <= cap_count; cap++) {
u32 offset, length;
u16 cap_id;
// 每个 Capabilities 长度 16字节
// Ref CXL 2.0 8.2.8.1 CXL Device Capabilities Array Register : Capabilities Count
// 获取 cap_id, offset, lenth
// Ref CXL 2.0 8.2.8.2 CXL Device Capability Header Register
// or 上图 CXL Device Capability Header Register
cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK,
readl(base + cap * 0x10));
offset = readl(base + cap * 0x10 + 0x4);
length = readl(base + cap * 0x10 + 0x8);
// 根据 id 做不同记录处理
// 不过都是记录偏移和大小以及生效标志
// Ref CXL 2.0 8.2.8.2.1 CXL Device Capabilities
switch (cap_id) {
case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
dev_dbg(dev, "found Status capability (0x%x)\n", offset);
map->status.valid = true;
map->status.offset = offset;
map->status.size = length;
break;
case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
map->mbox.valid = true;
map->mbox.offset = offset;
map->mbox.size = length;
break;
case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
break;
case CXLDEV_CAP_CAP_ID_MEMDEV:
dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
map->memdev.valid = true;
map->memdev.offset = offset;
map->memdev.size = length;
break;
default:
if (cap_id >= 0x8000)
dev_dbg(dev, "Vendor cap ID: %#x offset: %#x\n", cap_id, offset);
else
dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
break;
}
}
}
EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL);
// 地址映射
// 将PCI域地址(或直接理解为物理地址) addr 映射为虚拟地址
void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
resource_size_t length)
{
void __iomem *ret_val;
struct resource *res;
// 申请内存区域,告诉内核这块区域我用了,别人在用的话也会失败
res = devm_request_mem_region(dev, addr, length, dev_name(dev));
if (!res) {
resource_size_t end = addr + length - 1;
dev_err(dev, "Failed to request region %pa-%pa\n", &addr, &end);
return NULL;
}
// 建立 io 映射,返沪虚拟基地址 ret_val
ret_val = devm_ioremap(dev, addr, length);
if (!ret_val)
dev_err(dev, "Failed to map region %pr\n", res);
return ret_val;
}
// 映射组件寄存器,HDM Decoder 寄存器块
int cxl_map_component_regs(struct pci_dev *pdev,
struct cxl_component_regs *regs,
struct cxl_register_map *map)
{
struct device *dev = &pdev->dev;
resource_size_t phys_addr;
resource_size_t length;
// 计算寄存器块所在的bar 空间以及偏移,计算基地址(物理)
phys_addr = pci_resource_start(pdev, map->barno);
phys_addr += map->block_offset;
// 计算 hdm decoder 的基地址(物理)
phys_addr += map->component_map.hdm_decoder.offset;
length = map->component_map.hdm_decoder.size;
// 调用上函数进行映射,虚拟基地址保存在 hdm_decoder 中
regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
if (!regs->hdm_decoder)
return -ENOMEM;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL);
// 映射设备寄存器,.io 寄存器块
int cxl_map_device_regs(struct pci_dev *pdev,
struct cxl_device_regs *regs,
struct cxl_register_map *map)
{
struct device *dev = &pdev->dev;
resource_size_t phys_addr;
// 计算寄存器块所在的bar 空间以及偏移,计算基地址(物理)
phys_addr = pci_resource_start(pdev, map->barno);
phys_addr += map->block_offset;
// 如果 status 有效, 这个在上面函数中会根据情况置有效
if (map->device_map.status.valid) {
resource_size_t addr;
resource_size_t length;
// 计算寄存器具体基地址,以及长度
addr = phys_addr + map->device_map.status.offset;
length = map->device_map.status.size;
// 建立映射,返回虚拟基地址,保存,以下相同
regs->status = devm_cxl_iomap_block(dev, addr, length);
if (!regs->status)
return -ENOMEM;
}
// 如果 mbox 有效, 这个在上面函数中会根据情况置有效
if (map->device_map.mbox.valid) {
resource_size_t addr;
resource_size_t length;
addr = phys_addr + map->device_map.mbox.offset;
length = map->device_map.mbox.size;
regs->mbox = devm_cxl_iomap_block(dev, addr, length);
if (!regs->mbox)
return -ENOMEM;
}
// 如果 memdev 有效, 这个在上面函数中会根据情况置有效
if (map->device_map.memdev.valid) {
resource_size_t addr;
resource_size_t length;
addr = phys_addr + map->device_map.memdev.offset;
length = map->device_map.memdev.size;
regs->memdev = devm_cxl_iomap_block(dev, addr, length);
if (!regs->memdev)
return -ENOMEM;
}
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
6. Register Offset Low
// 解码寄存器块
// Ref 上图 Register Offset Low
// or CXL 2.0 SPec 8.1.9.1 Register Offset Low
static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
// reg hi代表偏移的高位,与 reg low 31:16 结合
map->block_offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
// 2:0 表示寄存器在哪一个 bar : 0 - 5
map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
// 表示寄存器的类型 15:8
map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
}
7. Register Locator DVSEC - Header
8. Register Locator DVSEC with 3 Register Block Entries
/**
* cxl_find_regblock() - Locate register blocks by type
* @pdev: The CXL PCI device to enumerate.
* @type: Register Block Indicator id
* @map: Enumeration output, clobbered on error
*
* Return: 0 if register block enumerated, negative error code otherwise
*
* A CXL DVSEC may point to one or more register blocks, search for them
* by @type.
*/
// 通过 Type 定位寄存器块
int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map)
{
u32 regloc_size, regblocks;
int regloc, i;
map->block_offset = U64_MAX;
// 在 PCIe capabilities 中寻找 VENDOR_ID 为 PCI_DVSEC_VENDOR_ID_CXL == 0x23
// 0x23 表示此 capabilities 是 DVSEC
// 然后根据 DVSEC ID 判断是哪一个 DVSEC 类型
// CXL_DVSEC_REG_LOCATOR == 8 表示寄存器定位
// Ref CXL 2.0 Table 124. CXL DVSEC ID Assignment
// or 上面的图 Table 124
regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
CXL_DVSEC_REG_LOCATOR);
if (!regloc)
return -ENXIO;
// Ref 上图 Register Locator DVSEC - Header
// or CXL 2.0 Table 131. Register Locator DVSEC - Header
// 获取 DVSEC Length 31:20
pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size);
regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
// CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xc
// 每个寄存器块用 8 字节, 根据总大小,计算寄存器块的个数
// Refer 上图 Register Locator DVSEC with 3 Register Block Entries ,标黄部分
// or CXL 2.0 Figure 131. Register Locator DVSEC with 3 Register Block Entries
regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET;
regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8;
// 对寄存器遍历,比较寄存器类型,匹配返回 0
for (i = 0; i < regblocks; i++, regloc += 8) {
u32 reg_lo, reg_hi;
pci_read_config_dword(pdev, regloc, ®_lo);
pci_read_config_dword(pdev, regloc + 4, ®_hi);
cxl_decode_regblock(reg_lo, reg_hi, map);
if (map->reg_type == type)
return 0;
}
map->block_offset = U64_MAX;
return -ENODEV;
}
EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);