一. Linux DMA简介
我们知道DMA是Direct Memory Access, 不需要CPU的参与,也可以直接访问内存中的数据。
CPU 虚拟地址:内核空间由于有MMU内存管理,正常使用的是虚拟地址
CPU物理地址:virtual memory system (TLB, page tables, etc) 会将CPU虚拟地址转换为物理地址。
总线地址: IO设备主要使用的是总线地址,如果一个设备在MMIO地址上有寄存器,或者如果它执行DMA来读取或写入系统存储器,那么该设备使用的地址就是总线地址。在一些系统中,总线地址与CPU物理地址相同,但通常情况下并非如此。IOMMU和主机桥可以在物理地址和总线地址之间产生任意映射。
在一些简单的系统中,设备可以直接对物理地址Y进行DMA。但在许多其他系统中,有IOMMU硬件可以将DMA地址转换为物理地址,例如,它将Z转换为Y。这是DMA API的部分原因:驱动程序可以向DMA_map_single()等接口提供虚拟地址X,它设置任何所需的IOMMU映射并返回DMA地址Z。然后驱动程序告诉设备进行DMA到Z,IOMMU将其映射到系统RAM中地址Y处的缓冲区。
为了让Linux能够使用动态DMA映射,它需要来自驱动程序的一些帮助,也就是说,它必须考虑到DMA地址应该只在实际使用时映射,并且在DMA传输后取消映射。
二. UFS DMA 概述
UFS作为块设备,读写操作进行时,数据传输的时候还是非常耗时的,如果一直需要靠CPU参与,势必会影响系统整体的调度,所有UFS子系统也会引入DMA机制,
(1)在数据准备阶段CPU参与,
(2)数据传输阶段CPU不参与,移交控制器交给DMA处理,
(3)数据传输处理完成后,DMA返还控制权给CPU。
三. UFS DMA 详解
UFS作为块设备传输,非常占用CPU的时间,为了解放CPU,UFS数据传输采用DMA的方式。
1 . 设置DMA的寻址能力
目前现在都是ARM64架构,64位系统,所以将DMA寻址能力设置为64位寻址,能够最大化DMA的寻址能力
ufshcd_set_dma_mask
/**
* ufshcd_set_dma_mask - Set dma mask based on the controller
* addressing capability
* @hba: per adapter instance
*
* Returns 0 for success, non-zero for failure
*/
static int ufshcd_set_dma_mask(struct ufs_hba *hba)
{
if (hba->capabilities & MASK_64_ADDRESSING_SUPPORT) {
if (!dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(64)))
return 0;
}
return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32));
}
2 .分配UFS Host 数据结构的DMA内存
主要是包括UCD(Command UPIU, Response UPIU, PRDT), UTP Transfer Request Descriptor(UTRDL), UTP Task Management Request Descriprtor(UTMRDL)
(1) UTP Command Descriptor Base Address: 128Byte内存对齐
(2) UTP Transfer Request List Base Addr: 1KB内存对齐
/**
* ufshcd_memory_alloc - allocate memory for host memory space data structures
* @hba: per adapter instance
*
* 1. Allocate DMA memory for Command Descriptor array
* Each command descriptor consist of Command UPIU, Response UPIU and PRDT
* 2. Allocate DMA memory for UTP Transfer Request Descriptor List (UTRDL).
* 3. Allocate DMA memory for UTP Task Management Request Descriptor List
* (UTMRDL)
* 4. Allocate memory for local reference block(lrb).
*
* Returns 0 for success, non-zero in case of failure
*/
static int ufshcd_memory_alloc(struct ufs_hba *hba)
{
size_t utmrdl_size, utrdl_size, ucdl_size;
/* Allocate memory for UTP command descriptors */
ucdl_size = ufshcd_get_ucd_size(hba) * hba->nutrs;
hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev,
ucdl_size,
&hba->ucdl_dma_addr,
GFP_KERNEL);
/*
* UFSHCI requires UTP command descriptor to be 128 byte aligned.
*/
if (!hba->ucdl_base_addr ||
WARN_ON(hba->ucdl_dma_addr & (128 - 1))) {
dev_err(hba->dev,
"Command Descriptor Memory allocation failed\n");
goto out;
}
/*
* Allocate memory for UTP Transfer descriptors
* UFSHCI requires 1KB alignment of UTRD
*/
utrdl_size = (sizeof(struct utp_transfer_req_desc) * hba->nutrs);
hba->utrdl_base_addr = dmam_alloc_coherent(hba->dev,
utrdl_size,
&hba->utrdl_dma_addr,
GFP_KERNEL);
if (!hba->utrdl_base_addr ||
WARN_ON(hba->utrdl_dma_addr & (SZ_1K - 1))) {
dev_err(hba->dev,
"Transfer Descriptor Memory allocation failed\n");
goto out;
}
/*
* Skip utmrdl allocation; it may have been
* allocated during first pass and not released during
* MCQ memory allocation.
* See ufshcd_release_sdb_queue() and ufshcd_config_mcq()
*/
if (hba->utmrdl_base_addr)
goto skip_utmrdl;
/*
* Allocate memory for UTP Task Management descriptors
* UFSHCI requires 1KB alignment of UTMRD
*/
utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs;
hba->utmrdl_base_addr = dmam_alloc_coherent(hba->dev,
utmrdl_size,
&hba->utmrdl_dma_addr,
GFP_KERNEL);
if (!hba->utmrdl_base_addr ||
WARN_ON(hba->utmrdl_dma_addr & (SZ_1K - 1))) {
dev_err(hba->dev,
"Task Management Descriptor Memory allocation failed\n");
goto out;
}
skip_utmrdl:
/* Allocate memory for local reference block */
hba->lrb = devm_kcalloc(hba->dev,
hba->nutrs, sizeof(struct ufshcd_lrb),
GFP_KERNEL);
if (!hba->lrb) {
dev_err(hba->dev, "LRB Memory allocation failed\n");
goto out;
}
return 0;
out:
return -ENOMEM;
}
(3)dmam_alloc_coherent :从CMA分配内存,如果DTS定义了UFS Dev的CMA内存,就从 UFS Dev的CMA内存分配,如果DTS没有定义,就从默认的CMA内存区域,这块区域在开机启 动的时候就设置好了,区域大小一般是16M, 可以通过相关的CONFIG来修改。
3. 配置UFS Host 内存空间
(1) UTP Transfer Request Descriptor:
Note:
UTRD contains a pointer for a data structure called UTP Command Descriptor (UCD)。
说明UCD是存放具体的Command Descriptor数据,包括UTP Command UPIU, UTP Response UPIU, PRDT(option),
UTRD: UTP Transfer Request Descriptor
UTRD中的DW4/DW5是指向UCD的首地址,也是UCD的Command UPIU地址处
UTRD中的DW6是指向UCD的Response UPIU地址处
UTRD中的DW7是指向UCD的PRDT地址处
(2) UTP Command Descriptor:
(3)
/**
* ufshcd_host_memory_configure - configure local reference block with
* memory offsets
* @hba: per adapter instance
*
* Configure Host memory space
* 1. Update Corresponding UTRD.UCDBA and UTRD.UCDBAU with UCD DMA
* address.
* 2. Update each UTRD with Response UPIU offset, Response UPIU length
* and PRDT offset.
* 3. Save the corresponding addresses of UTRD, UCD.CMD, UCD.RSP and UCD.PRDT
* into local reference block.
*/
static void ufshcd_host_memory_configure(struct ufs_hba *hba)
{
struct utp_transfer_cmd_desc *cmd_descp;
struct utp_transfer_req_desc *utrdlp; // UTRD structure
dma_addr_t cmd_desc_dma_addr;
dma_addr_t cmd_desc_element_addr;
u16 response_offset;
u16 prdt_offset;
int cmd_desc_size;
int i;
utrdlp = hba->utrdl_base_addr;
cmd_descp = hba->ucdl_base_addr;
response_offset =
offsetof(struct utp_transfer_cmd_desc, response_upiu);
prdt_offset =
offsetof(struct utp_transfer_cmd_desc, prd_table);
cmd_desc_size = sizeof(struct utp_transfer_cmd_desc);
cmd_desc_dma_addr = hba->ucdl_dma_addr;
for (i = 0; i < hba->nutrs; i++) {
/* Configure UTRD with command descriptor base address */
cmd_desc_element_addr =
(cmd_desc_dma_addr + (cmd_desc_size * i));
utrdlp[i].command_desc_base_addr_lo =
cpu_to_le32(lower_32_bits(cmd_desc_element_addr)); //DW-4
utrdlp[i].command_desc_base_addr_hi =
cpu_to_le32(upper_32_bits(cmd_desc_element_addr)); //DW-5
/* Response upiu and prdt offset should be in double words */
if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) {
utrdlp[i].response_upiu_offset =
cpu_to_le16(response_offset);
utrdlp[i].prd_table_offset =
cpu_to_le16(prdt_offset);
utrdlp[i].response_upiu_length =
cpu_to_le16(ALIGNED_UPIU_SIZE);
} else {
utrdlp[i].response_upiu_offset =
cpu_to_le16((response_offset >> 2)); // DW-6 low 16Bit
utrdlp[i].prd_table_offset =
cpu_to_le16((prdt_offset >> 2)); // DW-7 Upper 16Bit
utrdlp[i].response_upiu_length =
cpu_to_le16(ALIGNED_UPIU_SIZE >> 2); // DW-6 Upper 16Bit
}
hba->lrb[i].utr_descriptor_ptr = (utrdlp + i); // UTRD Struct Address
hba->lrb[i].utrd_dma_addr = hba->utrdl_dma_addr +
(i * sizeof(struct utp_transfer_req_desc)); // UTRD DNA Address
hba->lrb[i].ucd_req_ptr =
(struct utp_upiu_req *)(cmd_descp + i); // UCD Address
hba->lrb[i].ucd_req_dma_addr = cmd_desc_element_addr; // UCD DMA Addr
hba->lrb[i].ucd_rsp_ptr =
(struct utp_upiu_rsp *)cmd_descp[i].response_upiu; // UCD Reponse UPIU Addr
hba->lrb[i].ucd_rsp_dma_addr = cmd_desc_element_addr +
response_offset; // UCD Response UPIU DMA Addr
hba->lrb[i].ucd_prdt_ptr =
(struct ufshcd_sg_entry *)cmd_descp[i].prd_table; // UCD PRDT Addr
hba->lrb[i].ucd_prdt_dma_addr = cmd_desc_element_addr +
prdt_offset; // UCD PRDT DMA Addr
}
}
/**
* struct utp_transfer_req_desc - UTRD structure
* @header: UTRD header DW-0 to DW-3
* @command_desc_base_addr_lo: UCD base address low DW-4
* @command_desc_base_addr_hi: UCD base address high DW-5
* @response_upiu_length: response UPIU length DW-6
* @response_upiu_offset: response UPIU offset DW-6
* @prd_table_length: Physical region descriptor length DW-7
* @prd_table_offset: Physical region descriptor offset DW-7
*/
struct utp_transfer_req_desc {
/* DW 0-3 */
struct request_desc_header header;
/* DW 4-5*/
__le32 command_desc_base_addr_lo;
__le32 command_desc_base_addr_hi;
/* DW 6 */
__le16 response_upiu_length;
__le16 response_upiu_offset;
/* DW 7 */
__le16 prd_table_length;
__le16 prd_table_offset;
};
4. UFS Host DMA 数据传输
分配并配置好UFS Host的DMA内存,UFS Host侧就可以使用DMA进行数据传输了,
(1) UFS 数据是存放在sg list列表的内存区域,sg list的好处是使用链表串起来不连续的内存
(2) 调用dma_map_sg建立 ufs数据内存区域(sg list的内存区域)到DMA 内存的映射,建立好映射之后,用户只需要把需要传输的数据放到sg list内存区域,就可以通过DMA进行传输了
(3) 建立 sg list映射的dma地址和prdt table的映射,ufs hci会从prdt table表示的位置获取ufs传输下来的数据
(4) ufs hci 会通过data out 将prdt table 指向的数据传输给ufs device, 或者通过data in将数据传输到prdt table指向的位置
Note: 在做read10操作的时候,如果没有调用scsi_dma_unmap的时候,会出现data in传输的数据没有及时更新到DMA区域,可以使用sync方法同步数据到DMA, 或者在开始的时候直接映射数据区域内存到DMA地址,不要进行memcpy的动作。
/**
* ufshcd_map_sg - Map scatter-gather list to prdt
* @hba: per adapter instance
* @lrbp: pointer to local reference block
*
* Returns 0 in case of success, non-zero value in case of failure
*/
static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
{
struct ufshcd_sg_entry *prd_table;
struct scatterlist *sg;
struct scsi_cmnd *cmd;
int sg_segments;
int i;
cmd = lrbp->cmd;
sg_segments = scsi_dma_map(cmd);
if (sg_segments < 0)
return sg_segments;
if (sg_segments) {
if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN)
lrbp->utr_descriptor_ptr->prd_table_length =
cpu_to_le16((u16)(sg_segments *
sizeof(struct ufshcd_sg_entry)));
else
lrbp->utr_descriptor_ptr->prd_table_length =
cpu_to_le16((u16) (sg_segments));
prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr;
scsi_for_each_sg(cmd, sg, sg_segments, i) {
prd_table[i].size =
cpu_to_le32(((u32) sg_dma_len(sg))-1);
prd_table[i].base_addr =
cpu_to_le32(lower_32_bits(sg->dma_address));
prd_table[i].upper_addr =
cpu_to_le32(upper_32_bits(sg->dma_address));
prd_table[i].reserved = 0;
}
} else {
lrbp->utr_descriptor_ptr->prd_table_length = 0;
}
return 0;
}
/**
* scsi_dma_map - perform DMA mapping against command's sg lists
* @cmd: scsi command
*
* Returns the number of sg lists actually used, zero if the sg lists
* is NULL, or -ENOMEM if the mapping failed.
*/
int scsi_dma_map(struct scsi_cmnd *cmd)
{
int nseg = 0;
if (scsi_sg_count(cmd)) {
struct device *dev = cmd->device->host->dma_dev;
nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
cmd->sc_data_direction);
if (unlikely(!nseg))
return -ENOMEM;
}
return nseg;
}
EXPORT_SYMBOL(scsi_dma_map);
/**
* scsi_dma_unmap - unmap command's sg lists mapped by scsi_dma_map
* @cmd: scsi command
*/
void scsi_dma_unmap(struct scsi_cmnd *cmd)
{
if (scsi_sg_count(cmd)) {
struct device *dev = cmd->device->host->dma_dev;
dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
cmd->sc_data_direction);
}
}
四. 参考资料
1. UFS协议
2. kernel4.19官方源码