文章目录
- 用户态协议栈01-udp收发
- 前期准备
- DPDK初始化
- 开始搓udp协议栈
- 配置dpdk
- 定义udp相关变量
- 接受udp数据&&读取包内容
- 接口层
- 拼接udp数据包
- 完整代码
- 如何启动实验
- 如何编译
- 使用效果
用户态协议栈01-udp收发
实现用户态协议栈最最简单的就是实现Udp的收发,下面逐步完成一个基于dpdk的Udp协议栈,达到收发的目的。
前期准备
- 以太网协议(ether)
- IPv4协议(ip)
- UDP协议(udp)
这些协议的图解会在后面我们拆解和拼接数据包的时候用到,先放在这里。
DPDK初始化
DPDK初始化分为以下两个部分:
- 启动dpdk
参考我之前的博客
- 配置dpdk端口和收发队列
开始搓udp协议栈
配置dpdk
int gDpdkPortId = 0;
if(rte_eal_init(argc, argv) < 0) {
rte_exit(EXIT_FAILURE, "Error with EAL init\n");//退出,退出提示
}
uint16_t nb_dev_ports = rte_eth_dev_count_avail();//获取可用网卡数量
if(nb_dev_ports == 0) {
rte_exit(EXIT_FAILURE, "Error with dev count\n");
}
struct rte_mempool* mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NUM_MBUFS, 0, 0,
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());//初始化一个内存池
if(!mbuf_pool) {
rte_exit(EXIT_FAILURE, "Error with mbuf init\n");
}
struct rte_eth_dev_info dev_info;
rte_eth_dev_info_get(gDpdkPortId, &dev_info);//获取网卡信息
const int num_rx_queue = 1;//定义接受队列数量
const int num_tx_queue = 1;//定义发送队列数量
struct rte_eth_conf port_conf = port_conf_default;
//设置网卡队列参数
if(rte_eth_dev_configure(gDpdkPortId, num_rx_queue, num_tx_queue, &port_conf) < 0) {
rte_exit(EXIT_FAILURE, "Error with dev configure");
}
//配置接受队列参数
if(rte_eth_rx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId), NULL, mbuf_pool) < 0) {
rte_exit(EXIT_FAILURE, "Error with rx queue setup\n");
}
//配置发送队列参数
struct rte_eth_txconf txq_conf = dev_info.default_txconf;
txq_conf.offloads = port_conf.rxmode.offloads;
if(rte_eth_tx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId), &txq_conf) < 0) {
rte_exit(EXIT_FAILURE, "Error with tx queue setup\n");
}
//启动发送和接收服务
if(rte_eth_dev_start(gDpdkPortId) < 0) {
rte_exit(EXIT_FAILURE, "Error with dev start\n");
}
这里首先是配置了一下dpdk并且启动他。我们需要网卡(gDpdkPort)、发送队列、接收队列和一个内存池,后面我们发送和接收数据包都要用到这个内存池。这里需要说一下一些函数的参数。
int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id ,const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool);
- port_id:以太网端口的ID。
- rx_queue_id:接收队列的ID。
- nb_rx_desc:接收队列中描述符(descriptor)的数量。
- socket_id:指定内存分配所使用的套接字ID。
- rx_conf:一个指向结构体rte_eth_rxconf的指针,包含了一些接收配置参数,如处理函数、杂项模式等。
- mb_pool:一个指向内存池(mempool)对象的指针,用于分配接收缓冲区。
int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf);
port_id:要配置的以太网端口标识符。
tx_queue_id:要配置的发送队列标识符。
nb_tx_desc:发送队列中的描述符数量。
socket_id:用于内存分配的套接字标识符。
tx_conf:指向rte_eth_txconf结构体的指针,包含有关发送队列配置的详细信息
struct rte_mempool *rte_pktmbuf_pool_create(const char *name, unsigned n, unsigned cache_size, uint16_t priv_size, uint16_t data_room_size, int socket_id);
name:缓冲池的名称。
n:缓冲池中缓冲区的数量。
cache_size:每个CPU核心的本地缓存大小。如果设为0,则禁用本地缓存。
priv_size:每个数据包缓冲区保留的私有数据大小。
data_room_size:每个数据包缓冲区可用于存储数据的空间大小。
socket_id:用于内存分配的套接字标识符。
定义udp相关变量
//udp
uint8_t gSrcMac[RTE_ETHER_ADDR_LEN];
uint8_t gDstMac[RTE_ETHER_ADDR_LEN];
uint32_t gSrcIp;
uint32_t gDstIp;
uint16_t gSrcPort;
uint16_t gDstPort;
这些是我们使用UDP协议发送数据包时需要的参数,当我们接收到一个udp数据包的时候,我们从数据包中读取数据,然后保存到这些变量中;在创建新的数据包时使用这些变量来构建发回的数据包。
接受udp数据&&读取包内容
while(1) {
struct rte_mbuf* mbufs[BURST_SIZE];
//从uio/ufio中读取一个数据包
unsigned nb_recvd = rte_eth_rx_burst(gDpdkPortId, 0, mbufs, BURST_SIZE);
if(nb_recvd > BURST_SIZE) {
rte_exit(EXIT_FAILURE, "Error with rx burst\n");
}
unsigned i = 0;
for(i = 0; i < nb_recvd; i++) {
//获取以太网头
struct rte_ether_hdr* ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr*);
if(ehdr->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
continue;
}
//获取IP头
struct rte_ipv4_hdr* iphdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_ipv4_hdr*, sizeof(struct rte_ether_hdr));
if(iphdr->next_proto_id == IPPROTO_UDP) {
printf("get udp\n");
struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(iphdr + 1);
//拷贝所需变量
rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));
rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));
rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));
rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));
// uint16_t length = ntohs(udphdr->dgram_len) - sizeof(struct rte_udp_hdr);
uint16_t length = ntohs(udphdr->dgram_len);
// printf("length: %d, content: %s\n", length, (char*)(udphdr + 1));
//打印数据
struct in_addr addr;
addr.s_addr = iphdr->src_addr;
printf("src: %s:%d, ", inet_ntoa(addr), ntohs(udphdr->src_port));
addr.s_addr = iphdr->dst_addr;
printf("dst: %s:%d\n", inet_ntoa(addr), ntohs(udphdr->dst_port));
//调用数据包拼接函数(下文实现)
struct rte_mbuf* txbuf = ustack_send(mbuf_pool, (char*)(udphdr + 1), length);
uint16_t res = rte_eth_tx_burst(gDpdkPortId, 0, &txbuf, 1);
printf("send res: %d\n", res);
rte_pktmbuf_free(txbuf);
rte_pktmbuf_free(mbufs[i]);
}
else if(iphdr->next_proto_id = IPPROTO_TCP) {
printf("get tcp\n");
size_t length = 0;
void* hostinfo = get_hostinfo_from_fd()
}
}
}
关于
rte_pktmbuf_mtod(m, t)
这个宏,在源代码中的实现是这样的:#define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0) #define rte_pktmbuf_mtod_offset(m, t, o) \
((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
它的底层实现是对一个地址进行偏移,当我们获取一个udp/ip协议的以太网数据包的时候,如果偏移值为0,那就可以获得以太网头,如果偏移值为sizeof(以太网头长度)就可以获取IP数据包头。从下面这张图可以看出来。 ![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/1cd5249f24de4c29ac2ae83da5613b18.png#pic_center)
接口层
static struct rte_mbuf* ustack_send(struct rte_mempool* mbuf_pool, char* data, uint16_t length) {
// uint16_t total_length = length + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_udp_hdr);
//整个udp/ip数据包的长度
uint16_t total_length = length + 42;
//从内存池中申请一块内存
struct rte_mbuf* mbuf = rte_pktmbuf_alloc(mbuf_pool);
if(!mbuf) {
rte_exit(EXIT_FAILURE, "Error with EAL init\n");
}
mbuf->data_len = total_length;
mbuf->pkt_len = total_length;
uint8_t* pktdata = rte_pktmbuf_mtod(mbuf, uint8_t*);
//拼接函数
ustack_encode_udp_pkt(pktdata, data, total_length);
return mbuf;
}
这里是一个封装的中间层,方便后续其他协议实现的时候接口一样方便使用。他会返回udp数据包的地址,方便我们将他拷贝到uio/ufio中发送。
拼接udp数据包
static int ustack_encode_udp_pkt(uint8_t* msg, char* data, uint32_t total_len) {
//以太网头
struct rte_ether_hdr* eth = (struct rte_ether_hdr*)msg;
rte_memcpy(eth->d_addr.addr_bytes, gDstMac, RTE_ETHER_ADDR_LEN);
rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN);
eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);
//ip头
struct rte_ipv4_hdr* iphdr = (struct rte_ipv4_hdr*)(msg + sizeof(struct rte_ether_hdr));
iphdr->version_ihl = 0x45;
iphdr->type_of_service = 0x00;
iphdr->total_length = htons(total_len - sizeof(struct rte_ether_hdr));
iphdr->packet_id = 0;
iphdr->fragment_offset = 0;
iphdr->time_to_live = 64;
iphdr->next_proto_id = IPPROTO_UDP;
iphdr->src_addr = gSrcIp;
iphdr->dst_addr = gDstIp;
iphdr->hdr_checksum = 0;
iphdr->hdr_checksum = rte_ipv4_cksum(iphdr);
struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(msg + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr));
uint16_t udplen = total_len - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr);
udphdr->dst_port = gDstPort;
udphdr->src_port = gSrcPort;
udphdr->dgram_len = htons(udplen);
rte_memcpy((uint8_t*)(udphdr + 1), data, udplen);
udphdr->dgram_cksum = 0;
udphdr->dgram_cksum = rte_ipv4_udptcp_cksum(iphdr, udphdr);
struct in_addr addr;
addr.s_addr = gSrcIp;
printf(" --> src: %s:%d, ", inet_ntoa(addr), ntohs(gSrcPort));
addr.s_addr = gDstIp;
printf("dst: %s:%d\n", inet_ntoa(addr), ntohs(gDstPort));
return total_len;
}
这里需要参考上文的几张图,我再放一下:
这里需要分开看,首先是以太网部分
以太网协议作为链路层协议,他的主要信息就是MAC地址。我们只要将准备好的MAC地址拷贝到数据包中即可,最后设置一下协议类型。IP部分
这里需要比价细致的解读:
version_ihl如何计算:从上面的IP数据包图可以看出:长方形的长度为32为,首部长度(宽)为20字节,注意
32
和20
的单位是不一样的。32位是4字节;20 / 4 = 5;所以长度是5。time_to_live:我们可以做一个实验,ping一下baidu.com
得到的结果如上图,ttl就是数据包的生命周期,我这里的ttl=48,64-48=16,说明数据包从我这里到百度服务器,经历了16个网关。checksum:在计算校验和之前,首先将hdr_checksum字段设置为0的目的是确保不会将旧的校验和值包含在计算中。因为校验和是通过对报文头部进行累加求和得到的,如果不将其初始值设置为0,则可能会导致计算结果与实际期望的校验和不一致。
UDP部分
和上面原理一样,拷贝一下数据。
完整代码
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
#define NUM_MBUFS (4096-1)
#define BURST_SIZE 128
#define ENABLE_SEND 1
int gDpdkPortId = 0;
//udp
uint8_t gSrcMac[RTE_ETHER_ADDR_LEN];
uint8_t gDstMac[RTE_ETHER_ADDR_LEN];
uint32_t gSrcIp;
uint32_t gDstIp;
uint16_t gSrcPort;
uint16_t gDstPort;
static int ustack_encode_udp_pkt(uint8_t* msg, char* data, uint32_t total_len) {
struct rte_ether_hdr* eth = (struct rte_ether_hdr*)msg;
rte_memcpy(eth->d_addr.addr_bytes, gDstMac, RTE_ETHER_ADDR_LEN);
rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN);
eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);
struct rte_ipv4_hdr* iphdr = (struct rte_ipv4_hdr*)(msg + sizeof(struct rte_ether_hdr));
iphdr->version_ihl = 0x45;
iphdr->type_of_service = 0x00;
iphdr->total_length = htons(total_len - sizeof(struct rte_ether_hdr));
iphdr->packet_id = 0;
iphdr->fragment_offset = 0;
iphdr->time_to_live = 64;
iphdr->next_proto_id = IPPROTO_UDP;
iphdr->src_addr = gSrcIp;
iphdr->dst_addr = gDstIp;
iphdr->hdr_checksum = 0;
iphdr->hdr_checksum = rte_ipv4_cksum(iphdr);
struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(msg + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr));
uint16_t udplen = total_len - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr);
udphdr->dst_port = gDstPort;
udphdr->src_port = gSrcPort;
udphdr->dgram_len = htons(udplen);
rte_memcpy((uint8_t*)(udphdr + 1), data, udplen);
udphdr->dgram_cksum = 0;
udphdr->dgram_cksum = rte_ipv4_udptcp_cksum(iphdr, udphdr);
struct in_addr addr;
addr.s_addr = gSrcIp;
printf(" --> src: %s:%d, ", inet_ntoa(addr), ntohs(gSrcPort));
addr.s_addr = gDstIp;
printf("dst: %s:%d\n", inet_ntoa(addr), ntohs(gDstPort));
return total_len;
}
static struct rte_mbuf* ustack_send(struct rte_mempool* mbuf_pool, char* data, uint16_t length) {
// uint16_t total_length = length + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_udp_hdr);
uint16_t total_length = length + 42;
struct rte_mbuf* mbuf = rte_pktmbuf_alloc(mbuf_pool);
if(!mbuf) {
rte_exit(EXIT_FAILURE, "Error with EAL init\n");
}
mbuf->data_len = total_length;
mbuf->pkt_len = total_length;
uint8_t* pktdata = rte_pktmbuf_mtod(mbuf, uint8_t*);
ustack_encode_udp_pkt(pktdata, data, total_length);
return mbuf;
}
static const struct rte_eth_conf port_conf_default = {
.rxmode = {.max_rx_pkt_len = RTE_ETHER_MAX_LEN}
};
int main(int argc, char** argv) {
if(rte_eal_init(argc, argv) < 0) {
rte_exit(EXIT_FAILURE, "Error with EAL init\n");
}
uint16_t nb_dev_ports = rte_eth_dev_count_avail();
if(nb_dev_ports == 0) {
rte_exit(EXIT_FAILURE, "Error with dev count\n");
}
struct rte_mempool* mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NUM_MBUFS, 0, 0,
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if(!mbuf_pool) {
rte_exit(EXIT_FAILURE, "Error with mbuf init\n");
}
struct rte_eth_dev_info dev_info;
rte_eth_dev_info_get(gDpdkPortId, &dev_info);
const int num_rx_queue = 1;
const int num_tx_queue = 1;
struct rte_eth_conf port_conf = port_conf_default;
if(rte_eth_dev_configure(gDpdkPortId, num_rx_queue, num_tx_queue, &port_conf) < 0) {
rte_exit(EXIT_FAILURE, "Error with dev configure");
}
if(rte_eth_rx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId), NULL, mbuf_pool) < 0) {
rte_exit(EXIT_FAILURE, "Error with rx queue setup\n");
}
struct rte_eth_txconf txq_conf = dev_info.default_txconf;
txq_conf.offloads = port_conf.rxmode.offloads;
if(rte_eth_tx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId), &txq_conf) < 0) {
rte_exit(EXIT_FAILURE, "Error with tx queue setup\n");
}
if(rte_eth_dev_start(gDpdkPortId) < 0) {
rte_exit(EXIT_FAILURE, "Error with dev start\n");
}
rte_eth_macaddr_get(gDpdkPortId, (struct rte_ether_addr*)gSrcMac);
printf("dev start success\n");
while(1) {
struct rte_mbuf* mbufs[BURST_SIZE];
unsigned nb_recvd = rte_eth_rx_burst(gDpdkPortId, 0, mbufs, BURST_SIZE);
if(nb_recvd > BURST_SIZE) {
rte_exit(EXIT_FAILURE, "Error with rx burst\n");
}
unsigned i = 0;
for(i = 0; i < nb_recvd; i++) {
struct rte_ether_hdr* ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr*);
if(ehdr->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
continue;
}
struct rte_ipv4_hdr* iphdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_ipv4_hdr*, sizeof(struct rte_ether_hdr));
if(iphdr->next_proto_id == IPPROTO_UDP) {
printf("get udp\n");
struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(iphdr + 1);
rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);
rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));
rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));
rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));
rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));
// uint16_t length = ntohs(udphdr->dgram_len) - sizeof(struct rte_udp_hdr);
uint16_t length = ntohs(udphdr->dgram_len);
// printf("length: %d, content: %s\n", length, (char*)(udphdr + 1));
struct in_addr addr;
addr.s_addr = iphdr->src_addr;
printf("src: %s:%d, ", inet_ntoa(addr), ntohs(udphdr->src_port));
addr.s_addr = iphdr->dst_addr;
printf("dst: %s:%d\n", inet_ntoa(addr), ntohs(udphdr->dst_port));
struct rte_mbuf* txbuf = ustack_send(mbuf_pool, (char*)(udphdr + 1), length);
uint16_t res = rte_eth_tx_burst(gDpdkPortId, 0, &txbuf, 1);
printf("send res: %d\n", res);
rte_pktmbuf_free(txbuf);
rte_pktmbuf_free(mbufs[i]);
}
}
}
}
如何启动实验
**这一步还是比较重要的,建议看一下**
在我之前写的配置过程的基础上,我们需要将我们的虚拟机网卡添加到我们物理机的arp表中。
这是我的arp表,他现在已经添加过了,框出来的就是我添加的。
首先你要注意dpdk接管网卡的ip和mac地址,然后查看一下你的网络数据:
我这里是WIFI的局域网所以是插入到8-WLAN
里面,你可能是以太网(一般直接插网线就是)。
netsh -c i i add neighbors 23 192.168.0.120 00-0c-29-85-2e-88
按照以上格式将dpdk控制的网卡的ip和mac添加到arp表中。
如何编译
我们这里选择MakeFile来编译我们的文件,如果你有别的库或者包含目录,自行添加。
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2010-2014 Intel Corporation
# binary name 编译出二进制文件的名字(可执行文件)
APP = ustack
# all source are stored in SRCS-y 填写你的源文件
SRCS-y := main.c
# Build using pkg-config variables if possible
ifeq ($(shell pkg-config --exists libdpdk && echo 0),0)
all: shared
.PHONY: shared static
shared: build/$(APP)-shared
ln -sf $(APP)-shared build/$(APP)
static: build/$(APP)-static
ln -sf $(APP)-static build/$(APP)
PKGCONF=pkg-config --define-prefix
PC_FILE := $(shell $(PKGCONF) --path libdpdk)
CFLAGS += -O3 -g $(shell $(PKGCONF) --cflags libdpdk)
LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk)
LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk)
build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
build:
@mkdir -p $@
.PHONY: clean
clean:
rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
test -d build && rmdir -p build || true
else
ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif
# Default target, detect a build directory, by looking for a path with a .config
RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))
include $(RTE_SDK)/mk/rte.vars.mk
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
include $(RTE_SDK)/mk/rte.extapp.mk
endif