网络协议栈简单设计
操作系统内核中实现了网络协议栈,但今天利用netmap(也可利用dpdk)绕过内核协议栈进行网络数据的收发
netmap
- 内核协议栈加载数据:
数据从网卡到内核再到内存,需要经过两次拷贝
- netmap映射数据:
直接将网卡数据映射到内存,应用程序可以直接通过mmap操作相应内存的数据,实现0拷贝;
所谓零拷贝,指的是不由CPU操作,copy这个动作是由cpu发出指令move实现的,所以零拷贝就是不由CPU管理,由DMA管理。DMA允许外设与内存直接进行数据传输,这个过程不需要CPU的参与
netmap api:
- nm_open():
- 通过d->fd =open(NETMAP_DEVICE_NAME, O_RDWR);打开一个特殊的设备/dev/netmap来创建文件描述符 d->
fd
。 - 而这个fd是/dev/netmap这个网卡设备(所以只要有/dev/netmap这个设备,就说明netmap启动成功了),网卡只要来数据了,相应的这个fd就会有EPOLLIN事件,这个fd是检测网卡有没有数据的,因为是mmap,只要网卡有数据了,那么内存就有数据
- 对于消息检测,一般就是两种方法:
- 轮询:适合大量数据收发
- 事件通知:适合数据较为稀疏的情景(netmap采用poll、epoll事件通知)
- 所以,
数据检测是通过fd是指向网卡实现,而操作数据是操作内存中的数据
,内存和网卡数据的同步的,而cpu只能操作内存,不能操作外设 - 调用该函数后,网络数据就不走协议栈了,这时候最好在虚拟机中建两个网卡,一个用于netmap,一个用于ssh等应用程序的正常工作。
- 通过d->fd =open(NETMAP_DEVICE_NAME, O_RDWR);打开一个特殊的设备/dev/netmap来创建文件描述符 d->
- nm_nextpkt():
- 用来接收网卡上到来的数据包,nm_nextpkt会将所有 rx 环都检查一遍,当发现有一个 rx 环有需要接收的数据包时,得到这个数据包的地址,并返回。所以 nm_nextpkt()每次只能取一个数据包。 因为接收到的数据包没有经过协议栈处理,因此需要在用户程序中自己解析。
- rx:环形缓冲区 参考文章
- nm_inject():
- 往共享内存中写入待发送的数据包数据的。数据包经共享内存拷贝到网卡,然后发送出去
- nm_close():
- 回收动态内存,回收共享内存,关闭文件描述符
基于UDP的协议栈设计
注:只实现udp服务器接收客户端数据,并进行回应的功能
udp包需要从应用层开始经过层层包装:以客户端send(buff)为例,其包装后是:【以太网头】【网络层头】【传输层头】【buff】
而这些头部信息需要我们去定义:
#pragma pack(1) // 单字节对齐,udp包对齐
struct ethhdr { // 定义以太网头:源目MAC地址、协议 =》 数据链路层
unsigned char h_dst[ETH_ADDR_LENGTH];
unsigned char h_src[ETH_ADDR_LENGTH];
unsigned short h_proto;
}; // 14
struct iphdr { // IP头 =》 网络层
unsigned char hdrlen:4,
version:4; // 一个bytes,注意高低位,版本号在前面,这里要写后面
unsigned char tos;
unsigned short totlen;
unsigned short id;
unsigned short flag_offset; //
unsigned char ttl; //time to live
// 0x1234// htons
unsigned char type;
unsigned short check;
unsigned int sip;
unsigned int dip;
}; // 20
struct udphdr { // UDP:没有源目IP =》 传输层
unsigned short sport;
unsigned short dport;
unsigned short length;
unsigned short check;
}; // 8
/*
结构体对齐:以最大成员的类型对齐
结构体中含有数组:只需比较数组值类型
结构体S2中含有结构体S1:取S1的最大类型与S2的类型比较
*/
struct udppkt {
struct ethhdr eh; // 14 =》它后面会有2字节的空间,数据容易读取错误,统一一字节对齐
struct iphdr ip; // 20 4直接对齐
struct udphdr udp; // 8
// 这里不能用指针,因为包发出去了,带走了指针,但是没带走数据,而且使用也不方便
// 定义0长数据,其实就是指向传输层尾部的偏移量,数组并不占空间
// 0长数组经常写在结构体的后面,使得结构体长度可变 =》 需要编译器支持
unsigned char data[0]; // 应用层数据
};
- 单字节申明两个变量时,注意大小端顺序
- 设置单字节对齐,不然udppkt会以4直接对齐,struct ethhdr eh后会有2字节的空缺,造成数据读取出错
- 设置0长数据,使得udp包可以不定长(udp协议规定不能超过64k,但大小最好不超过MTU)
然后,接收udp包并解析:
int main() {
struct nm_pkthdr h; // ringbuff
// netmap 通过打开一个特殊的设备/dev/netmap来创建文件描述符 d->fd
// 注意这个fd是/dev/netmap这个网卡设备,网卡只要来数据了,相应的这个fd就会有EPOLLIN事件,这个fd是检测网卡有没有数据的,因为是mmap,只要网卡有数据了,那么内存就有数据的
struct nm_desc *nmr = nm_open("netmap:eth0", NULL, 0, NULL);
if (nmr == NULL) return -1;
struct pollfd pfd = {0};
pfd.fd = nmr->fd;
pfd.events = POLLIN;
while (1) {
int ret = poll(&pfd, 1, -1);
if (ret < 0) continue;
if (pfd.revents & POLLIN) {
unsigned char *stream = nm_nextpkt(nmr, &h);
struct ethhdr *eh = (struct ethhdr *)stream;
if (ntohs(eh->h_proto) == PROTO_IP) {
struct udppkt *udp = (struct udppkt *)stream; // 网络字节序转本机字节序
if (udp->ip.type == PROTO_UDP) { // udp包
int udplength = ntohs(udp->udp.length);
udp->data[udplength-8] = '\0'; // 去除udp的8个字节,注意这里已经把以太网头和网络层头去掉了
printf("udp --> %s\n", udp->data); // 打印包中的应用层数据
} else if (udp->ip.type == PROTO_ICMP) {
}
} else if (ntohs(eh->h_proto) == PROTO_ARP) {
}
}
}
}
进一步,实现arp协议和ICMP协议:
ARP:
struct arphdr { // arp头:按照arp协议字段定义即可
unsigned short h_type;
unsigned short h_proto;
unsigned char h_addrlen;
unsigned char h_protolen;
unsigned short oper;
unsigned char smac[ETH_ADDR_LENGTH];
unsigned int sip;
unsigned char dmac[ETH_ADDR_LENGTH];
unsigned int dip;
};
struct arppkt {
struct ethhdr eh;
struct arphdr arp;
};
// arp回包:源目ip、mac互换
void echo_arp_pkt(struct arppkt *arp, struct arppkt *arp_rt, char *mac) {
memcpy(arp_rt, arp, sizeof(struct arppkt));
memcpy(arp_rt->eh.h_dst, arp->eh.h_src, ETH_ADDR_LENGTH);
str2mac(arp_rt->eh.h_src, mac);
arp_rt->eh.h_proto = arp->eh.h_proto;
arp_rt->arp.h_addrlen = 6;
arp_rt->arp.h_protolen = 4;
arp_rt->arp.oper = htons(2);
str2mac(arp_rt->arp.smac, mac);
arp_rt->arp.sip = arp->arp.dip;
memcpy(arp_rt->arp.dmac, arp->arp.smac, ETH_ADDR_LENGTH);
arp_rt->arp.dip = arp->arp.sip;
}
int main() {
struct nm_pkthdr h; // ringbuff
// netmap 通过打开一个特殊的设备/dev/netmap来创建文件描述符 d->fd
// 注意这个fd是/dev/netmap这个网卡设备,网卡只要来数据了,相应的这个fd就会有EPOLLIN事件,这个fd是检测网卡有没有数据的,因为是mmap,只要网卡有数据了,那么内存就有数据的
struct nm_desc *nmr = nm_open("netmap:eth0", NULL, 0, NULL);
if (nmr == NULL) return -1;
struct pollfd pfd = {0};
pfd.fd = nmr->fd;
pfd.events = POLLIN;
while (1) {
int ret = poll(&pfd, 1, -1);
if (ret < 0) continue;
if (pfd.revents & POLLIN) {
unsigned char *stream = nm_nextpkt(nmr, &h);
struct ethhdr *eh = (struct ethhdr *)stream;
if (ntohs(eh->h_proto) == PROTO_IP) {
struct udppkt *udp = (struct udppkt *)stream; // 网络字节序转本机字节序
if (udp->ip.type == PROTO_UDP) { // udp包
int udplength = ntohs(udp->udp.length);
udp->data[udplength-8] = '\0'; // 去除udp的8个字节,注意这里已经把以太网头和网络层头去掉了
printf("udp --> %s\n", udp->data); // 打印包中的应用层数据
} else if (udp->ip.type == PROTO_ICMP) {
}
} else if (ntohs(eh->h_proto) == PROTO_ARP) { // 因为开启了net_map,就不用系统的协议栈了
struct arppkt *arp = (struct arppkt *)stream;
struct arppkt arp_rt; // 回包
if (arp->arp.dip == inet_addr("192.168.0.123")) { // 是否是发送给我本机的
echo_arp_pkt(arp, &arp_rt, "00:50:56:33:1c:ca");
nm_inject(nmr, &arp_rt, sizeof(arp_rt));
printf("arp ret\n");
}
}
}
}
}
arp攻击:攻击者通过响应客户端发出的arp请求包,更改arp表的IP-MAC条目,造成网络中断或中间人攻击,在上述代码中,把if (ntohs(eh->h_proto) == PROTO_ARP)去掉就可对别的主机造成arp攻击了
ICMP:
struct icmphdr {
unsigned char type;
unsigned char code;
unsigned short check;
unsigned short identifier;
unsigned short seq;
unsigned char data[32];
};
struct icmppkt {
struct ethhdr eh;
struct iphdr ip;
struct icmphdr icmp;
};
unsigned short in_cksum(unsigned short *addr, int len)
{
register int nleft = len;
register unsigned short *w = addr;
register int sum = 0;
unsigned short answer = 0;
while (nleft > 1) {
sum += *w++;
nleft -= 2;
}
if (nleft == 1) {
*(u_char *)(&answer) = *(u_char *)w ;
sum += answer;
}
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16);
answer = ~sum;
return (answer);
}
void echo_icmp_pkt(struct icmppkt *icmp, struct icmppkt *icmp_rt) {
memcpy(icmp_rt, icmp, sizeof(struct icmppkt));
icmp_rt->icmp.type = 0x0; //
icmp_rt->icmp.code = 0x0; //
icmp_rt->icmp.check = 0x0;
icmp_rt->ip.saddr = icmp->ip.daddr;
icmp_rt->ip.daddr = icmp->ip.saddr;
memcpy(icmp_rt->eh.h_dest, icmp->eh.h_source, ETH_ALEN);
memcpy(icmp_rt->eh.h_source, icmp->eh.h_dest, ETH_ALEN);
icmp_rt->icmp.check = in_cksum((unsigned short*)&icmp_rt->icmp, sizeof(struct icmphdr));
}
可以看出,协议族一个一个互不影响,独立实现
完整代码
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <sys/poll.h>
#include <arpa/inet.h>
#define NETMAP_WITH_LIBS
#include <net/netmap_user.h>
#pragma pack(1)
#define ETH_ALEN 6
#define PROTO_IP 0x0800
#define PROTO_ARP 0x0806
#define PROTO_UDP 17
#define PROTO_ICMP 1
#define PROTO_IGMP 2
struct ethhdr {
unsigned char h_dest[ETH_ALEN];
unsigned char h_source[ETH_ALEN];
unsigned short h_proto;
};
struct iphdr {
unsigned char version;
unsigned char tos;
unsigned short tot_len;
unsigned short id;
unsigned short flag_off;
unsigned char ttl;
unsigned char protocol;
unsigned short check;
unsigned int saddr;
unsigned int daddr;
};
struct udphdr {
unsigned short source;
unsigned short dest;
unsigned short len;
unsigned short check;
};
struct udppkt {
struct ethhdr eh;
struct iphdr ip;
struct udphdr udp;
unsigned char body[0];
};
struct arphdr {
unsigned short h_type;
unsigned short h_proto;
unsigned char h_addrlen;
unsigned char protolen;
unsigned short oper;
unsigned char smac[ETH_ALEN];
unsigned int sip;
unsigned char dmac[ETH_ALEN];
unsigned int dip;
};
struct arppkt {
struct ethhdr eh;
struct arphdr arp;
};
struct icmphdr {
unsigned char type;
unsigned char code;
unsigned short check;
unsigned short identifier;
unsigned short seq;
unsigned char data[32];
};
struct icmppkt {
struct ethhdr eh;
struct iphdr ip;
struct icmphdr icmp;
};
void print_mac(unsigned char *mac) {
int i = 0;
for (i = 0;i < ETH_ALEN-1;i ++) {
printf("%02x:", mac[i]);
}
printf("%02x", mac[i]);
}
void print_ip(unsigned char *ip) {
int i = 0;
for (i = 0;i < 3;i ++) {
printf("%d.", ip[i]);
}
printf("%d", ip[i]);
}
void print_arp(struct arppkt *arp) {
print_mac(arp->eh.h_dest);
printf(" ");
print_mac(arp->eh.h_source);
printf(" ");
printf("0x%04x ", ntohs(arp->eh.h_proto));
printf(" ");
}
int str2mac(char *mac, char *str) {
char *p = str;
unsigned char value = 0x0;
int i = 0;
while (p != '\0') {
if (*p == ':') {
mac[i++] = value;
value = 0x0;
} else {
unsigned char temp = *p;
if (temp <= '9' && temp >= '0') {
temp -= '0';
} else if (temp <= 'f' && temp >= 'a') {
temp -= 'a';
temp += 10;
} else if (temp <= 'F' && temp >= 'A') {
temp -= 'A';
temp += 10;
} else {
break;
}
value <<= 4;
value |= temp;
}
p ++;
}
mac[i] = value;
return 0;
}
void echo_arp_pkt(struct arppkt *arp, struct arppkt *arp_rt, char *hmac) {
memcpy(arp_rt, arp, sizeof(struct arppkt));
memcpy(arp_rt->eh.h_dest, arp->eh.h_source, ETH_ALEN);
str2mac(arp_rt->eh.h_source, hmac);
arp_rt->eh.h_proto = arp->eh.h_proto;
arp_rt->arp.h_addrlen = 6;
arp_rt->arp.protolen = 4;
arp_rt->arp.oper = htons(2);
str2mac(arp_rt->arp.smac, hmac);
arp_rt->arp.sip = arp->arp.dip;
memcpy(arp_rt->arp.dmac, arp->arp.smac, ETH_ALEN);
arp_rt->arp.dip = arp->arp.sip;
}
void echo_udp_pkt(struct udppkt *udp, struct udppkt *udp_rt) {
memcpy(udp_rt, udp, sizeof(struct udppkt));
memcpy(udp_rt->eh.h_dest, udp->eh.h_source, ETH_ALEN);
memcpy(udp_rt->eh.h_source, udp->eh.h_dest, ETH_ALEN);
udp_rt->ip.saddr = udp->ip.daddr;
udp_rt->ip.daddr = udp->ip.saddr;
udp_rt->udp.source = udp->udp.dest;
udp_rt->udp.dest = udp->udp.source;
}
unsigned short in_cksum(unsigned short *addr, int len)
{
register int nleft = len;
register unsigned short *w = addr;
register int sum = 0;
unsigned short answer = 0;
while (nleft > 1) {
sum += *w++;
nleft -= 2;
}
if (nleft == 1) {
*(u_char *)(&answer) = *(u_char *)w ;
sum += answer;
}
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16);
answer = ~sum;
return (answer);
}
void echo_icmp_pkt(struct icmppkt *icmp, struct icmppkt *icmp_rt) {
memcpy(icmp_rt, icmp, sizeof(struct icmppkt));
icmp_rt->icmp.type = 0x0; //
icmp_rt->icmp.code = 0x0; //
icmp_rt->icmp.check = 0x0;
icmp_rt->ip.saddr = icmp->ip.daddr;
icmp_rt->ip.daddr = icmp->ip.saddr;
memcpy(icmp_rt->eh.h_dest, icmp->eh.h_source, ETH_ALEN);
memcpy(icmp_rt->eh.h_source, icmp->eh.h_dest, ETH_ALEN);
icmp_rt->icmp.check = in_cksum((unsigned short*)&icmp_rt->icmp, sizeof(struct icmphdr));
}
int main() {
struct ethhdr *eh;
struct pollfd pfd = {0};
struct nm_pkthdr h;
unsigned char *stream = NULL;
struct nm_desc *nmr = nm_open("netmap:eth0", NULL, 0, NULL);
if (nmr == NULL) {
return -1;
}
pfd.fd = nmr->fd;
pfd.events = POLLIN;
while (1) {
int ret = poll(&pfd, 1, -1);
if (ret < 0) continue;
if (pfd.revents & POLLIN) {
stream = nm_nextpkt(nmr, &h);
eh = (struct ethhdr*)stream;
if (ntohs(eh->h_proto) == PROTO_IP) {
struct udppkt *udp = (struct udppkt*)stream;
if (udp->ip.protocol == PROTO_UDP) {
struct in_addr addr;
addr.s_addr = udp->ip.saddr;
int udp_length = ntohs(udp->udp.len);
printf("%s:%d:length:%d, ip_len:%d --> ", inet_ntoa(addr), udp->udp.source,
udp_length, ntohs(udp->ip.tot_len));
udp->body[udp_length-8] = '\0';
printf("udp --> %s\n", udp->body);
#if 1
struct udppkt udp_rt;
echo_udp_pkt(udp, &udp_rt);
nm_inject(nmr, &udp_rt, sizeof(struct udppkt));
#endif
} else if (udp->ip.protocol == PROTO_ICMP) {
struct icmppkt *icmp = (struct icmppkt*)stream;
printf("icmp ---------- --> %d, %x\n", icmp->icmp.type, icmp->icmp.check);
if (icmp->icmp.type == 0x08) {
struct icmppkt icmp_rt = {0};
echo_icmp_pkt(icmp, &icmp_rt);
//printf("icmp check %x\n", icmp_rt.icmp.check);
nm_inject(nmr, &icmp_rt, sizeof(struct icmppkt));
}
} else if (udp->ip.protocol == PROTO_IGMP) {
} else {
printf("other ip packet");
}
} else if (ntohs(eh->h_proto) == PROTO_ARP) {
struct arppkt *arp = (struct arppkt *)stream;
struct arppkt arp_rt;
if (arp->arp.dip == inet_addr("192.168.0.123")) {
echo_arp_pkt(arp, &arp_rt, "00:50:56:33:1c:ca");
nm_inject(nmr, &arp_rt, sizeof(struct arppkt));
}
}
}
}
}