- 论坛徽章:
- 0
|
目前项目需要处理gtpu的数据,将修改原始报文的(src+dst)ip,同时修改隧道的teid值,现在数据都走通了,不过上压力的时候系统就oops了,悲剧的事情始终逃都逃不掉。望牛人给予帮助。
实现方案是在3.0.85的内核上使用netfilter在NF_INET_LOCAL_IN添加钩子函数,匹配udp报文后钩子函数最终会调用data_target_tran,data_target_tran将分析端口是不是GTPU端口(2152),是的话进入gtpu_handle处理,gtpu_handle继续根据ip地址和端口号查找hash转换表rab_get(),查到后根据转换表的记录修改skb后将skb发送出去data_send(),data_route_packet()。
整个过程和iptables的nat类似,不过转换表的记录由内部程序管理。
主要的代码如下:
static inline void data_add_udph(struct sk_buff *skb,
struct iphdr* orig_iph,
const RabTabInfo *pinfo)
{
int udph_len= sizeof(struct udphdr);
struct udphdr* pudph;
//add udp header
pudph = (struct udphdr*)skb_push(skb,udph_len);
pudph->source = htons(pinfo->LocalPort);
pudph->dest = htons(pinfo->PeerPort);
pudph->len = htons(skb->len);
pudph->check = 0;
pudph->check = csum_tcpudp_magic(pinfo->LocalIP,
pinfo->PeerIP,
skb->len,
IPPROTO_UDP,
csum_partial((char*)pudph,skb->len,0));
skb_set_transport_header(skb, 0);
}
static inline void data_add_iph(struct sk_buff *skb,
struct iphdr* orig_iph,
const RabTabInfo *pinfo)
{
uint32 iph_len = sizeof(struct iphdr);
uint32 payload_len;
struct iphdr* iph;
payload_len = skb->len + iph_len;
iph = (struct iphdr*)skb_push(skb,iph_len);
iph->ihl = 5;
iph->version = 4;
iph->tos = orig_iph->tos;
iph->tot_len = htons(payload_len);
iph->id = atomic_add_return(1, &g_ipID);
iph->frag_off = orig_iph->frag_off;
iph->ttl = orig_iph->ttl-1;
iph->protocol = IPPROTO_UDP;
iph->check = 0;
iph->saddr = pinfo->LocalIP;
iph->daddr = pinfo->PeerIP;
iph->check = ip_fast_csum((unsigned char *)iph,iph->ihl);
skb_set_network_header(skb, 0);
}
static bool data_route_packet(struct sk_buff *skb)
{
int err = 0;
struct rtable *rt = NULL;
struct iphdr *iph = ip_hdr(skb);
struct flowi4 fl;
memset(&fl, 0, sizeof(fl));
fl.daddr = iph->daddr;
fl.saddr = iph->saddr;
fl.flowi4_scope = RT_SCOPE_UNIVERSE;
fl.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl.flowi4_proto = IPPROTO_UDP;
security_skb_classify_flow(skb, flowi4_to_flowi(&fl));
rt = ip_route_output_key(&init_net, &fl);
if (IS_ERR(rt)) return DATA_FAILURE;
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
skb->dev = skb_dst(skb)->dev;
if(__LINK_STATE_LINKWATCH_PENDING != skb->dev->state) return DATA_FAILURE;
skb->protocol = htons(ETH_P_IP);
err = dst_output(skb);
if(err == 0)
return DATA_SUCCESS;
else
return DATA_FAILURE;
}
static inline int data_send(struct sk_buff *skb,struct iphdr* orig_iph,const RabTabInfo *pinfo)
{
//add udp header
data_add_udph(skb,orig_iph,pinfo);
//add ip header
data_add_iph(skb,orig_iph,pinfo);
if(unlikely(skb->tail>skb->end || skb->data<skb->head))
{
log_error("skb error: len:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
skb->len,skb->head, skb->data,(unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
return 0;
}
if(data_route_packet(skb) == DATA_SUCCESS)
return skb->len;
else
return 0;
}
static inline int gtpu_handle(struct sk_buff *gtpu_skb,struct iphdr* orig_iph,
RabTabInfo *pinfo, int packet_from)
{
struct gtpuhdr *pgtpuh;
RabTabInfo *send_info;
if(gtpu_skb->len < sizeof(struct gtpuhdr)) return 0;
pgtpuh = (struct gtpuhdr*)gtpu_skb->data;
if(0 == pgtpuh->tunid)
{
return 0;
}
else
{
RabTabKey key;
RabTabItem item;
key = rab_make_key(pinfo->LocalIP,pinfo->LocalPort);
if(!rab_get(RAB_TAB_GTPU,&key,&item))
{
GTPU_STAT_INC_MATCH_FAIL();
return 0;
}
//判断发往iuh口还是iu口
if(ADDR_TYPE_IUH_PS == packet_from)
send_info = &item.CnSide;
else
send_info= &item.AuSide;
pgtpuh->tunid = htonl(send_info->PeerPort);
}
send_info->LocalPort = GTPU_PORT;
send_info->PeerPort = GTPU_PORT;
return data_send(gtpu_skb,orig_iph,send_info);
}
static inline void data_stat(int pktFrom,int rxLen,int txLen)
{
bool isSendOK = (txLen>0);
if(ADDR_TYPE_IUH_PS == pktFrom)
{
GTPU_STAT_INC_RX_PKT_FROM_AU();
GTPU_STAT_INC_RX_BYTE_FROM_AU(rxLen);
if(!isSendOK)
{
GTPU_STAT_INC_TX_PKT_FAIL_TO_CN();
}
else
{
GTPU_STAT_INC_TX_PKT_TO_CN();
GTPU_STAT_INC_TX_BYTE_TO_CN(txLen);
}
}
else if(ADDR_TYPE_CN_PS == pktFrom)
{
GTPU_STAT_INC_RX_PKT_FROM_CN();
GTPU_STAT_INC_RX_BYTE_FROM_CN(rxLen);
if(!isSendOK)
{
GTPU_STAT_INC_TX_PKT_FAIL_TO_AU();
}
else
{
GTPU_STAT_INC_TX_PKT_TO_AU();
GTPU_STAT_INC_TX_BYTE_TO_AU(txLen);
}
}
else if(ADDR_TYPE_IUH_CS == pktFrom)
{
RTP_STAT_INC_RX_PKT_FROM_AU();
RTP_STAT_INC_RX_BYTE_FROM_AU(rxLen);
if(!isSendOK)
{
RTP_STAT_INC_TX_PKT_FAIL_TO_CN();
}
else
{
RTP_STAT_INC_TX_PKT_TO_CN();
RTP_STAT_INC_TX_BYTE_TO_CN(txLen);
}
}
else if(ADDR_TYPE_CN_CS == pktFrom)
{
RTP_STAT_INC_RX_PKT_FROM_CN();
RTP_STAT_INC_RX_BYTE_FROM_CN(rxLen);
if(!isSendOK)
{
RTP_STAT_INC_TX_PKT_FAIL_TO_AU();
}
else
{
RTP_STAT_INC_TX_PKT_TO_AU();
RTP_STAT_INC_TX_BYTE_TO_AU(txLen);
}
}
}
static unsigned int data_target_tran(struct sk_buff *skb, const struct data_target_info *tgi)
{
int packet_from = ADDR_TYPE_UNKNOW;
int iph_len;
int udph_len;
int recv_len;
int send_len;
uint32 src_port;
uint32 dest_port;
RabTabInfo info;
uint8 handle = 0;
struct in_addr ip;
struct iphdr orig_iph;
struct iphdr *piph;
struct udphdr *pudph;
piph = (struct iphdr *)ip_hdr(skb);
iph_len = piph->ihl*4;
udph_len = sizeof(struct udphdr);
//len check
if (unlikely(!pskb_may_pull(skb, iph_len + udph_len)))
{
log_warning("DATA: len check err, len = %u\n", skb->len);
return NF_ACCEPT;
}
ip.s_addr = piph->daddr;
pudph = (struct udphdr*)skb_get_pos(skb,iph_len);
src_port = ntohs(pudph->source);
dest_port = ntohs(pudph->dest);
if(GTPU_PORT == src_port && GTPU_PORT == dest_port)
{
struct gtpuhdr *pgtpuh;
if (unlikely(!pskb_may_pull(skb, iph_len + udph_len + sizeof(struct gtpuhdr))))
{
log_warning("DATA: len check err2, len = %u\n", skb->len);
return NF_ACCEPT;
}
pgtpuh = (struct gtpuhdr*)skb_get_pos(skb,iph_len+udph_len);
if(unlikely(NULL == pgtpuh))
{
printk("The packet is wrong.length %d is too short.",skb->len);
return NF_ACCEPT;
}
handle = 1;
src_port = dest_port = ntohl(pgtpuh->tunid);
packet_from = IPConfig_ChkAddrType(ip,2);
}
else if(UDP_TEST_PORT_MIN<dest_port && UDP_TEST_PORT_MAX>dest_port)//udp ping
{
handle = 2;
packet_from = IPConfig_ChkAddrType(ip,3);
}
else if(rtp_is_user_pkt(dest_port))//deal with rtp
{
handle = 3;
packet_from = IPConfig_ChkAddrType(ip,1);
}
if(unlikely(ADDR_TYPE_UNKNOW == packet_from)) return NF_ACCEPT;//not user plan ip
info.LocalPort = dest_port;
info.PeerPort = src_port;
info.LocalIP = piph->daddr;
info.PeerIP = piph->saddr;
recv_len = skb->len;
//skb_info(skb,"Recv");
//remove ip header
skb_pull_to(skb, &orig_iph, iph_len);
//remove udp header
skb_pull(skb,udph_len);
if(1 == handle)
send_len = gtpu_handle(skb, &orig_iph, &info, packet_from);
else if(2 == handle)//udp ping
send_len = data_send(skb, &orig_iph, &info);
else
return NF_DROP;
//kpi stat
if(2 != handle) data_stat(packet_from, recv_len, send_len);
if(0 < send_len)
return NF_STOLEN;
else
return NF_DROP;
}
oops如下:
[<c06defaa>]? dst_input+0x12/0x15
[<c06df4f5>]? ip_rcv+0x1f8/0x1f8
[<c074a713>]? error_code+0x67/0x6c
[<c043f96c>]? spin_unlock+0x8/0xa
[<c04300d8>]? pirnt_cfs_group_stats+0xa7/0x585
[<c06aeda6>]? skb_over_panic+0x3f/0x46
[<f85901e2>]? tg3_rx+0x175/0x3b7 [tg3]
[<c06b030c>]? skb_put+0x3a/0x40
[<f85901e2>]? tg3_rx+0x175/0x3b7 [tg3]
[<f859048f>]? tg3_poll_work+0x6b/0x132 [tg3]
[<f8590643>]? tg3_poll_msix+0x2a/0xa5 [tg3]
[<c06bbd45>]? net_rx_action+0x7b/0x11b
[<c04447b7>]? __do_softirq+0x8b/0x110
[<c044472c>]? local_bh_enable+0x12/0x12
<IRQ>[<c8444669>]? invoke_softirq+0x11/0x31
[<c0445091>]? irq_exit+0x2e/0x6c
[<c040383d>]? do_IRQ+0x72/0x88
[<c074f1f0>]? common_interrupt+0x30/0x38
[<c069538b>]? test_ti_thread_flag+0x7/0xb
[<c06953a3>]? need_resched+0x14/0x1e
[<c06957f8>]? poll_idle+0x22/0x4b
[<c0695993>]? cupidle_idle_call+0x8e/0xd7
[<c0401f13>]? cpu_idle+0x8f/0xb8
[<c074518f>]? start_secondary+0xe2/0xe7
由于电脑串口的问题,只能通过屏幕看到oops的部分信息,但是可以肯定的是oops由skb_over_panic导致,同时导致的模块是tg3的网卡驱动,现在无法确定的是自己的钩子函数处理流程有哪个地方弄错了,导致对tg3的驱动有影响,希望大牛们帮忙分析下上面的几个函数是否有问题!
其他相关信息:
在data_route_packet()中如果不加
if(__LINK_STATE_LINKWATCH_PENDING != skb->dev->state) return DATA_FAILURE;
的话,对网卡插拔网线的话系统一样会挂,不过是挂在应用程序收udp报的过程中。我查过很多内核网络协议栈的函数,发现没有哪个地方有对网卡状态有判断的,怀疑是代码对资源没有进行保护,但又不知道哪里出错了。挂的oops信息如下:
Oops: 0002 [#1] PREEMPT SMP
Pid: 6403, comm: ntf Not tainted 3.0.85-1.el5PAE #3 RadiSys Corp. ATCA-4500/ATCA-4500
EIP: 0060:[<c06b26ec>] EFLAGS: 00010046 CPU: 0
EIP is at __skb_unlink+0x18/0x1f
EAX: f2b99cc0 EBX: f2b99cc0 ECX: 00000000 EDX: 00000000
ESI: 00000282 EDI: f2ebc094 EBP: edc5dd08 ESP: edc5dd08
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process ntf (pid: 6403, ti=edc5c000 task=ee3e3a70 task.ti=edc5c000)
Stack:
edc5dd2c c06b334e edc5dd4c 00000000 f2ebc040 7fffffff 00000000 f2ebc040
c08d1b40 edc5dd5c c0700e59 edc5dd48 edc5deac edc5ded8 00000002 00000030
00000030 00000000 00000000 f2ebc040 c08d1b40 edc5dd8c c070613d 00001000
Call Trace:
[<c06b334e>] __skb_recv_datagram+0x81/0xc7
[<c0700e59>] udp_recvmsg+0x69/0x1e3
[<c070613d>] inet_recvmsg+0x47/0x63
[<c06a8c2c>] __sock_recvmsg_nosec+0x45/0x4d
[<c06a91c4>] __sock_recvmsg+0x35/0x3f
[<c06a966e>] sock_recvmsg+0x9e/0xb5
[<c046b95e>] ? __raw_spin_unlock_irq+0x1f/0x2a
[<c0749e11>] ? _raw_spin_unlock_irq+0xd/0xf
[<c04348fa>] ? finish_task_switch+0x56/0x71
[<c04f6208>] ? rcu_read_unlock+0x8/0xa
[<c04f6a68>] ? fget_light+0x7b/0x82
[<c06a9b88>] sys_recvfrom+0xd7/0x140
[<c042f6cd>] ? ttwu_stat+0x8d/0xd4
[<c041ae37>] ? apic_write+0xf/0x11
[<c041b149>] ? lapic_next_event+0x14/0x18
[<c04650c5>] ? clockevents_program_event+0xc4/0xd3
[<c046657b>] ? tick_dev_program_event+0x2d/0x90
[<c05a3848>] ? __copy_from_user_ll+0x16/0xd2
[<c06ab15a>] sys_socketcall+0x118/0x1bd
[<c074ebdf>] sysenter_do_call+0x12/0x28
在线等! |
|