int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
struct iphdr *iph;
int raw = 0;
int ptr;
struct net_device *dev;
struct sk_buff *skb2;
unsigned int mtu, hlen, left, len, ll_rs, pad;
int offset;
__be16 not_last_frag;
/* 取得路由表 */
struct rtable *rt = skb->rtable;
int err = 0;
/* 网络设备 */
dev = rt->u.dst.dev;
/*
* Point into the IP datagram header.
*/
/* 取得ip头 */
iph = ip_hdr(skb);
/*
* 判断DF位,知道如果df位被设置了话就表示不要被分片,
* 这时ip_fragment将会发送一个icmp报文返回到源主机。这里
* 主要是为forward数据所判断。
*/
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
return -EMSGSIZE;
}
/*
* Setup starting values.
*/
/* 得到ip头的长度 */
hlen = iph->ihl * 4;
/*
* 得到mtu的大小。这里要注意,他的大小减去了hlen,也就是ip头的大小
*/
mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
* some transformers could create wrong frag_list or break existing
* one, it is not prohibited. In this case fall back to copying.
*
* LATER: this step can be merged to real generation of fragments,
* we can switch to copy when see the first bad fragment.
*/
/*
* 如果4层将数据包分片了,那么就会把这些数据包放到skb的frag_list链表中,
* 因此这里首先先判断frag_list链表是否为空,为空的话将会进行slow 分片
*/
if (skb_shinfo(skb)->frag_list) {
struct sk_buff *frag;
/*
* 取得第一个数据报的len.当sk_write_queue队列被flush后,
* 除了第一个切好包的另外的包都会加入到frag_list中,而这里
* 需要得到的第一个包(也就是本身这个sk_buff)的长度。
*/
int first_len = skb_pagelen(skb);
int truesizes = 0;
/*
* 接下来的判断都是为了确定能进行fast分片。分片不能被共享,
* 这是因为在fast path 中,需要加给每个分片不同的ip头(而并
* 不会复制每个分片)。因此在fast path中是不可接受的。而在
* slow path中,就算有共享也无所谓,因为他会复制每一个分片,
* 使用一个新的buff。
*/
slow_path:
/* 分片的数据剩余长度 */
left = skb->len - hlen; /* Space per frame */
/* 而ptr就是分片开始的数据指针 */
ptr = raw + hlen; /* Where to start from */
/* for bridged IP traffic encapsulated inside f.e. a vlan header,
* we need to make room for the encapsulating header
*/
/* 处理桥接、VLAN、PPPOE相关MTU */
pad = nf_bridge_pad(skb);
ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
mtu -= pad;
/*
* Keep copying data until we run out.
*/
/* 开始为循环处理,每一个分片创建一个skb buffer */
while (left > 0) {
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
/* 如果len大于mtu,设置当前的将要分片的数据大小为mtu */
if (len > mtu)
len = mtu;
/* IF: we are not sending upto and including the packet end
then align the next start on an eight byte boundary */
/* 长度对齐 */
if (len left) {
len &= ~7;
}
/*
* Allocate buffer.
*/
/* malloc一个新的buff,它的大小包括ip payload,ip head,以及L2 head */
if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
err = -ENOMEM;
goto fail;
}
/*
* Set up data on packet
*/
/* 调用ip_copy_metadata复制一些相同的值的域 */
ip_copy_metadata(skb2, skb);
/* 保留L2 header空间 */
skb_reserve(skb2, ll_rs);
/* 设置ip header & ddos header & ip paylod 空间 */
skb_put(skb2, len + hlen);
skb_reset_network_header(skb2);
/* L4 header指针为ip header + ddos header数据偏移位置,用于复制原始payload */
skb2->transport_header = skb2->network_header + hlen;
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
/* 将每一个分片的ip包都关联到源包的socket */
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
/*
* Copy a block of the IP datagram.
*/
/* 拷贝ip payload数据 */
if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
BUG();
/* 分片的数据剩余长度 */
left -= len;
/*
* Fill in the new header fields.
*/
/* 填充相应的ip头 */
iph = ip_hdr(skb2);
iph->frag_off = htons((offset >> 3));
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST (otherwise,
* options are already fixed) and make it ONCE
* on the initial skb, so that all the following fragments
* will inherit fixed options.
*/
/* 第一个包,因此进行ip_option处理 */
if (offset == 0)
ip_options_fragment(skb);
/*
* Added AC : If we are fragmenting a fragment that's not the
* last fragment then keep MF on each bit
*/
/* 不是最后一个包,因此设置mf位 */
if (left > 0 || not_last_frag)
iph->frag_off |= htons(IP_MF);
/* 移动数据指针以及更改数据偏移 */
ptr += len;
offset += len;
/*
* Put this fragment into the sending queue.
*/
/* 增加ddos header 长度 */
// hlen += DDOS_HDR_LEN;