quhr 发表于 2011-12-21 08:43

Linux内核中PF_KEY协议族的实现(1)

本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。<br>msn: <a href="mailto:yfydz_no1@hotmail.com" target="_blank">yfydz_no1@hotmail.com</a><br>来源:<a href="http://yfydz.cublog.cn/" target="_blank">http://yfydz.cublog.cn</a>
<div><br>1. 前言</div>
<div><br>在Linux2.6内核中自带了PF_KEY协议族的实现,这样就不用象2.4那样打补丁来实现了。内核中PF_KEY实现要完成的功能是实现维护内核的安全联盟(SA)和安全策略(SP)数据库, 以及和用户空间的接口。</div>
<div><br>以下内核代码版本为2.6.19.2,
PF_KEY相关代码在net/key/目录下,定义了内核中PF_KEY与用户空间的接口,这个接口是RFC定义的,因此各种实现都基本类似;但具体关
于SA和SP的内部的实现和管理则是与实现相关的,各种实现各自不同,在linux内核是使用xfrm库来实现的,代码在net/xfrm/目录下定义。</div>
<div>&nbsp;</div>
<div>2. 数据结构</div>
<div>关于SA和SP的数据结构已经在RFC2367中定义, 头文件为include/linux/pfkeyv2.h,
这些是用户空间和内核空间共享的,只是作为接口的数据结构;而内核中具体使用的数据结构为xfrm定义的结构,在include/net/xfrm.h中
定义。</div>
<div><br>2.1 PF_KEY类型的sock</div>
<div><br>struct pfkey_sock {<br>&nbsp;/* struct sock must be the first member of struct pfkey_sock */<br>&nbsp;struct sock&nbsp;sk;<br>// 比普通sock添加两个参数<br>// 是否进行登记<br>&nbsp;int&nbsp;&nbsp;registered;<br>// 是否是混杂模式<br>&nbsp;int&nbsp;&nbsp;promisc;<br>};</div>
<div><br>2.2 状态(SA)</div>
<div><br>xfrm状态用来描述SA在内核中的具体实现:</div>
<div>struct xfrm_state<br>{<br>&nbsp;/* Note: bydst is re-used during gc */<br>// 每个状态结构挂接到三个HASH链表中<br>&nbsp;struct hlist_node&nbsp;bydst; // 按目的地址HASH<br>&nbsp;struct hlist_node&nbsp;bysrc; // 按源地址HASH<br>&nbsp;struct hlist_node&nbsp;byspi; // 按SPI值HASH</div>
<div>&nbsp;atomic_t&nbsp;&nbsp;refcnt; // 所有使用计数<br>&nbsp;spinlock_t&nbsp;&nbsp;lock;&nbsp;&nbsp; // 状态锁</div>
<div>&nbsp;struct xfrm_id&nbsp;&nbsp;id; // ID<br>&nbsp;struct xfrm_selector&nbsp;sel; // 状态选择子</div>
<div>&nbsp;u32&nbsp;&nbsp;&nbsp;genid;</div>
<div>&nbsp;/* Key manger bits */<br>&nbsp;struct {<br>&nbsp;&nbsp;u8&nbsp;&nbsp;state;<br>&nbsp;&nbsp;u8&nbsp;&nbsp;dying;<br>&nbsp;&nbsp;u32&nbsp;&nbsp;seq;<br>&nbsp;} km; </div>
<div>&nbsp;/* Parameters of this state. */<br>&nbsp;struct {<br>&nbsp;&nbsp;u32&nbsp;&nbsp;reqid;<br>&nbsp;&nbsp;u8&nbsp;&nbsp;mode;<br>&nbsp;&nbsp;u8&nbsp;&nbsp;replay_window;<br>&nbsp;&nbsp;u8&nbsp;&nbsp;aalgo, ealgo, calgo;<br>&nbsp;&nbsp;u8&nbsp;&nbsp;flags;<br>&nbsp;&nbsp;u16&nbsp;&nbsp;family;<br>&nbsp;&nbsp;xfrm_address_t&nbsp;saddr;<br>&nbsp;&nbsp;int&nbsp;&nbsp;header_len;<br>&nbsp;&nbsp;int&nbsp;&nbsp;trailer_len;<br>&nbsp;} props;</div>
<div>&nbsp;struct xfrm_lifetime_cfg lft; // 生存时间</div>
<div>&nbsp;/* Data for transformer */<br>&nbsp;struct xfrm_algo&nbsp;*aalg; // hash算法<br>&nbsp;struct xfrm_algo&nbsp;*ealg; // 加密算法<br>&nbsp;struct xfrm_algo&nbsp;*calg; // 压缩算法</div>
<div>&nbsp;/* Data for encapsulator */<br>&nbsp;struct xfrm_encap_tmpl&nbsp;*encap; // NAT-T封装信息</div>
<div>&nbsp;/* Data for care-of address */<br>&nbsp;xfrm_address_t&nbsp;*coaddr;</div>
<div>&nbsp;/* IPComp needs an IPIP tunnel for handling uncompressed packets */<br>&nbsp;struct xfrm_state&nbsp;*tunnel; </div>
<div>&nbsp;/* If a tunnel, number of users + 1 */<br>&nbsp;atomic_t&nbsp;&nbsp;tunnel_users;</div>
<div>&nbsp;/* State for replay detection */<br>&nbsp;struct xfrm_replay_state replay;</div>
<div>&nbsp;/* Replay detection state at the time we sent the last notification */<br>&nbsp;struct xfrm_replay_state preplay;</div>
<div>&nbsp;/* internal flag that only holds state for delayed aevent at the<br>&nbsp; * moment<br>&nbsp;*/<br>&nbsp;u32&nbsp;&nbsp;&nbsp;xflags;</div>
<div>&nbsp;/* Replay detection notification settings */<br>&nbsp;u32&nbsp;&nbsp;&nbsp;replay_maxage;<br>&nbsp;u32&nbsp;&nbsp;&nbsp;replay_maxdiff;</div>
<div>&nbsp;/* Replay detection notification timer */<br>&nbsp;struct timer_list&nbsp;rtimer;</div>
<div>&nbsp;/* Statistics */<br>&nbsp;struct xfrm_stats&nbsp;stats;</div>
<div>&nbsp;struct xfrm_lifetime_cur curlft;<br>&nbsp;struct timer_list&nbsp;timer;</div>
<div>&nbsp;/* Last used time */<br>&nbsp;u64&nbsp;&nbsp;&nbsp;lastused;</div>
<div>&nbsp;/* Reference to data common to all the instances of this<br>&nbsp; * transformer. */<br>&nbsp;struct xfrm_type&nbsp;*type;<br>&nbsp;struct xfrm_mode&nbsp;*mode;</div>
<div>&nbsp;/* Security context */<br>&nbsp;struct xfrm_sec_ctx&nbsp;*security;</div>
<div>&nbsp;/* Private data of this transformer, format is opaque,<br>&nbsp; * interpreted by xfrm_type methods. */<br>&nbsp;void&nbsp;&nbsp;&nbsp;*data;<br>};</div>
<div>&nbsp;</div>
<div>2.3 策略(SP)</div>
<div><br>struct xfrm_policy<br>{<br>&nbsp;struct xfrm_policy&nbsp;*next; // 下一个策略<br>&nbsp;struct hlist_node&nbsp;bydst; // 按目的地址HASH的链表<br>&nbsp;struct hlist_node&nbsp;byidx; // 按索引号HASH的链表</div>
<div>&nbsp;/* This lock only affects elements except for entry. */<br>&nbsp;rwlock_t&nbsp;&nbsp;lock;<br>&nbsp;atomic_t&nbsp;&nbsp;refcnt;<br>&nbsp;struct timer_list&nbsp;timer;</div>
<div>&nbsp;u8&nbsp;&nbsp;&nbsp;type;<br>&nbsp;u32&nbsp;&nbsp;&nbsp;priority;<br>&nbsp;u32&nbsp;&nbsp;&nbsp;index;<br>&nbsp;struct xfrm_selector&nbsp;selector;<br>&nbsp;struct xfrm_lifetime_cfg lft;<br>&nbsp;struct xfrm_lifetime_cur curlft;<br>&nbsp;struct dst_entry&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; *bundles;<br>&nbsp;__u16&nbsp;&nbsp;&nbsp;family;<br>&nbsp;__u8&nbsp;&nbsp;&nbsp;action;<br>&nbsp;__u8&nbsp;&nbsp;&nbsp;flags;<br>&nbsp;__u8&nbsp;&nbsp;&nbsp;dead;<br>&nbsp;__u8&nbsp;&nbsp;&nbsp;xfrm_nr;<br>&nbsp;struct xfrm_sec_ctx&nbsp;*security;<br>&nbsp;struct xfrm_tmpl&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;xfrm_vec;<br>};</div>
<div><br>2.4 事件</div>
<div>struct km_event<br>{<br>&nbsp;union {<br>&nbsp;&nbsp;u32 hard;<br>&nbsp;&nbsp;u32 proto;<br>&nbsp;&nbsp;u32 byid;<br>&nbsp;&nbsp;u32 aevent;<br>&nbsp;&nbsp;u32 type;<br>&nbsp;} data;</div>
<div>&nbsp;u32&nbsp;seq;<br>&nbsp;u32&nbsp;pid;<br>&nbsp;u32&nbsp;event;<br>};</div>
<div><br>3. 初始化</div>
<div>/* net/key/af_key.c */</div>
<div>static int __init ipsec_pfkey_init(void)<br>{<br>// 登记key_proto结构, 该结构定义如下:<br>// static struct proto key_proto = {<br>//&nbsp;.name&nbsp;&nbsp; = "KEY",<br>//&nbsp;.owner&nbsp;&nbsp; = THIS_MODULE,<br>//&nbsp;.obj_size = sizeof(struct pfkey_sock),<br>//};<br>// 最后一个参数为0, 表示不进行slab的分配, 只是简单的将key_proto结构<br>// 挂接到系统的网络协议链表中,这个结构最主要是告知了pfkey sock结构的大小<br>&nbsp;int err = proto_register(&amp;key_proto, 0);</div>
<div>&nbsp;if (err != 0)<br>&nbsp;&nbsp;goto out;</div>
<div>// 登记pfkey协议族的的操作结构<br>&nbsp;err = sock_register(&amp;pfkey_family_ops);<br>&nbsp;if (err != 0)<br>&nbsp;&nbsp;goto out_unregister_key_proto;<br>#ifdef CONFIG_PROC_FS<br>&nbsp;err = -ENOMEM;<br>// 建立只读的pfkey的PROC文件: /proc/net/pfkey<br>&nbsp;if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL)<br>&nbsp;&nbsp;goto out_sock_unregister;<br>#endif<br>// 登记通知(notify)处理pfkeyv2_mgr<br>&nbsp;err = xfrm_register_km(&amp;pfkeyv2_mgr);<br>&nbsp;if (err != 0)<br>&nbsp;&nbsp;goto out_remove_proc_entry;<br>out:<br>&nbsp;return err;<br>out_remove_proc_entry:<br>#ifdef CONFIG_PROC_FS<br>&nbsp;remove_proc_entry("net/pfkey", NULL);<br>out_sock_unregister:<br>#endif<br>&nbsp;sock_unregister(PF_KEY);<br>out_unregister_key_proto:<br>&nbsp;proto_unregister(&amp;key_proto);<br>&nbsp;goto out;<br>}</div>
<div><br>4. pfkey套接口操作</div>
<div><br>4.1&nbsp; 建立套接口</div>
<div><br>/* net/key/af_key.c */</div>
<div>// pfkey协议族操作, 在用户程序使用socket打开pfkey类型的socket时调用,<br>// 相应的create函数在__sock_create(net/socket.c)函数中调用:<br>static struct net_proto_family pfkey_family_ops = {<br>&nbsp;.family&nbsp;=&nbsp;PF_KEY,<br>&nbsp;.create&nbsp;=&nbsp;pfkey_create,<br>&nbsp;.owner&nbsp;=&nbsp;THIS_MODULE,<br>};</div>
<div>// 在用户空间每次打开pfkey socket时都会调用此函数: </div>
<div>static int pfkey_create(struct socket *sock, int protocol)<br>{<br>&nbsp;struct sock *sk;<br>&nbsp;int err;</div>
<div>// 建立PFKEY的socket必须有ROOT权限<br>&nbsp;if (!capable(CAP_NET_ADMIN))<br>&nbsp;&nbsp;return -EPERM;<br>// socket类型必须是RAW, 协议为PF_KEY_V2<br>&nbsp;if (sock-&gt;type != SOCK_RAW)<br>&nbsp;&nbsp;return -ESOCKTNOSUPPORT;<br>&nbsp;if (protocol != PF_KEY_V2)<br>&nbsp;&nbsp;return -EPROTONOSUPPORT;</div>
<div>&nbsp;err = -ENOMEM;<br>// 分配sock结构, 并清零<br>&nbsp;sk = sk_alloc(PF_KEY, GFP_KERNEL, &amp;key_proto, 1);<br>&nbsp;if (sk == NULL)<br>&nbsp;&nbsp;goto out;</div>
<div>// PFKEY类型socket的操作<br>&nbsp;sock-&gt;ops = &amp;pfkey_ops;<br>// 初始化socket参数<br>&nbsp;sock_init_data(sock, sk);</div>
<div>// 初始化sock的族类型和释放函数<br>&nbsp;sk-&gt;sk_family = PF_KEY;<br>&nbsp;sk-&gt;sk_destruct = pfkey_sock_destruct;<br>// 增加使用数<br>&nbsp;atomic_inc(&amp;pfkey_socks_nr);</div>
<div>// 将sock挂接到系统的sock链表<br>&nbsp;pfkey_insert(sk);</div>
<div>&nbsp;return 0;<br>out:<br>&nbsp;return err;<br>}</div>
<div>&nbsp;</div>
<div><br>4.2 PF_KEY套接口操作</div>
<div><br>static const struct proto_ops pfkey_ops = {<br>&nbsp;.family&nbsp;&nbsp;=&nbsp;PF_KEY,<br>&nbsp;.owner&nbsp;&nbsp;=&nbsp;THIS_MODULE,<br>&nbsp;/* Operations that make no sense on pfkey sockets. */<br>&nbsp;.bind&nbsp;&nbsp;=&nbsp;sock_no_bind,<br>&nbsp;.connect&nbsp;=&nbsp;sock_no_connect,<br>&nbsp;.socketpair&nbsp;=&nbsp;sock_no_socketpair,<br>&nbsp;.accept&nbsp;&nbsp;=&nbsp;sock_no_accept,<br>&nbsp;.getname&nbsp;=&nbsp;sock_no_getname,<br>&nbsp;.ioctl&nbsp;&nbsp;=&nbsp;sock_no_ioctl,<br>&nbsp;.listen&nbsp;&nbsp;=&nbsp;sock_no_listen,<br>&nbsp;.shutdown&nbsp;=&nbsp;sock_no_shutdown,<br>&nbsp;.setsockopt&nbsp;=&nbsp;sock_no_setsockopt,<br>&nbsp;.getsockopt&nbsp;=&nbsp;sock_no_getsockopt,<br>&nbsp;.mmap&nbsp;&nbsp;=&nbsp;sock_no_mmap,<br>&nbsp;.sendpage&nbsp;=&nbsp;sock_no_sendpage,</div>
<div>&nbsp;/* Now the operations that really occur. */<br>&nbsp;.release&nbsp;=&nbsp;pfkey_release,<br>&nbsp;.poll&nbsp;&nbsp;=&nbsp;datagram_poll,<br>&nbsp;.sendmsg&nbsp;=&nbsp;pfkey_sendmsg,<br>&nbsp;.recvmsg&nbsp;=&nbsp;pfkey_recvmsg,<br>};</div>
<div><br>PF_KEY类型的sock中大多数操作都没有定义, 这是因为PF_KEY的数据都是本机内的内核空间于用户空间的交换, 因此实际和网络相关的操作都不用定义, 所谓发送和介绍数据也只是内核与用户空间之间的通信。</div>
<div><br>4.2.1 释放套接口</div>
<div><br>static int pfkey_release(struct socket *sock)<br>{<br>// 从socket到sock结构转换<br>&nbsp;struct sock *sk = sock-&gt;sk;</div>
<div>&nbsp;if (!sk)<br>&nbsp;&nbsp;return 0;<br>// 将sock从系统的sock链表断开<br>&nbsp;pfkey_remove(sk);</div>
<div>// 设置sock状态为DEAD, 清空sock中的socket和sleep指针<br>&nbsp;sock_orphan(sk);</div>
<div>&nbsp;sock-&gt;sk = NULL;<br>// 清除当前数据队列<br>&nbsp;skb_queue_purge(&amp;sk-&gt;sk_write_queue);<br>// 释放sock<br>&nbsp;sock_put(sk);</div>
<div>&nbsp;return 0;<br>}</div>
<div><br>4.2.2 描述符选择</div>
<div><br>使用的是标准的数据报选择函数: datagram_poll</div>
<div><br>4.2.3 发送数据</div>
<div><br>实际是将数据从内核空间发送给用户空间的程序:</div>
<div>static int pfkey_sendmsg(struct kiocb *kiocb,<br>&nbsp;&nbsp;&nbsp; struct socket *sock, struct msghdr *msg, size_t len)<br>{<br>&nbsp;struct sock *sk = sock-&gt;sk;<br>&nbsp;struct sk_buff *skb = NULL;<br>&nbsp;struct sadb_msg *hdr = NULL;<br>&nbsp;int err;</div>
<div>&nbsp;err = -EOPNOTSUPP;<br>// PF_KEY不支持MSG_OOB标志<br>&nbsp;if (msg-&gt;msg_flags &amp; MSG_OOB)<br>&nbsp;&nbsp;goto out;</div>
<div>&nbsp;err = -EMSGSIZE;<br>// 一次发送的数据长度不能太大<br>&nbsp;if ((unsigned)len &gt; sk-&gt;sk_sndbuf - 32)<br>&nbsp;&nbsp;goto out;</div>
<div>&nbsp;err = -ENOBUFS;<br>// 获取一个空闲的skbuff<br>&nbsp;skb = alloc_skb(len, GFP_KERNEL);<br>&nbsp;if (skb == NULL)<br>&nbsp;&nbsp;goto out;</div>
<div>&nbsp;err = -EFAULT;<br>// 从缓冲区中拷贝数据到skbuff中<br>&nbsp;if (memcpy_fromiovec(skb_put(skb,len), msg-&gt;msg_iov, len))<br>&nbsp;&nbsp;goto out;<br>// 获取SADB数据头的指针<br>&nbsp;hdr = pfkey_get_base_msg(skb, &amp;err);<br>&nbsp;if (!hdr)<br>&nbsp;&nbsp;goto out;</div>
<div>&nbsp;mutex_lock(&amp;xfrm_cfg_mutex);<br>// 处理PFKEY数据的发送<br>&nbsp;err = pfkey_process(sk, skb, hdr);<br>&nbsp;mutex_unlock(&amp;xfrm_cfg_mutex);</div>
<div>out:<br>&nbsp;if (err &amp;&amp; hdr &amp;&amp; pfkey_error(hdr, err, sk) == 0)<br>&nbsp;&nbsp;err = 0;<br>&nbsp;if (skb)<br>&nbsp;&nbsp;kfree_skb(skb);</div>
<div>&nbsp;return err ? : len;<br>}</div>
<div><br>static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)<br>{<br>&nbsp;void *ext_hdrs;<br>&nbsp;int err;<br>// 向混杂模式的sock发送SA消息<br>&nbsp;pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,<br>&nbsp;&nbsp;&nbsp;BROADCAST_PROMISC_ONLY, NULL);</div>
<div>&nbsp;memset(ext_hdrs, 0, sizeof(ext_hdrs));<br>// 解析SADB数据头中的消息类型<br>&nbsp;err = parse_exthdrs(skb, hdr, ext_hdrs);<br>&nbsp;if (!err) {<br>&nbsp;&nbsp;err = -EOPNOTSUPP;<br>// 根据消息类型调用相关的处理函数进行处理<br>&nbsp;&nbsp;if (pfkey_funcs)<br>&nbsp;&nbsp;&nbsp;err = pfkey_funcs(sk, skb, hdr, ext_hdrs);<br>&nbsp;}<br>&nbsp;return err;<br>}</div>
<div>4.2.4 接收数据</div>
<div><br>实际是将数据从用户空间发送给内核空间:</div>
<div>static int pfkey_recvmsg(struct kiocb *kiocb,<br>&nbsp;&nbsp;&nbsp; struct socket *sock, struct msghdr *msg, size_t len,<br>&nbsp;&nbsp;&nbsp; int flags)<br>{<br>&nbsp;struct sock *sk = sock-&gt;sk;<br>&nbsp;struct sk_buff *skb;<br>&nbsp;int copied, err;</div>
<div>&nbsp;err = -EINVAL;<br>// 只支持4类标志<br>&nbsp;if (flags &amp; ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))<br>&nbsp;&nbsp;goto out;</div>
<div>&nbsp;msg-&gt;msg_namelen = 0;<br>// 接收数据包<br>&nbsp;skb = skb_recv_datagram(sk, flags, flags &amp; MSG_DONTWAIT, &amp;err);<br>&nbsp;if (skb == NULL)<br>&nbsp;&nbsp;goto out;</div>
<div>&nbsp;copied = skb-&gt;len;<br>// 接收到的数据超过了接收缓冲区长度, 设置截断标志<br>&nbsp;if (copied &gt; len) {<br>&nbsp;&nbsp;msg-&gt;msg_flags |= MSG_TRUNC;<br>&nbsp;&nbsp;copied = len;<br>&nbsp;}</div>
<div>&nbsp;skb-&gt;h.raw = skb-&gt;data;<br>// 将数据包中信息拷贝到接收缓冲区<br>&nbsp;err = skb_copy_datagram_iovec(skb, 0, msg-&gt;msg_iov, copied);<br>&nbsp;if (err)<br>&nbsp;&nbsp;goto out_free;<br>// 设置时间戳<br>&nbsp;sock_recv_timestamp(msg, sk, skb);</div>
<div>&nbsp;err = (flags &amp; MSG_TRUNC) ? skb-&gt;len : copied;</div>
<div>out_free:<br>&nbsp;skb_free_datagram(sk, skb);<br>out:<br>&nbsp;return err;<br>}</div>
<div>4.2.5 pfkey广播</div>
<div><br>pfkey广播是将内核到用户空间的回应信息, 所有打开了PF_KEY类型socket的用户空间程序都可以收到, 所以用户空间程序在收到消息的时候要判断是否该消息是给自己的, 不是就忽略掉,这和netlink的广播比较类似。</div>
<div>/* Send SKB to all pfkey sockets matching selected criteria.&nbsp; */<br>#define BROADCAST_ALL&nbsp;&nbsp;0<br>#define BROADCAST_ONE&nbsp;&nbsp;1<br>#define BROADCAST_REGISTERED&nbsp;2<br>#define BROADCAST_PROMISC_ONLY&nbsp;4<br>static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int broadcast_flags, struct sock *one_sk)<br>{<br>&nbsp;struct sock *sk;<br>&nbsp;struct hlist_node *node;<br>&nbsp;struct sk_buff *skb2 = NULL;<br>&nbsp;int err = -ESRCH;</div>
<div>&nbsp;/* XXX Do we need something like netlink_overrun?&nbsp; I think<br>&nbsp; * XXX PF_KEY socket apps will not mind current behavior.<br>&nbsp; */<br>&nbsp;if (!skb)<br>&nbsp;&nbsp;return -ENOMEM;</div>
<div>&nbsp;pfkey_lock_table();<br>// 遍历所有的pfkey sock表, <br>&nbsp;sk_for_each(sk, node, &amp;pfkey_table) {<br>// 获取pfkey sock用于发送消息<br>&nbsp;&nbsp;struct pfkey_sock *pfk = pfkey_sk(sk);<br>&nbsp;&nbsp;int err2;</div>
<div>&nbsp;&nbsp;/* Yes, it means that if you are meant to receive this<br>&nbsp;&nbsp; * pfkey message you receive it twice as promiscuous<br>&nbsp;&nbsp; * socket.<br>&nbsp;&nbsp; */<br>// 该pfkey sock是混杂模式, 先发送一次, 由于后面还会广播发送, 所以设置了混杂模式的pfkey<br>// sock一般情况下会收到两次<br>&nbsp;&nbsp;if (pfk-&gt;promisc)<br>&nbsp;&nbsp;&nbsp;pfkey_broadcast_one(skb, &amp;skb2, allocation, sk);</div>
<div>&nbsp;&nbsp;/* the exact target will be processed later */<br>// 指定了one_sk的话这个one_sk对应的用户程序将最后才收到包, 现在在循环中不发<br>// 以后才发<br>&nbsp;&nbsp;if (sk == one_sk)<br>&nbsp;&nbsp;&nbsp;continue;<br>// 如果不是广播给所有的进程, #define BROADCAST_ALL&nbsp; 0<br>&nbsp;&nbsp;if (broadcast_flags != BROADCAST_ALL) {<br>// 如果只广播给pfkey混杂模式的进程, 跳过, 继续循环<br>&nbsp;&nbsp;&nbsp;if (broadcast_flags &amp; BROADCAST_PROMISC_ONLY)<br>&nbsp;&nbsp;&nbsp;&nbsp;continue;<br>// 如果只广播给登记的进程而该sock没登记, 跳过, 继续循环<br>&nbsp;&nbsp;&nbsp;if ((broadcast_flags &amp; BROADCAST_REGISTERED) &amp;&amp;<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; !pfk-&gt;registered)<br>&nbsp;&nbsp;&nbsp;&nbsp;continue;<br>// 只广播给一个, 和one_sk配合使用, 这样消息就只会发送给one_sk和所有混杂模式的pfkey sock<br>&nbsp;&nbsp;&nbsp;if (broadcast_flags &amp; BROADCAST_ONE)<br>&nbsp;&nbsp;&nbsp;&nbsp;continue;<br>&nbsp;&nbsp;}<br>// 发送给该pfkey sock<br>&nbsp;&nbsp;err2 = pfkey_broadcast_one(skb, &amp;skb2, allocation, sk);</div>
<div>&nbsp;&nbsp;/* Error is cleare after succecful sending to at least one<br>&nbsp;&nbsp; * registered KM */<br>&nbsp;&nbsp;if ((broadcast_flags &amp; BROADCAST_REGISTERED) &amp;&amp; err)<br>&nbsp;&nbsp;&nbsp;err = err2;<br>&nbsp;}<br>&nbsp;pfkey_unlock_table();</div>
<div>// 如果指定one_sk, 再向该pfkey sock发送, 该sock是最后一个收到消息的<br>&nbsp;if (one_sk != NULL)<br>&nbsp;&nbsp;err = pfkey_broadcast_one(skb, &amp;skb2, allocation, one_sk);</div>
<div>// 释放skb<br>&nbsp;if (skb2)<br>&nbsp;&nbsp;kfree_skb(skb2);<br>&nbsp;kfree_skb(skb);<br>&nbsp;return err;<br>}</div>
<div><br>// 发送一个包<br>static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; gfp_t allocation, struct sock *sk)<br>{<br>&nbsp;int err = -ENOBUFS;</div>
<div>&nbsp;sock_hold(sk);<br>&nbsp;if (*skb2 == NULL) {<br>// skb2是skb的一个克隆包<br>&nbsp;&nbsp;if (atomic_read(&amp;skb-&gt;users) != 1) {<br>&nbsp;&nbsp;&nbsp;*skb2 = skb_clone(skb, allocation);<br>&nbsp;&nbsp;} else {<br>&nbsp;&nbsp;&nbsp;*skb2 = skb;<br>// 因为发送会减少skb的使用计数<br>&nbsp;&nbsp;&nbsp;atomic_inc(&amp;skb-&gt;users);<br>&nbsp;&nbsp;}<br>&nbsp;}<br>&nbsp;if (*skb2 != NULL) {<br>// 实际发送的时skb2<br>&nbsp;&nbsp;if (atomic_read(&amp;sk-&gt;sk_rmem_alloc) &lt;= sk-&gt;sk_rcvbuf) {<br>&nbsp;&nbsp;&nbsp;skb_orphan(*skb2);<br>&nbsp;&nbsp;&nbsp;skb_set_owner_r(*skb2, sk);<br>&nbsp;&nbsp;&nbsp;skb_queue_tail(&amp;sk-&gt;sk_receive_queue, *skb2);<br>&nbsp;&nbsp;&nbsp;sk-&gt;sk_data_ready(sk, (*skb2)-&gt;len);<br>&nbsp;&nbsp;&nbsp;*skb2 = NULL;<br>&nbsp;&nbsp;&nbsp;err = 0;<br>&nbsp;&nbsp;}<br>&nbsp;}<br>&nbsp;sock_put(sk);<br>&nbsp;return err;<br>}</div>
<div>...... 待续 ......</div>
页: [1]
查看完整版本: Linux内核中PF_KEY协议族的实现(1)