- 论坛徽章:
- 0
|
好久没有来发贴,今天看了一下l7filter过滤的实现,发上来讨论一下:
模版初始化,注册proc和netfilter match:- static int __init xt_layer7_init(void)
- {
- need_conntrack();
- layer7_init_proc();
- if(maxdatalen < 1) {
- printk(KERN_WARNING "layer7: maxdatalen can't be < 1, "
- "using 1\n");
- maxdatalen = 1;
- }
- /* This is not a hard limit. It's just here to prevent people from
- bringing their slow machines to a grinding halt. */
- else if(maxdatalen > 65536) {
- printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, "
- "using 65536\n");
- maxdatalen = 65536;
- }
- return xt_register_matches(xt_layer7_match,
- ARRAY_SIZE(xt_layer7_match));
- }
复制代码 maxdatalen是最大的数据流长度。layer7使用数据流检测,若干个报文被组装到一个长度为maxdatalen的数据流中。其值默认为2048。
相应的模块注销,完成proc清理和netfilter match的注销:- static void __exit xt_layer7_fini(void)
- {
- layer7_cleanup_proc();
- xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match));
- }
复制代码 layer7_match是核心的匹备函数,layer7扩展了nf_conn结构,引入了协议名称,数据缓冲区和数据长度。当一个报文被匹备时,协议名称将记录这个协议,
以方便该会话的后续报文匹备检测:- static bool
- layer7_match(const struct sk_buff *skbin, const struct xt_match_param *mt_para)
- {
- /* sidestep const without getting a compiler warning... */
- struct sk_buff * skb = (struct sk_buff *)skbin;
- const struct xt_layer7_info * info = mt_para->matchinfo;
- enum ip_conntrack_info master_ctinfo, ctinfo;
- struct nf_conn *master_conntrack, *conntrack;
- unsigned char * app_data;
- unsigned int pattern_result, appdatalen;
- regexp * comppattern;
- /* Be paranoid/incompetent - lock the entire match function. */
- //加全局锁
- spin_lock_bh(&l7_lock);
- //确认是能够处理的数据报文
- if(!can_handle(skb)){
- DPRINTK("layer7: This is some protocol I can't handle.\n");
- spin_unlock_bh(&l7_lock);
- return info->invert;
- }
- /* Treat parent & all its children together as one connection, except
- for the purpose of setting conntrack->layer7.app_proto in the actual
- connection. This makes /proc/net/ip_conntrack more satisfying. */
- //获取数据包的连接
- if(!(conntrack = nf_ct_get(skb, &ctinfo)) ||
- !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){
- DPRINTK("layer7: couldn't get conntrack.\n");
- spin_unlock_bh(&l7_lock);
- return info->invert;
- }
- /* Try to get a master conntrack (and its master etc) for FTP, etc. */
- /**
- * 如果连接存在主连接,master_conntrack指向连接的主连接,
- * 这样,如果一个连接不存在master,则conntrack/master_conntrack都指向该连接,
- * 如果存在,则conntrack指向子连接自己,而master_conntrack指向其父连接
- */
- while (master_ct(master_conntrack) != NULL)
- master_conntrack = master_ct(master_conntrack);
- /* if we've classified it or seen too many packets */
- //如果对当前连接的处理超过最大报文上限,默认阀值为10,或者是主连接中已经有了协议名称
- if(TOTAL_PACKETS > num_packets ||
- master_conntrack->layer7.app_proto) {
- //如果主连接协议名已经标识,而子连接没有,则使用它覆盖所有子连接协议名称,
- //并且匹备协议名称。否则将主连接协议名设为"Unkown"
- //对于没有子连接的来说,两个指针都指向自己,这意味着后续的报文直接通过协议名称,
- //可以快速处理。
- pattern_result = match_no_append(conntrack, master_conntrack,
- ctinfo, master_ctinfo, info);
- /* skb->cb[0] == seen. Don't do things twice if there are
- multiple l7 rules. I'm not sure that using cb for this purpose
- is correct, even though it says "put your private variables
- there". But it doesn't look like it is being used for anything
- else in the skbs that make it here. */
- skb->cb[0] = 1; /* marking it seen here's probably irrelevant */
- spin_unlock_bh(&l7_lock);
- //返回匹备结果
- return (pattern_result ^ info->invert);
- }
- //需要一个完整的报文做检测
- if(skb_is_nonlinear(skb)){
- if(skb_linearize(skb) != 0){
- if (net_ratelimit())
- printk(KERN_ERR "layer7: failed to linearize "
- "packet, bailing.\n");
- spin_unlock_bh(&l7_lock);
- return info->invert;
- }
- }
- /* now that the skb is linearized, it's safe to set these. */
- //app_data指向报文数据载荷位
- app_data = skb->data + app_data_offset(skb);
- //appdatalen记录数据载荷长度
- appdatalen = skb_tail_pointer(skb) - app_data;
- /* the return value gets checked later, when we're ready to use it */
- //初始化正则表达式
- comppattern = compile_and_cache(info->pattern, info->protocol);
- /* On the first packet of a connection, allocate space for app data */
- //如果是第一个报文,为连接会话中的app_data指针分配空间
- if(TOTAL_PACKETS == 1 && !skb->cb[0] &&
- !master_conntrack->layer7.app_data){
- master_conntrack->layer7.app_data =
- kmalloc(maxdatalen, GFP_ATOMIC);
- //分配空间
- if(!master_conntrack->layer7.app_data){
- if (net_ratelimit())
- printk(KERN_ERR "layer7: out of memory in "
- "match, bailing.\n");
- spin_unlock_bh(&l7_lock);
- return info->invert;
- }
- master_conntrack->layer7.app_data[0] = '\0';
- }
- /* Can be here, but unallocated, if numpackets is increased near
- the beginning of a connection */
- //有这样的可能,当会话已经建立了,l7filter模块才被插入。
- if(master_conntrack->layer7.app_data == NULL){
- spin_unlock_bh(&l7_lock);
- return (info->invert); /* unmatched */
- }
- if(!skb->cb[0]){
- int newbytes;
- //将报文载荷拷贝到会话中,连接会话中的app_data记录连接的所有数据,最大不超过maxdatalen,
- //其阀值默认为2048。
- //感觉将追加的长度判断放在add_data之前更好,这样,大多数不被匹配的报文将减少一次函数调用开销
- newbytes = add_data(master_conntrack, app_data, appdatalen);
- //连接的数据缓冲已经满了
- if(newbytes == 0) { /* didn't add any data */
- skb->cb[0] = 1;
- /* Didn't match before, not going to match now */
- spin_unlock_bh(&l7_lock);
- return info->invert;
- }
- }
- //协议匹备,unkown/unset做为逃生的协议名标识,前者标识不进行匹备,后者表示匹备所有
- /* If looking for "unknown", then never match. "Unknown" means that
- we've given up; we're still trying with these packets. */
- if(!strcmp(info->protocol, "unknown")) {
- pattern_result = 0;
- /* If looking for "unset", then always match. "Unset" means that we
- haven't yet classified the connection. */
- } else if(!strcmp(info->protocol, "unset")) {
- pattern_result = 2;
- DPRINTK("layer7: matched unset: not yet classified "
- "(%d/%d packets)\n", TOTAL_PACKETS, num_packets);
- /* If the regexp failed to compile, don't bother running it */
- } else if(comppattern &&
- regexec(comppattern, master_conntrack->layer7.app_data)){
- //regexec成功则返回1,失败返回0,这里表示匹备成功。
- //每一次匹备,都使用的是连接的数据进行,数据缓冲区的释放,是放在destroy_conntrack中进行的。
- DPRINTK("layer7: matched %s\n", info->protocol);
- pattern_result = 1;
- } else pattern_result = 0;
- if(pattern_result == 1) {
- //匹备成功,记录协议名称。
- master_conntrack->layer7.app_proto =
- kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
- if(!master_conntrack->layer7.app_proto){
- if (net_ratelimit())
- printk(KERN_ERR "layer7: out of memory in "
- "match, bailing.\n");
- spin_unlock_bh(&l7_lock);
- return (pattern_result ^ info->invert);
- }
- strcpy(master_conntrack->layer7.app_proto, info->protocol);
- } else if(pattern_result > 1) { /* cleanup from "unset" */
- //专门为unset准备的
- pattern_result = 1;
- }
- /* mark the packet seen */
- //标识数据报文已经被处理过
- skb->cb[0] = 1;
- spin_unlock_bh(&l7_lock);
- //返回匹备结果
- return (pattern_result ^ info->invert);
- }
复制代码 can_handle返回l7filter支持处理的协议,包括tcp/udp/icmp:- static int can_handle(const struct sk_buff *skb)
- {
- if(!ip_hdr(skb)) /* not IP */
- return 0;
- if(ip_hdr(skb)->protocol != IPPROTO_TCP &&
- ip_hdr(skb)->protocol != IPPROTO_UDP &&
- ip_hdr(skb)->protocol != IPPROTO_ICMP)
- return 0;
- return 1;
- }
复制代码 compile_and_cache函数初始化正则表达式,将用户空间的表达式转换为内核可识别的形式
为了加速正则表达式的转换——不可能每个报文都要转换一次。函数引入了缓存机制,不过这个缓存是一维链表形式的。这意味着加了若干条规则后,
这个链将变得很长。- static regexp * compile_and_cache(const char * regex_string,
- const char * protocol)
- {
- struct pattern_cache * node = first_pattern_cache;
- struct pattern_cache * last_pattern_cache = first_pattern_cache;
- struct pattern_cache * tmp;
- unsigned int len;
- //遍历表达式缓存表,试图找到缓存节点
- while (node != NULL) {
- if (!strcmp(node->regex_string, regex_string))
- return node->pattern;
- last_pattern_cache = node;/* points at the last non-NULL node */
- node = node->next;
- }
- /* If we reach the end of the list, then we have not yet cached
- the pattern for this regex. Let's do that now.
- Be paranoid about running out of memory to avoid list corruption. */
- //为新的表达式分配空间
- tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
- //分配失败
- if(!tmp) {
- if (net_ratelimit())
- printk(KERN_ERR "layer7: out of memory in "
- "compile_and_cache, bailing.\n");
- return NULL;
- }
- //初始化各成员
- tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
- tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC);
- tmp->next = NULL;
- if(!tmp->regex_string || !tmp->pattern) {
- if (net_ratelimit())
- printk(KERN_ERR "layer7: out of memory in "
- "compile_and_cache, bailing.\n");
- kfree(tmp->regex_string);
- kfree(tmp->pattern);
- kfree(tmp);
- return NULL;
- }
- /* Ok. The new node is all ready now. */
- node = tmp;
- //更新表达式节点缓存
- if(first_pattern_cache == NULL) /* list is empty */
- first_pattern_cache = node; /* make node the beginning */
- else
- last_pattern_cache->next = node; /* attach node to the end */
- //编译正则表达式
- /* copy the string and compile the regex */
- len = strlen(regex_string);
- DPRINTK("About to compile this: \"%s\"\n", regex_string);
- node->pattern = regcomp((char *)regex_string, &len);
- if ( !node->pattern ) {
- if (net_ratelimit())
- printk(KERN_ERR "layer7: Error compiling regexp "
- "\"%s\" (%s)\n",
- regex_string, protocol);
- /* pattern is now cached as NULL, so we won't try again. */
- }
- strcpy(node->regex_string, regex_string);
- return node->pattern;
- }
复制代码 match_no_append函数用于匹备已经标识存在协议名的连接,或者是超过最大检测数据包限制的会话:- static int match_no_append(struct nf_conn * conntrack,
- struct nf_conn * master_conntrack,
- enum ip_conntrack_info ctinfo,
- enum ip_conntrack_info master_ctinfo,
- const struct xt_layer7_info * info)
- {
- /* If we're in here, throw the app data away */
- //会话数据缓冲区已经用不着了,释放内存,置空
- if(master_conntrack->layer7.app_data != NULL) {
- #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
- if(!master_conntrack->layer7.app_proto) {
- char * f =
- friendly_print(master_conntrack->layer7.app_data);
- char * g =
- hex_print(master_conntrack->layer7.app_data);
- DPRINTK("\nl7-filter gave up after %d bytes "
- "(%d packets):\n%s\n",
- strlen(f), TOTAL_PACKETS, f);
- kfree(f);
- DPRINTK("In hex: %s\n", g);
- kfree(g);
- }
- #endif
- kfree(master_conntrack->layer7.app_data);
- master_conntrack->layer7.app_data = NULL; /* don't free again */
- }
- //主连接已经标识了协议名称了
- if(master_conntrack->layer7.app_proto){
- //如果连接还没有标识,则使用主连接的覆盖之
- /* Here child connections set their .app_proto (for /proc) */
- if(!conntrack->layer7.app_proto) {
- conntrack->layer7.app_proto =
- kmalloc(strlen(master_conntrack->layer7.app_proto)+1,
- GFP_ATOMIC);
- if(!conntrack->layer7.app_proto){
- if (net_ratelimit())
- printk(KERN_ERR "layer7: out of memory "
- "in match_no_append, "
- "bailing.\n");
- return 1;
- }
- strcpy(conntrack->layer7.app_proto,
- master_conntrack->layer7.app_proto);
- }
- //匹备协议名
- return (!strcmp(master_conntrack->layer7.app_proto,
- info->protocol));
- }
- else {
- /* If not classified, set to "unknown" to distinguish from
- connections that are still being tested. */
- //否则将协议标识为unkown,则为不可识别,这也意味着,每一个unkown的连接,都会进入
- //这个函数进行匹备协议名
- master_conntrack->layer7.app_proto =
- kmalloc(strlen("unknown")+1, GFP_ATOMIC);
- if(!master_conntrack->layer7.app_proto){
- if (net_ratelimit())
- printk(KERN_ERR "layer7: out of memory in "
- "match_no_append, bailing.\n");
- return 1;
- }
- strcpy(master_conntrack->layer7.app_proto, "unknown");
- return 0;
- }
- }
复制代码 app_data_offset计算载荷的偏移位置,跳过传输层首部:- /* Returns offset the into the skb->data that the application data starts */
- static int app_data_offset(const struct sk_buff *skb)
- {
- /* In case we are ported somewhere (ebtables?) where ip_hdr(skb)
- isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
- int ip_hl = 4*ip_hdr(skb)->ihl;
- if( ip_hdr(skb)->protocol == IPPROTO_TCP ) {
- /* 12 == offset into TCP header for the header length field.
- Can't get this with skb->h.th->doff because the tcphdr
- struct doesn't get set when routing (this is confirmed to be
- true in Netfilter as well as QoS.) */
- int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
- return ip_hl + tcp_hl;
- } else if( ip_hdr(skb)->protocol == IPPROTO_UDP ) {
- return ip_hl + 8; /* UDP header is always 8 bytes */
- } else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) {
- return ip_hl + 8; /* ICMP header is 8 bytes */
- } else {
- if (net_ratelimit())
- printk(KERN_ERR "layer7: tried to handle unknown "
- "protocol!\n");
- return ip_hl + 8; /* something reasonable */
- }
- }
复制代码 add_data函数用于构建待检测的数据流,它将数据报文追加到连接会话的数据缓冲区中,令人疑惑的是,为什么没有区分数据流方向呢?
- /* add the new app data to the conntrack. Return number of bytes added. */
- static int add_data(struct nf_conn * master_conntrack,
- char * app_data, int appdatalen)
- {
- int length = 0, i;
- int oldlength = master_conntrack->layer7.app_data_len;
- /* This is a fix for a race condition by Deti Fliegl. However, I'm not
- clear on whether the race condition exists or whether this really
- fixes it. I might just be being dense... Anyway, if it's not really
- a fix, all it does is waste a very small amount of time. */
- if(!master_conntrack->layer7.app_data) return 0;
- /* Strip nulls. Make everything lower case (our regex lib doesn't
- do case insensitivity). Add it to the end of the current data. */
- //追加数据进入连接会话中
- for(i = 0; i < maxdatalen-oldlength-1 &&
- i < appdatalen; i++) {
- if(app_data[i] != '\0') {
- /* the kernel version of tolower mungs 'upper ascii' */
- master_conntrack->layer7.app_data[length+oldlength] =
- isascii(app_data[i])?
- tolower(app_data[i]) : app_data[i];
- length++;
- }
- }
- //置结束标记
- master_conntrack->layer7.app_data[length+oldlength] = '\0';
- //重新计算长度
- master_conntrack->layer7.app_data_len = length + oldlength;
- return length;
- }
复制代码 个人认为,layer7的性能低,是各个方面造成的,不知道优化匹备模式,能不能提高其效率——类似snort规则,引用更多的检测项。 |
评分
-
查看全部评分
|