- 论坛徽章:
- 0
|
原帖由 Au_Hank 于 2008-10-30 19:01 发表
你可能有多次的数据修改,这样一来你需要记住多个的 (ORI_SEQ,N)偶对。如何消除旧的(ORI_SEQ,N)偶对是个问题。我觉得原则是已经被ACK过的ORI_SEQ可以被删除,但是如何知道 ACK被另外一端收到了是个问题(因为ACK可能被你的设备收到了,但是RELAY给接收方的时候丢失了)。
还有一个难办的是selective ack,要崩溃了。我觉得还不如在用户空间socket转接算了,简单多了,但是性能差了。
我在想,内核代码中有 NAT 代码,NAT 部分是重新计算 TCP 的 SEQ 和 WIN 的,能否直接利用他们的东西呢?
不同的是,我是桥,不是 NAT
kernel/net/netfilter/nf_conntrack_proto_tcp.c
- /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
- in IP Filter' by Guido van Rooij.
- [url]http://www.nluug.nl/events/sane2000/papers.html[/url]
- [url]http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz[/url]
- The boundaries and the conditions are changed according to RFC793:
- the packet must intersect the window (i.e. segments may be
- after the right or before the left edge) and thus receivers may ACK
- segments after the right edge of the window.
- td_maxend = max(sack + max(win,1)) seen in reply packets
- td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
- td_maxwin += seq + len - sender.td_maxend
- if seq + len > sender.td_maxend
- td_end = max(seq + len) seen in sent packets
- I. Upper bound for valid data: seq <= sender.td_maxend
- II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
- III. Upper bound for valid ack: sack <= receiver.td_end
- IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
- where sack is the highest right edge of sack block found in the packet.
- The upper bound limit for a valid ack is not ignored -
- we doesn't have to deal with fragments.
- */
- static inline __u32 segment_seq_plus_len(__u32 seq,
- size_t len,
- unsigned int dataoff,
- struct tcphdr *tcph)
- {
- /* XXX Should I use payload length field in IP/IPv6 header ?
- * - YK */
- return (seq + len - dataoff - tcph->doff*4
- + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
- }
- /* Fixme: what about big packets? */
- #define MAXACKWINCONST 66000
- #define MAXACKWINDOW(sender) \
- ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
- : MAXACKWINCONST)
复制代码
- static int tcp_in_window(struct nf_conn *ct,
- struct ip_ct_tcp *state,
- enum ip_conntrack_dir dir,
- unsigned int index,
- const struct sk_buff *skb,
- unsigned int dataoff,
- struct tcphdr *tcph,
- int pf)
- {
- struct ip_ct_tcp_state *sender = &state->seen[dir];
- struct ip_ct_tcp_state *receiver = &state->seen[!dir];
- struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
- __u32 seq, ack, sack, end, win, swin;
- int res;
- /*
- * Get the required data from the packet.
- */
- seq = ntohl(tcph->seq);
- ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
- end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
- if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
- tcp_sack(skb, dataoff, tcph, &sack);
- pr_debug("tcp_in_window: START\n");
- pr_debug("tcp_in_window: ");
- NF_CT_DUMP_TUPLE(tuple);
- pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
- seq, ack, sack, win, end);
- pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- if (sender->td_end == 0) {
- /*
- * Initialize sender data.
- */
- if (tcph->syn && tcph->ack) {
- /*
- * Outgoing SYN-ACK in reply to a SYN.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
- tcp_options(skb, dataoff, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- sender->td_end = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
- sender->td_maxend = end + sender->td_maxwin;
- }
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
- /*
- * RFC 793: "if a TCP is reinitialized ... then it need
- * not wait at all; it must only be sure to use sequence
- * numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
- tcp_options(skb, dataoff, tcph, sender);
- }
- if (!(tcph->ack)) {
- /*
- * If there is no ACK, just pretend it was set and OK.
- */
- ack = sack = receiver->td_end;
- } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
- (TCP_FLAG_ACK|TCP_FLAG_RST))
- && (ack == 0)) {
- /*
- * Broken TCP stacks, that set ACK in RST packets as well
- * with zero ack value.
- */
- ack = sack = receiver->td_end;
- }
- if (seq == end
- && (!tcph->rst
- || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
- /*
- * Packets contains no data: we assume it is valid
- * and check the ack value only.
- * However RST segments are always validated by their
- * SEQ number, except when seq == 0 (reset sent answering
- * SYN.
- */
- seq = end = sender->td_end;
- pr_debug("tcp_in_window: ");
- NF_CT_DUMP_TUPLE(tuple);
- pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
- seq, ack, sack, win, end);
- pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
- before(seq, sender->td_maxend + 1),
- after(end, sender->td_end - receiver->td_maxwin - 1),
- before(sack, receiver->td_end + 1),
- after(ack, receiver->td_end - MAXACKWINDOW(sender)));
- if (before(seq, sender->td_maxend + 1) &&
- after(end, sender->td_end - receiver->td_maxwin - 1) &&
- before(sack, receiver->td_end + 1) &&
- after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
- /*
- * Take into account window scaling (RFC 1323).
- */
- if (!tcph->syn)
- win <<= sender->td_scale;
- /*
- * Update sender data.
- */
- swin = win + (sack - ack);
- if (sender->td_maxwin < swin)
- sender->td_maxwin = swin;
- if (after(end, sender->td_end))
- sender->td_end = end;
- /*
- * Update receiver data.
- */
- if (after(end, sender->td_maxend))
- receiver->td_maxwin += end - sender->td_maxend;
- if (after(sack + win, receiver->td_maxend - 1)) {
- receiver->td_maxend = sack + win;
- if (win == 0)
- receiver->td_maxend++;
- }
- /*
- * Check retransmissions.
- */
- if (index == TCP_ACK_SET) {
- if (state->last_dir == dir
- && state->last_seq == seq
- && state->last_ack == ack
- && state->last_end == end
- && state->last_win == win)
- state->retrans++;
- else {
- state->last_dir = dir;
- state->last_seq = seq;
- state->last_ack = ack;
- state->last_end = end;
- state->last_win = win;
- state->retrans = 0;
- }
- }
- res = 1;
- } else {
- res = 0;
- if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
- nf_ct_tcp_be_liberal)
- res = 1;
- if (!res && LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: %s ",
- before(seq, sender->td_maxend + 1) ?
- after(end, sender->td_end - receiver->td_maxwin - 1) ?
- before(sack, receiver->td_end + 1) ?
- after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
- : "ACK is under the lower bound (possible overly delayed ACK)"
- : "ACK is over the upper bound (ACKed data not seen yet)"
- : "SEQ is under the lower bound (already ACKed data retransmitted)"
- : "SEQ is over the upper bound (over the window of the receiver)");
- }
- pr_debug("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
- "receiver end=%u maxend=%u maxwin=%u\n",
- res, sender->td_end, sender->td_maxend, sender->td_maxwin,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
- return res;
- }
复制代码
- #ifdef CONFIG_NF_NAT_NEEDED
- /* Update sender->td_end after NAT successfully mangled the packet */
- /* Caller must linearize skb at tcp header. */
- void nf_conntrack_tcp_update(struct sk_buff *skb,
- unsigned int dataoff,
- struct nf_conn *conntrack,
- int dir)
- {
- struct tcphdr *tcph = (void *)skb->data + dataoff;
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
- __u32 end;
- end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
- write_lock_bh(&tcp_lock);
- /*
- * We have to worry for the ack in the reply packet only...
- */
- if (after(end, conntrack->proto.tcp.seen[dir].td_end))
- conntrack->proto.tcp.seen[dir].td_end = end;
- conntrack->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
- pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- }
- EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
- #endif
复制代码
根据提示,我找到了这个文档,里面详细讲了应如何计算 SEQ 和 WIN
没想到这样的问题会导致像自己做一个 TCP 栈一样……
内核目前有这些,我能否利用他们呢?能力有限没能滤清思路…… |
|