- 论坛徽章:
- 0
|
TCP拥塞控制算法内核实现剖析(二)
内核版本:2.6.37
主要源文件:linux-2.6.37/ net/ ipv4/ tcp_bic.c
======================================================================================================- /* BIC TCP Parameters */
-
- struct bictcp {
-
- u32 cnt ; /* increase cwnd by 1 after ACKs */
-
- u32 last_max_cwnd ; /* last maximum snd_cwnd */
-
- u32 loss_cwnd ; /* congestion window at last loss */
-
- u32 last_cwnd ; /* the last snd_cwnd */
-
- u32 last_time ; /* time when updated last_cwnd */
-
- u32 epoch_start ; /* beginning of an epoch */
-
- #define ACK_RATIO_SHIFT 4
-
- u32 delayed_ack ; /* estimate the ratio of Packets/ACKs << 4 */
-
- } ;
-
-
-
- /* Scale factor beta calculation
- * max_cwnd = snd_cwnd * beta
-
- */
-
- #define BICTCP_BETA_SCALE 1024
-
-
- /* In binary search ,
-
- * go to point (max+min) / N
-
- */
-
- #define BICTCP_B 4 /*并不是真正的二分*/
复制代码 全局变量- static int fast_convergence = 1 ; /* BIC能快速的达到一个平衡值,开关*/
-
- static int max_increment = 16 ; /* 每次增加的MSS 不能超过这个值,防止增长太过剧烈*/
-
- static int low_window = 14 ; /* lower bound on congestion window , for TCP friendliness */
-
- static int beta = 819 ; /* = 819 / 1024(BICTCP_BETA_SCALE) ,beta for multiplicative increase 。?*/
-
- static int initial_ssthresh ; /* 初始的阈值 */
-
- static int smooth_part = 20 ; /* log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wmax-B to Wmax 。?*/
-
- /* initial_ssthresh的初始值被设置成2^31-1=2147483647 */
复制代码 ==========================================================================================================- struct inet_connection_sock {
-
- ...
-
- u32 icsk_ca_priv[16] ;
-
- #define ICSK_CA_PRIV_SIZE (16*sizeof(u32))
-
- }
-
-
-
- static inline void *inet_csk_ca( const struct sock *sk )
-
- {
-
- return (void *)inet_csk(sk)->icsk_ca_priv ;
-
- }
复制代码 ============================================================================================================
不明白?!-
- /* Slow start with delack produces 3 packets of burst , so that it is safe "de facto". This will be
-
- * default - same as the default reordering threshold - but if reordering increases , we must
-
- * be able to allow cwnd to burst at least this much in order to not pull it back when holes
-
- * are filled.
-
- */
-
- static __inline__ __u32 tcp_max_burst ( const struct tcp_sock *sk )
-
- {
-
- return tp->reordering ;
-
- }
-
- /* u8 reordering ; Packets reordering metric */
-
-
-
- /* RFC2681 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
-
- */
-
- /* 返回0,不需要增加cwnd ; 返回1,cwnd被限制,需要增加 */
-
- int tcp_is_cwnd_limited ( const struct sock *sk , u32 in_flight )
-
- {
-
- const struct tcp_sock *tp = tcp_sk(sk) ;
-
- u32 left ;
-
- if( in_flight >= tp->snd_cwnd ) /* 不是规定in_flight < snd_cwnd ? */
-
- return 1 ;
-
- left = tp->snd_cwnd - in_flight ;
-
- if( sk_can_gso(sk) &&
- left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
-
- left * tp->mss_cache < sk->sk_gso_max_size )
-
- return 1 ;
-
- return left <= tcp_max_busrt( tp ) ;
-
- }
复制代码 =============================================================================================================- static void bictcp_cong_avoid ( struct sock *sk , u32 ack , u32 in_flight )
-
- {
-
- struct tcp_sock *tp = tcp_sk(sk) ;
-
- struct bictcp *ca = inet_csk_ca(sk) ;
-
- /* 如果发送拥塞窗口不被限制,不能再增加,则返回 */
-
- if( !tcp_is_cwnd_limited(sk , in_flight))
-
- return ;
-
- if( tp->snd_cwnd < tp->snd_ssthresh )
-
- tcp_slow_start( tp ) ;
-
- else {
-
- bictcp_update(ca , tp->snd_cwnd ) ;
-
- tcp_cong_avoid_ai( tp , ca->cnt ) ;
-
- }
-
- }
复制代码 从以上函数可以看出,BIC的慢启动和reno相同。在拥塞避免阶段,当snd_cwnd <= low_window ,两者也采用相同方法。
只有当snd_cwnd > low时,BIC才开始显示出它的特性。
在include/ net / tcp.h中,从以上函数可以看出,和reno相比,BIC在拥塞避免阶段snd_cwnd增长极快。
当ca->last_max_cwnd - snd_cwnd >= 4 时,snd_cwnd最慢的增长率为 1/16 。
而当ca->last_max_cwnd - snd_cwnd <4 时,增长率非常低,可以使当前的snd_cwnd维持很长一段时间,
即以最合适的snd_cwnd发送数据。
这两点使BIC在高带宽、长时延的环境下能达到较高的吞吐量。
1. 搜索阶段- (1) cwnd < last_max_cwnd - 64, 则cnt = cwnd / 16
-
- (2) last_max_cwnd - 64 <= cwnd < last_max_cwnd -4 ,则cnt = cwnd / dist
-
- (3) last_max_cwnd - 4 <= cwnd < last_max_cwnd ,则cnt = 5*cwnd
复制代码 总体来说,snd_cwnd增长先快后慢,趋于稳定。
2. max probing阶段-
- (1) last_max_cwnd <= cwnd < last_max_cwnd + 4,则cnt = 5*cwnd
-
- (2) last_max_cwnd + 4 <= cwnd < last_max_cwnd + 48 ,则cnt = 3*cwnd / (cwnd - last_max_cwnd)
-
- (3) cwnd >= last_max_cwnd + 48 ,则cnt = cwnd / 16
复制代码 总体来说,snd_cwnd的增长先慢后快,越来越快。 |
|