patch-2.4.0-test12 linux/net/ipv4/tcp_input.c
Next file: linux/net/ipv4/tcp_ipv4.c
Previous file: linux/net/ipv4/tcp.c
Back to the patch index
Back to the overall index
- Lines: 374
- Date:
Tue Nov 28 21:53:45 2000
- Orig file:
v2.4.0-test11/linux/net/ipv4/tcp_input.c
- Orig date:
Sun Oct 8 10:50:39 2000
diff -u --recursive --new-file v2.4.0-test11/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.202 2000/09/21 01:05:38 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.203 2000/11/28 17:04:09 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -56,6 +56,10 @@
* Andi Kleen: Process packets with PSH set in the
* fast path.
* J Hadi Salim: ECN support
+ * Andrei Gurtov,
+ * Pasi Sarolahti,
+ * Panu Kuhlberg: Experimental audit of TCP (re)transmission
+ * engine. Lots of bugs are found.
*/
#include <linux/config.h>
@@ -1259,7 +1263,7 @@
static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
{
return !tp->retrans_stamp ||
- (tp->saw_tstamp &&
+ (tp->saw_tstamp && tp->rcv_tsecr &&
(__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
}
@@ -1378,10 +1382,8 @@
NET_INC_STATS_BH(TCPLossUndo);
tp->retransmits = 0;
tp->undo_marker = 0;
- if (!IsReno(tp)) {
+ if (!IsReno(tp))
tp->ca_state = TCP_CA_Open;
- tp->backoff = 0;
- }
return 1;
}
return 0;
@@ -1479,7 +1481,6 @@
tp->retransmits = 0;
if (tcp_try_undo_recovery(sk, tp))
return;
- tp->backoff = 0;
break;
case TCP_CA_CWR:
@@ -1579,7 +1580,7 @@
/* Read draft-ietf-tcplw-high-performance before mucking
* with this code. (Superceeds RFC1323)
*/
-static void tcp_ack_saw_tstamp(struct tcp_opt *tp)
+static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag)
{
__u32 seq_rtt;
@@ -1594,7 +1595,12 @@
seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
tcp_rtt_estimator(tp, seq_rtt);
tcp_set_rto(tp);
- tp->rto <<= tp->backoff;
+ if (tp->backoff) {
+ if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED))
+ tp->backoff = 0;
+ else
+ tp->rto <<= tp->backoff;
+ }
tcp_bound_rto(tp);
}
@@ -1609,20 +1615,27 @@
* I.e. Karn's algorithm. (SIGCOMM '87, p5.)
*/
- if (!tp->retransmits && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- tp->backoff = 0;
- tcp_rtt_estimator(tp, seq_rtt);
- tcp_set_rto(tp);
- tcp_bound_rto(tp);
+ tcp_rtt_estimator(tp, seq_rtt);
+ tcp_set_rto(tp);
+ if (tp->backoff) {
+ /* To relax it? We have valid sample as soon as we are
+ * here. Why not to clear backoff?
+ */
+ if (!tp->retransmits || !(flag & FLAG_RETRANS_DATA_ACKED))
+ tp->backoff = 0;
+ else
+ tp->rto <<= tp->backoff;
}
+ tcp_bound_rto(tp);
}
static __inline__ void
-tcp_ack_update_rtt(struct tcp_opt *tp, int flag, u32 seq_rtt)
+tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
{
- if (tp->saw_tstamp)
- tcp_ack_saw_tstamp(tp);
- else
+ /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
+ if (tp->saw_tstamp && tp->rcv_tsecr)
+ tcp_ack_saw_tstamp(tp, flag);
+ else if (seq_rtt >= 0)
tcp_ack_no_tstamp(tp, seq_rtt, flag);
}
@@ -1669,7 +1682,7 @@
struct sk_buff *skb;
__u32 now = tcp_time_stamp;
int acked = 0;
- __u32 seq_rtt = 0; /* F..g gcc... */
+ __s32 seq_rtt = -1;
while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -1700,16 +1713,23 @@
if(sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out--;
acked |= FLAG_RETRANS_DATA_ACKED;
- }
+ seq_rtt = -1;
+ } else if (seq_rtt < 0)
+ seq_rtt = now - scb->when;
if(sacked & TCPCB_SACKED_ACKED)
tp->sacked_out--;
if(sacked & TCPCB_LOST)
tp->lost_out--;
- }
+ if(sacked & TCPCB_URG) {
+ if (tp->urg_mode &&
+ !before(scb->end_seq, tp->snd_up))
+ tp->urg_mode = 0;
+ }
+ } else if (seq_rtt < 0)
+ seq_rtt = now - scb->when;
if(tp->fackets_out)
tp->fackets_out--;
tp->packets_out--;
- seq_rtt = now - scb->when;
__skb_unlink(skb, skb->list);
tcp_free_skb(sk, skb);
}
@@ -1821,7 +1841,8 @@
#ifdef TCP_DEBUG
if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) {
- if (net_ratelimit())
+ if ((tp->snd_una + tp->snd_wnd)-tp->snd_nxt >= (1<<tp->snd_wscale)
+ && net_ratelimit())
printk(KERN_DEBUG "TCP: peer %u.%u.%u.%u:%u/%u shrinks window %u:%u:%u. Bad, what else can I say?\n",
NIPQUAD(sk->daddr), htons(sk->dport), sk->num,
tp->snd_una, tp->snd_wnd, tp->snd_nxt);
@@ -1929,7 +1950,7 @@
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
-void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp)
+void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp, int estab)
{
unsigned char *ptr;
struct tcphdr *th = skb->h.th;
@@ -1956,7 +1977,7 @@
return; /* don't parse partial options */
switch(opcode) {
case TCPOPT_MSS:
- if(opsize==TCPOLEN_MSS && th->syn) {
+ if(opsize==TCPOLEN_MSS && th->syn && !estab) {
u16 in_mss = ntohs(*(__u16 *)ptr);
if (in_mss) {
if (tp->user_mss && tp->user_mss < in_mss)
@@ -1966,7 +1987,7 @@
}
break;
case TCPOPT_WINDOW:
- if(opsize==TCPOLEN_WINDOW && th->syn)
+ if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
if (sysctl_tcp_window_scaling) {
tp->wscale_ok = 1;
tp->snd_wscale = *(__u8 *)ptr;
@@ -1981,8 +2002,8 @@
break;
case TCPOPT_TIMESTAMP:
if(opsize==TCPOLEN_TIMESTAMP) {
- if (sysctl_tcp_timestamps) {
- tp->tstamp_ok = 1;
+ if ((estab && tp->tstamp_ok) ||
+ (!estab && sysctl_tcp_timestamps)) {
tp->saw_tstamp = 1;
tp->rcv_tsval = ntohl(*(__u32 *)ptr);
tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
@@ -1990,7 +2011,7 @@
}
break;
case TCPOPT_SACK_PERM:
- if(opsize==TCPOLEN_SACK_PERM && th->syn) {
+ if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
if (sysctl_tcp_sack) {
tp->sack_ok = 1;
tcp_sack_reset(tp);
@@ -2019,7 +2040,8 @@
if (th->doff == sizeof(struct tcphdr)>>2) {
tp->saw_tstamp = 0;
return 0;
- } else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+ } else if (tp->tstamp_ok &&
+ th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
__u32 *ptr = (__u32 *)(th + 1);
if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -2031,7 +2053,7 @@
return 1;
}
}
- tcp_parse_options(skb, tp);
+ tcp_parse_options(skb, tp, 1);
return 1;
}
@@ -3329,8 +3351,9 @@
struct tcphdr *th, unsigned len)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ int saved_clamp = tp->mss_clamp;
- tcp_parse_options(skb, tp);
+ tcp_parse_options(skb, tp, 0);
if (th->ack) {
/* rfc793:
@@ -3345,24 +3368,12 @@
* test reduces to:
*/
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
- return 1;
+ goto reset_and_undo;
- if (tp->saw_tstamp) {
- if (tp->rcv_tsecr == 0) {
- /* Workaround for bug in linux-2.1 and early
- * 2.2 kernels. Let's pretend that we did not
- * see such timestamp to avoid bogus rtt value,
- * calculated by tcp_ack().
- */
- tp->saw_tstamp = 0;
-
- /* But do not forget to store peer's timestamp! */
- if (th->syn)
- tcp_store_ts_recent(tp);
- } else if (!between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) {
- NET_INC_STATS_BH(PAWSActiveRejected);
- return 1;
- }
+ if (tp->saw_tstamp && tp->rcv_tsecr &&
+ !between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) {
+ NET_INC_STATS_BH(PAWSActiveRejected);
+ goto reset_and_undo;
}
/* Now ACK is acceptable.
@@ -3386,7 +3397,7 @@
* --ANK(990513)
*/
if (!th->syn)
- goto discard;
+ goto discard_and_undo;
/* rfc793:
* "If the SYN bit is on ...
@@ -3419,14 +3430,16 @@
tp->window_clamp = min(tp->window_clamp,65535);
}
- if (tp->tstamp_ok) {
+ if (tp->saw_tstamp) {
+ tp->tstamp_ok = 1;
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- } else
- tp->tcp_header_len = sizeof(struct tcphdr);
- if (tp->saw_tstamp)
tcp_store_ts_recent(tp);
+ } else {
+ tp->tcp_header_len = sizeof(struct tcphdr);
+ }
+
if (tp->sack_ok && sysctl_tcp_fack)
tp->sack_ok |= 2;
@@ -3467,7 +3480,10 @@
tp->ack.lrcvtime = tcp_time_stamp;
tcp_enter_quickack_mode(tp);
tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
- goto discard;
+
+discard:
+ __kfree_skb(skb);
+ return 0;
} else {
tcp_send_ack(sk);
}
@@ -3483,12 +3499,12 @@
* Otherwise (no ACK) drop the segment and return."
*/
- goto discard;
+ goto discard_and_undo;
}
/* PAWS check. */
if (tp->ts_recent_stamp && tp->saw_tstamp && tcp_paws_check(tp, 0))
- goto discard;
+ goto discard_and_undo;
if (th->syn) {
/* We see SYN without ACK. It is attempt of
@@ -3496,8 +3512,15 @@
* Particularly, it can be connect to self.
*/
tcp_set_state(sk, TCP_SYN_RECV);
- if (tp->saw_tstamp)
+
+ if (tp->saw_tstamp) {
+ tp->tstamp_ok = 1;
tcp_store_ts_recent(tp);
+ tp->tcp_header_len =
+ sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ } else {
+ tp->tcp_header_len = sizeof(struct tcphdr);
+ }
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
@@ -3526,15 +3549,23 @@
* Uncomment this return to process the data.
*/
return -1;
+#else
+ goto discard;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*/
-discard:
- __kfree_skb(skb);
- return 0;
+discard_and_undo:
+ tcp_clear_options(tp);
+ tp->mss_clamp = saved_clamp;
+ goto discard;
+
+reset_and_undo:
+ tcp_clear_options(tp);
+ tp->mss_clamp = saved_clamp;
+ return 1;
}
@@ -3671,8 +3702,8 @@
* and does not calculate rtt.
* Fix it at least with timestamps.
*/
- if (tp->saw_tstamp && !tp->srtt)
- tcp_ack_saw_tstamp(tp);
+ if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt)
+ tcp_ack_saw_tstamp(tp, 0);
if (tp->tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)