[TCP]: Abstract tp->highest_sack accessing & point to next skb

Pointing to the next skb is necessary to avoid referencing
already SACKed skbs which will soon be on a separate list.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6e392ba..5ec1cac 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1267,8 +1267,12 @@
 	__tcp_add_write_queue_tail(sk, skb);
 
 	/* Queue it, remembering where we must start sending. */
-	if (sk->sk_send_head == NULL)
+	if (sk->sk_send_head == NULL) {
 		sk->sk_send_head = skb;
+
+		if (tcp_sk(sk)->highest_sack == NULL)
+			tcp_sk(sk)->highest_sack = skb;
+	}
 }
 
 static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
@@ -1318,9 +1322,38 @@
 {
 	if (!tp->sacked_out)
 		return tp->snd_una;
+
+	if (tp->highest_sack == NULL)
+		return tp->snd_nxt;
+
 	return TCP_SKB_CB(tp->highest_sack)->seq;
 }
 
+static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
+{
+	tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
+						tcp_write_queue_next(sk, skb);
+}
+
+static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
+{
+	return tcp_sk(sk)->highest_sack;
+}
+
+static inline void tcp_highest_sack_reset(struct sock *sk)
+{
+	tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
+}
+
+/* Called when old skb is about to be deleted (to be combined with new skb) */
+static inline void tcp_highest_sack_combine(struct sock *sk,
+					    struct sk_buff *old,
+					    struct sk_buff *new)
+{
+	if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
+		tcp_sk(sk)->highest_sack = new;
+}
+
 /* /proc */
 enum tcp_seq_states {
 	TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 263c536..bc2d5f7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1125,7 +1125,7 @@
 	struct sk_buff *skb;
 	int cnt = 0;
 	u32 new_low_seq = tp->snd_nxt;
-	u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq;
+	u32 received_upto = tcp_highest_sack_seq(tp);
 
 	if (!tcp_is_fack(tp) || !tp->retrans_out ||
 	    !after(received_upto, tp->lost_retrans_low) ||
@@ -1236,9 +1236,10 @@
 	return in_sack;
 }
 
-static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp,
+static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 			   int *reord, int dup_sack, int fack_count)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	u8 sacked = TCP_SKB_CB(skb)->sacked;
 	int flag = 0;
 
@@ -1307,8 +1308,8 @@
 		if (fack_count > tp->fackets_out)
 			tp->fackets_out = fack_count;
 
-		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
-			tp->highest_sack = skb;
+		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+			tcp_advance_highest_sack(sk, skb);
 	}
 
 	/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1330,8 +1331,6 @@
 					int dup_sack_in, int *fack_count,
 					int *reord, int *flag)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-
 	tcp_for_write_queue_from(skb, sk) {
 		int in_sack = 0;
 		int dup_sack = dup_sack_in;
@@ -1358,7 +1357,7 @@
 			break;
 
 		if (in_sack)
-			*flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count);
+			*flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count);
 
 		*fack_count += tcp_skb_pcount(skb);
 	}
@@ -1429,7 +1428,7 @@
 	if (!tp->sacked_out) {
 		if (WARN_ON(tp->fackets_out))
 			tp->fackets_out = 0;
-		tp->highest_sack = tcp_write_queue_head(sk);
+		tcp_highest_sack_reset(sk);
 	}
 
 	found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
@@ -1552,9 +1551,11 @@
 						       &fack_count, &reord, &flag);
 
 			/* ...tail remains todo... */
-			if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) {
+			if (tcp_highest_sack_seq(tp) == cache->end_seq) {
 				/* ...but better entrypoint exists! */
-				skb = tcp_write_queue_next(sk, tp->highest_sack);
+				skb = tcp_highest_sack(sk);
+				if (skb == NULL)
+					break;
 				fack_count = tp->fackets_out;
 				cache++;
 				goto walk;
@@ -1566,8 +1567,10 @@
 			continue;
 		}
 
-		if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) {
-			skb = tcp_write_queue_next(sk, tp->highest_sack);
+		if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+			skb = tcp_highest_sack(sk);
+			if (skb == NULL)
+				break;
 			fack_count = tp->fackets_out;
 		}
 		skb = tcp_sacktag_skip(skb, sk, start_seq);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d8583a..9a985b5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -667,7 +667,7 @@
 	if (!tp->sacked_out || tcp_is_reno(tp))
 		return;
 
-	if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
+	if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
 		tp->fackets_out -= decr;
 }
 
@@ -711,9 +711,6 @@
 	TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
 
-	if (tcp_is_sack(tp) && tp->sacked_out && (skb == tp->highest_sack))
-		tp->highest_sack = buff;
-
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
 	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
@@ -1707,9 +1704,7 @@
 		BUG_ON(tcp_skb_pcount(skb) != 1 ||
 		       tcp_skb_pcount(next_skb) != 1);
 
-		if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out &&
-		    (next_skb == tp->highest_sack)))
-			return;
+		tcp_highest_sack_combine(sk, next_skb, skb);
 
 		/* Ok.	We will be able to collapse the packet. */
 		tcp_unlink_write_queue(next_skb, sk);
@@ -2019,7 +2014,7 @@
 			break;
 		tp->forward_skb_hint = skb;
 
-		if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+		if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
 			break;
 
 		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)