tcp/dccp: do not touch listener sk_refcnt under synflood

When a SYNFLOOD targets a non SO_REUSEPORT listener, multiple
cpus contend on sk->sk_refcnt and sk->sk_wmem_alloc changes.

By letting listeners use SOCK_RCU_FREE infrastructure,
we can relax TCP_LISTEN lookup rules and avoid touching sk_refcnt

Note that we still use SLAB_DESTROY_BY_RCU rules for other sockets,
only listeners are impacted by this change.

Peak performance under SYNFLOOD is increased by ~33% :

On my test machine, I could process 3.2 Mpps instead of 2.4 Mpps

Most consuming functions are now skb_set_owner_w() and sock_wfree()
contending on sk->sk_wmem_alloc when cooking SYNACK and freeing them.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 711d209..f0422e7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -858,6 +858,7 @@
 		return;
 
 #ifdef CONFIG_TCP_MD5SIG
+	rcu_read_lock();
 	hash_location = tcp_parse_md5sig_option(th);
 	if (sk && sk_fullsock(sk)) {
 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
@@ -875,16 +876,15 @@
 					   th->source, &ipv6h->daddr,
 					   ntohs(th->source), tcp_v6_iif(skb));
 		if (!sk1)
-			return;
+			goto out;
 
-		rcu_read_lock();
 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
 		if (!key)
-			goto release_sk1;
+			goto out;
 
 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
-			goto release_sk1;
+			goto out;
 	}
 #endif
 
@@ -898,11 +898,8 @@
 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
 
 #ifdef CONFIG_TCP_MD5SIG
-release_sk1:
-	if (sk1) {
-		rcu_read_unlock();
-		sock_put(sk1);
-	}
+out:
+	rcu_read_unlock();
 #endif
 }
 
@@ -1351,6 +1348,7 @@
 {
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
+	bool refcounted;
 	struct sock *sk;
 	int ret;
 	struct net *net = dev_net(skb->dev);
@@ -1381,7 +1379,8 @@
 
 lookup:
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
-				th->source, th->dest, inet6_iif(skb));
+				th->source, th->dest, inet6_iif(skb),
+				&refcounted);
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1404,6 +1403,7 @@
 			goto lookup;
 		}
 		sock_hold(sk);
+		refcounted = true;
 		nsk = tcp_check_req(sk, skb, req, false);
 		if (!nsk) {
 			reqsk_put(req);
@@ -1460,7 +1460,8 @@
 	bh_unlock_sock(sk);
 
 put_and_return:
-	sock_put(sk);
+	if (refcounted)
+		sock_put(sk);
 	return ret ? -1 : 0;
 
 no_tcp_socket:
@@ -1483,7 +1484,8 @@
 	return 0;
 
 discard_and_relse:
-	sock_put(sk);
+	if (refcounted)
+		sock_put(sk);
 	goto discard_it;
 
 do_time_wait:
@@ -1514,6 +1516,7 @@
 			inet_twsk_deschedule_put(tw);
 			sk = sk2;
 			tcp_v6_restore_cb(skb);
+			refcounted = false;
 			goto process;
 		}
 		/* Fall through to ACK */