commit 89fc9814c36abb126dd86caad75ed0f3665aac21
Author: KOVACS Krisztian <hidden@balabit.hu>
Date:   Fri Dec 1 11:52:08 2006 +0100

    Delete TIME_WAIT conntrack entries when needed.
    
    %patch

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 81392e5..b727243 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -81,6 +81,9 @@ #if defined(CONFIG_IP_NF_TPROXY) || defi
 
 	IPS_TPROXY_RELATED_BIT = 12,
 	IPS_TPROXY_RELATED = (1 << IPS_TPROXY_RELATED_BIT),
+
+	IPS_MAY_DELETE_BIT = 12,
+	IPS_MAY_DELETE = (1 << IPS_MAY_DELETE_BIT),
 #endif
 };
 
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 4369150..44c19ad 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -244,6 +244,8 @@ static inline void ip_ct_refresh(struct
 	__ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
+extern void __death_by_timeout(unsigned long ul_conntrack);
+
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
 extern void ip_conntrack_tcp_update(struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index b96427b..4ee301a 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -366,6 +366,50 @@ destroy_conntrack(struct nf_conntrack *n
 	ip_conntrack_free(ct);
 }
 
+static void
+__destroy_conntrack(struct nf_conntrack *nfct)
+{
+	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
+	struct ip_conntrack_protocol *proto;
+
+	DEBUGP("destroy_conntrack(%p)\n", ct);
+	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
+	IP_NF_ASSERT(!timer_pending(&ct->timeout));
+
+	ip_conntrack_event(IPCT_DESTROY, ct);
+	set_bit(IPS_DYING_BIT, &ct->status);
+
+	/* To make sure we don't get any weird locking issues here:
+	 * destroy_conntrack() MUST NOT be called with a write lock
+	 * to ip_conntrack_lock!!! -HW */
+	proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+	if (proto && proto->destroy)
+		proto->destroy(ct);
+
+	if (ip_conntrack_destroyed)
+		ip_conntrack_destroyed(ct);
+
+	/* Expectations will have been removed in clean_from_lists,
+	 * except TFTP can create an expectation on the first packet,
+	 * before connection is in the list, so we need to clean here,
+	 * too. */
+	ip_ct_remove_expectations(ct);
+
+	/* We overload first tuple to link into unconfirmed list. */
+	if (!is_confirmed(ct)) {
+		BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
+		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	}
+
+	CONNTRACK_STAT_INC(delete);
+
+	if (ct->master)
+		ip_conntrack_put(ct->master);
+
+	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
+	ip_conntrack_free(ct);
+}
+
 static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct ip_conntrack *ct = (void *)ul_conntrack;
@@ -379,6 +423,19 @@ static void death_by_timeout(unsigned lo
 	ip_conntrack_put(ct);
 }
 
+void __death_by_timeout(unsigned long ul_conntrack)
+{
+	struct ip_conntrack *ct = (void *)ul_conntrack;
+
+	/* Inside lock so preempt is disabled on module removal path.
+	 * Otherwise we can get spurious warnings. */
+	CONNTRACK_STAT_INC(delete_list);
+	clean_from_lists(ct);
+
+        if (atomic_dec_and_test(&ct->ct_general.use))
+		__destroy_conntrack((struct nf_conntrack *)ct);
+}
+
 static inline int
 conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
 		    const struct ip_conntrack_tuple *tuple,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fb920e7..fd51229 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -31,6 +31,7 @@ #include <linux/spinlock.h>
 
 #include <net/tcp.h>
 
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -983,6 +984,15 @@ static int tcp_packet(struct ip_conntrac
 					      NULL, "ip_ct_tcp: invalid SYN");
 			return -NF_ACCEPT;
 		}
+#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE)
+	case TCP_CONNTRACK_TIME_WAIT:
+		/* Set MAY_DELETE if NAT subsystem may drop connection when it is clashing */
+		if (test_bit(IPS_TPROXY_BIT, &conntrack->status)) {
+			DEBUGP(KERN_DEBUG "Marking TPROXY-related TIME_WAIT conntrack entry MAY_DELETE\n");
+			set_bit(IPS_MAY_DELETE_BIT, &conntrack->status);
+		}
+		break;
+#endif
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
 		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 260c281..6f7c072 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -972,3 +972,6 @@ #if defined(CONFIG_IP_NF_CONNTRACK_NETLI
 EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
 EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
 #endif
+#if defined(CONFIG_IP_NF_TPROXY) || defined(CONFIG_IP_NF_TPROXY_MODULE)
+EXPORT_SYMBOL_GPL(__death_by_timeout);
+#endif
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 262f36e..9abdc63 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -26,6 +26,7 @@ #include <linux/jhash.h>
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
 
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter_ipv4/ip_conntrack.h>
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
@@ -122,6 +123,15 @@ static void ip_nat_cleanup_conntrack(str
 	write_unlock_bh(&ip_nat_lock);
 }
 
+static void __ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
+{
+	if (!(conn->status & IPS_NAT_DONE_MASK))
+		return;
+
+	list_del(&conn->nat.info.bysource);
+}
+
+
 /* We do checksum mangling, so if they were wrong before they're still
  * wrong.  Also works for incomplete packets (eg. ICMP dest
  * unreachables.) */
@@ -251,7 +261,8 @@ __ip_nat_reserved_new_hash(const struct
 			   const struct ip_conntrack_manip *peer)
 {
 	struct ip_nat_reserved *res;
-	unsigned int h;
+	struct ip_conntrack_tuple_hash *h = NULL;
+	unsigned int hash;
 
 	DEBUGP("__ip_nat_reserved_new_hash: manip proto %u %u.%u.%u.%u:%u\n",
 			proto, NIPQUAD(manip->ip), ntohs(manip->u.all));
@@ -283,25 +294,61 @@ __ip_nat_reserved_new_hash(const struct
 					       .dst = {.protonum = proto,
 						       .ip = manip->ip,
 						       .u = {.all = manip->u.all}}};
+		struct ip_conntrack *ctrack;
+
+		h = ip_conntrack_find_get(&t, NULL);
+
+#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE)
+		if ((h != NULL) &&
+		    (ctrack = tuplehash_to_ctrack(h)) &&
+		    test_bit(IPS_MAY_DELETE_BIT, &ctrack->status)) {
+			DEBUGP("Deleting old conntrack entry for NAT\n");
+			__ip_nat_cleanup_conntrack(ctrack);
+			ctrack->status &= ~IPS_NAT_DONE_MASK;
+			if (del_timer(&ctrack->timeout))
+				ctrack->timeout.function((unsigned long)ctrack);
+			ip_conntrack_put(ctrack);
+			h = NULL;
+		}
+#endif
 
-		if (ip_conntrack_tuple_taken(&t, NULL)) {
+		if (h) {
 			DEBUGP("__ip_nat_reserved_new_hash: manip clashes with an already existing connection\n");
+			ip_conntrack_put(tuplehash_to_ctrack(h));
 			return NULL;
 		}
 	} else {
 		/* Strong check: we have only a manip, unfortunately we scan the whole conntrack
 		 * hash for possible clashing connections... */
-		struct ip_conntrack_tuple_hash *h = NULL;
 		unsigned int i;
+		int repeat;
+		struct ip_conntrack *ctrack;
 
-		read_lock_bh(&ip_conntrack_lock);
+		write_lock_bh(&ip_conntrack_lock);
 		for (i = 0; !h && i < ip_conntrack_htable_size; i++) {
-			h = LIST_FIND(&ip_conntrack_hash[i], clashing_ct_cmp,
-				      struct ip_conntrack_tuple_hash *, manip);
+			do {
+				repeat = 0;
+				h = LIST_FIND(&ip_conntrack_hash[i], clashing_ct_cmp,
+					      struct ip_conntrack_tuple_hash *, manip);
+#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE)
+				if ((h != NULL) &&
+				    (ctrack = tuplehash_to_ctrack(h)) &&
+				    test_bit(IPS_MAY_DELETE_BIT, &ctrack->status)) {
+					DEBUGP("Deleting old conntrack entry for NAT\n");
+					__ip_nat_cleanup_conntrack(ctrack);
+					ctrack->status &= ~IPS_NAT_DONE_MASK;
+					if (del_timer(&ctrack->timeout))
+						__death_by_timeout((unsigned long)ctrack);
+					h = NULL;
+					repeat = 1;
+				}
+#endif
+			} while (repeat);
+			/* there's a clashing connection, break */
 			if (h)
 				break;
 		}
-		read_unlock_bh(&ip_conntrack_lock);
+		write_unlock_bh(&ip_conntrack_lock);
 		if (h) {
 			DEBUGP("__ip_nat_reserved_new_hash: manip clashes with an already existing connection\n");
 			return NULL;
@@ -320,9 +367,9 @@ __ip_nat_reserved_new_hash(const struct
 		res->peer = *peer;
 
 	/* put it into the hash */
-	h = hash_nat_reserved(manip, peer, proto);
+	hash = hash_nat_reserved(manip, peer, proto);
 	atomic_inc(&ip_nat_reserved_count);
-	list_prepend(&natreserved[h], &res->hash);
+	list_prepend(&natreserved[hash], &res->hash);
 	DEBUGP("__ip_nat_reserved_new_hash: hashed manip proto %u %u.%u.%u.%u:%u\n",
 			proto, NIPQUAD(manip->ip), ntohs(manip->u.all));
 
@@ -497,6 +544,8 @@ ip_nat_used_tuple(const struct ip_conntr
 
 	   We could keep a separate hash if this proves too slow. */
 	struct ip_conntrack_tuple reply;
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack *ctrack;
 #ifdef CONFIG_IP_NF_NAT_NRES
 	struct ip_nat_reserved *res;
 
@@ -512,8 +561,29 @@ #ifdef CONFIG_IP_NF_NAT_NRES
 	}
 #endif
 
+	/* check if it's taken by an existing connection */
 	invert_tuplepr(&reply, tuple);
-	return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
+	h = ip_conntrack_find_get(&reply, ignored_conntrack);
+
+#if defined(CONFIG_IP_NF_TPROXY) || defined (CONFIG_IP_NF_TPROXY_MODULE)
+	/* check if that conntrack is marked MAY_DELETE, if so, get rid of it... */
+	if ((h != NULL) &&
+	    (ctrack = tuplehash_to_ctrack(h)) &&
+	    test_bit(IPS_MAY_DELETE_BIT, &ctrack->status)) {
+		DEBUGP("Deleting old conntrack entry for NAT\n");
+		__ip_nat_cleanup_conntrack(ctrack);
+		ctrack->status &= ~IPS_NAT_DONE_MASK;
+		if (del_timer(&ctrack->timeout))
+			ctrack->timeout.function((unsigned long)ctrack);
+		ip_conntrack_put(tuplehash_to_ctrack(h));
+		h = NULL;
+	}
+#endif
+
+	if (h)
+		ip_conntrack_put(tuplehash_to_ctrack(h));
+
+	return h != NULL;
 }
 EXPORT_SYMBOL(ip_nat_used_tuple);
 
diff --git a/net/ipv4/netfilter/iptable_tproxy.c b/net/ipv4/netfilter/iptable_tproxy.c
index b037007..50d8be9 100644
--- a/net/ipv4/netfilter/iptable_tproxy.c
+++ b/net/ipv4/netfilter/iptable_tproxy.c
@@ -1340,8 +1340,8 @@ ip_tproxy_setsockopt_unassign(struct soc
 
 	if (!sr) {
 		DEBUGP(KERN_DEBUG "IP_TPROXY: IP_TPROXY_UNASSIGN not unhashing socket, "
-		       "%08x:%04x, proto=%d, sk->state=%d\n",
-		       inet->rcv_saddr, inet->sport, proto, sk->sk_socket ? sk->sk_socket->state : -1);
+		       "%08x:%04x, proto=%d\n", 
+		       inet->rcv_saddr, inet->sport, proto);
 		res = -ENOENT;
 		goto write_unlk;
 	}
