proj/linux-patches: Linux patch 4.4.1744.4-175

Signed-off-by: Mike Pagano <mpagano@gentoo.org>
author: Mike Pagano <mpagano@gentoo.org> 2019-02-08 10:20:49 -0500
committer: Mike Pagano <mpagano@gentoo.org> 2019-02-08 10:20:49 -0500
commit: 32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2 (patch)
tree: fd6ddd914b3d50d48809fece8521e8cdc1356d50
parent: proj/linux-patches: Linux patch 4.4.173 (diff)
download: linux-patches-32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2.tar.gz
linux-patches-32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2.tar.bz2
linux-patches-32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2.zip
2 files changed, 3079 insertions, 0 deletions
diff --git a/0000_README b/0000_README
index b00cafe6..e836b734 100644
--- a/0000_README
+++ b/0000_README
@@ -735,6 +735,10 @@ Patch:  1172_linux-4.4.173.patch
 From:   http://www.kernel.org
 Desc:   Linux 4.4.173
 
+Patch:  1173_linux-4.4.174.patch
+From:   http://www.kernel.org
+Desc:   Linux 4.4.174
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.
diff --git a/1173_linux-4.4.174.patch b/1173_linux-4.4.174.patch
new file mode 100644
index 00000000..3060cab7
--- /dev/null
+++ b/1173_linux-4.4.174.patch
@@ -0,0 +1,3075 @@
+diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
+index 2ea4c45cf1c8..7c229f59016f 100644
+--- a/Documentation/networking/ip-sysctl.txt
++++ b/Documentation/networking/ip-sysctl.txt
+@@ -112,14 +112,11 @@ min_adv_mss - INTEGER
+ 
+ IP Fragmentation:
+ 
+-ipfrag_high_thresh - INTEGER
+-	Maximum memory used to reassemble IP fragments. When
+-	ipfrag_high_thresh bytes of memory is allocated for this purpose,
+-	the fragment handler will toss packets until ipfrag_low_thresh
+-	is reached. This also serves as a maximum limit to namespaces
+-	different from the initial one.
+-
+-ipfrag_low_thresh - INTEGER
++ipfrag_high_thresh - LONG INTEGER
++	Maximum memory used to reassemble IP fragments.
++
++ipfrag_low_thresh - LONG INTEGER
++	(Obsolete since linux-4.17)
+ 	Maximum memory used to reassemble IP fragments before the kernel
+ 	begins to remove incomplete fragment queues to free up resources.
+ 	The kernel still accepts new fragments for defragmentation.
+diff --git a/Makefile b/Makefile
+index db7665e32da8..1fa281069379 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,6 +1,6 @@
+ VERSION = 4
+ PATCHLEVEL = 4
+-SUBLEVEL = 173
++SUBLEVEL = 174
+ EXTRAVERSION =
+ NAME = Blurry Fish Butt
+ 
+diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
+index e50b31d18462..e97cdfd6cba9 100644
+--- a/include/linux/rhashtable.h
++++ b/include/linux/rhashtable.h
+@@ -133,23 +133,23 @@ struct rhashtable_params {
+ /**
+  * struct rhashtable - Hash table handle
+  * @tbl: Bucket table
+- * @nelems: Number of elements in table
+  * @key_len: Key length for hashfn
+  * @elasticity: Maximum chain length before rehash
+  * @p: Configuration parameters
+  * @run_work: Deferred worker to expand/shrink asynchronously
+  * @mutex: Mutex to protect current/future table swapping
+  * @lock: Spin lock to protect walker list
++ * @nelems: Number of elements in table
+  */
+ struct rhashtable {
+ 	struct bucket_table __rcu	*tbl;
+-	atomic_t			nelems;
+ 	unsigned int			key_len;
+ 	unsigned int			elasticity;
+ 	struct rhashtable_params	p;
+ 	struct work_struct		run_work;
+ 	struct mutex                    mutex;
+ 	spinlock_t			lock;
++	atomic_t			nelems;
+ };
+ 
+ /**
+@@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht,
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ 					    const void *key,
+ 					    struct rhash_head *obj,
+-					    struct bucket_table *old_tbl);
++					    struct bucket_table *old_tbl,
++					    void **data);
+ int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
+ 
+ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
+@@ -514,18 +515,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
+ 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
+ }
+ 
+-/**
+- * rhashtable_lookup_fast - search hash table, inlined version
+- * @ht:		hash table
+- * @key:	the pointer to the key
+- * @params:	hash table parameters
+- *
+- * Computes the hash value for the key and traverses the bucket chain looking
+- * for a entry with an identical key. The first matching entry is returned.
+- *
+- * Returns the first entry on which the compare function returned true.
+- */
+-static inline void *rhashtable_lookup_fast(
++/* Internal function, do not use. */
++static inline struct rhash_head *__rhashtable_lookup(
+ 	struct rhashtable *ht, const void *key,
+ 	const struct rhashtable_params params)
+ {
+@@ -537,8 +528,6 @@ static inline void *rhashtable_lookup_fast(
+ 	struct rhash_head *he;
+ 	unsigned int hash;
+ 
+-	rcu_read_lock();
+-
+ 	tbl = rht_dereference_rcu(ht->tbl, ht);
+ restart:
+ 	hash = rht_key_hashfn(ht, tbl, key, params);
+@@ -547,8 +536,7 @@ restart:
+ 		    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
+ 		    rhashtable_compare(&arg, rht_obj(ht, he)))
+ 			continue;
+-		rcu_read_unlock();
+-		return rht_obj(ht, he);
++		return he;
+ 	}
+ 
+ 	/* Ensure we see any new tables. */
+@@ -557,13 +545,64 @@ restart:
+ 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ 	if (unlikely(tbl))
+ 		goto restart;
+-	rcu_read_unlock();
+ 
+ 	return NULL;
+ }
+ 
+-/* Internal function, please use rhashtable_insert_fast() instead */
+-static inline int __rhashtable_insert_fast(
++/**
++ * rhashtable_lookup - search hash table
++ * @ht:		hash table
++ * @key:	the pointer to the key
++ * @params:	hash table parameters
++ *
++ * Computes the hash value for the key and traverses the bucket chain looking
++ * for a entry with an identical key. The first matching entry is returned.
++ *
++ * This must only be called under the RCU read lock.
++ *
++ * Returns the first entry on which the compare function returned true.
++ */
++static inline void *rhashtable_lookup(
++	struct rhashtable *ht, const void *key,
++	const struct rhashtable_params params)
++{
++	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
++
++	return he ? rht_obj(ht, he) : NULL;
++}
++
++/**
++ * rhashtable_lookup_fast - search hash table, without RCU read lock
++ * @ht:		hash table
++ * @key:	the pointer to the key
++ * @params:	hash table parameters
++ *
++ * Computes the hash value for the key and traverses the bucket chain looking
++ * for a entry with an identical key. The first matching entry is returned.
++ *
++ * Only use this function when you have other mechanisms guaranteeing
++ * that the object won't go away after the RCU read lock is released.
++ *
++ * Returns the first entry on which the compare function returned true.
++ */
++static inline void *rhashtable_lookup_fast(
++	struct rhashtable *ht, const void *key,
++	const struct rhashtable_params params)
++{
++	void *obj;
++
++	rcu_read_lock();
++	obj = rhashtable_lookup(ht, key, params);
++	rcu_read_unlock();
++
++	return obj;
++}
++
++/* Internal function, please use rhashtable_insert_fast() instead. This
++ * function returns the existing element already in hashes in there is a clash,
++ * otherwise it returns an error via ERR_PTR().
++ */
++static inline void *__rhashtable_insert_fast(
+ 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ 	const struct rhashtable_params params)
+ {
+@@ -576,6 +615,7 @@ static inline int __rhashtable_insert_fast(
+ 	spinlock_t *lock;
+ 	unsigned int elasticity;
+ 	unsigned int hash;
++	void *data = NULL;
+ 	int err;
+ 
+ restart:
+@@ -600,11 +640,14 @@ restart:
+ 
+ 	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ 	if (unlikely(new_tbl)) {
+-		tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
++		tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
+ 		if (!IS_ERR_OR_NULL(tbl))
+ 			goto slow_path;
+ 
+ 		err = PTR_ERR(tbl);
++		if (err == -EEXIST)
++			err = 0;
++
+ 		goto out;
+ 	}
+ 
+@@ -618,25 +661,25 @@ slow_path:
+ 		err = rhashtable_insert_rehash(ht, tbl);
+ 		rcu_read_unlock();
+ 		if (err)
+-			return err;
++			return ERR_PTR(err);
+ 
+ 		goto restart;
+ 	}
+ 
+-	err = -EEXIST;
++	err = 0;
+ 	elasticity = ht->elasticity;
+ 	rht_for_each(head, tbl, hash) {
+ 		if (key &&
+ 		    unlikely(!(params.obj_cmpfn ?
+ 			       params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+-			       rhashtable_compare(&arg, rht_obj(ht, head)))))
++			       rhashtable_compare(&arg, rht_obj(ht, head))))) {
++			data = rht_obj(ht, head);
+ 			goto out;
++		}
+ 		if (!--elasticity)
+ 			goto slow_path;
+ 	}
+ 
+-	err = 0;
+-
+ 	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+ 
+ 	RCU_INIT_POINTER(obj->next, head);
+@@ -651,7 +694,7 @@ out:
+ 	spin_unlock_bh(lock);
+ 	rcu_read_unlock();
+ 
+-	return err;
++	return err ? ERR_PTR(err) : data;
+ }
+ 
+ /**
+@@ -674,7 +717,13 @@ static inline int rhashtable_insert_fast(
+ 	struct rhashtable *ht, struct rhash_head *obj,
+ 	const struct rhashtable_params params)
+ {
+-	return __rhashtable_insert_fast(ht, NULL, obj, params);
++	void *ret;
++
++	ret = __rhashtable_insert_fast(ht, NULL, obj, params);
++	if (IS_ERR(ret))
++		return PTR_ERR(ret);
++
++	return ret == NULL ? 0 : -EEXIST;
+ }
+ 
+ /**
+@@ -703,11 +752,15 @@ static inline int rhashtable_lookup_insert_fast(
+ 	const struct rhashtable_params params)
+ {
+ 	const char *key = rht_obj(ht, obj);
++	void *ret;
+ 
+ 	BUG_ON(ht->p.obj_hashfn);
+ 
+-	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
+-					params);
++	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
++	if (IS_ERR(ret))
++		return PTR_ERR(ret);
++
++	return ret == NULL ? 0 : -EEXIST;
+ }
+ 
+ /**
+@@ -735,6 +788,32 @@ static inline int rhashtable_lookup_insert_fast(
+ static inline int rhashtable_lookup_insert_key(
+ 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+ 	const struct rhashtable_params params)
++{
++	void *ret;
++
++	BUG_ON(!ht->p.obj_hashfn || !key);
++
++	ret = __rhashtable_insert_fast(ht, key, obj, params);
++	if (IS_ERR(ret))
++		return PTR_ERR(ret);
++
++	return ret == NULL ? 0 : -EEXIST;
++}
++
++/**
++ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
++ * @ht:		hash table
++ * @obj:	pointer to hash head inside object
++ * @params:	hash table parameters
++ * @data:	pointer to element data already in hashes
++ *
++ * Just like rhashtable_lookup_insert_key(), but this function returns the
++ * object if it exists, NULL if it does not and the insertion was successful,
++ * and an ERR_PTR otherwise.
++ */
++static inline void *rhashtable_lookup_get_insert_key(
++	struct rhashtable *ht, const void *key, struct rhash_head *obj,
++	const struct rhashtable_params params)
+ {
+ 	BUG_ON(!ht->p.obj_hashfn || !key);
+ 
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 6d39d81d3c38..502787c29ce9 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -556,9 +556,14 @@ struct sk_buff {
+ 				struct skb_mstamp skb_mstamp;
+ 			};
+ 		};
+-		struct rb_node	rbnode; /* used in netem & tcp stack */
++		struct rb_node		rbnode; /* used in netem, ip4 defrag, and tcp stack */
+ 	};
+-	struct sock		*sk;
++
++	union {
++		struct sock		*sk;
++		int			ip_defrag_offset;
++	};
++
+ 	struct net_device	*dev;
+ 
+ 	/*
+@@ -2273,7 +2278,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
+ 		kfree_skb(skb);
+ }
+ 
+-void skb_rbtree_purge(struct rb_root *root);
++unsigned int skb_rbtree_purge(struct rb_root *root);
+ 
+ void *netdev_alloc_frag(unsigned int fragsz);
+ 
+@@ -2791,6 +2796,7 @@ static inline unsigned char *skb_push_rcsum(struct sk_buff *skb,
+ 	return skb->data;
+ }
+ 
++int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len);
+ /**
+  *	pskb_trim_rcsum - trim received skb and update checksum
+  *	@skb: buffer to trim
+@@ -2805,9 +2811,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+ {
+ 	if (likely(len >= skb->len))
+ 		return 0;
+-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+-		skb->ip_summed = CHECKSUM_NONE;
+-	return __pskb_trim(skb, len);
++	return pskb_trim_rcsum_slow(skb, len);
+ }
+ 
+ #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
+index c26a6e4dc306..6260ec146142 100644
+--- a/include/net/inet_frag.h
++++ b/include/net/inet_frag.h
+@@ -1,13 +1,19 @@
+ #ifndef __NET_FRAG_H__
+ #define __NET_FRAG_H__
+ 
++#include <linux/rhashtable.h>
++
+ struct netns_frags {
+-	/* Keep atomic mem on separate cachelines in structs that include it */
+-	atomic_t		mem ____cacheline_aligned_in_smp;
+ 	/* sysctls */
++	long			high_thresh;
++	long			low_thresh;
+ 	int			timeout;
+-	int			high_thresh;
+-	int			low_thresh;
++	struct inet_frags	*f;
++
++	struct rhashtable       rhashtable ____cacheline_aligned_in_smp;
++
++	/* Keep atomic mem on separate cachelines in structs that include it */
++	atomic_long_t		mem ____cacheline_aligned_in_smp;
+ };
+ 
+ /**
+@@ -23,74 +29,68 @@ enum {
+ 	INET_FRAG_COMPLETE	= BIT(2),
+ };
+ 
++struct frag_v4_compare_key {
++	__be32		saddr;
++	__be32		daddr;
++	u32		user;
++	u32		vif;
++	__be16		id;
++	u16		protocol;
++};
++
++struct frag_v6_compare_key {
++	struct in6_addr	saddr;
++	struct in6_addr	daddr;
++	u32		user;
++	__be32		id;
++	u32		iif;
++};
++
+ /**
+  * struct inet_frag_queue - fragment queue
+  *
+- * @lock: spinlock protecting the queue
++ * @node: rhash node
++ * @key: keys identifying this frag.
+  * @timer: queue expiration timer
+- * @list: hash bucket list
++ * @lock: spinlock protecting this frag
+  * @refcnt: reference count of the queue
+  * @fragments: received fragments head
++ * @rb_fragments: received fragments rb-tree root
+  * @fragments_tail: received fragments tail
++ * @last_run_head: the head of the last "run". see ip_fragment.c
+  * @stamp: timestamp of the last received fragment
+  * @len: total length of the original datagram
+  * @meat: length of received fragments so far
+  * @flags: fragment queue flags
+  * @max_size: maximum received fragment size
+  * @net: namespace that this frag belongs to
+- * @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
++ * @rcu: rcu head for freeing deferall
+  */
+ struct inet_frag_queue {
+-	spinlock_t		lock;
++	struct rhash_head	node;
++	union {
++		struct frag_v4_compare_key v4;
++		struct frag_v6_compare_key v6;
++	} key;
+ 	struct timer_list	timer;
+-	struct hlist_node	list;
++	spinlock_t		lock;
+ 	atomic_t		refcnt;
+-	struct sk_buff		*fragments;
++	struct sk_buff		*fragments;  /* Used in IPv6. */
++	struct rb_root		rb_fragments; /* Used in IPv4. */
+ 	struct sk_buff		*fragments_tail;
++	struct sk_buff		*last_run_head;
+ 	ktime_t			stamp;
+ 	int			len;
+ 	int			meat;
+ 	__u8			flags;
+ 	u16			max_size;
+-	struct netns_frags	*net;
+-	struct hlist_node	list_evictor;
+-};
+-
+-#define INETFRAGS_HASHSZ	1024
+-
+-/* averaged:
+- * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
+- *	       rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
+- *	       struct frag_queue))
+- */
+-#define INETFRAGS_MAXDEPTH	128
+-
+-struct inet_frag_bucket {
+-	struct hlist_head	chain;
+-	spinlock_t		chain_lock;
++	struct netns_frags      *net;
++	struct rcu_head		rcu;
+ };
+ 
+ struct inet_frags {
+-	struct inet_frag_bucket	hash[INETFRAGS_HASHSZ];
+-
+-	struct work_struct	frags_work;
+-	unsigned int next_bucket;
+-	unsigned long last_rebuild_jiffies;
+-	bool rebuild;
+-
+-	/* The first call to hashfn is responsible to initialize
+-	 * rnd. This is best done with net_get_random_once.
+-	 *
+-	 * rnd_seqlock is used to let hash insertion detect
+-	 * when it needs to re-lookup the hash chain to use.
+-	 */
+-	u32			rnd;
+-	seqlock_t		rnd_seqlock;
+ 	int			qsize;
+ 
+-	unsigned int		(*hashfn)(const struct inet_frag_queue *);
+-	bool			(*match)(const struct inet_frag_queue *q,
+-					 const void *arg);
+ 	void			(*constructor)(struct inet_frag_queue *q,
+ 					       const void *arg);
+ 	void			(*destructor)(struct inet_frag_queue *);
+@@ -98,56 +98,47 @@ struct inet_frags {
+ 	void			(*frag_expire)(unsigned long data);
+ 	struct kmem_cache	*frags_cachep;
+ 	const char		*frags_cache_name;
++	struct rhashtable_params rhash_params;
+ };
+ 
+ int inet_frags_init(struct inet_frags *);
+ void inet_frags_fini(struct inet_frags *);
+ 
+-static inline void inet_frags_init_net(struct netns_frags *nf)
++static inline int inet_frags_init_net(struct netns_frags *nf)
+ {
+-	atomic_set(&nf->mem, 0);
++	atomic_long_set(&nf->mem, 0);
++	return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
+ }
+-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
++void inet_frags_exit_net(struct netns_frags *nf);
+ 
+-void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
+-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+-		struct inet_frags *f, void *key, unsigned int hash);
++void inet_frag_kill(struct inet_frag_queue *q);
++void inet_frag_destroy(struct inet_frag_queue *q);
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+ 
+-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+-				   const char *prefix);
++/* Free all skbs in the queue; return the sum of their truesizes. */
++unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+ 
+-static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
++static inline void inet_frag_put(struct inet_frag_queue *q)
+ {
+ 	if (atomic_dec_and_test(&q->refcnt))
+-		inet_frag_destroy(q, f);
+-}
+-
+-static inline bool inet_frag_evicting(struct inet_frag_queue *q)
+-{
+-	return !hlist_unhashed(&q->list_evictor);
++		inet_frag_destroy(q);
+ }
+ 
+ /* Memory Tracking Functions. */
+ 
+-static inline int frag_mem_limit(struct netns_frags *nf)
+-{
+-	return atomic_read(&nf->mem);
+-}
+-
+-static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
++static inline long frag_mem_limit(const struct netns_frags *nf)
+ {
+-	atomic_sub(i, &nf->mem);
++	return atomic_long_read(&nf->mem);
+ }
+ 
+-static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
++static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
+ {
+-	atomic_add(i, &nf->mem);
++	atomic_long_sub(val, &nf->mem);
+ }
+ 
+-static inline int sum_frag_mem_limit(struct netns_frags *nf)
++static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
+ {
+-	return atomic_read(&nf->mem);
++	atomic_long_add(val, &nf->mem);
+ }
+ 
+ /* RFC 3168 support :
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 0530bcdbc212..7b968927477d 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -524,7 +524,6 @@ static inline struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *s
+ 	return skb;
+ }
+ #endif
+-int ip_frag_mem(struct net *net);
+ 
+ /*
+  *	Functions provided by ip_forward.c
+diff --git a/include/net/ipv6.h b/include/net/ipv6.h
+index 0e01d570fa22..c07cf9596b6f 100644
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -320,13 +320,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+ 	    idev->cnf.accept_ra;
+ }
+ 
+-#if IS_ENABLED(CONFIG_IPV6)
+-static inline int ip6_frag_mem(struct net *net)
+-{
+-	return sum_frag_mem_limit(&net->ipv6.frags);
+-}
+-#endif
+-
+ #define IPV6_FRAG_HIGH_THRESH	(4 * 1024*1024)	/* 4194304 */
+ #define IPV6_FRAG_LOW_THRESH	(3 * 1024*1024)	/* 3145728 */
+ #define IPV6_FRAG_TIMEOUT	(60 * HZ)	/* 60 seconds */
+@@ -505,17 +498,8 @@ enum ip6_defrag_users {
+ 	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+ };
+ 
+-struct ip6_create_arg {
+-	__be32 id;
+-	u32 user;
+-	const struct in6_addr *src;
+-	const struct in6_addr *dst;
+-	int iif;
+-	u8 ecn;
+-};
+-
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a);
+-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
++extern const struct rhashtable_params ip6_rhash_params;
+ 
+ /*
+  *	Equivalent of ipv4 struct ip
+@@ -523,19 +507,13 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
+ struct frag_queue {
+ 	struct inet_frag_queue	q;
+ 
+-	__be32			id;		/* fragment id		*/
+-	u32			user;
+-	struct in6_addr		saddr;
+-	struct in6_addr		daddr;
+-
+ 	int			iif;
+ 	unsigned int		csum;
+ 	__u16			nhoffset;
+ 	u8			ecn;
+ };
+ 
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+-			   struct inet_frags *frags);
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
+ 
+ static inline bool ipv6_addr_any(const struct in6_addr *a)
+ {
+diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
+index 25a9ad8bcef1..9de808ebce05 100644
+--- a/include/uapi/linux/snmp.h
++++ b/include/uapi/linux/snmp.h
+@@ -55,6 +55,7 @@ enum
+ 	IPSTATS_MIB_ECT1PKTS,			/* InECT1Pkts */
+ 	IPSTATS_MIB_ECT0PKTS,			/* InECT0Pkts */
+ 	IPSTATS_MIB_CEPKTS,			/* InCEPkts */
++	IPSTATS_MIB_REASM_OVERLAPS,		/* ReasmOverlaps */
+ 	__IPSTATS_MIB_MAX
+ };
+ 
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index 8a62cbfe1f2f..4e886ccd40db 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -3817,7 +3817,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+ 					continue;
+ 				rdp = per_cpu_ptr(rsp->rda, cpu);
+ 				pr_cont(" %d-%c%c%c", cpu,
+-					"O."[cpu_online(cpu)],
++					"O."[!!cpu_online(cpu)],
+ 					"o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ 					"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
+ 			}
+diff --git a/lib/rhashtable.c b/lib/rhashtable.c
+index 37ea94b636a3..7bb8649429bf 100644
+--- a/lib/rhashtable.c
++++ b/lib/rhashtable.c
+@@ -250,8 +250,10 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
+ 	if (!new_tbl)
+ 		return 0;
+ 
+-	for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
++	for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+ 		rhashtable_rehash_chain(ht, old_hash);
++		cond_resched();
++	}
+ 
+ 	/* Publish the new table pointer. */
+ 	rcu_assign_pointer(ht->tbl, new_tbl);
+@@ -441,7 +443,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
+ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ 					    const void *key,
+ 					    struct rhash_head *obj,
+-					    struct bucket_table *tbl)
++					    struct bucket_table *tbl,
++					    void **data)
+ {
+ 	struct rhash_head *head;
+ 	unsigned int hash;
+@@ -452,8 +455,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+ 	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+ 
+ 	err = -EEXIST;
+-	if (key && rhashtable_lookup_fast(ht, key, ht->p))
+-		goto exit;
++	if (key) {
++		*data = rhashtable_lookup_fast(ht, key, ht->p);
++		if (*data)
++			goto exit;
++	}
+ 
+ 	err = -E2BIG;
+ 	if (unlikely(rht_grow_above_max(ht, tbl)))
+@@ -838,6 +844,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
+ 		for (i = 0; i < tbl->size; i++) {
+ 			struct rhash_head *pos, *next;
+ 
++			cond_resched();
+ 			for (pos = rht_dereference(tbl->buckets[i], ht),
+ 			     next = !rht_is_a_nulls(pos) ?
+ 					rht_dereference(pos->next, ht) : NULL;
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 8a57bbaf7452..fea7c24e99d0 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -1502,6 +1502,21 @@ done:
+ }
+ EXPORT_SYMBOL(___pskb_trim);
+ 
++/* Note : use pskb_trim_rcsum() instead of calling this directly
++ */
++int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
++{
++	if (skb->ip_summed == CHECKSUM_COMPLETE) {
++		int delta = skb->len - len;
++
++		skb->csum = csum_block_sub(skb->csum,
++					   skb_checksum(skb, len, delta, 0),
++					   len);
++	}
++	return __pskb_trim(skb, len);
++}
++EXPORT_SYMBOL(pskb_trim_rcsum_slow);
++
+ /**
+  *	__pskb_pull_tail - advance tail of skb header
+  *	@skb: buffer to reallocate
+@@ -2380,23 +2395,27 @@ EXPORT_SYMBOL(skb_queue_purge);
+ /**
+  *	skb_rbtree_purge - empty a skb rbtree
+  *	@root: root of the rbtree to empty
++ *	Return value: the sum of truesizes of all purged skbs.
+  *
+  *	Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+  *	the list and one reference dropped. This function does not take
+  *	any lock. Synchronization should be handled by the caller (e.g., TCP
+  *	out-of-order queue is protected by the socket lock).
+  */
+-void skb_rbtree_purge(struct rb_root *root)
++unsigned int skb_rbtree_purge(struct rb_root *root)
+ {
+ 	struct rb_node *p = rb_first(root);
++	unsigned int sum = 0;
+ 
+ 	while (p) {
+ 		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+ 
+ 		p = rb_next(p);
+ 		rb_erase(&skb->rbnode, root);
++		sum += skb->truesize;
+ 		kfree_skb(skb);
+ 	}
++	return sum;
+ }
+ 
+ /**
+diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
+index b4e17a7c0df0..fdbebe51446f 100644
+--- a/net/ieee802154/6lowpan/6lowpan_i.h
++++ b/net/ieee802154/6lowpan/6lowpan_i.h
+@@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result;
+ #define LOWPAN_DISPATCH_FRAG1           0xc0
+ #define LOWPAN_DISPATCH_FRAGN           0xe0
+ 
+-struct lowpan_create_arg {
++struct frag_lowpan_compare_key {
+ 	u16 tag;
+ 	u16 d_size;
+-	const struct ieee802154_addr *src;
+-	const struct ieee802154_addr *dst;
++	struct ieee802154_addr src;
++	struct ieee802154_addr dst;
+ };
+ 
+-/* Equivalent of ipv4 struct ip
++/* Equivalent of ipv4 struct ipq
+  */
+ struct lowpan_frag_queue {
+ 	struct inet_frag_queue	q;
+-
+-	u16			tag;
+-	u16			d_size;
+-	struct ieee802154_addr	saddr;
+-	struct ieee802154_addr	daddr;
+ };
+ 
+-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
+-{
+-	switch (a->mode) {
+-	case IEEE802154_ADDR_LONG:
+-		return (((__force u64)a->extended_addr) >> 32) ^
+-			(((__force u64)a->extended_addr) & 0xffffffff);
+-	case IEEE802154_ADDR_SHORT:
+-		return (__force u32)(a->short_addr);
+-	default:
+-		return 0;
+-	}
+-}
+-
+ /* private device info */
+ struct lowpan_dev_info {
+ 	struct net_device	*wdev; /* wpan device ptr */
+diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
+index 12e8cf4bda9f..6183730d38db 100644
+--- a/net/ieee802154/6lowpan/reassembly.c
++++ b/net/ieee802154/6lowpan/reassembly.c
+@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
+ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
+ 			     struct sk_buff *prev, struct net_device *ldev);
+ 
+-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
+-				     const struct ieee802154_addr *saddr,
+-				     const struct ieee802154_addr *daddr)
+-{
+-	net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
+-	return jhash_3words(ieee802154_addr_hash(saddr),
+-			    ieee802154_addr_hash(daddr),
+-			    (__force u32)(tag + (d_size << 16)),
+-			    lowpan_frags.rnd);
+-}
+-
+-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
+-{
+-	const struct lowpan_frag_queue *fq;
+-
+-	fq = container_of(q, struct lowpan_frag_queue, q);
+-	return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
+-}
+-
+-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
+-{
+-	const struct lowpan_frag_queue *fq;
+-	const struct lowpan_create_arg *arg = a;
+-
+-	fq = container_of(q, struct lowpan_frag_queue, q);
+-	return	fq->tag == arg->tag && fq->d_size == arg->d_size &&
+-		ieee802154_addr_equal(&fq->saddr, arg->src) &&
+-		ieee802154_addr_equal(&fq->daddr, arg->dst);
+-}
+-
+ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+-	const struct lowpan_create_arg *arg = a;
++	const struct frag_lowpan_compare_key *key = a;
+ 	struct lowpan_frag_queue *fq;
+ 
+ 	fq = container_of(q, struct lowpan_frag_queue, q);
+ 
+-	fq->tag = arg->tag;
+-	fq->d_size = arg->d_size;
+-	fq->saddr = *arg->src;
+-	fq->daddr = *arg->dst;
++	BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
++	memcpy(&q->key, key, sizeof(*key));
+ }
+ 
+ static void lowpan_frag_expire(unsigned long data)
+@@ -93,10 +61,10 @@ static void lowpan_frag_expire(unsigned long data)
+ 	if (fq->q.flags & INET_FRAG_COMPLETE)
+ 		goto out;
+ 
+-	inet_frag_kill(&fq->q, &lowpan_frags);
++	inet_frag_kill(&fq->q);
+ out:
+ 	spin_unlock(&fq->q.lock);
+-	inet_frag_put(&fq->q, &lowpan_frags);
++	inet_frag_put(&fq->q);
+ }
+ 
+ static inline struct lowpan_frag_queue *
+@@ -104,25 +72,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
+ 	const struct ieee802154_addr *src,
+ 	const struct ieee802154_addr *dst)
+ {
+-	struct inet_frag_queue *q;
+-	struct lowpan_create_arg arg;
+-	unsigned int hash;
+ 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ 		net_ieee802154_lowpan(net);
++	struct frag_lowpan_compare_key key = {};
++	struct inet_frag_queue *q;
+ 
+-	arg.tag = cb->d_tag;
+-	arg.d_size = cb->d_size;
+-	arg.src = src;
+-	arg.dst = dst;
+-
+-	hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
++	key.tag = cb->d_tag;
++	key.d_size = cb->d_size;
++	key.src = *src;
++	key.dst = *dst;
+ 
+-	q = inet_frag_find(&ieee802154_lowpan->frags,
+-			   &lowpan_frags, &arg, hash);
+-	if (IS_ERR_OR_NULL(q)) {
+-		inet_frag_maybe_warn_overflow(q, pr_fmt());
++	q = inet_frag_find(&ieee802154_lowpan->frags, &key);
++	if (!q)
+ 		return NULL;
+-	}
++
+ 	return container_of(q, struct lowpan_frag_queue, q);
+ }
+ 
+@@ -229,7 +192,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
+ 	struct sk_buff *fp, *head = fq->q.fragments;
+ 	int sum_truesize;
+ 
+-	inet_frag_kill(&fq->q, &lowpan_frags);
++	inet_frag_kill(&fq->q);
+ 
+ 	/* Make the one we just received the head. */
+ 	if (prev) {
+@@ -408,7 +371,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ 	struct lowpan_frag_queue *fq;
+ 	struct net *net = dev_net(skb->dev);
+ 	struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
+-	struct ieee802154_hdr hdr;
++	struct ieee802154_hdr hdr = {};
+ 	int err;
+ 
+ 	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+@@ -437,7 +400,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
+ 		ret = lowpan_frag_queue(fq, skb, frag_type);
+ 		spin_unlock(&fq->q.lock);
+ 
+-		inet_frag_put(&fq->q, &lowpan_frags);
++		inet_frag_put(&fq->q);
+ 		return ret;
+ 	}
+ 
+@@ -447,24 +410,22 @@ err:
+ }
+ 
+ #ifdef CONFIG_SYSCTL
+-static int zero;
+ 
+ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
+ 	{
+ 		.procname	= "6lowpanfrag_high_thresh",
+ 		.data		= &init_net.ieee802154_lowpan.frags.high_thresh,
+-		.maxlen		= sizeof(int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra1		= &init_net.ieee802154_lowpan.frags.low_thresh
+ 	},
+ 	{
+ 		.procname	= "6lowpanfrag_low_thresh",
+ 		.data		= &init_net.ieee802154_lowpan.frags.low_thresh,
+-		.maxlen		= sizeof(int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
+-		.extra1		= &zero,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra2		= &init_net.ieee802154_lowpan.frags.high_thresh
+ 	},
+ 	{
+@@ -580,14 +541,20 @@ static int __net_init lowpan_frags_init_net(struct net *net)
+ {
+ 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+ 		net_ieee802154_lowpan(net);
++	int res;
+ 
+ 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
++	ieee802154_lowpan->frags.f = &lowpan_frags;
+ 
+-	inet_frags_init_net(&ieee802154_lowpan->frags);
+-
+-	return lowpan_frags_ns_sysctl_register(net);
++	res = inet_frags_init_net(&ieee802154_lowpan->frags);
++	if (res < 0)
++		return res;
++	res = lowpan_frags_ns_sysctl_register(net);
++	if (res < 0)
++		inet_frags_exit_net(&ieee802154_lowpan->frags);
++	return res;
+ }
+ 
+ static void __net_exit lowpan_frags_exit_net(struct net *net)
+@@ -596,7 +563,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
+ 		net_ieee802154_lowpan(net);
+ 
+ 	lowpan_frags_ns_sysctl_unregister(net);
+-	inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
++	inet_frags_exit_net(&ieee802154_lowpan->frags);
+ }
+ 
+ static struct pernet_operations lowpan_frags_ops = {
+@@ -604,33 +571,64 @@ static struct pernet_operations lowpan_frags_ops = {
+ 	.exit = lowpan_frags_exit_net,
+ };
+ 
+-int __init lowpan_net_frag_init(void)
++static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
+ {
+-	int ret;
++	return jhash2(data,
++		      sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
++}
+ 
+-	ret = lowpan_frags_sysctl_register();
+-	if (ret)
+-		return ret;
++static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++	const struct inet_frag_queue *fq = data;
+ 
+-	ret = register_pernet_subsys(&lowpan_frags_ops);
+-	if (ret)
+-		goto err_pernet;
++	return jhash2((const u32 *)&fq->key,
++		      sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
++}
++
++static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++	const struct frag_lowpan_compare_key *key = arg->key;
++	const struct inet_frag_queue *fq = ptr;
++
++	return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++static const struct rhashtable_params lowpan_rhash_params = {
++	.head_offset		= offsetof(struct inet_frag_queue, node),
++	.hashfn			= lowpan_key_hashfn,
++	.obj_hashfn		= lowpan_obj_hashfn,
++	.obj_cmpfn		= lowpan_obj_cmpfn,
++	.automatic_shrinking	= true,
++};
++
++int __init lowpan_net_frag_init(void)
++{
++	int ret;
+ 
+-	lowpan_frags.hashfn = lowpan_hashfn;
+ 	lowpan_frags.constructor = lowpan_frag_init;
+ 	lowpan_frags.destructor = NULL;
+ 	lowpan_frags.skb_free = NULL;
+ 	lowpan_frags.qsize = sizeof(struct frag_queue);
+-	lowpan_frags.match = lowpan_frag_match;
+ 	lowpan_frags.frag_expire = lowpan_frag_expire;
+ 	lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
++	lowpan_frags.rhash_params = lowpan_rhash_params;
+ 	ret = inet_frags_init(&lowpan_frags);
+ 	if (ret)
+-		goto err_pernet;
++		goto out;
+ 
++	ret = lowpan_frags_sysctl_register();
++	if (ret)
++		goto err_sysctl;
++
++	ret = register_pernet_subsys(&lowpan_frags_ops);
++	if (ret)
++		goto err_pernet;
++out:
+ 	return ret;
+ err_pernet:
+ 	lowpan_frags_sysctl_unregister();
++err_sysctl:
++	inet_frags_fini(&lowpan_frags);
+ 	return ret;
+ }
+ 
+diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
+index b2001b20e029..c03e5f5859e1 100644
+--- a/net/ipv4/inet_fragment.c
++++ b/net/ipv4/inet_fragment.c
+@@ -25,12 +25,6 @@
+ #include <net/inet_frag.h>
+ #include <net/inet_ecn.h>
+ 
+-#define INETFRAGS_EVICT_BUCKETS   128
+-#define INETFRAGS_EVICT_MAX	  512
+-
+-/* don't rebuild inetfrag table with new secret more often than this */
+-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
+-
+ /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
+  * Value : 0xff if frame should be dropped.
+  *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
+@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
+ };
+ EXPORT_SYMBOL(ip_frag_ecn_table);
+ 
+-static unsigned int
+-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
+-{
+-	return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
+-}
+-
+-static bool inet_frag_may_rebuild(struct inet_frags *f)
+-{
+-	return time_after(jiffies,
+-	       f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
+-}
+-
+-static void inet_frag_secret_rebuild(struct inet_frags *f)
+-{
+-	int i;
+-
+-	write_seqlock_bh(&f->rnd_seqlock);
+-
+-	if (!inet_frag_may_rebuild(f))
+-		goto out;
+-
+-	get_random_bytes(&f->rnd, sizeof(u32));
+-
+-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+-		struct inet_frag_bucket *hb;
+-		struct inet_frag_queue *q;
+-		struct hlist_node *n;
+-
+-		hb = &f->hash[i];
+-		spin_lock(&hb->chain_lock);
+-
+-		hlist_for_each_entry_safe(q, n, &hb->chain, list) {
+-			unsigned int hval = inet_frag_hashfn(f, q);
+-
+-			if (hval != i) {
+-				struct inet_frag_bucket *hb_dest;
+-
+-				hlist_del(&q->list);
+-
+-				/* Relink to new hash chain. */
+-				hb_dest = &f->hash[hval];
+-
+-				/* This is the only place where we take
+-				 * another chain_lock while already holding
+-				 * one.  As this will not run concurrently,
+-				 * we cannot deadlock on hb_dest lock below, if its
+-				 * already locked it will be released soon since
+-				 * other caller cannot be waiting for hb lock
+-				 * that we've taken above.
+-				 */
+-				spin_lock_nested(&hb_dest->chain_lock,
+-						 SINGLE_DEPTH_NESTING);
+-				hlist_add_head(&q->list, &hb_dest->chain);
+-				spin_unlock(&hb_dest->chain_lock);
+-			}
+-		}
+-		spin_unlock(&hb->chain_lock);
+-	}
+-
+-	f->rebuild = false;
+-	f->last_rebuild_jiffies = jiffies;
+-out:
+-	write_sequnlock_bh(&f->rnd_seqlock);
+-}
+-
+-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
+-{
+-	if (!hlist_unhashed(&q->list_evictor))
+-		return false;
+-
+-	return q->net->low_thresh == 0 ||
+-	       frag_mem_limit(q->net) >= q->net->low_thresh;
+-}
+-
+-static unsigned int
+-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
+-{
+-	struct inet_frag_queue *fq;
+-	struct hlist_node *n;
+-	unsigned int evicted = 0;
+-	HLIST_HEAD(expired);
+-
+-	spin_lock(&hb->chain_lock);
+-
+-	hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
+-		if (!inet_fragq_should_evict(fq))
+-			continue;
+-
+-		if (!del_timer(&fq->timer))
+-			continue;
+-
+-		hlist_add_head(&fq->list_evictor, &expired);
+-		++evicted;
+-	}
+-
+-	spin_unlock(&hb->chain_lock);
+-
+-	hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
+-		f->frag_expire((unsigned long) fq);
+-
+-	return evicted;
+-}
+-
+-static void inet_frag_worker(struct work_struct *work)
+-{
+-	unsigned int budget = INETFRAGS_EVICT_BUCKETS;
+-	unsigned int i, evicted = 0;
+-	struct inet_frags *f;
+-
+-	f = container_of(work, struct inet_frags, frags_work);
+-
+-	BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
+-
+-	local_bh_disable();
+-
+-	for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
+-		evicted += inet_evict_bucket(f, &f->hash[i]);
+-		i = (i + 1) & (INETFRAGS_HASHSZ - 1);
+-		if (evicted > INETFRAGS_EVICT_MAX)
+-			break;
+-	}
+-
+-	f->next_bucket = i;
+-
+-	local_bh_enable();
+-
+-	if (f->rebuild && inet_frag_may_rebuild(f))
+-		inet_frag_secret_rebuild(f);
+-}
+-
+-static void inet_frag_schedule_worker(struct inet_frags *f)
+-{
+-	if (unlikely(!work_pending(&f->frags_work)))
+-		schedule_work(&f->frags_work);
+-}
+-
+ int inet_frags_init(struct inet_frags *f)
+ {
+-	int i;
+-
+-	INIT_WORK(&f->frags_work, inet_frag_worker);
+-
+-	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+-		struct inet_frag_bucket *hb = &f->hash[i];
+-
+-		spin_lock_init(&hb->chain_lock);
+-		INIT_HLIST_HEAD(&hb->chain);
+-	}
+-
+-	seqlock_init(&f->rnd_seqlock);
+-	f->last_rebuild_jiffies = 0;
+ 	f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
+ 					    NULL);
+ 	if (!f->frags_cachep)
+@@ -214,73 +59,53 @@ EXPORT_SYMBOL(inet_frags_init);
+ 
+ void inet_frags_fini(struct inet_frags *f)
+ {
+-	cancel_work_sync(&f->frags_work);
++	/* We must wait that all inet_frag_destroy_rcu() have completed. */
++	rcu_barrier();
++
+ 	kmem_cache_destroy(f->frags_cachep);
++	f->frags_cachep = NULL;
+ }
+ EXPORT_SYMBOL(inet_frags_fini);
+ 
+-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
++static void inet_frags_free_cb(void *ptr, void *arg)
+ {
+-	unsigned int seq;
+-	int i;
++	struct inet_frag_queue *fq = ptr;
+ 
+-	nf->low_thresh = 0;
+-
+-evict_again:
+-	local_bh_disable();
+-	seq = read_seqbegin(&f->rnd_seqlock);
+-
+-	for (i = 0; i < INETFRAGS_HASHSZ ; i++)
+-		inet_evict_bucket(f, &f->hash[i]);
+-
+-	local_bh_enable();
+-	cond_resched();
+-
+-	if (read_seqretry(&f->rnd_seqlock, seq) ||
+-	    sum_frag_mem_limit(nf))
+-		goto evict_again;
+-}
+-EXPORT_SYMBOL(inet_frags_exit_net);
+-
+-static struct inet_frag_bucket *
+-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
+-__acquires(hb->chain_lock)
+-{
+-	struct inet_frag_bucket *hb;
+-	unsigned int seq, hash;
+-
+- restart:
+-	seq = read_seqbegin(&f->rnd_seqlock);
+-
+-	hash = inet_frag_hashfn(f, fq);
+-	hb = &f->hash[hash];
++	/* If we can not cancel the timer, it means this frag_queue
++	 * is already disappearing, we have nothing to do.
++	 * Otherwise, we own a refcount until the end of this function.
++	 */
++	if (!del_timer(&fq->timer))
++		return;
+ 
+-	spin_lock(&hb->chain_lock);
+-	if (read_seqretry(&f->rnd_seqlock, seq)) {
+-		spin_unlock(&hb->chain_lock);
+-		goto restart;
++	spin_lock_bh(&fq->lock);
++	if (!(fq->flags & INET_FRAG_COMPLETE)) {
++		fq->flags |= INET_FRAG_COMPLETE;
++		atomic_dec(&fq->refcnt);
+ 	}
++	spin_unlock_bh(&fq->lock);
+ 
+-	return hb;
++	inet_frag_put(fq);
+ }
+ 
+-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
++void inet_frags_exit_net(struct netns_frags *nf)
+ {
+-	struct inet_frag_bucket *hb;
++	nf->high_thresh = 0; /* prevent creation of new frags */
+ 
+-	hb = get_frag_bucket_locked(fq, f);
+-	hlist_del(&fq->list);
+-	fq->flags |= INET_FRAG_COMPLETE;
+-	spin_unlock(&hb->chain_lock);
++	rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
+ }
++EXPORT_SYMBOL(inet_frags_exit_net);
+ 
+-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
++void inet_frag_kill(struct inet_frag_queue *fq)
+ {
+ 	if (del_timer(&fq->timer))
+ 		atomic_dec(&fq->refcnt);
+ 
+ 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
+-		fq_unlink(fq, f);
++		struct netns_frags *nf = fq->net;
++
++		fq->flags |= INET_FRAG_COMPLETE;
++		rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
+ 		atomic_dec(&fq->refcnt);
+ 	}
+ }
+@@ -294,11 +119,23 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
+ 	kfree_skb(skb);
+ }
+ 
+-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
++static void inet_frag_destroy_rcu(struct rcu_head *head)
++{
++	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
++						 rcu);
++	struct inet_frags *f = q->net->f;
++
++	if (f->destructor)
++		f->destructor(q);
++	kmem_cache_free(f->frags_cachep, q);
++}
++
++void inet_frag_destroy(struct inet_frag_queue *q)
+ {
+ 	struct sk_buff *fp;
+ 	struct netns_frags *nf;
+ 	unsigned int sum, sum_truesize = 0;
++	struct inet_frags *f;
+ 
+ 	WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
+ 	WARN_ON(del_timer(&q->timer) != 0);
+@@ -306,64 +143,35 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+ 	/* Release all fragment data. */
+ 	fp = q->fragments;
+ 	nf = q->net;
+-	while (fp) {
+-		struct sk_buff *xp = fp->next;
+-
+-		sum_truesize += fp->truesize;
+-		frag_kfree_skb(nf, f, fp);
+-		fp = xp;
++	f = nf->f;
++	if (fp) {
++		do {
++			struct sk_buff *xp = fp->next;
++
++			sum_truesize += fp->truesize;
++			frag_kfree_skb(nf, f, fp);
++			fp = xp;
++		} while (fp);
++	} else {
++		sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
+ 	}
+ 	sum = sum_truesize + f->qsize;
+ 
+-	if (f->destructor)
+-		f->destructor(q);
+-	kmem_cache_free(f->frags_cachep, q);
++	call_rcu(&q->rcu, inet_frag_destroy_rcu);
+ 
+ 	sub_frag_mem_limit(nf, sum);
+ }
+ EXPORT_SYMBOL(inet_frag_destroy);
+ 
+-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
+-						struct inet_frag_queue *qp_in,
+-						struct inet_frags *f,
+-						void *arg)
+-{
+-	struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
+-	struct inet_frag_queue *qp;
+-
+-#ifdef CONFIG_SMP
+-	/* With SMP race we have to recheck hash table, because
+-	 * such entry could have been created on other cpu before
+-	 * we acquired hash bucket lock.
+-	 */
+-	hlist_for_each_entry(qp, &hb->chain, list) {
+-		if (qp->net == nf && f->match(qp, arg)) {
+-			atomic_inc(&qp->refcnt);
+-			spin_unlock(&hb->chain_lock);
+-			qp_in->flags |= INET_FRAG_COMPLETE;
+-			inet_frag_put(qp_in, f);
+-			return qp;
+-		}
+-	}
+-#endif
+-	qp = qp_in;
+-	if (!mod_timer(&qp->timer, jiffies + nf->timeout))
+-		atomic_inc(&qp->refcnt);
+-
+-	atomic_inc(&qp->refcnt);
+-	hlist_add_head(&qp->list, &hb->chain);
+-
+-	spin_unlock(&hb->chain_lock);
+-
+-	return qp;
+-}
+-
+ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+ 					       struct inet_frags *f,
+ 					       void *arg)
+ {
+ 	struct inet_frag_queue *q;
+ 
++	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
++		return NULL;
++
+ 	q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
+ 	if (!q)
+ 		return NULL;
+@@ -374,75 +182,52 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+ 
+ 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ 	spin_lock_init(&q->lock);
+-	atomic_set(&q->refcnt, 1);
++	atomic_set(&q->refcnt, 3);
+ 
+ 	return q;
+ }
+ 
+ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+-						struct inet_frags *f,
+-						void *arg)
++						void *arg,
++						struct inet_frag_queue **prev)
+ {
++	struct inet_frags *f = nf->f;
+ 	struct inet_frag_queue *q;
+ 
+ 	q = inet_frag_alloc(nf, f, arg);
+-	if (!q)
+-		return NULL;
+-
+-	return inet_frag_intern(nf, q, f, arg);
+-}
+-
+-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
+-				       struct inet_frags *f, void *key,
+-				       unsigned int hash)
+-{
+-	struct inet_frag_bucket *hb;
+-	struct inet_frag_queue *q;
+-	int depth = 0;
+-
+-	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
+-		inet_frag_schedule_worker(f);
++	if (!q) {
++		*prev = ERR_PTR(-ENOMEM);
+ 		return NULL;
+ 	}
+-
+-	if (frag_mem_limit(nf) > nf->low_thresh)
+-		inet_frag_schedule_worker(f);
+-
+-	hash &= (INETFRAGS_HASHSZ - 1);
+-	hb = &f->hash[hash];
+-
+-	spin_lock(&hb->chain_lock);
+-	hlist_for_each_entry(q, &hb->chain, list) {
+-		if (q->net == nf && f->match(q, key)) {
+-			atomic_inc(&q->refcnt);
+-			spin_unlock(&hb->chain_lock);
+-			return q;
+-		}
+-		depth++;
+-	}
+-	spin_unlock(&hb->chain_lock);
+-
+-	if (depth <= INETFRAGS_MAXDEPTH)
+-		return inet_frag_create(nf, f, key);
+-
+-	if (inet_frag_may_rebuild(f)) {
+-		if (!f->rebuild)
+-			f->rebuild = true;
+-		inet_frag_schedule_worker(f);
++	mod_timer(&q->timer, jiffies + nf->timeout);
++
++	*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
++						 &q->node, f->rhash_params);
++	if (*prev) {
++		q->flags |= INET_FRAG_COMPLETE;
++		inet_frag_kill(q);
++		inet_frag_destroy(q);
++		return NULL;
+ 	}
+-
+-	return ERR_PTR(-ENOBUFS);
++	return q;
+ }
+-EXPORT_SYMBOL(inet_frag_find);
++EXPORT_SYMBOL(inet_frag_create);
+ 
+-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+-				   const char *prefix)
++/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
++struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+ {
+-	static const char msg[] = "inet_frag_find: Fragment hash bucket"
+-		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
+-		". Dropping fragment.\n";
++	struct inet_frag_queue *fq = NULL, *prev;
+ 
+-	if (PTR_ERR(q) == -ENOBUFS)
+-		net_dbg_ratelimited("%s%s", prefix, msg);
++	rcu_read_lock();
++	prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
++	if (!prev)
++		fq = inet_frag_create(nf, key, &prev);
++	if (prev && !IS_ERR(prev)) {
++		fq = prev;
++		if (!atomic_inc_not_zero(&fq->refcnt))
++			fq = NULL;
++	}
++	rcu_read_unlock();
++	return fq;
+ }
+-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
++EXPORT_SYMBOL(inet_frag_find);
+diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
+index 72915658a6b1..9b09a9b5a4fe 100644
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -58,27 +58,64 @@
+ static int sysctl_ipfrag_max_dist __read_mostly = 64;
+ static const char ip_frag_cache_name[] = "ip4-frags";
+ 
+-struct ipfrag_skb_cb
+-{
++/* Use skb->cb to track consecutive/adjacent fragments coming at
++ * the end of the queue. Nodes in the rb-tree queue will
++ * contain "runs" of one or more adjacent fragments.
++ *
++ * Invariants:
++ * - next_frag is NULL at the tail of a "run";
++ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
++ */
++struct ipfrag_skb_cb {
+ 	struct inet_skb_parm	h;
+-	int			offset;
++	struct sk_buff		*next_frag;
++	int			frag_run_len;
+ };
+ 
+-#define FRAG_CB(skb)	((struct ipfrag_skb_cb *)((skb)->cb))
++#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
++
++static void ip4_frag_init_run(struct sk_buff *skb)
++{
++	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
++
++	FRAG_CB(skb)->next_frag = NULL;
++	FRAG_CB(skb)->frag_run_len = skb->len;
++}
++
++/* Append skb to the last "run". */
++static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
++					struct sk_buff *skb)
++{
++	RB_CLEAR_NODE(&skb->rbnode);
++	FRAG_CB(skb)->next_frag = NULL;
++
++	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
++	FRAG_CB(q->fragments_tail)->next_frag = skb;
++	q->fragments_tail = skb;
++}
++
++/* Create a new "run" with the skb. */
++static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
++{
++	if (q->last_run_head)
++		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
++			     &q->last_run_head->rbnode.rb_right);
++	else
++		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
++	rb_insert_color(&skb->rbnode, &q->rb_fragments);
++
++	ip4_frag_init_run(skb);
++	q->fragments_tail = skb;
++	q->last_run_head = skb;
++}
+ 
+ /* Describe an entry in the "incomplete datagrams" queue. */
+ struct ipq {
+ 	struct inet_frag_queue q;
+ 
+-	u32		user;
+-	__be32		saddr;
+-	__be32		daddr;
+-	__be16		id;
+-	u8		protocol;
+ 	u8		ecn; /* RFC3168 support */
+ 	u16		max_df_size; /* largest frag with DF set seen */
+ 	int             iif;
+-	int             vif;   /* L3 master device index */
+ 	unsigned int    rid;
+ 	struct inet_peer *peer;
+ };
+@@ -90,49 +127,9 @@ static u8 ip4_frag_ecn(u8 tos)
+ 
+ static struct inet_frags ip4_frags;
+ 
+-int ip_frag_mem(struct net *net)
+-{
+-	return sum_frag_mem_limit(&net->ipv4.frags);
+-}
+-
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+-			 struct net_device *dev);
+-
+-struct ip4_create_arg {
+-	struct iphdr *iph;
+-	u32 user;
+-	int vif;
+-};
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
++			 struct sk_buff *prev_tail, struct net_device *dev);
+ 
+-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
+-{
+-	net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
+-	return jhash_3words((__force u32)id << 16 | prot,
+-			    (__force u32)saddr, (__force u32)daddr,
+-			    ip4_frags.rnd);
+-}
+-
+-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
+-{
+-	const struct ipq *ipq;
+-
+-	ipq = container_of(q, struct ipq, q);
+-	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
+-}
+-
+-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
+-{
+-	const struct ipq *qp;
+-	const struct ip4_create_arg *arg = a;
+-
+-	qp = container_of(q, struct ipq, q);
+-	return	qp->id == arg->iph->id &&
+-		qp->saddr == arg->iph->saddr &&
+-		qp->daddr == arg->iph->daddr &&
+-		qp->protocol == arg->iph->protocol &&
+-		qp->user == arg->user &&
+-		qp->vif == arg->vif;
+-}
+ 
+ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+@@ -141,17 +138,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
+ 					       frags);
+ 	struct net *net = container_of(ipv4, struct net, ipv4);
+ 
+-	const struct ip4_create_arg *arg = a;
++	const struct frag_v4_compare_key *key = a;
+ 
+-	qp->protocol = arg->iph->protocol;
+-	qp->id = arg->iph->id;
+-	qp->ecn = ip4_frag_ecn(arg->iph->tos);
+-	qp->saddr = arg->iph->saddr;
+-	qp->daddr = arg->iph->daddr;
+-	qp->vif = arg->vif;
+-	qp->user = arg->user;
++	q->key.v4 = *key;
++	qp->ecn = 0;
+ 	qp->peer = sysctl_ipfrag_max_dist ?
+-		inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
++		inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
+ 		NULL;
+ }
+ 
+@@ -169,7 +161,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
+ 
+ static void ipq_put(struct ipq *ipq)
+ {
+-	inet_frag_put(&ipq->q, &ip4_frags);
++	inet_frag_put(&ipq->q);
+ }
+ 
+ /* Kill ipq entry. It is not destroyed immediately,
+@@ -177,7 +169,7 @@ static void ipq_put(struct ipq *ipq)
+  */
+ static void ipq_kill(struct ipq *ipq)
+ {
+-	inet_frag_kill(&ipq->q, &ip4_frags);
++	inet_frag_kill(&ipq->q);
+ }
+ 
+ static bool frag_expire_skip_icmp(u32 user)
+@@ -194,8 +186,11 @@ static bool frag_expire_skip_icmp(u32 user)
+  */
+ static void ip_expire(unsigned long arg)
+ {
+-	struct ipq *qp;
++	const struct iphdr *iph;
++	struct sk_buff *head = NULL;
+ 	struct net *net;
++	struct ipq *qp;
++	int err;
+ 
+ 	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ 	net = container_of(qp->q.net, struct net, ipv4.frags);
+@@ -208,51 +203,65 @@ static void ip_expire(unsigned long arg)
+ 
+ 	ipq_kill(qp);
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
++	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ 
+-	if (!inet_frag_evicting(&qp->q)) {
+-		struct sk_buff *clone, *head = qp->q.fragments;
+-		const struct iphdr *iph;
+-		int err;
+-
+-		IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
++	if (!(qp->q.flags & INET_FRAG_FIRST_IN))
++		goto out;
+ 
+-		if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
++	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
++	 * pull the head out of the tree in order to be able to
++	 * deal with head->dev.
++	 */
++	if (qp->q.fragments) {
++		head = qp->q.fragments;
++		qp->q.fragments = head->next;
++	} else {
++		head = skb_rb_first(&qp->q.rb_fragments);
++		if (!head)
+ 			goto out;
++		if (FRAG_CB(head)->next_frag)
++			rb_replace_node(&head->rbnode,
++					&FRAG_CB(head)->next_frag->rbnode,
++					&qp->q.rb_fragments);
++		else
++			rb_erase(&head->rbnode, &qp->q.rb_fragments);
++		memset(&head->rbnode, 0, sizeof(head->rbnode));
++		barrier();
++	}
++	if (head == qp->q.fragments_tail)
++		qp->q.fragments_tail = NULL;
+ 
+-		head->dev = dev_get_by_index_rcu(net, qp->iif);
+-		if (!head->dev)
+-			goto out;
++	sub_frag_mem_limit(qp->q.net, head->truesize);
++
++	head->dev = dev_get_by_index_rcu(net, qp->iif);
++	if (!head->dev)
++		goto out;
+ 
+ 
+-		/* skb has no dst, perform route lookup again */
+-		iph = ip_hdr(head);
+-		err = ip_route_input_noref(head, iph->daddr, iph->saddr,
++	/* skb has no dst, perform route lookup again */
++	iph = ip_hdr(head);
++	err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ 					   iph->tos, head->dev);
+-		if (err)
+-			goto out;
++	if (err)
++		goto out;
+ 
+-		/* Only an end host needs to send an ICMP
+-		 * "Fragment Reassembly Timeout" message, per RFC792.
+-		 */
+-		if (frag_expire_skip_icmp(qp->user) &&
+-		    (skb_rtable(head)->rt_type != RTN_LOCAL))
+-			goto out;
++	/* Only an end host needs to send an ICMP
++	 * "Fragment Reassembly Timeout" message, per RFC792.
++	 */
++	if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
++	    (skb_rtable(head)->rt_type != RTN_LOCAL))
++		goto out;
+ 
+-		clone = skb_clone(head, GFP_ATOMIC);
++	spin_unlock(&qp->q.lock);
++	icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
++	goto out_rcu_unlock;
+ 
+-		/* Send an ICMP "Fragment Reassembly Timeout" message. */
+-		if (clone) {
+-			spin_unlock(&qp->q.lock);
+-			icmp_send(clone, ICMP_TIME_EXCEEDED,
+-				  ICMP_EXC_FRAGTIME, 0);
+-			consume_skb(clone);
+-			goto out_rcu_unlock;
+-		}
+-	}
+ out:
+ 	spin_unlock(&qp->q.lock);
+ out_rcu_unlock:
+ 	rcu_read_unlock();
++	if (head)
++		kfree_skb(head);
+ 	ipq_put(qp);
+ }
+ 
+@@ -262,21 +271,20 @@ out_rcu_unlock:
+ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
+ 			   u32 user, int vif)
+ {
++	struct frag_v4_compare_key key = {
++		.saddr = iph->saddr,
++		.daddr = iph->daddr,
++		.user = user,
++		.vif = vif,
++		.id = iph->id,
++		.protocol = iph->protocol,
++	};
+ 	struct inet_frag_queue *q;
+-	struct ip4_create_arg arg;
+-	unsigned int hash;
+-
+-	arg.iph = iph;
+-	arg.user = user;
+-	arg.vif = vif;
+-
+-	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
+ 
+-	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
+-	if (IS_ERR_OR_NULL(q)) {
+-		inet_frag_maybe_warn_overflow(q, pr_fmt());
++	q = inet_frag_find(&net->ipv4.frags, &key);
++	if (!q)
+ 		return NULL;
+-	}
++
+ 	return container_of(q, struct ipq, q);
+ }
+ 
+@@ -296,7 +304,7 @@ static int ip_frag_too_far(struct ipq *qp)
+ 	end = atomic_inc_return(&peer->rid);
+ 	qp->rid = end;
+ 
+-	rc = qp->q.fragments && (end - start) > max;
++	rc = qp->q.fragments_tail && (end - start) > max;
+ 
+ 	if (rc) {
+ 		struct net *net;
+@@ -310,7 +318,6 @@ static int ip_frag_too_far(struct ipq *qp)
+ 
+ static int ip_frag_reinit(struct ipq *qp)
+ {
+-	struct sk_buff *fp;
+ 	unsigned int sum_truesize = 0;
+ 
+ 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+@@ -318,21 +325,16 @@ static int ip_frag_reinit(struct ipq *qp)
+ 		return -ETIMEDOUT;
+ 	}
+ 
+-	fp = qp->q.fragments;
+-	do {
+-		struct sk_buff *xp = fp->next;
+-
+-		sum_truesize += fp->truesize;
+-		kfree_skb(fp);
+-		fp = xp;
+-	} while (fp);
++	sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
+ 	sub_frag_mem_limit(qp->q.net, sum_truesize);
+ 
+ 	qp->q.flags = 0;
+ 	qp->q.len = 0;
+ 	qp->q.meat = 0;
+ 	qp->q.fragments = NULL;
++	qp->q.rb_fragments = RB_ROOT;
+ 	qp->q.fragments_tail = NULL;
++	qp->q.last_run_head = NULL;
+ 	qp->iif = 0;
+ 	qp->ecn = 0;
+ 
+@@ -342,11 +344,13 @@ static int ip_frag_reinit(struct ipq *qp)
+ /* Add new segment to existing queue. */
+ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
+-	struct sk_buff *prev, *next;
++	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
++	struct rb_node **rbn, *parent;
++	struct sk_buff *skb1, *prev_tail;
++	int ihl, end, skb1_run_end;
+ 	struct net_device *dev;
+ 	unsigned int fragsize;
+ 	int flags, offset;
+-	int ihl, end;
+ 	int err = -ENOENT;
+ 	u8 ecn;
+ 
+@@ -405,94 +409,68 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ 	if (err)
+ 		goto err;
+ 
+-	/* Find out which fragments are in front and at the back of us
+-	 * in the chain of fragments so far.  We must know where to put
+-	 * this fragment, right?
+-	 */
+-	prev = qp->q.fragments_tail;
+-	if (!prev || FRAG_CB(prev)->offset < offset) {
+-		next = NULL;
+-		goto found;
+-	}
+-	prev = NULL;
+-	for (next = qp->q.fragments; next != NULL; next = next->next) {
+-		if (FRAG_CB(next)->offset >= offset)
+-			break;	/* bingo! */
+-		prev = next;
+-	}
+-
+-found:
+-	/* We found where to put this one.  Check for overlap with
+-	 * preceding fragment, and, if needed, align things so that
+-	 * any overlaps are eliminated.
++	/* Note : skb->rbnode and skb->dev share the same location. */
++	dev = skb->dev;
++	/* Makes sure compiler wont do silly aliasing games */
++	barrier();
++
++	/* RFC5722, Section 4, amended by Errata ID : 3089
++	 *                          When reassembling an IPv6 datagram, if
++	 *   one or more its constituent fragments is determined to be an
++	 *   overlapping fragment, the entire datagram (and any constituent
++	 *   fragments) MUST be silently discarded.
++	 *
++	 * We do the same here for IPv4 (and increment an snmp counter) but
++	 * we do not want to drop the whole queue in response to a duplicate
++	 * fragment.
+ 	 */
+-	if (prev) {
+-		int i = (FRAG_CB(prev)->offset + prev->len) - offset;
+-
+-		if (i > 0) {
+-			offset += i;
+-			err = -EINVAL;
+-			if (end <= offset)
+-				goto err;
+-			err = -ENOMEM;
+-			if (!pskb_pull(skb, i))
+-				goto err;
+-			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+-				skb->ip_summed = CHECKSUM_NONE;
+-		}
+-	}
+ 
+-	err = -ENOMEM;
+-
+-	while (next && FRAG_CB(next)->offset < end) {
+-		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
+-
+-		if (i < next->len) {
+-			/* Eat head of the next overlapped fragment
+-			 * and leave the loop. The next ones cannot overlap.
+-			 */
+-			if (!pskb_pull(next, i))
+-				goto err;
+-			FRAG_CB(next)->offset += i;
+-			qp->q.meat -= i;
+-			if (next->ip_summed != CHECKSUM_UNNECESSARY)
+-				next->ip_summed = CHECKSUM_NONE;
+-			break;
+-		} else {
+-			struct sk_buff *free_it = next;
+-
+-			/* Old fragment is completely overridden with
+-			 * new one drop it.
+-			 */
+-			next = next->next;
+-
+-			if (prev)
+-				prev->next = next;
++	err = -EINVAL;
++	/* Find out where to put this fragment.  */
++	prev_tail = qp->q.fragments_tail;
++	if (!prev_tail)
++		ip4_frag_create_run(&qp->q, skb);  /* First fragment. */
++	else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
++		/* This is the common case: skb goes to the end. */
++		/* Detect and discard overlaps. */
++		if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
++			goto discard_qp;
++		if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
++			ip4_frag_append_to_last_run(&qp->q, skb);
++		else
++			ip4_frag_create_run(&qp->q, skb);
++	} else {
++		/* Binary search. Note that skb can become the first fragment,
++		 * but not the last (covered above).
++		 */
++		rbn = &qp->q.rb_fragments.rb_node;
++		do {
++			parent = *rbn;
++			skb1 = rb_to_skb(parent);
++			skb1_run_end = skb1->ip_defrag_offset +
++				       FRAG_CB(skb1)->frag_run_len;
++			if (end <= skb1->ip_defrag_offset)
++				rbn = &parent->rb_left;
++			else if (offset >= skb1_run_end)
++				rbn = &parent->rb_right;
++			else if (offset >= skb1->ip_defrag_offset &&
++				 end <= skb1_run_end)
++				goto err; /* No new data, potential duplicate */
+ 			else
+-				qp->q.fragments = next;
+-
+-			qp->q.meat -= free_it->len;
+-			sub_frag_mem_limit(qp->q.net, free_it->truesize);
+-			kfree_skb(free_it);
+-		}
++				goto discard_qp; /* Found an overlap */
++		} while (*rbn);
++		/* Here we have parent properly set, and rbn pointing to
++		 * one of its NULL left/right children. Insert skb.
++		 */
++		ip4_frag_init_run(skb);
++		rb_link_node(&skb->rbnode, parent, rbn);
++		rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
+ 	}
+ 
+-	FRAG_CB(skb)->offset = offset;
+-
+-	/* Insert this fragment in the chain of fragments. */
+-	skb->next = next;
+-	if (!next)
+-		qp->q.fragments_tail = skb;
+-	if (prev)
+-		prev->next = skb;
+-	else
+-		qp->q.fragments = skb;
+-
+-	dev = skb->dev;
+-	if (dev) {
++	if (dev)
+ 		qp->iif = dev->ifindex;
+-		skb->dev = NULL;
+-	}
++	skb->ip_defrag_offset = offset;
++
+ 	qp->q.stamp = skb->tstamp;
+ 	qp->q.meat += skb->len;
+ 	qp->ecn |= ecn;
+@@ -514,7 +492,7 @@ found:
+ 		unsigned long orefdst = skb->_skb_refdst;
+ 
+ 		skb->_skb_refdst = 0UL;
+-		err = ip_frag_reasm(qp, prev, dev);
++		err = ip_frag_reasm(qp, skb, prev_tail, dev);
+ 		skb->_skb_refdst = orefdst;
+ 		return err;
+ 	}
+@@ -522,20 +500,23 @@ found:
+ 	skb_dst_drop(skb);
+ 	return -EINPROGRESS;
+ 
++discard_qp:
++	inet_frag_kill(&qp->q);
++	IP_INC_STATS_BH(net, IPSTATS_MIB_REASM_OVERLAPS);
+ err:
+ 	kfree_skb(skb);
+ 	return err;
+ }
+ 
+-
+ /* Build a new IP datagram from all its fragments. */
+-
+-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+-			 struct net_device *dev)
++static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
++			 struct sk_buff *prev_tail, struct net_device *dev)
+ {
+ 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ 	struct iphdr *iph;
+-	struct sk_buff *fp, *head = qp->q.fragments;
++	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
++	struct sk_buff **nextp; /* To build frag_list. */
++	struct rb_node *rbn;
+ 	int len;
+ 	int ihlen;
+ 	int err;
+@@ -549,26 +530,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ 		goto out_fail;
+ 	}
+ 	/* Make the one we just received the head. */
+-	if (prev) {
+-		head = prev->next;
+-		fp = skb_clone(head, GFP_ATOMIC);
++	if (head != skb) {
++		fp = skb_clone(skb, GFP_ATOMIC);
+ 		if (!fp)
+ 			goto out_nomem;
+-
+-		fp->next = head->next;
+-		if (!fp->next)
++		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
++		if (RB_EMPTY_NODE(&skb->rbnode))
++			FRAG_CB(prev_tail)->next_frag = fp;
++		else
++			rb_replace_node(&skb->rbnode, &fp->rbnode,
++					&qp->q.rb_fragments);
++		if (qp->q.fragments_tail == skb)
+ 			qp->q.fragments_tail = fp;
+-		prev->next = fp;
+-
+-		skb_morph(head, qp->q.fragments);
+-		head->next = qp->q.fragments->next;
+-
+-		consume_skb(qp->q.fragments);
+-		qp->q.fragments = head;
++		skb_morph(skb, head);
++		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
++		rb_replace_node(&head->rbnode, &skb->rbnode,
++				&qp->q.rb_fragments);
++		consume_skb(head);
++		head = skb;
+ 	}
+ 
+-	WARN_ON(!head);
+-	WARN_ON(FRAG_CB(head)->offset != 0);
++	WARN_ON(head->ip_defrag_offset != 0);
+ 
+ 	/* Allocate a new buffer for the datagram. */
+ 	ihlen = ip_hdrlen(head);
+@@ -592,35 +574,61 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ 		clone = alloc_skb(0, GFP_ATOMIC);
+ 		if (!clone)
+ 			goto out_nomem;
+-		clone->next = head->next;
+-		head->next = clone;
+ 		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+ 		skb_frag_list_init(head);
+ 		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+ 			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+ 		clone->len = clone->data_len = head->data_len - plen;
+-		head->data_len -= clone->len;
+-		head->len -= clone->len;
++		head->truesize += clone->truesize;
+ 		clone->csum = 0;
+ 		clone->ip_summed = head->ip_summed;
+ 		add_frag_mem_limit(qp->q.net, clone->truesize);
++		skb_shinfo(head)->frag_list = clone;
++		nextp = &clone->next;
++	} else {
++		nextp = &skb_shinfo(head)->frag_list;
+ 	}
+ 
+-	skb_shinfo(head)->frag_list = head->next;
+ 	skb_push(head, head->data - skb_network_header(head));
+ 
+-	for (fp=head->next; fp; fp = fp->next) {
+-		head->data_len += fp->len;
+-		head->len += fp->len;
+-		if (head->ip_summed != fp->ip_summed)
+-			head->ip_summed = CHECKSUM_NONE;
+-		else if (head->ip_summed == CHECKSUM_COMPLETE)
+-			head->csum = csum_add(head->csum, fp->csum);
+-		head->truesize += fp->truesize;
++	/* Traverse the tree in order, to build frag_list. */
++	fp = FRAG_CB(head)->next_frag;
++	rbn = rb_next(&head->rbnode);
++	rb_erase(&head->rbnode, &qp->q.rb_fragments);
++	while (rbn || fp) {
++		/* fp points to the next sk_buff in the current run;
++		 * rbn points to the next run.
++		 */
++		/* Go through the current run. */
++		while (fp) {
++			*nextp = fp;
++			nextp = &fp->next;
++			fp->prev = NULL;
++			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
++			fp->sk = NULL;
++			head->data_len += fp->len;
++			head->len += fp->len;
++			if (head->ip_summed != fp->ip_summed)
++				head->ip_summed = CHECKSUM_NONE;
++			else if (head->ip_summed == CHECKSUM_COMPLETE)
++				head->csum = csum_add(head->csum, fp->csum);
++			head->truesize += fp->truesize;
++			fp = FRAG_CB(fp)->next_frag;
++		}
++		/* Move to the next run. */
++		if (rbn) {
++			struct rb_node *rbnext = rb_next(rbn);
++
++			fp = rb_to_skb(rbn);
++			rb_erase(rbn, &qp->q.rb_fragments);
++			rbn = rbnext;
++		}
+ 	}
+ 	sub_frag_mem_limit(qp->q.net, head->truesize);
+ 
++	*nextp = NULL;
+ 	head->next = NULL;
++	head->prev = NULL;
+ 	head->dev = dev;
+ 	head->tstamp = qp->q.stamp;
+ 	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+@@ -648,7 +656,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ 
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
+ 	qp->q.fragments = NULL;
++	qp->q.rb_fragments = RB_ROOT;
+ 	qp->q.fragments_tail = NULL;
++	qp->q.last_run_head = NULL;
+ 	return 0;
+ 
+ out_nomem:
+@@ -656,7 +666,7 @@ out_nomem:
+ 	err = -ENOMEM;
+ 	goto out_fail;
+ out_oversize:
+-	net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
++	net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
+ out_fail:
+ 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
+ 	return err;
+@@ -734,25 +744,46 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
+ }
+ EXPORT_SYMBOL(ip_check_defrag);
+ 
++unsigned int inet_frag_rbtree_purge(struct rb_root *root)
++{
++	struct rb_node *p = rb_first(root);
++	unsigned int sum = 0;
++
++	while (p) {
++		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
++
++		p = rb_next(p);
++		rb_erase(&skb->rbnode, root);
++		while (skb) {
++			struct sk_buff *next = FRAG_CB(skb)->next_frag;
++
++			sum += skb->truesize;
++			kfree_skb(skb);
++			skb = next;
++		}
++	}
++	return sum;
++}
++EXPORT_SYMBOL(inet_frag_rbtree_purge);
++
+ #ifdef CONFIG_SYSCTL
+-static int zero;
++static int dist_min;
+ 
+ static struct ctl_table ip4_frags_ns_ctl_table[] = {
+ 	{
+ 		.procname	= "ipfrag_high_thresh",
+ 		.data		= &init_net.ipv4.frags.high_thresh,
+-		.maxlen		= sizeof(int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra1		= &init_net.ipv4.frags.low_thresh
+ 	},
+ 	{
+ 		.procname	= "ipfrag_low_thresh",
+ 		.data		= &init_net.ipv4.frags.low_thresh,
+-		.maxlen		= sizeof(int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
+-		.extra1		= &zero,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra2		= &init_net.ipv4.frags.high_thresh
+ 	},
+ 	{
+@@ -781,7 +812,7 @@ static struct ctl_table ip4_frags_ctl_table[] = {
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= proc_dointvec_minmax,
+-		.extra1		= &zero
++		.extra1		= &dist_min,
+ 	},
+ 	{ }
+ };
+@@ -853,6 +884,8 @@ static void __init ip4_frags_ctl_register(void)
+ 
+ static int __net_init ipv4_frags_init_net(struct net *net)
+ {
++	int res;
++
+ 	/* Fragment cache limits.
+ 	 *
+ 	 * The fragment memory accounting code, (tries to) account for
+@@ -876,15 +909,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
+ 	 */
+ 	net->ipv4.frags.timeout = IP_FRAG_TIME;
+ 
+-	inet_frags_init_net(&net->ipv4.frags);
++	net->ipv4.frags.f = &ip4_frags;
+ 
+-	return ip4_frags_ns_ctl_register(net);
++	res = inet_frags_init_net(&net->ipv4.frags);
++	if (res < 0)
++		return res;
++	res = ip4_frags_ns_ctl_register(net);
++	if (res < 0)
++		inet_frags_exit_net(&net->ipv4.frags);
++	return res;
+ }
+ 
+ static void __net_exit ipv4_frags_exit_net(struct net *net)
+ {
+ 	ip4_frags_ns_ctl_unregister(net);
+-	inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
++	inet_frags_exit_net(&net->ipv4.frags);
+ }
+ 
+ static struct pernet_operations ip4_frags_ops = {
+@@ -892,18 +931,50 @@ static struct pernet_operations ip4_frags_ops = {
+ 	.exit = ipv4_frags_exit_net,
+ };
+ 
++
++static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
++{
++	return jhash2(data,
++		      sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
++}
++
++static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++	const struct inet_frag_queue *fq = data;
++
++	return jhash2((const u32 *)&fq->key.v4,
++		      sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
++}
++
++static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++	const struct frag_v4_compare_key *key = arg->key;
++	const struct inet_frag_queue *fq = ptr;
++
++	return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++static const struct rhashtable_params ip4_rhash_params = {
++	.head_offset		= offsetof(struct inet_frag_queue, node),
++	.key_offset		= offsetof(struct inet_frag_queue, key),
++	.key_len		= sizeof(struct frag_v4_compare_key),
++	.hashfn			= ip4_key_hashfn,
++	.obj_hashfn		= ip4_obj_hashfn,
++	.obj_cmpfn		= ip4_obj_cmpfn,
++	.automatic_shrinking	= true,
++};
++
+ void __init ipfrag_init(void)
+ {
+-	ip4_frags_ctl_register();
+-	register_pernet_subsys(&ip4_frags_ops);
+-	ip4_frags.hashfn = ip4_hashfn;
+ 	ip4_frags.constructor = ip4_frag_init;
+ 	ip4_frags.destructor = ip4_frag_free;
+ 	ip4_frags.skb_free = NULL;
+ 	ip4_frags.qsize = sizeof(struct ipq);
+-	ip4_frags.match = ip4_frag_match;
+ 	ip4_frags.frag_expire = ip_expire;
+ 	ip4_frags.frags_cache_name = ip_frag_cache_name;
++	ip4_frags.rhash_params = ip4_rhash_params;
+ 	if (inet_frags_init(&ip4_frags))
+ 		panic("IP: failed to allocate ip4_frags cache\n");
++	ip4_frags_ctl_register();
++	register_pernet_subsys(&ip4_frags_ops);
+ }
+diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
+index 3abd9d7a3adf..b001ad668108 100644
+--- a/net/ipv4/proc.c
++++ b/net/ipv4/proc.c
+@@ -52,7 +52,6 @@
+ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ {
+ 	struct net *net = seq->private;
+-	unsigned int frag_mem;
+ 	int orphans, sockets;
+ 
+ 	local_bh_disable();
+@@ -72,8 +71,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
+ 		   sock_prot_inuse_get(net, &udplite_prot));
+ 	seq_printf(seq, "RAW: inuse %d\n",
+ 		   sock_prot_inuse_get(net, &raw_prot));
+-	frag_mem = ip_frag_mem(net);
+-	seq_printf(seq,  "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
++	seq_printf(seq,  "FRAG: inuse %u memory %lu\n",
++		   atomic_read(&net->ipv4.frags.rhashtable.nelems),
++		   frag_mem_limit(&net->ipv4.frags));
+ 	return 0;
+ }
+ 
+@@ -132,6 +132,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
+ 	SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ 	SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ 	SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
++	SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
+ 	SNMP_MIB_SENTINEL
+ };
+ 
+diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
+index 5a9ae56e7868..664c84e47bab 100644
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -64,7 +64,6 @@ struct nf_ct_frag6_skb_cb
+ static struct inet_frags nf_frags;
+ 
+ #ifdef CONFIG_SYSCTL
+-static int zero;
+ 
+ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ 	{
+@@ -77,18 +76,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
+ 	{
+ 		.procname	= "nf_conntrack_frag6_low_thresh",
+ 		.data		= &init_net.nf_frag.frags.low_thresh,
+-		.maxlen		= sizeof(unsigned int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
+-		.extra1		= &zero,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra2		= &init_net.nf_frag.frags.high_thresh
+ 	},
+ 	{
+ 		.procname	= "nf_conntrack_frag6_high_thresh",
+ 		.data		= &init_net.nf_frag.frags.high_thresh,
+-		.maxlen		= sizeof(unsigned int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra1		= &init_net.nf_frag.frags.low_thresh
+ 	},
+ 	{ }
+@@ -153,23 +151,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+ 	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+ }
+ 
+-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
+-				 const struct in6_addr *daddr)
+-{
+-	net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
+-	return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+-			    (__force u32)id, nf_frags.rnd);
+-}
+-
+-
+-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
+-{
+-	const struct frag_queue *nq;
+-
+-	nq = container_of(q, struct frag_queue, q);
+-	return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
+-}
+-
+ static void nf_skb_free(struct sk_buff *skb)
+ {
+ 	if (NFCT_FRAG6_CB(skb)->orig)
+@@ -184,34 +165,26 @@ static void nf_ct_frag6_expire(unsigned long data)
+ 	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ 	net = container_of(fq->q.net, struct net, nf_frag.frags);
+ 
+-	ip6_expire_frag_queue(net, fq, &nf_frags);
++	ip6_expire_frag_queue(net, fq);
+ }
+ 
+ /* Creation primitives. */
+-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+-					 u32 user, struct in6_addr *src,
+-					 struct in6_addr *dst, int iif, u8 ecn)
++static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
++				  const struct ipv6hdr *hdr, int iif)
+ {
++	struct frag_v6_compare_key key = {
++		.id = id,
++		.saddr = hdr->saddr,
++		.daddr = hdr->daddr,
++		.user = user,
++		.iif = iif,
++	};
+ 	struct inet_frag_queue *q;
+-	struct ip6_create_arg arg;
+-	unsigned int hash;
+-
+-	arg.id = id;
+-	arg.user = user;
+-	arg.src = src;
+-	arg.dst = dst;
+-	arg.iif = iif;
+-	arg.ecn = ecn;
+-
+-	local_bh_disable();
+-	hash = nf_hash_frag(id, src, dst);
+-
+-	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
+-	local_bh_enable();
+-	if (IS_ERR_OR_NULL(q)) {
+-		inet_frag_maybe_warn_overflow(q, pr_fmt());
++
++	q = inet_frag_find(&net->nf_frag.frags, &key);
++	if (!q)
+ 		return NULL;
+-	}
++
+ 	return container_of(q, struct frag_queue, q);
+ }
+ 
+@@ -362,7 +335,7 @@ found:
+ 	return 0;
+ 
+ discard_fq:
+-	inet_frag_kill(&fq->q, &nf_frags);
++	inet_frag_kill(&fq->q);
+ err:
+ 	return -1;
+ }
+@@ -383,7 +356,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ 	int    payload_len;
+ 	u8 ecn;
+ 
+-	inet_frag_kill(&fq->q, &nf_frags);
++	inet_frag_kill(&fq->q);
+ 
+ 	WARN_ON(head == NULL);
+ 	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+@@ -454,6 +427,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ 		else if (head->ip_summed == CHECKSUM_COMPLETE)
+ 			head->csum = csum_add(head->csum, fp->csum);
+ 		head->truesize += fp->truesize;
++		fp->sk = NULL;
+ 	}
+ 	sub_frag_mem_limit(fq->q.net, head->truesize);
+ 
+@@ -472,6 +446,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+ 					  head->csum);
+ 
+ 	fq->q.fragments = NULL;
++	fq->q.rb_fragments = RB_ROOT;
+ 	fq->q.fragments_tail = NULL;
+ 
+ 	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
+@@ -601,9 +576,13 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ 	hdr = ipv6_hdr(clone);
+ 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
+ 
++	if (clone->len - skb_network_offset(clone) < IPV6_MIN_MTU &&
++	    fhdr->frag_off & htons(IP6_MF))
++		goto ret_orig;
++
+ 	skb_orphan(skb);
+-	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+-		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
++	fq = fq_find(net, fhdr->identification, user, hdr,
++		     skb->dev ? skb->dev->ifindex : 0);
+ 	if (fq == NULL) {
+ 		pr_debug("Can't find and can't create new queue\n");
+ 		goto ret_orig;
+@@ -614,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ 	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
+ 		spin_unlock_bh(&fq->q.lock);
+ 		pr_debug("Can't insert skb to queue\n");
+-		inet_frag_put(&fq->q, &nf_frags);
++		inet_frag_put(&fq->q);
+ 		goto ret_orig;
+ 	}
+ 
+@@ -626,7 +605,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
+ 	}
+ 	spin_unlock_bh(&fq->q.lock);
+ 
+-	inet_frag_put(&fq->q, &nf_frags);
++	inet_frag_put(&fq->q);
+ 	return ret_skb;
+ 
+ ret_orig:
+@@ -650,18 +629,26 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
+ 
+ static int nf_ct_net_init(struct net *net)
+ {
++	int res;
++
+ 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+-	inet_frags_init_net(&net->nf_frag.frags);
+-
+-	return nf_ct_frag6_sysctl_register(net);
++	net->nf_frag.frags.f = &nf_frags;
++
++	res = inet_frags_init_net(&net->nf_frag.frags);
++	if (res < 0)
++		return res;
++	res = nf_ct_frag6_sysctl_register(net);
++	if (res < 0)
++		inet_frags_exit_net(&net->nf_frag.frags);
++	return res;
+ }
+ 
+ static void nf_ct_net_exit(struct net *net)
+ {
+ 	nf_ct_frags6_sysctl_unregister(net);
+-	inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
++	inet_frags_exit_net(&net->nf_frag.frags);
+ }
+ 
+ static struct pernet_operations nf_ct_net_ops = {
+@@ -673,14 +660,13 @@ int nf_ct_frag6_init(void)
+ {
+ 	int ret = 0;
+ 
+-	nf_frags.hashfn = nf_hashfn;
+ 	nf_frags.constructor = ip6_frag_init;
+ 	nf_frags.destructor = NULL;
+ 	nf_frags.skb_free = nf_skb_free;
+ 	nf_frags.qsize = sizeof(struct frag_queue);
+-	nf_frags.match = ip6_frag_match;
+ 	nf_frags.frag_expire = nf_ct_frag6_expire;
+ 	nf_frags.frags_cache_name = nf_frags_cache_name;
++	nf_frags.rhash_params = ip6_rhash_params;
+ 	ret = inet_frags_init(&nf_frags);
+ 	if (ret)
+ 		goto out;
+diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
+index 679253d0af84..73e766e7bc37 100644
+--- a/net/ipv6/proc.c
++++ b/net/ipv6/proc.c
+@@ -33,7 +33,6 @@
+ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ {
+ 	struct net *net = seq->private;
+-	unsigned int frag_mem = ip6_frag_mem(net);
+ 
+ 	seq_printf(seq, "TCP6: inuse %d\n",
+ 		       sock_prot_inuse_get(net, &tcpv6_prot));
+@@ -43,7 +42,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
+ 			sock_prot_inuse_get(net, &udplitev6_prot));
+ 	seq_printf(seq, "RAW6: inuse %d\n",
+ 		       sock_prot_inuse_get(net, &rawv6_prot));
+-	seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
++	seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
++		   atomic_read(&net->ipv6.frags.rhashtable.nelems),
++		   frag_mem_limit(&net->ipv6.frags));
+ 	return 0;
+ }
+ 
+diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+index 58f2139ebb5e..ec917f58d105 100644
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -79,94 +79,58 @@ static struct inet_frags ip6_frags;
+ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ 			  struct net_device *dev);
+ 
+-/*
+- * callers should be careful not to use the hash value outside the ipfrag_lock
+- * as doing so could race with ipfrag_hash_rnd being recalculated.
+- */
+-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
+-				    const struct in6_addr *daddr)
+-{
+-	net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
+-	return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
+-			    (__force u32)id, ip6_frags.rnd);
+-}
+-
+-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
+-{
+-	const struct frag_queue *fq;
+-
+-	fq = container_of(q, struct frag_queue, q);
+-	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
+-}
+-
+-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
+-{
+-	const struct frag_queue *fq;
+-	const struct ip6_create_arg *arg = a;
+-
+-	fq = container_of(q, struct frag_queue, q);
+-	return	fq->id == arg->id &&
+-		fq->user == arg->user &&
+-		ipv6_addr_equal(&fq->saddr, arg->src) &&
+-		ipv6_addr_equal(&fq->daddr, arg->dst) &&
+-		(arg->iif == fq->iif ||
+-		 !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+-					       IPV6_ADDR_LINKLOCAL)));
+-}
+-EXPORT_SYMBOL(ip6_frag_match);
+-
+ void ip6_frag_init(struct inet_frag_queue *q, const void *a)
+ {
+ 	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+-	const struct ip6_create_arg *arg = a;
++	const struct frag_v6_compare_key *key = a;
+ 
+-	fq->id = arg->id;
+-	fq->user = arg->user;
+-	fq->saddr = *arg->src;
+-	fq->daddr = *arg->dst;
+-	fq->ecn = arg->ecn;
++	q->key.v6 = *key;
++	fq->ecn = 0;
+ }
+ EXPORT_SYMBOL(ip6_frag_init);
+ 
+-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+-			   struct inet_frags *frags)
++void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
+ {
+ 	struct net_device *dev = NULL;
++	struct sk_buff *head;
+ 
++	rcu_read_lock();
+ 	spin_lock(&fq->q.lock);
+ 
+ 	if (fq->q.flags & INET_FRAG_COMPLETE)
+ 		goto out;
+ 
+-	inet_frag_kill(&fq->q, frags);
++	inet_frag_kill(&fq->q);
+ 
+-	rcu_read_lock();
+ 	dev = dev_get_by_index_rcu(net, fq->iif);
+ 	if (!dev)
+-		goto out_rcu_unlock;
++		goto out;
+ 
+ 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+-
+-	if (inet_frag_evicting(&fq->q))
+-		goto out_rcu_unlock;
+-
+ 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+ 
+ 	/* Don't send error if the first segment did not arrive. */
+-	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
+-		goto out_rcu_unlock;
++	head = fq->q.fragments;
++	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
++		goto out;
+ 
+ 	/* But use as source device on which LAST ARRIVED
+ 	 * segment was received. And do not use fq->dev
+ 	 * pointer directly, device might already disappeared.
+ 	 */
+-	fq->q.fragments->dev = dev;
+-	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+-out_rcu_unlock:
+-	rcu_read_unlock();
++	head->dev = dev;
++	skb_get(head);
++	spin_unlock(&fq->q.lock);
++
++	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
++	kfree_skb(head);
++	goto out_rcu_unlock;
++
+ out:
+ 	spin_unlock(&fq->q.lock);
+-	inet_frag_put(&fq->q, frags);
++out_rcu_unlock:
++	rcu_read_unlock();
++	inet_frag_put(&fq->q);
+ }
+ EXPORT_SYMBOL(ip6_expire_frag_queue);
+ 
+@@ -178,31 +142,29 @@ static void ip6_frag_expire(unsigned long data)
+ 	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ 	net = container_of(fq->q.net, struct net, ipv6.frags);
+ 
+-	ip6_expire_frag_queue(net, fq, &ip6_frags);
++	ip6_expire_frag_queue(net, fq);
+ }
+ 
+ static struct frag_queue *
+-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
+-	const struct in6_addr *dst, int iif, u8 ecn)
++fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
+ {
++	struct frag_v6_compare_key key = {
++		.id = id,
++		.saddr = hdr->saddr,
++		.daddr = hdr->daddr,
++		.user = IP6_DEFRAG_LOCAL_DELIVER,
++		.iif = iif,
++	};
+ 	struct inet_frag_queue *q;
+-	struct ip6_create_arg arg;
+-	unsigned int hash;
+ 
+-	arg.id = id;
+-	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
+-	arg.src = src;
+-	arg.dst = dst;
+-	arg.iif = iif;
+-	arg.ecn = ecn;
++	if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
++					    IPV6_ADDR_LINKLOCAL)))
++		key.iif = 0;
+ 
+-	hash = inet6_hash_frag(id, src, dst);
+-
+-	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
+-	if (IS_ERR_OR_NULL(q)) {
+-		inet_frag_maybe_warn_overflow(q, pr_fmt());
++	q = inet_frag_find(&net->ipv6.frags, &key);
++	if (!q)
+ 		return NULL;
+-	}
++
+ 	return container_of(q, struct frag_queue, q);
+ }
+ 
+@@ -359,7 +321,7 @@ found:
+ 	return -1;
+ 
+ discard_fq:
+-	inet_frag_kill(&fq->q, &ip6_frags);
++	inet_frag_kill(&fq->q);
+ err:
+ 	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ 			 IPSTATS_MIB_REASMFAILS);
+@@ -386,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ 	int sum_truesize;
+ 	u8 ecn;
+ 
+-	inet_frag_kill(&fq->q, &ip6_frags);
++	inet_frag_kill(&fq->q);
+ 
+ 	ecn = ip_frag_ecn_table[fq->ecn];
+ 	if (unlikely(ecn == 0xff))
+@@ -503,6 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ 	rcu_read_unlock();
+ 	fq->q.fragments = NULL;
++	fq->q.rb_fragments = RB_ROOT;
+ 	fq->q.fragments_tail = NULL;
+ 	return 1;
+ 
+@@ -524,6 +487,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ 	struct frag_queue *fq;
+ 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ 	struct net *net = dev_net(skb_dst(skb)->dev);
++	int iif;
+ 
+ 	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ 		goto fail_hdr;
+@@ -552,17 +516,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
+ 		return 1;
+ 	}
+ 
+-	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
+-		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
++	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
++	    fhdr->frag_off & htons(IP6_MF))
++		goto fail_hdr;
++
++	iif = skb->dev ? skb->dev->ifindex : 0;
++	fq = fq_find(net, fhdr->identification, hdr, iif);
+ 	if (fq) {
+ 		int ret;
+ 
+ 		spin_lock(&fq->q.lock);
+ 
++		fq->iif = iif;
+ 		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+ 
+ 		spin_unlock(&fq->q.lock);
+-		inet_frag_put(&fq->q, &ip6_frags);
++		inet_frag_put(&fq->q);
+ 		return ret;
+ 	}
+ 
+@@ -583,24 +552,22 @@ static const struct inet6_protocol frag_protocol = {
+ };
+ 
+ #ifdef CONFIG_SYSCTL
+-static int zero;
+ 
+ static struct ctl_table ip6_frags_ns_ctl_table[] = {
+ 	{
+ 		.procname	= "ip6frag_high_thresh",
+ 		.data		= &init_net.ipv6.frags.high_thresh,
+-		.maxlen		= sizeof(int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra1		= &init_net.ipv6.frags.low_thresh
+ 	},
+ 	{
+ 		.procname	= "ip6frag_low_thresh",
+ 		.data		= &init_net.ipv6.frags.low_thresh,
+-		.maxlen		= sizeof(int),
++		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= proc_dointvec_minmax,
+-		.extra1		= &zero,
++		.proc_handler	= proc_doulongvec_minmax,
+ 		.extra2		= &init_net.ipv6.frags.high_thresh
+ 	},
+ 	{
+@@ -708,19 +675,27 @@ static void ip6_frags_sysctl_unregister(void)
+ 
+ static int __net_init ipv6_frags_init_net(struct net *net)
+ {
++	int res;
++
+ 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
++	net->ipv6.frags.f = &ip6_frags;
+ 
+-	inet_frags_init_net(&net->ipv6.frags);
++	res = inet_frags_init_net(&net->ipv6.frags);
++	if (res < 0)
++		return res;
+ 
+-	return ip6_frags_ns_sysctl_register(net);
++	res = ip6_frags_ns_sysctl_register(net);
++	if (res < 0)
++		inet_frags_exit_net(&net->ipv6.frags);
++	return res;
+ }
+ 
+ static void __net_exit ipv6_frags_exit_net(struct net *net)
+ {
+ 	ip6_frags_ns_sysctl_unregister(net);
+-	inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
++	inet_frags_exit_net(&net->ipv6.frags);
+ }
+ 
+ static struct pernet_operations ip6_frags_ops = {
+@@ -728,14 +703,55 @@ static struct pernet_operations ip6_frags_ops = {
+ 	.exit = ipv6_frags_exit_net,
+ };
+ 
++static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
++{
++	return jhash2(data,
++		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
++}
++
++static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++	const struct inet_frag_queue *fq = data;
++
++	return jhash2((const u32 *)&fq->key.v6,
++		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
++}
++
++static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
++{
++	const struct frag_v6_compare_key *key = arg->key;
++	const struct inet_frag_queue *fq = ptr;
++
++	return !!memcmp(&fq->key, key, sizeof(*key));
++}
++
++const struct rhashtable_params ip6_rhash_params = {
++	.head_offset		= offsetof(struct inet_frag_queue, node),
++	.hashfn			= ip6_key_hashfn,
++	.obj_hashfn		= ip6_obj_hashfn,
++	.obj_cmpfn		= ip6_obj_cmpfn,
++	.automatic_shrinking	= true,
++};
++EXPORT_SYMBOL(ip6_rhash_params);
++
+ int __init ipv6_frag_init(void)
+ {
+ 	int ret;
+ 
+-	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
++	ip6_frags.constructor = ip6_frag_init;
++	ip6_frags.destructor = NULL;
++	ip6_frags.qsize = sizeof(struct frag_queue);
++	ip6_frags.frag_expire = ip6_frag_expire;
++	ip6_frags.frags_cache_name = ip6_frag_cache_name;
++	ip6_frags.rhash_params = ip6_rhash_params;
++	ret = inet_frags_init(&ip6_frags);
+ 	if (ret)
+ 		goto out;
+ 
++	ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
++	if (ret)
++		goto err_protocol;
++
+ 	ret = ip6_frags_sysctl_register();
+ 	if (ret)
+ 		goto err_sysctl;
+@@ -744,17 +760,6 @@ int __init ipv6_frag_init(void)
+ 	if (ret)
+ 		goto err_pernet;
+ 
+-	ip6_frags.hashfn = ip6_hashfn;
+-	ip6_frags.constructor = ip6_frag_init;
+-	ip6_frags.destructor = NULL;
+-	ip6_frags.skb_free = NULL;
+-	ip6_frags.qsize = sizeof(struct frag_queue);
+-	ip6_frags.match = ip6_frag_match;
+-	ip6_frags.frag_expire = ip6_frag_expire;
+-	ip6_frags.frags_cache_name = ip6_frag_cache_name;
+-	ret = inet_frags_init(&ip6_frags);
+-	if (ret)
+-		goto err_pernet;
+ out:
+ 	return ret;
+ 
+@@ -762,6 +767,8 @@ err_pernet:
+ 	ip6_frags_sysctl_unregister();
+ err_sysctl:
+ 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
++err_protocol:
++	inet_frags_fini(&ip6_frags);
+ 	goto out;
+ }
+
author	Mike Pagano <mpagano@gentoo.org>	2019-02-08 10:20:49 -0500
committer	Mike Pagano <mpagano@gentoo.org>	2019-02-08 10:20:49 -0500
commit	32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2 (patch)
tree	fd6ddd914b3d50d48809fece8521e8cdc1356d50
parent	proj/linux-patches: Linux patch 4.4.173 (diff)
download	linux-patches-32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2.tar.gz linux-patches-32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2.tar.bz2 linux-patches-32d8aab8c0070a58fbb2a4f1d9cda28915ec17c2.zip