summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Oskolkov <posk@google.com>2018-10-10 12:30:14 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-10-18 09:13:26 +0200
commite9e4ac488c017739b2832177550ba2569fffc709 (patch)
treebd224020d88ed6cc339b92762527f2cde06136ff
parent10043954eadac2d8f8c1886190f7a7ee584ff939 (diff)
downloadlinux-stable-e9e4ac488c017739b2832177550ba2569fffc709.tar.gz
linux-stable-e9e4ac488c017739b2832177550ba2569fffc709.tar.bz2
linux-stable-e9e4ac488c017739b2832177550ba2569fffc709.zip
ip: add helpers to process in-order fragments faster.
This patch introduces several helper functions/macros that will be used in the follow-up patch. No runtime changes yet. The new logic (fully implemented in the second patch) is as follows: * Nodes in the rb-tree will now contain not single fragments, but lists of consecutive fragments ("runs"). * At each point in time, the current "active" run at the tail is maintained/tracked. Fragments that arrive in-order, adjacent to the previous tail fragment, are added to this tail run without triggering the re-balancing of the rb-tree. * If a fragment arrives out of order with the offset _before_ the tail run, it is inserted into the rb-tree as a single fragment. * If a fragment arrives after the current tail fragment (with a gap), it starts a new "tail" run, as is inserted into the rb-tree at the end as the head of the new run. skb->cb is used to store additional information needed here (suggested by Eric Dumazet). Reported-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Peter Oskolkov <posk@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net> (cherry picked from commit 353c9cb360874e737fb000545f783df756c06f9a) Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/net/inet_frag.h6
-rw-r--r--net/ipv4/ip_fragment.c73
2 files changed, 79 insertions, 0 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 1ff0433d94a7..a3812e9c8fee 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -56,7 +56,9 @@ struct frag_v6_compare_key {
* @lock: spinlock protecting this frag
* @refcnt: reference count of the queue
* @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
* @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
@@ -77,6 +79,7 @@ struct inet_frag_queue {
struct sk_buff *fragments; /* Used in IPv6. */
struct rb_root rb_fragments; /* Used in IPv4. */
struct sk_buff *fragments_tail;
+ struct sk_buff *last_run_head;
ktime_t stamp;
int len;
int meat;
@@ -112,6 +115,9 @@ void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+
static inline void inet_frag_put(struct inet_frag_queue *q)
{
if (atomic_dec_and_test(&q->refcnt))
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 11d3dc649ef0..5e2121b82588 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -56,6 +56,57 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+ struct inet_skb_parm h;
+ struct sk_buff *next_frag;
+ int frag_run_len;
+};
+
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ ip4_frag_init_run(skb);
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}
+
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
@@ -652,6 +703,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_check_defrag);
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
#ifdef CONFIG_SYSCTL
static int dist_min;