summaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c111
1 files changed, 91 insertions, 20 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index e8041eb76ac1..d7107ac835fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
* rcu_read_lock must be held on entry.
*/
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
{
struct ipv6hdr *ip6;
struct iphdr *ip;
struct netdev_rx_queue *rxqueue;
struct rps_map *map;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
u8 ip_proto;
+ u16 tcpu;
u32 addr1, addr2, ports, ihl;
if (skb_rx_queue_recorded(skb)) {
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
} else
rxqueue = dev->_rx;
- if (!rxqueue->rps_map)
+ if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
goto done;
if (skb->rxhash)
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
skb->rxhash = 1;
got_hash:
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
+ if (flow_table && sock_flow_table) {
+ u16 next_cpu;
+ struct rps_dev_flow *rflow;
+
+ rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+ tcpu = rflow->cpu;
+
+ next_cpu = sock_flow_table->ents[skb->rxhash &
+ sock_flow_table->mask];
+
+ /*
+ * If the desired CPU (where last recvmsg was done) is
+ * different from current CPU (one in the rx-queue flow
+ * table entry), switch if one of the following holds:
+ * - Current CPU is unset (equal to RPS_NO_CPU).
+ * - Current CPU is offline.
+ * - The current CPU's queue tail has advanced beyond the
+ * last packet that was enqueued using this table entry.
+ * This guarantees that all previous packets for the flow
+ * have been dequeued, thus preserving in order delivery.
+ */
+ if (unlikely(tcpu != next_cpu) &&
+ (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+ ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+ rflow->last_qtail)) >= 0)) {
+ tcpu = rflow->cpu = next_cpu;
+ if (tcpu != RPS_NO_CPU)
+ rflow->last_qtail = per_cpu(softnet_data,
+ tcpu).input_queue_head;
+ }
+ if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+ *rflowp = rflow;
+ cpu = tcpu;
+ goto done;
+ }
+ }
+
map = rcu_dereference(rxqueue->rps_map);
if (map) {
- u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+ tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
if (cpu_online(tcpu)) {
cpu = tcpu;
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
__napi_schedule(&queue->backlog);
__get_cpu_var(netdev_rx_stat).received_rps++;
}
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_RPS */
/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
*/
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+ unsigned int *qtail)
{
struct softnet_data *queue;
unsigned long flags;
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
if (queue->input_pkt_queue.qlen) {
enqueue:
__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+ *qtail = queue->input_queue_head +
+ queue->input_pkt_queue.qlen;
+#endif
rps_unlock(queue);
local_irq_restore(flags);
return NET_RX_SUCCESS;
@@ -2355,11 +2408,10 @@ enqueue:
cpu_set(cpu, rcpus->mask[rcpus->select]);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
- } else
- __napi_schedule(&queue->backlog);
-#else
- __napi_schedule(&queue->backlog);
+ goto enqueue;
+ }
#endif
+ __napi_schedule(&queue->backlog);
}
goto enqueue;
}
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)
#ifdef CONFIG_RPS
{
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
rcu_read_lock();
- cpu = get_rps_cpu(skb->dev, skb);
+
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu < 0)
cpu = smp_processor_id();
- ret = enqueue_to_backlog(skb, cpu);
+
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
rcu_read_unlock();
}
#else
- ret = enqueue_to_backlog(skb, get_cpu());
- put_cpu();
+ {
+ unsigned int qtail;
+ ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+ put_cpu();
+ }
#endif
return ret;
}
@@ -2830,14 +2889,22 @@ out:
int netif_receive_skb(struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- int cpu;
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
+ int cpu, ret;
+
+ rcu_read_lock();
- cpu = get_rps_cpu(skb->dev, skb);
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
- if (cpu < 0)
- return __netif_receive_skb(skb);
- else
- return enqueue_to_backlog(skb, cpu);
+ if (cpu >= 0) {
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+ rcu_read_unlock();
+ } else {
+ rcu_read_unlock();
+ ret = __netif_receive_skb(skb);
+ }
+
+ return ret;
#else
return __netif_receive_skb(skb);
#endif
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
if (skb->dev == dev) {
__skb_unlink(skb, &queue->input_pkt_queue);
kfree_skb(skb);
+ incr_input_queue_head(queue);
}
rps_unlock(queue);
}
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
local_irq_enable();
break;
}
+ incr_input_queue_head(queue);
rps_unlock(queue);
local_irq_enable();
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
local_irq_enable();
/* Process offline CPU's input_pkt_queue */
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx(skb);
+ incr_input_queue_head(oldsd);
+ }
return NOTIFY_OK;
}