diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 111 |
1 files changed, 91 insertions, 20 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index e8041eb76ac1..d7107ac835fa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64; /* old backlog weight */ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; #ifdef CONFIG_RPS + +/* One global table that all flow-based protocols share. */ +struct rps_sock_flow_table *rps_sock_flow_table; +EXPORT_SYMBOL(rps_sock_flow_table); + /* * get_rps_cpu is called from netif_receive_skb and returns the target * CPU from the RPS map of the receiving queue for a given skb. * rcu_read_lock must be held on entry. */ -static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) +static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow **rflowp) { struct ipv6hdr *ip6; struct iphdr *ip; struct netdev_rx_queue *rxqueue; struct rps_map *map; + struct rps_dev_flow_table *flow_table; + struct rps_sock_flow_table *sock_flow_table; int cpu = -1; u8 ip_proto; + u16 tcpu; u32 addr1, addr2, ports, ihl; if (skb_rx_queue_recorded(skb)) { @@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) } else rxqueue = dev->_rx; - if (!rxqueue->rps_map) + if (!rxqueue->rps_map && !rxqueue->rps_flow_table) goto done; if (skb->rxhash) @@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb) skb->rxhash = 1; got_hash: + flow_table = rcu_dereference(rxqueue->rps_flow_table); + sock_flow_table = rcu_dereference(rps_sock_flow_table); + if (flow_table && sock_flow_table) { + u16 next_cpu; + struct rps_dev_flow *rflow; + + rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; + tcpu = rflow->cpu; + + next_cpu = sock_flow_table->ents[skb->rxhash & + sock_flow_table->mask]; + + /* + * If the desired CPU (where last recvmsg was done) is + * different from current CPU (one in the rx-queue flow + * table entry), switch if one of the following holds: + * - Current CPU is unset (equal to RPS_NO_CPU). + * - Current CPU is offline. + * - The current CPU's queue tail has advanced beyond the + * last packet that was enqueued using this table entry. + * This guarantees that all previous packets for the flow + * have been dequeued, thus preserving in order delivery. + */ + if (unlikely(tcpu != next_cpu) && + (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || + ((int)(per_cpu(softnet_data, tcpu).input_queue_head - + rflow->last_qtail)) >= 0)) { + tcpu = rflow->cpu = next_cpu; + if (tcpu != RPS_NO_CPU) + rflow->last_qtail = per_cpu(softnet_data, + tcpu).input_queue_head; + } + if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { + *rflowp = rflow; + cpu = tcpu; + goto done; + } + } + map = rcu_dereference(rxqueue->rps_map); if (map) { - u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; if (cpu_online(tcpu)) { cpu = tcpu; @@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data) __napi_schedule(&queue->backlog); __get_cpu_var(netdev_rx_stat).received_rps++; } -#endif /* CONFIG_SMP */ +#endif /* CONFIG_RPS */ /* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). */ -static int enqueue_to_backlog(struct sk_buff *skb, int cpu) +static int enqueue_to_backlog(struct sk_buff *skb, int cpu, + unsigned int *qtail) { struct softnet_data *queue; unsigned long flags; @@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu) if (queue->input_pkt_queue.qlen) { enqueue: __skb_queue_tail(&queue->input_pkt_queue, skb); +#ifdef CONFIG_RPS + *qtail = queue->input_queue_head + + queue->input_pkt_queue.qlen; +#endif rps_unlock(queue); local_irq_restore(flags); return NET_RX_SUCCESS; @@ -2355,11 +2408,10 @@ enqueue: cpu_set(cpu, rcpus->mask[rcpus->select]); __raise_softirq_irqoff(NET_RX_SOFTIRQ); - } else - __napi_schedule(&queue->backlog); -#else - __napi_schedule(&queue->backlog); + goto enqueue; + } #endif + __napi_schedule(&queue->backlog); } goto enqueue; } @@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb) #ifdef CONFIG_RPS { + struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; rcu_read_lock(); - cpu = get_rps_cpu(skb->dev, skb); + + cpu = get_rps_cpu(skb->dev, skb, &rflow); if (cpu < 0) cpu = smp_processor_id(); - ret = enqueue_to_backlog(skb, cpu); + + ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); + rcu_read_unlock(); } #else - ret = enqueue_to_backlog(skb, get_cpu()); - put_cpu(); + { + unsigned int qtail; + ret = enqueue_to_backlog(skb, get_cpu(), &qtail); + put_cpu(); + } #endif return ret; } @@ -2830,14 +2889,22 @@ out: int netif_receive_skb(struct sk_buff *skb) { #ifdef CONFIG_RPS - int cpu; + struct rps_dev_flow voidflow, *rflow = &voidflow; + int cpu, ret; + + rcu_read_lock(); - cpu = get_rps_cpu(skb->dev, skb); + cpu = get_rps_cpu(skb->dev, skb, &rflow); - if (cpu < 0) - return __netif_receive_skb(skb); - else - return enqueue_to_backlog(skb, cpu); + if (cpu >= 0) { + ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); + rcu_read_unlock(); + } else { + rcu_read_unlock(); + ret = __netif_receive_skb(skb); + } + + return ret; #else return __netif_receive_skb(skb); #endif @@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg) if (skb->dev == dev) { __skb_unlink(skb, &queue->input_pkt_queue); kfree_skb(skb); + incr_input_queue_head(queue); } rps_unlock(queue); } @@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota) local_irq_enable(); break; } + incr_input_queue_head(queue); rps_unlock(queue); local_irq_enable(); @@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb, local_irq_enable(); /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx(skb); + incr_input_queue_head(oldsd); + } return NOTIFY_OK; } |