diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-01-10 20:11:38 +0100 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2011-01-10 20:11:38 +0100 |
commit | 83723d60717f8da0f53f91cf42a845ed56c09662 (patch) | |
tree | 9d32edc2c6dc4849e63d422f8dad42606b2f984f /net | |
parent | 45b9f509b7f5d2d792b3c03b78ddc8ec543e921b (diff) | |
download | linux-stable-83723d60717f8da0f53f91cf42a845ed56c09662.tar.gz linux-stable-83723d60717f8da0f53f91cf42a845ed56c09662.tar.bz2 linux-stable-83723d60717f8da0f53f91cf42a845ed56c09662.zip |
netfilter: x_tables: dont block BH while reading counters
Using "iptables -L" with a lot of rules have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.
Switch to a per_cpu seqlock scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).
This adds two increments on seqlock sequence per ipt_do_table() call,
its a reasonable cost for allowing "iptables -L" not block BH
processing.
Reported-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 45 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 45 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 45 | ||||
-rw-r--r-- | net/netfilter/x_tables.c | 3 |
4 files changed, 44 insertions, 94 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340a28d5..e855fffaed95 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t, struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) * about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name, struct arpt_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d633b3b6..652efea013dc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t, struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU. - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ipt_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 455582384ece..7d227c644f72 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t, struct ip6t_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu = get_cpu(); - - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table - */ - local_bh_disable(); - i = 0; - xt_entry_foreach(iter, t->entries[curcpu], t->size) { - SET_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); - ++i; - } - local_bh_enable(); - /* Processing counters from other cpus, we can let bottom half enabled, - * (preemption is disabled) - */ for_each_possible_cpu(cpu) { - if (cpu == curcpu) - continue; + seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock; + i = 0; - local_bh_disable(); - xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { - ADD_COUNTER(counters[i], iter->counters.bcnt, - iter->counters.pcnt); + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqbegin(lock); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqretry(lock, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); ++i; } - xt_info_wrunlock(cpu); - local_bh_enable(); } - put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ip6t_entry *iter; ret = 0; - counters = vmalloc(num_counters * sizeof(struct xt_counters)); + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 80463507420e..c94237631077 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1325,7 +1325,8 @@ static int __init xt_init(void) for_each_possible_cpu(i) { struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); - spin_lock_init(&lock->lock); + + seqlock_init(&lock->lock); lock->readers = 0; } |