summaryrefslogtreecommitdiffstats
path: root/fs/eventpoll.c
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2021-09-07 20:00:00 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-08 11:50:27 -0700
commit1e1c15839df084f4011825fee922aa976c9159dc (patch)
treeeba0d3e4771362a9032bb3637607cd41f52b2ae6 /fs/eventpoll.c
parent4ce9f970457899defdf68e26e0502c7245002eb3 (diff)
downloadlinux-1e1c15839df084f4011825fee922aa976c9159dc.tar.gz
linux-1e1c15839df084f4011825fee922aa976c9159dc.tar.bz2
linux-1e1c15839df084f4011825fee922aa976c9159dc.zip
fs/epoll: use a per-cpu counter for user's watches count
This counter tracks the number of watches a user has, to compare against the 'max_user_watches' limit. This causes a scalability bottleneck on SPECjbb2015 on large systems as there is only one user. Changing to a per-cpu counter increases throughput of the benchmark by about 30% on a 16-socket, > 1000 thread system. [rdunlap@infradead.org: fix build errors in kernel/user.c when CONFIG_EPOLL=n] [npiggin@gmail.com: move ifdefs into wrapper functions, slightly improve panic message] Link: https://lkml.kernel.org/r/1628051945.fens3r99ox.astroid@bobo.none [akpm@linux-foundation.org: tweak user_epoll_alloc(), per Guenter] Link: https://lkml.kernel.org/r/20210804191421.GA1900577@roeck-us.net Link: https://lkml.kernel.org/r/20210802032013.2751916-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reported-by: Anton Blanchard <anton@ozlabs.org> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/eventpoll.c')
-rw-r--r--fs/eventpoll.c18
1 files changed, 10 insertions, 8 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e596e1d0bba..648ed77f4164 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -723,7 +723,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
*/
call_rcu(&epi->rcu, epi_rcu_free);
- atomic_long_dec(&ep->user->epoll_watches);
+ percpu_counter_dec(&ep->user->epoll_watches);
return 0;
}
@@ -1439,7 +1439,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
{
int error, pwake = 0;
__poll_t revents;
- long user_watches;
struct epitem *epi;
struct ep_pqueue epq;
struct eventpoll *tep = NULL;
@@ -1449,11 +1448,15 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
lockdep_assert_irqs_enabled();
- user_watches = atomic_long_read(&ep->user->epoll_watches);
- if (unlikely(user_watches >= max_user_watches))
+ if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
+ max_user_watches) >= 0))
return -ENOSPC;
- if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL)))
+ percpu_counter_inc(&ep->user->epoll_watches);
+
+ if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) {
+ percpu_counter_dec(&ep->user->epoll_watches);
return -ENOMEM;
+ }
/* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
@@ -1466,17 +1469,16 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
mutex_lock_nested(&tep->mtx, 1);
/* Add the current item to the list of active epoll hook for this file */
if (unlikely(attach_epitem(tfile, epi) < 0)) {
- kmem_cache_free(epi_cache, epi);
if (tep)
mutex_unlock(&tep->mtx);
+ kmem_cache_free(epi_cache, epi);
+ percpu_counter_dec(&ep->user->epoll_watches);
return -ENOMEM;
}
if (full_check && !tep)
list_file(tfile);
- atomic_long_inc(&ep->user->epoll_watches);
-
/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.