summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-03-31 21:44:55 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:59 -0400
commit241e26369e1267be376490152ee2c52021b4321a (patch)
tree3959554638d2d03f6d25d8433d1964b1b3532424
parent9d8022db1ccfff6aaf1de6158c2a26b667c70a15 (diff)
downloadlinux-stable-241e26369e1267be376490152ee2c52021b4321a.tar.gz
linux-stable-241e26369e1267be376490152ee2c52021b4321a.tar.bz2
linux-stable-241e26369e1267be376490152ee2c52021b4321a.zip
bcachefs: Don't flush btree writes more aggressively because of btree key cache
We need to flush the btree key cache when it's too dirty, because otherwise the shrinker won't be able to reclaim memory - this is done by journal reclaim. But journal reclaim also kicks btree node writes: this meant that btree node writes were getting kicked much too often just because we needed to flush btree key cache keys. This patch splits journal pins into two different lists, and teaches journal reclaim to not flush btree node writes when it only needs to flush key cache keys. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_iter.c4
-rw-r--r--fs/bcachefs/btree_key_cache.c7
-rw-r--r--fs/bcachefs/btree_key_cache.h12
-rw-r--r--fs/bcachefs/btree_update_interior.c11
-rw-r--r--fs/bcachefs/journal.c30
-rw-r--r--fs/bcachefs/journal_reclaim.c68
-rw-r--r--fs/bcachefs/journal_types.h1
-rw-r--r--fs/bcachefs/trace.h5
8 files changed, 81 insertions, 57 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 203c9adb0623..8f5318a38d9b 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -473,8 +473,10 @@ bool bch2_trans_relock(struct btree_trans *trans)
trans_for_each_iter(trans, iter)
if (btree_iter_keep(trans, iter) &&
- !bch2_btree_iter_relock(iter, true))
+ !bch2_btree_iter_relock(iter, true)) {
+ trace_trans_restart_relock(trans->ip);
return false;
+ }
return true;
}
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 0af46335bd00..ac844f47b8dd 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -445,9 +445,8 @@ out:
return ret;
}
-static int btree_key_cache_journal_flush(struct journal *j,
- struct journal_entry_pin *pin,
- u64 seq)
+int bch2_btree_key_cache_journal_flush(struct journal *j,
+ struct journal_entry_pin *pin, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bkey_cached *ck =
@@ -528,7 +527,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
}
bch2_journal_pin_update(&c->journal, trans->journal_res.seq,
- &ck->journal, btree_key_cache_journal_flush);
+ &ck->journal, bch2_btree_key_cache_journal_flush);
if (kick_reclaim)
journal_reclaim_kick(&c->journal);
diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
index 4e1e5a9c7656..7e2b0a08f745 100644
--- a/fs/bcachefs/btree_key_cache.h
+++ b/fs/bcachefs/btree_key_cache.h
@@ -1,15 +1,6 @@
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
#define _BCACHEFS_BTREE_KEY_CACHE_H
-static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c)
-{
- size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
- size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
- size_t max_dirty = nr_keys / 4;
-
- return max_t(ssize_t, 0, nr_dirty - max_dirty);
-}
-
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
{
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
@@ -29,6 +20,9 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
}
+int bch2_btree_key_cache_journal_flush(struct journal *,
+ struct journal_entry_pin *, u64);
+
struct bkey_cached *
bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index e965c8bbddce..b3137525f9c1 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -974,20 +974,25 @@ retry:
* closure argument
*/
if (flags & BTREE_INSERT_NOUNLOCK) {
+ trace_trans_restart_journal_preres_get(trans->ip);
ret = -EINTR;
goto err;
}
bch2_trans_unlock(trans);
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
- goto err;
+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
+ bch2_btree_update_free(as);
+ return ERR_PTR(ret);
+ }
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags);
- if (ret)
+ if (ret) {
+ trace_trans_restart_journal_preres_get(trans->ip);
goto err;
+ }
if (!bch2_trans_relock(trans)) {
ret = -EINTR;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 35a48629b63b..af2f8528ac65 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -59,21 +59,23 @@ journal_seq_to_buf(struct journal *j, u64 seq)
return buf;
}
-static void journal_pin_new_entry(struct journal *j, int count)
+static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
- struct journal_entry_pin_list *p;
+ INIT_LIST_HEAD(&p->list);
+ INIT_LIST_HEAD(&p->key_cache_list);
+ INIT_LIST_HEAD(&p->flushed);
+ atomic_set(&p->count, count);
+ p->devs.nr = 0;
+}
+static void journal_pin_new_entry(struct journal *j)
+{
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for journal_last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
- p = fifo_push_ref(&j->pin);
-
- INIT_LIST_HEAD(&p->list);
- INIT_LIST_HEAD(&p->flushed);
- atomic_set(&p->count, count);
- p->devs.nr = 0;
+ journal_pin_list_init(fifo_push_ref(&j->pin), 1);
}
static void bch2_journal_buf_init(struct journal *j)
@@ -192,7 +194,7 @@ static bool __journal_entry_close(struct journal *j)
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
/* Initialize new buffer: */
- journal_pin_new_entry(j, 1);
+ journal_pin_new_entry(j);
bch2_journal_buf_init(j);
@@ -1030,12 +1032,8 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
- fifo_for_each_entry_ptr(p, &j->pin, seq) {
- INIT_LIST_HEAD(&p->list);
- INIT_LIST_HEAD(&p->flushed);
- atomic_set(&p->count, 1);
- p->devs.nr = 0;
- }
+ fifo_for_each_entry_ptr(p, &j->pin, seq)
+ journal_pin_list_init(p, 1);
list_for_each_entry(i, journal_entries, list) {
unsigned ptr;
@@ -1058,7 +1056,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
set_bit(JOURNAL_STARTED, &j->flags);
j->last_flush_write = jiffies;
- journal_pin_new_entry(j, 1);
+ journal_pin_new_entry(j);
j->reservations.idx = j->reservations.unwritten_idx = journal_cur_seq(j);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 42ed7a3525b1..0d7fe1f99dbf 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -407,7 +407,12 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
pin->seq = seq;
pin->flush = flush_fn;
- list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
+ if (flush_fn == bch2_btree_key_cache_journal_flush)
+ list_add(&pin->list, &pin_list->key_cache_list);
+ else if (flush_fn)
+ list_add(&pin->list, &pin_list->list);
+ else
+ list_add(&pin->list, &pin_list->flushed);
spin_unlock(&j->lock);
/*
@@ -437,23 +442,40 @@ void bch2_journal_pin_flush(struct journal *j, struct journal_entry_pin *pin)
*/
static struct journal_entry_pin *
-journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
+journal_get_next_pin(struct journal *j,
+ bool get_any,
+ bool get_key_cache,
+ u64 max_seq, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
- fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
- if (*seq > max_seq ||
- (ret = list_first_entry_or_null(&pin_list->list,
- struct journal_entry_pin, list)))
+ fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
+ if (*seq > max_seq && !get_any && !get_key_cache)
break;
- return ret;
+ if (*seq <= max_seq || get_any) {
+ ret = list_first_entry_or_null(&pin_list->list,
+ struct journal_entry_pin, list);
+ if (ret)
+ return ret;
+ }
+
+ if (*seq <= max_seq || get_any || get_key_cache) {
+ ret = list_first_entry_or_null(&pin_list->key_cache_list,
+ struct journal_entry_pin, list);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return NULL;
}
/* returns true if we did work */
static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
- unsigned min_nr)
+ unsigned min_any,
+ unsigned min_key_cache)
{
struct journal_entry_pin *pin;
size_t nr_flushed = 0;
@@ -472,8 +494,10 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
j->last_flushed = jiffies;
spin_lock(&j->lock);
- pin = journal_get_next_pin(j, min_nr
- ? U64_MAX : seq_to_flush, &seq);
+ pin = journal_get_next_pin(j,
+ min_any != 0,
+ min_key_cache != 0,
+ seq_to_flush, &seq);
if (pin) {
BUG_ON(j->flush_in_progress);
j->flush_in_progress = pin;
@@ -485,8 +509,11 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
if (!pin)
break;
- if (min_nr)
- min_nr--;
+ if (min_key_cache && pin->flush == bch2_btree_key_cache_journal_flush)
+ min_key_cache--;
+
+ if (min_any)
+ min_any--;
err = flush_fn(j, pin, seq);
@@ -610,18 +637,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
if (j->prereserved.reserved * 2 > j->prereserved.remaining)
min_nr = 1;
- if (atomic_read(&c->btree_cache.dirty) * 4 >
- c->btree_cache.used * 3)
- min_nr = 1;
-
if (fifo_free(&j->pin) <= 32)
min_nr = 1;
- min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c));
-
- /* Don't do too many without delivering wakeup: */
- min_nr = min(min_nr, 128UL);
-
trace_journal_reclaim_start(c,
min_nr,
j->prereserved.reserved,
@@ -631,7 +649,9 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
atomic_long_read(&c->btree_key_cache.nr_dirty),
atomic_long_read(&c->btree_key_cache.nr_keys));
- nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);
+ nr_flushed = journal_flush_pins(j, seq_to_flush,
+ min_nr,
+ min(bch2_nr_btree_keys_need_flush(c), 128UL));
if (direct)
j->nr_direct_reclaim += nr_flushed;
@@ -641,7 +661,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
if (nr_flushed)
wake_up(&j->reclaim_wait);
- } while (min_nr && nr_flushed);
+ } while (min_nr && nr_flushed && !direct);
memalloc_noreclaim_restore(flags);
@@ -734,7 +754,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
- *did_work = journal_flush_pins(j, seq_to_flush, 0) != 0;
+ *did_work = journal_flush_pins(j, seq_to_flush, 0, 0) != 0;
spin_lock(&j->lock);
/*
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 97d764370b89..f597eb78e66e 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -43,6 +43,7 @@ struct journal_buf {
struct journal_entry_pin_list {
struct list_head list;
+ struct list_head key_cache_list;
struct list_head flushed;
atomic_t count;
struct bch_devs_list devs;
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index babb07e3acc4..387c1c49f696 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -716,6 +716,11 @@ DEFINE_EVENT(transaction_restart, trans_restart_iter_upgrade,
TP_ARGS(ip)
);
+DEFINE_EVENT(transaction_restart, trans_restart_relock,
+ TP_PROTO(unsigned long ip),
+ TP_ARGS(ip)
+);
+
DEFINE_EVENT(transaction_restart, trans_restart_traverse,
TP_PROTO(unsigned long ip),
TP_ARGS(ip)