summaryrefslogtreecommitdiffstats
path: root/fs/bcachefs/btree_trans_commit.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-11-02 18:57:19 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2024-01-01 11:47:41 -0500
commit09caeabe1a5daa3b667b797c401d43206d583f23 (patch)
tree38285367be9d45f8ecbbe12f233744548d2ec455 /fs/bcachefs/btree_trans_commit.c
parentb05c0e9370bec71c62df690250f451e58e8ed2a4 (diff)
downloadlinux-09caeabe1a5daa3b667b797c401d43206d583f23.tar.gz
linux-09caeabe1a5daa3b667b797c401d43206d583f23.tar.bz2
linux-09caeabe1a5daa3b667b797c401d43206d583f23.zip
bcachefs: btree write buffer now slurps keys from journal
Previosuly, the transaction commit path would have to add keys to the btree write buffer as a separate operation, requiring additional global synchronization. This patch introduces a new journal entry type, which indicates that the keys need to be copied into the btree write buffer prior to being written out. We switch the journal entry type back to JSET_ENTRY_btree_keys prior to write, so this is not an on disk format change. Flushing the btree write buffer may require pulling keys out of journal entries yet to be written, and quiescing outstanding journal reservations; we previously added journal->buf_lock for synchronization with the journal write path. We also can't put strict bounds on the number of keys in the journal destined for the write buffer, which means we might overflow the size of the preallocated buffer and have to reallocate - this introduces a potentially fatal memory allocation failure. This is something we'll have to watch for, if it becomes an issue in practice we can do additional mitigation. The transaction commit path no longer has to explicitly check if the write buffer is full and wait on flushing; this is another performance optimization. Instead, when the btree write buffer is close to full we change the journal watermark, so that only reservations for journal reclaim are allowed. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/btree_trans_commit.c')
-rw-r--r--fs/bcachefs/btree_trans_commit.c52
1 files changed, 1 insertions, 51 deletions
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index abdf4fd10b6a..e52386fdc7ec 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -659,10 +659,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
i->k->k.needs_whiteout = false;
}
- if (trans->nr_wb_updates &&
- trans->nr_wb_updates + c->btree_write_buffer.state.nr > c->btree_write_buffer.size)
- return -BCH_ERR_btree_insert_need_flush_buffer;
-
/*
* Don't get journal reservation until after we know insert will
* succeed:
@@ -697,14 +693,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
return -BCH_ERR_btree_insert_need_mark_replicas;
- if (trans->nr_wb_updates) {
- EBUG_ON(flags & BCH_TRANS_COMMIT_no_journal_res);
-
- ret = bch2_btree_insert_keys_write_buffer(trans);
- if (ret)
- goto revert_fs_usage;
- }
-
h = trans->hooks;
while (h) {
ret = h->fn(trans, h);
@@ -757,7 +745,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
trans_for_each_wb_update(trans, wb) {
entry = bch2_journal_add_entry(j, &trans->journal_res,
- BCH_JSET_ENTRY_btree_keys,
+ BCH_JSET_ENTRY_write_buffer_keys,
wb->btree, 0,
wb->k.k.u64s);
bkey_copy((struct bkey_i *) entry->start, &wb->k);
@@ -948,30 +936,6 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
ret = bch2_trans_relock(trans);
break;
- case -BCH_ERR_btree_insert_need_flush_buffer: {
- struct btree_write_buffer *wb = &c->btree_write_buffer;
-
- ret = 0;
-
- if (wb->state.nr > wb->size * 3 / 4) {
- bch2_trans_unlock(trans);
- mutex_lock(&wb->flush_lock);
-
- if (wb->state.nr > wb->size * 3 / 4) {
- bch2_trans_begin(trans);
- ret = bch2_btree_write_buffer_flush_locked(trans);
- mutex_unlock(&wb->flush_lock);
- if (!ret) {
- trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
- }
- } else {
- mutex_unlock(&wb->flush_lock);
- ret = bch2_trans_relock(trans);
- }
- }
- break;
- }
default:
BUG_ON(ret >= 0);
break;
@@ -1070,20 +1034,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
goto out_reset;
}
- if (c->btree_write_buffer.state.nr > c->btree_write_buffer.size / 2 &&
- mutex_trylock(&c->btree_write_buffer.flush_lock)) {
- bch2_trans_begin(trans);
- bch2_trans_unlock(trans);
-
- ret = bch2_btree_write_buffer_flush_locked(trans);
- mutex_unlock(&c->btree_write_buffer.flush_lock);
- if (!ret) {
- trace_and_count(c, trans_restart_write_buffer_flush, trans, _THIS_IP_);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_write_buffer_flush);
- }
- goto out;
- }
-
EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags));
trans->journal_u64s = trans->journal_entries_u64s;