diff options
Diffstat (limited to 'fs/erofs')
-rw-r--r-- | fs/erofs/Kconfig | 16 | ||||
-rw-r--r-- | fs/erofs/compress.h | 2 | ||||
-rw-r--r-- | fs/erofs/data.c | 148 | ||||
-rw-r--r-- | fs/erofs/decompressor.c | 95 | ||||
-rw-r--r-- | fs/erofs/decompressor_deflate.c | 8 | ||||
-rw-r--r-- | fs/erofs/decompressor_lzma.c | 8 | ||||
-rw-r--r-- | fs/erofs/decompressor_zstd.c | 8 | ||||
-rw-r--r-- | fs/erofs/dir.c | 9 | ||||
-rw-r--r-- | fs/erofs/erofs_fs.h | 191 | ||||
-rw-r--r-- | fs/erofs/fileio.c | 4 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 2 | ||||
-rw-r--r-- | fs/erofs/inode.c | 125 | ||||
-rw-r--r-- | fs/erofs/internal.h | 47 | ||||
-rw-r--r-- | fs/erofs/namei.c | 2 | ||||
-rw-r--r-- | fs/erofs/super.c | 85 | ||||
-rw-r--r-- | fs/erofs/sysfs.c | 2 | ||||
-rw-r--r-- | fs/erofs/xattr.c | 12 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 101 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 287 |
19 files changed, 581 insertions, 571 deletions
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 6ea60661fa55..8f68ec49ad89 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,8 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CRC32 select FS_IOMAP - select LIBCRC32C help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline @@ -13,12 +13,12 @@ config EROFS_FS smartphones with Android OS, LiveCDs and high-density hosts with numerous containers; - It also provides fixed-sized output compression support in order to - improve storage density as well as keep relatively higher compression - ratios and implements in-place decompression to reuse the file page - for compressed data temporarily with proper strategies, which is - quite useful to ensure guaranteed end-to-end runtime decompression - performance under extremely memory pressure without extra cost. + It also provides transparent compression and deduplication support to + improve storage density and maintain relatively high compression + ratios, and it implements in-place decompression to temporarily reuse + page cache for compressed data using proper strategies, which is + quite useful for ensuring guaranteed end-to-end runtime decompression + performance under extreme memory pressure without extra cost. See the documentation at <file:Documentation/filesystems/erofs.rst> and the web pages at <https://erofs.docs.kernel.org> for more details. @@ -97,7 +97,7 @@ config EROFS_FS_ZIP select LZ4_DECOMPRESS default y help - Enable fixed-sized output compression for EROFS. + Enable transparent compression support for EROFS file systems. If you don't want to enable compression feature, say N. diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 65ff39401020..2704d7a592a5 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -11,6 +11,7 @@ struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; + unsigned int inpages, outpages; unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; @@ -59,7 +60,6 @@ extern const struct z_erofs_decompressor *z_erofs_decomp[]; struct z_erofs_stream_dctx { struct z_erofs_decompress_req *rq; - unsigned int inpages, outpages; /* # of {en,de}coded pages */ int no, ni; /* the current {en,de}coded page # */ unsigned int avail_out; /* remaining bytes in the decoded buffer */ diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 0cd6b5c4df98..2409d2ab0c28 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -25,8 +25,7 @@ void erofs_put_metabuf(struct erofs_buf *buf) buf->page = NULL; } -void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, - enum erofs_kmap_type type) +void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap) { pgoff_t index = offset >> PAGE_SHIFT; struct folio *folio = NULL; @@ -43,10 +42,10 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, return folio; } buf->page = folio_file_page(folio, index); - if (!buf->base && type == EROFS_KMAP) - buf->base = kmap_local_page(buf->page); - if (type == EROFS_NO_KMAP) + if (!need_kmap) return NULL; + if (!buf->base) + buf->base = kmap_local_page(buf->page); return buf->base + (offset & ~PAGE_MASK); } @@ -65,64 +64,47 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb) } void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_off_t offset, enum erofs_kmap_type type) + erofs_off_t offset, bool need_kmap) { erofs_init_metabuf(buf, sb); - return erofs_bread(buf, offset, type); -} - -static int erofs_map_blocks_flatmode(struct inode *inode, - struct erofs_map_blocks *map) -{ - struct erofs_inode *vi = EROFS_I(inode); - struct super_block *sb = inode->i_sb; - bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); - erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking; - - map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */ - if (map->m_la < erofs_pos(sb, lastblk)) { - map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la; - map->m_plen = erofs_pos(sb, lastblk) - map->m_la; - } else { - DBG_BUGON(!tailendpacking); - map->m_pa = erofs_iloc(inode) + vi->inode_isize + - vi->xattr_isize + erofs_blkoff(sb, map->m_la); - map->m_plen = inode->i_size - map->m_la; - - /* inline data should be located in the same meta block */ - if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { - erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } - map->m_flags |= EROFS_MAP_META; - } - return 0; + return erofs_bread(buf, offset, need_kmap); } int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) { + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = inode->i_sb; + unsigned int unit, blksz = sb->s_blocksize; struct erofs_inode *vi = EROFS_I(inode); struct erofs_inode_chunk_index *idx; - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - u64 chunknr; - unsigned int unit; + erofs_blk_t startblk, addrmask; + bool tailpacking; erofs_off_t pos; - void *kaddr; + u64 chunknr; int err = 0; trace_erofs_map_blocks_enter(inode, map, 0); map->m_deviceid = 0; - if (map->m_la >= inode->i_size) { - /* leave out-of-bound access unmapped */ - map->m_flags = 0; - map->m_plen = map->m_llen; + map->m_flags = 0; + if (map->m_la >= inode->i_size) goto out; - } if (vi->datalayout != EROFS_INODE_CHUNK_BASED) { - err = erofs_map_blocks_flatmode(inode, map); + tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE); + if (!tailpacking && vi->startblk == EROFS_NULL_ADDR) + goto out; + pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking); + + map->m_flags = EROFS_MAP_MAPPED; + if (map->m_la < pos) { + map->m_pa = erofs_pos(sb, vi->startblk) + map->m_la; + map->m_llen = pos - map->m_la; + } else { + map->m_pa = erofs_iloc(inode) + vi->inode_isize + + vi->xattr_isize + erofs_blkoff(sb, map->m_la); + map->m_llen = inode->i_size - map->m_la; + map->m_flags |= EROFS_MAP_META; + } goto out; } @@ -135,45 +117,44 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map) pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, unit) + unit * chunknr; - kaddr = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP); - if (IS_ERR(kaddr)) { - err = PTR_ERR(kaddr); + idx = erofs_read_metabuf(&buf, sb, pos, true); + if (IS_ERR(idx)) { + err = PTR_ERR(idx); goto out; } map->m_la = chunknr << vi->chunkbits; - map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, - round_up(inode->i_size - map->m_la, sb->s_blocksize)); - - /* handle block map */ - if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { - __le32 *blkaddr = kaddr; - - if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { - map->m_flags = 0; - } else { - map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr)); + map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits, + round_up(inode->i_size - map->m_la, blksz)); + if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) { + addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ? + BIT_ULL(48) - 1 : BIT_ULL(32) - 1; + startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) | + le32_to_cpu(idx->startblk_lo)) & addrmask; + if ((startblk ^ EROFS_NULL_ADDR) & addrmask) { + map->m_deviceid = le16_to_cpu(idx->device_id) & + EROFS_SB(sb)->device_id_mask; + map->m_pa = erofs_pos(sb, startblk); + map->m_flags = EROFS_MAP_MAPPED; + } + } else { + startblk = le32_to_cpu(*(__le32 *)idx); + if (startblk != (u32)EROFS_NULL_ADDR) { + map->m_pa = erofs_pos(sb, startblk); map->m_flags = EROFS_MAP_MAPPED; } - goto out_unlock; - } - /* parse chunk indexes */ - idx = kaddr; - switch (le32_to_cpu(idx->blkaddr)) { - case EROFS_NULL_ADDR: - map->m_flags = 0; - break; - default: - map->m_deviceid = le16_to_cpu(idx->device_id) & - EROFS_SB(sb)->device_id_mask; - map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr)); - map->m_flags = EROFS_MAP_MAPPED; - break; } -out_unlock: erofs_put_metabuf(&buf); out: - if (!err) - map->m_llen = map->m_plen; + if (!err) { + map->m_plen = map->m_llen; + /* inline data should be located in the same meta block */ + if ((map->m_flags & EROFS_MAP_META) && + erofs_blkoff(sb, map->m_pa) + map->m_plen > blksz) { + erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + } trace_erofs_map_blocks_exit(inode, map, 0, err); return err; } @@ -192,7 +173,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) { struct erofs_dev_context *devs = EROFS_SB(sb)->devs; struct erofs_device_info *dif; - erofs_off_t startoff, length; + erofs_off_t startoff; int id; erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0); @@ -205,7 +186,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) return -ENODEV; } if (devs->flatdev) { - map->m_pa += erofs_pos(sb, dif->mapped_blkaddr); + map->m_pa += erofs_pos(sb, dif->uniaddr); up_read(&devs->rwsem); return 0; } @@ -214,13 +195,12 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) } else if (devs->extra_devices && !devs->flatdev) { down_read(&devs->rwsem); idr_for_each_entry(&devs->tree, dif, id) { - if (!dif->mapped_blkaddr) + if (!dif->uniaddr) continue; - startoff = erofs_pos(sb, dif->mapped_blkaddr); - length = erofs_pos(sb, dif->blocks); + startoff = erofs_pos(sb, dif->uniaddr); if (map->m_pa >= startoff && - map->m_pa < startoff + length) { + map->m_pa < startoff + erofs_pos(sb, dif->blocks)) { map->m_pa -= startoff; erofs_fill_from_devinfo(map, sb, dif); break; @@ -312,7 +292,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, struct erofs_buf buf = __EROFS_BUF_INITIALIZER; iomap->type = IOMAP_INLINE; - ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, EROFS_KMAP); + ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true); if (IS_ERR(ptr)) return PTR_ERR(ptr); iomap->inline_data = ptr; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 2b123b070a42..bf62e2836b60 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -9,14 +9,6 @@ #define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1) -struct z_erofs_lz4_decompress_ctx { - struct z_erofs_decompress_req *rq; - /* # of encoded, decoded pages */ - unsigned int inpages, outpages; - /* decoded block total length (used for in-place decompression) */ - unsigned int oend; -}; - static int z_erofs_load_lz4_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { @@ -55,10 +47,9 @@ static int z_erofs_load_lz4_config(struct super_block *sb, * Fill all gaps with bounce pages if it's a sparse page list. Also check if * all physical pages are consecutive, which can be seen for moderate CR. */ -static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, +static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, struct page **pagepool) { - struct z_erofs_decompress_req *rq = ctx->rq; struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, BITS_PER_LONG)] = { 0 }; @@ -68,7 +59,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, unsigned int i, j, top; top = 0; - for (i = j = 0; i < ctx->outpages; ++i, ++j) { + for (i = j = 0; i < rq->outpages; ++i, ++j) { struct page *const page = rq->out[i]; struct page *victim; @@ -114,36 +105,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, return kaddr ? 1 : 0; } -static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, +static void *z_erofs_lz4_handle_overlap(struct z_erofs_decompress_req *rq, void *inpage, void *out, unsigned int *inputmargin, int *maptype, bool may_inplace) { - struct z_erofs_decompress_req *rq = ctx->rq; - unsigned int omargin, total, i; + unsigned int oend, omargin, total, i; struct page **in; void *src, *tmp; if (rq->inplace_io) { - omargin = PAGE_ALIGN(ctx->oend) - ctx->oend; + oend = rq->pageofs_out + rq->outputsize; + omargin = PAGE_ALIGN(oend) - oend; if (rq->partial_decoding || !may_inplace || omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize)) goto docopy; - for (i = 0; i < ctx->inpages; ++i) - if (rq->out[ctx->outpages - ctx->inpages + i] != + for (i = 0; i < rq->inpages; ++i) + if (rq->out[rq->outpages - rq->inpages + i] != rq->in[i]) goto docopy; kunmap_local(inpage); *maptype = 3; - return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT); + return out + ((rq->outpages - rq->inpages) << PAGE_SHIFT); } - if (ctx->inpages <= 1) { + if (rq->inpages <= 1) { *maptype = 0; return inpage; } kunmap_local(inpage); - src = erofs_vm_map_ram(rq->in, ctx->inpages); + src = erofs_vm_map_ram(rq->in, rq->inpages); if (!src) return ERR_PTR(-ENOMEM); *maptype = 1; @@ -152,7 +143,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx, docopy: /* Or copy compressed data which can be overlapped to per-CPU buffer */ in = rq->in; - src = z_erofs_get_gbuf(ctx->inpages); + src = z_erofs_get_gbuf(rq->inpages); if (!src) { DBG_BUGON(1); kunmap_local(inpage); @@ -197,10 +188,8 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, return 0; } -static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, - u8 *dst) +static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, u8 *dst) { - struct z_erofs_decompress_req *rq = ctx->rq; bool support_0padding = false, may_inplace = false; unsigned int inputmargin; u8 *out, *headpage, *src; @@ -224,7 +213,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, } inputmargin = rq->pageofs_in; - src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin, + src = z_erofs_lz4_handle_overlap(rq, headpage, dst, &inputmargin, &maptype, may_inplace); if (IS_ERR(src)) return PTR_ERR(src); @@ -251,7 +240,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, if (maptype == 0) { kunmap_local(headpage); } else if (maptype == 1) { - vm_unmap_ram(src, ctx->inpages); + vm_unmap_ram(src, rq->inpages); } else if (maptype == 2) { z_erofs_put_gbuf(src); } else if (maptype != 3) { @@ -264,54 +253,42 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx, static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, struct page **pagepool) { - struct z_erofs_lz4_decompress_ctx ctx; unsigned int dst_maptype; void *dst; int ret; - ctx.rq = rq; - ctx.oend = rq->pageofs_out + rq->outputsize; - ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT; - ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; - /* one optimized fast path only for non bigpcluster cases yet */ - if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) { + if (rq->inpages == 1 && rq->outpages == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); dst = kmap_local_page(*rq->out); dst_maptype = 0; - goto dstmap_out; - } - - /* general decoding path which can be used for all cases */ - ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool); - if (ret < 0) { - return ret; - } else if (ret > 0) { - dst = page_address(*rq->out); - dst_maptype = 1; } else { - dst = erofs_vm_map_ram(rq->out, ctx.outpages); - if (!dst) - return -ENOMEM; - dst_maptype = 2; + /* general decoding path which can be used for all cases */ + ret = z_erofs_lz4_prepare_dstpages(rq, pagepool); + if (ret < 0) + return ret; + if (ret > 0) { + dst = page_address(*rq->out); + dst_maptype = 1; + } else { + dst = erofs_vm_map_ram(rq->out, rq->outpages); + if (!dst) + return -ENOMEM; + dst_maptype = 2; + } } - -dstmap_out: - ret = z_erofs_lz4_decompress_mem(&ctx, dst); + ret = z_erofs_lz4_decompress_mem(rq, dst); if (!dst_maptype) kunmap_local(dst); else if (dst_maptype == 2) - vm_unmap_ram(dst, ctx.outpages); + vm_unmap_ram(dst, rq->outpages); return ret; } static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, struct page **pagepool) { - const unsigned int nrpages_in = - PAGE_ALIGN(rq->pageofs_in + rq->inputsize) >> PAGE_SHIFT; - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const unsigned int nrpages_in = rq->inpages, nrpages_out = rq->outpages; const unsigned int bs = rq->sb->s_blocksize; unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt; u8 *kin; @@ -336,7 +313,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, rq->outputsize -= cur; } - for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) { + for (; rq->outputsize; rq->pageofs_in = 0, cur += insz, ni++) { insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize); rq->outputsize -= insz; if (!rq->in[ni]) @@ -373,7 +350,7 @@ int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst, unsigned int j; if (!dctx->avail_out) { - if (++dctx->no >= dctx->outpages || !rq->outputsize) { + if (++dctx->no >= rq->outpages || !rq->outputsize) { erofs_err(sb, "insufficient space for decompressed data"); return -EFSCORRUPTED; } @@ -401,7 +378,7 @@ int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst, } if (dctx->inbuf_pos == dctx->inbuf_sz && rq->inputsize) { - if (++dctx->ni >= dctx->inpages) { + if (++dctx->ni >= rq->inpages) { erofs_err(sb, "invalid compressed data"); return -EFSCORRUPTED; } @@ -434,7 +411,7 @@ int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst, dctx->bounced = true; } - for (j = dctx->ni + 1; j < dctx->inpages; ++j) { + for (j = dctx->ni + 1; j < rq->inpages; ++j) { if (rq->out[dctx->no] != rq->in[j]) continue; tmppage = erofs_allocpage(pgpl, rq->gfp); diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 5070d2fcc737..c6908a487054 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -101,13 +101,7 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, struct page **pgpl) { struct super_block *sb = rq->sb; - struct z_erofs_stream_dctx dctx = { - .rq = rq, - .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, - .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) - >> PAGE_SHIFT, - .no = -1, .ni = 0, - }; + struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; struct z_erofs_deflate *strm; int zerr, err; diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 40666815046f..832cffb83a66 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -150,13 +150,7 @@ static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, struct page **pgpl) { struct super_block *sb = rq->sb; - struct z_erofs_stream_dctx dctx = { - .rq = rq, - .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, - .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) - >> PAGE_SHIFT, - .no = -1, .ni = 0, - }; + struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; struct xz_buf buf = {}; struct z_erofs_lzma *strm; enum xz_ret xz_err; diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c index 7e177304967e..b4bfe14229f9 100644 --- a/fs/erofs/decompressor_zstd.c +++ b/fs/erofs/decompressor_zstd.c @@ -139,13 +139,7 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, struct page **pgpl) { struct super_block *sb = rq->sb; - struct z_erofs_stream_dctx dctx = { - .rq = rq, - .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT, - .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) - >> PAGE_SHIFT, - .no = -1, .ni = 0, - }; + struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 }; zstd_in_buffer in_buf = { NULL, 0, 0 }; zstd_out_buffer out_buf = { NULL, 0, 0 }; struct z_erofs_zstd *strm; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index c3b90abdee37..2fae209d0274 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -58,9 +58,9 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, dbstart, EROFS_KMAP); + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { - erofs_err(sb, "fail to readdir of logical block %u of nid %llu", + erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; @@ -90,6 +90,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) ofs = 0; } erofs_put_metabuf(&buf); + if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { + if (!dir_emit_dot(f, ctx)) + return 0; + ++ctx->pos; + } return err < 0 ? err : 0; } diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 199395ed1c1f..767fb4acdc93 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -30,25 +30,19 @@ #define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020 #define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020 #define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040 +#define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080 #define EROFS_ALL_FEATURE_INCOMPAT \ - (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \ - EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ - EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ - EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ - EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ - EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \ - EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \ - EROFS_FEATURE_INCOMPAT_FRAGMENTS | \ - EROFS_FEATURE_INCOMPAT_DEDUPE | \ - EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES) + ((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1) #define EROFS_SB_EXTSLOT_SIZE 16 struct erofs_deviceslot { u8 tag[64]; /* digest(sha256), etc. */ - __le32 blocks; /* total fs blocks of this device */ - __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ - u8 reserved[56]; + __le32 blocks_lo; /* total blocks count of this device */ + __le32 uniaddr_lo; /* unified starting block of this device */ + __le32 blocks_hi; /* total blocks count MSB */ + __le16 uniaddr_hi; /* unified starting block MSB */ + u8 reserved[50]; }; #define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) @@ -59,13 +53,14 @@ struct erofs_super_block { __le32 feature_compat; __u8 blkszbits; /* filesystem block size in bit shift */ __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ - - __le16 root_nid; /* nid of root directory */ + union { + __le16 rootnid_2b; /* nid of root directory */ + __le16 blocks_hi; /* (48BIT on) blocks count MSB */ + } __packed rb; __le64 inos; /* total valid ino # (== f_files - f_favail) */ - - __le64 build_time; /* compact inode time derivation */ - __le32 build_time_nsec; /* compact inode time derivation in ns scale */ - __le32 blocks; /* used for statfs */ + __le64 epoch; /* base seconds used for compact inodes */ + __le32 fixed_nsec; /* fixed nanoseconds for compact inodes */ + __le32 blocks_lo; /* blocks count LSB */ __le32 meta_blkaddr; /* start block address of metadata area */ __le32 xattr_blkaddr; /* start block address of shared xattr area */ __u8 uuid[16]; /* 128-bit uuid for volume */ @@ -84,7 +79,10 @@ struct erofs_super_block { __le32 xattr_prefix_start; /* start of long xattr prefixes */ __le64 packed_nid; /* nid of the special packed inode */ __u8 xattr_filter_reserved; /* reserved for xattr name filter */ - __u8 reserved2[23]; + __u8 reserved[3]; + __le32 build_time; /* seconds added to epoch for mkfs time */ + __le64 rootnid_8b; /* (48BIT on) nid of root directory */ + __u8 reserved2[8]; }; /* @@ -115,19 +113,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) #define EROFS_I_VERSION_MASK 0x01 #define EROFS_I_DATALAYOUT_MASK 0x07 -#define EROFS_I_VERSION_BIT 0 -#define EROFS_I_DATALAYOUT_BIT 1 -#define EROFS_I_ALL_BIT 4 - -#define EROFS_I_ALL ((1 << EROFS_I_ALL_BIT) - 1) +#define EROFS_I_VERSION_BIT 0 +#define EROFS_I_DATALAYOUT_BIT 1 +#define EROFS_I_NLINK_1_BIT 4 /* non-directory compact inodes only */ +#define EROFS_I_DOT_OMITTED_BIT 4 /* (directories) omit the `.` dirent */ +#define EROFS_I_ALL ((1 << (EROFS_I_NLINK_1_BIT + 1)) - 1) /* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ #define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F -/* with chunk indexes or just a 4-byte blkaddr array */ +/* with chunk indexes or just a 4-byte block array */ #define EROFS_CHUNK_FORMAT_INDEXES 0x0020 +#define EROFS_CHUNK_FORMAT_48BIT 0x0040 -#define EROFS_CHUNK_FORMAT_ALL \ - (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) +#define EROFS_CHUNK_FORMAT_ALL ((EROFS_CHUNK_FORMAT_48BIT << 1) - 1) /* 32-byte on-disk inode */ #define EROFS_INODE_LAYOUT_COMPACT 0 @@ -140,45 +138,40 @@ struct erofs_inode_chunk_info { }; union erofs_inode_i_u { - /* total compressed blocks for compressed inodes */ - __le32 compressed_blocks; - - /* block address for uncompressed flat inodes */ - __le32 raw_blkaddr; - - /* for device files, used to indicate old/new device # */ - __le32 rdev; - - /* for chunk-based files, it contains the summary info */ + __le32 blocks_lo; /* total blocks count (if compressed inodes) */ + __le32 startblk_lo; /* starting block number (if flat inodes) */ + __le32 rdev; /* device ID (if special inodes) */ struct erofs_inode_chunk_info c; }; +union erofs_inode_i_nb { + __le16 nlink; /* if EROFS_I_NLINK_1_BIT is unset */ + __le16 blocks_hi; /* total blocks count MSB */ + __le16 startblk_hi; /* starting block number MSB */ +} __packed; + /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { __le16 i_format; /* inode format hints */ - -/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ __le16 i_xattr_icount; __le16 i_mode; - __le16 i_nlink; + union erofs_inode_i_nb i_nb; __le32 i_size; - __le32 i_reserved; + __le32 i_mtime; union erofs_inode_i_u i_u; __le32 i_ino; /* only used for 32-bit stat compatibility */ __le16 i_uid; __le16 i_gid; - __le32 i_reserved2; + __le32 i_reserved; }; /* 64-byte complete form of an ondisk inode */ struct erofs_inode_extended { __le16 i_format; /* inode format hints */ - -/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ __le16 i_xattr_icount; __le16 i_mode; - __le16 i_reserved; + union erofs_inode_i_nb i_nb; __le64 i_size; union erofs_inode_i_u i_u; @@ -248,6 +241,7 @@ static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount) if (!i_xattr_icount) return 0; + /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ return sizeof(struct erofs_xattr_ibody_header) + sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1); } @@ -266,11 +260,11 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) /* 4-byte block address array */ #define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32) -/* 8-byte inode chunk indexes */ +/* 8-byte inode chunk index */ struct erofs_inode_chunk_index { - __le16 advise; /* always 0, don't care for now */ + __le16 startblk_hi; /* starting block number MSB */ __le16 device_id; /* back-end storage id (with bits masked) */ - __le32 blkaddr; /* start block address of this inode chunk */ + __le32 startblk_lo; /* starting block number of this chunk */ }; /* dirent sorts in alphabet order, thus we can do binary search */ @@ -337,21 +331,20 @@ struct z_erofs_zstd_cfgs { #define Z_EROFS_ZSTD_MAX_DICT_SIZE Z_EROFS_PCLUSTER_MAX_SIZE /* - * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) - * e.g. for 4k logical cluster size, 4B if compacted 2B is off; - * (4B) + 2B + (4B) if compacted 2B is on. - * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) - * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) - * bit 3 : tailpacking inline pcluster (0 - off; 1 - on) - * bit 4 : interlaced plain pcluster (0 - off; 1 - on) - * bit 5 : fragment pcluster (0 - off; 1 - on) + * Enable COMPACTED_2B for EROFS_INODE_COMPRESSED_COMPACT inodes: + * 4B (disabled) vs 4B+2B+4B (enabled) */ #define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 +/* Enable extent metadata for EROFS_INODE_COMPRESSED_FULL inodes */ +#define Z_EROFS_ADVISE_EXTENTS 0x0001 #define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 #define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 #define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008 #define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010 #define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020 +/* Indicate the record size for each extent if extent metadata is used */ +#define Z_EROFS_ADVISE_EXTRECSZ_BIT 1 +#define Z_EROFS_ADVISE_EXTRECSZ_MASK 0x3 #define Z_EROFS_FRAGMENT_INODE_BIT 7 struct z_erofs_map_header { @@ -363,45 +356,24 @@ struct z_erofs_map_header { /* indicates the encoded size of tailpacking data */ __le16 h_idata_size; }; + __le32 h_extents_lo; /* extent count LSB */ }; __le16 h_advise; - /* - * bit 0-3 : algorithm type of head 1 (logical cluster type 01); - * bit 4-7 : algorithm type of head 2 (logical cluster type 11). - */ - __u8 h_algorithmtype; - /* - * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; - * bit 3-6 : reserved; - * bit 7 : move the whole file into packed inode or not. - */ - __u8 h_clusterbits; + union { + struct { + /* algorithm type (bit 0-3: HEAD1; bit 4-7: HEAD2) */ + __u8 h_algorithmtype; + /* + * bit 0-3 : logical cluster bits - blkszbits + * bit 4-6 : reserved + * bit 7 : pack the whole file into packed inode + */ + __u8 h_clusterbits; + } __packed; + __le16 h_extents_hi; /* extent count MSB */ + } __packed; }; -/* - * On-disk logical cluster type: - * 0 - literal (uncompressed) lcluster - * 1,3 - compressed lcluster (for HEAD lclusters) - * 2 - compressed lcluster (for NONHEAD lclusters) - * - * In detail, - * 0 - literal (uncompressed) lcluster, - * di_advise = 0 - * di_clusterofs = the literal data offset of the lcluster - * di_blkaddr = the blkaddr of the literal pcluster - * - * 1,3 - compressed lcluster (for HEAD lclusters) - * di_advise = 1 or 3 - * di_clusterofs = the decompressed data offset of the lcluster - * di_blkaddr = the blkaddr of the compressed pcluster - * - * 2 - compressed lcluster (for NONHEAD lclusters) - * di_advise = 2 - * di_clusterofs = - * the decompressed data offset in its own HEAD lcluster - * di_u.delta[0] = distance to this HEAD lcluster - * di_u.delta[1] = distance to the next HEAD lcluster - */ enum { Z_EROFS_LCLUSTER_TYPE_PLAIN = 0, Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1, @@ -415,11 +387,7 @@ enum { /* (noncompact only, HEAD) This pcluster refers to partial decompressed data */ #define Z_EROFS_LI_PARTIAL_REF (1 << 15) -/* - * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the - * compressed block count of a compressed extent (in logical clusters, aka. - * block count of a pcluster). - */ +/* Set on 1st non-head lcluster to store compressed block counti (in blocks) */ #define Z_EROFS_LI_D0_CBLKCNT (1 << 11) struct z_erofs_lcluster_index { @@ -428,19 +396,36 @@ struct z_erofs_lcluster_index { __le16 di_clusterofs; union { - /* for the HEAD lclusters */ - __le32 blkaddr; + __le32 blkaddr; /* for the HEAD lclusters */ /* - * for the NONHEAD lclusters * [0] - distance to its HEAD lcluster * [1] - distance to the next HEAD lcluster */ - __le16 delta[2]; + __le16 delta[2]; /* for the NONHEAD lclusters */ } di_u; }; -#define Z_EROFS_FULL_INDEX_ALIGN(end) \ - (ALIGN(end, 8) + sizeof(struct z_erofs_map_header) + 8) +#define Z_EROFS_MAP_HEADER_END(end) \ + (ALIGN(end, 8) + sizeof(struct z_erofs_map_header)) +#define Z_EROFS_FULL_INDEX_START(end) (Z_EROFS_MAP_HEADER_END(end) + 8) + +#define Z_EROFS_EXTENT_PLEN_PARTIAL BIT(27) +#define Z_EROFS_EXTENT_PLEN_FMT_BIT 28 +#define Z_EROFS_EXTENT_PLEN_MASK ((Z_EROFS_PCLUSTER_MAX_SIZE << 1) - 1) +struct z_erofs_extent { + __le32 plen; /* encoded length */ + __le32 pstart_lo; /* physical offset */ + __le32 pstart_hi; /* physical offset MSB */ + __le32 lstart_lo; /* logical offset */ + __le32 lstart_hi; /* logical offset MSB (>= 4GiB inodes) */ + __u8 reserved[12]; /* for future use */ +}; + +static inline int z_erofs_extent_recsize(unsigned int advise) +{ + return 4 << ((advise >> Z_EROFS_ADVISE_EXTRECSZ_BIT) & + Z_EROFS_ADVISE_EXTRECSZ_MASK); +} /* check the EROFS on-disk layout strictly at compile time */ static inline void erofs_check_ondisk_layout_definitions(void) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 0ffd1c63beeb..4fa0a0121288 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -32,6 +32,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) ret = 0; } if (rq->bio.bi_end_io) { + if (ret < 0 && !rq->bio.bi_status) + rq->bio.bi_status = errno_to_blk_status(ret); rq->bio.bi_end_io(&rq->bio); } else { bio_for_each_folio_all(fi, &rq->bio) { @@ -112,7 +114,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) void *src; src = erofs_read_metabuf(&buf, inode->i_sb, - map->m_pa + ofs, EROFS_KMAP); + map->m_pa + ofs, true); if (IS_ERR(src)) { err = PTR_ERR(src); break; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index ce3d8737df85..9c9129bca346 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -276,7 +276,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) size_t size = map.m_llen; void *src; - src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP); + src = erofs_read_metabuf(&buf, sb, map.m_pa, true); if (IS_ERR(src)) return PTR_ERR(src); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index d4b89407822a..a0ae0b4f7b01 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -27,29 +27,27 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr, static int erofs_read_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; + erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode)); + unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode)); + struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_sb_info *sbi = EROFS_SB(sb); + erofs_blk_t addrmask = BIT_ULL(48) - 1; struct erofs_inode *vi = EROFS_I(inode); - const erofs_off_t inode_loc = erofs_iloc(inode); - erofs_blk_t blkaddr, nblks = 0; - void *kaddr; + struct erofs_inode_extended *die, copied; struct erofs_inode_compact *dic; - struct erofs_inode_extended *die, *copied = NULL; - union erofs_inode_i_u iu; - struct erofs_buf buf = __EROFS_BUF_INITIALIZER; - unsigned int ifmt, ofs; + unsigned int ifmt; + void *ptr; int err = 0; - blkaddr = erofs_blknr(sb, inode_loc); - ofs = erofs_blkoff(sb, inode_loc); - - kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), EROFS_KMAP); - if (IS_ERR(kaddr)) { - erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld", - vi->nid, PTR_ERR(kaddr)); - return PTR_ERR(kaddr); + ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + erofs_err(sb, "failed to get inode (nid: %llu) page, err %d", + vi->nid, err); + goto err_out; } - dic = kaddr + ofs; + dic = ptr + ofs; ifmt = le16_to_cpu(dic->i_format); if (ifmt & ~EROFS_I_ALL) { erofs_err(sb, "unsupported i_format %u of nid %llu", @@ -73,40 +71,34 @@ static int erofs_read_inode(struct inode *inode) if (ofs + vi->inode_isize <= sb->s_blocksize) { ofs += vi->inode_isize; die = (struct erofs_inode_extended *)dic; + copied.i_u = die->i_u; + copied.i_nb = die->i_nb; } else { const unsigned int gotten = sb->s_blocksize - ofs; - copied = kmalloc(vi->inode_isize, GFP_KERNEL); - if (!copied) { - err = -ENOMEM; + memcpy(&copied, dic, gotten); + ptr = erofs_read_metabuf(&buf, sb, + erofs_pos(sb, blkaddr + 1), true); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d", + vi->nid, err); goto err_out; } - memcpy(copied, dic, gotten); - kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr + 1), - EROFS_KMAP); - if (IS_ERR(kaddr)) { - erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld", - vi->nid, PTR_ERR(kaddr)); - kfree(copied); - return PTR_ERR(kaddr); - } ofs = vi->inode_isize - gotten; - memcpy((u8 *)copied + gotten, kaddr, ofs); - die = copied; + memcpy((u8 *)&copied + gotten, ptr, ofs); + die = &copied; } vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount); inode->i_mode = le16_to_cpu(die->i_mode); - iu = die->i_u; i_uid_write(inode, le32_to_cpu(die->i_uid)); i_gid_write(inode, le32_to_cpu(die->i_gid)); set_nlink(inode, le32_to_cpu(die->i_nlink)); - /* each extended inode has its own timestamp */ - inode_set_ctime(inode, le64_to_cpu(die->i_mtime), + inode_set_mtime(inode, le64_to_cpu(die->i_mtime), le32_to_cpu(die->i_mtime_nsec)); inode->i_size = le64_to_cpu(die->i_size); - kfree(copied); break; case EROFS_INODE_LAYOUT_COMPACT: vi->inode_isize = sizeof(struct erofs_inode_compact); @@ -114,12 +106,20 @@ static int erofs_read_inode(struct inode *inode) vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount); inode->i_mode = le16_to_cpu(dic->i_mode); - iu = dic->i_u; + copied.i_u = dic->i_u; i_uid_write(inode, le16_to_cpu(dic->i_uid)); i_gid_write(inode, le16_to_cpu(dic->i_gid)); - set_nlink(inode, le16_to_cpu(dic->i_nlink)); - /* use build time for compact inodes */ - inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec); + if (!S_ISDIR(inode->i_mode) && + ((ifmt >> EROFS_I_NLINK_1_BIT) & 1)) { + set_nlink(inode, 1); + copied.i_nb = dic->i_nb; + } else { + set_nlink(inode, le16_to_cpu(dic->i_nb.nlink)); + copied.i_nb.startblk_hi = 0; + addrmask = BIT_ULL(32) - 1; + } + inode_set_mtime(inode, sbi->epoch + le32_to_cpu(dic->i_mtime), + sbi->fixed_nsec); inode->i_size = le32_to_cpu(dic->i_size); break; @@ -136,19 +136,26 @@ static int erofs_read_inode(struct inode *inode) goto err_out; } switch (inode->i_mode & S_IFMT) { - case S_IFREG: case S_IFDIR: + vi->dot_omitted = (ifmt >> EROFS_I_DOT_OMITTED_BIT) & 1; + fallthrough; + case S_IFREG: case S_IFLNK: - vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr); + vi->startblk = le32_to_cpu(copied.i_u.startblk_lo) | + ((u64)le16_to_cpu(copied.i_nb.startblk_hi) << 32); + if (vi->datalayout == EROFS_INODE_FLAT_PLAIN && + !((vi->startblk ^ EROFS_NULL_ADDR) & addrmask)) + vi->startblk = EROFS_NULL_ADDR; + if(S_ISLNK(inode->i_mode)) { - err = erofs_fill_symlink(inode, kaddr, ofs); + err = erofs_fill_symlink(inode, ptr, ofs); if (err) goto err_out; } break; case S_IFCHR: case S_IFBLK: - inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev)); + inode->i_rdev = new_decode_dev(le32_to_cpu(copied.i_u.rdev)); break; case S_IFIFO: case S_IFSOCK: @@ -161,12 +168,15 @@ static int erofs_read_inode(struct inode *inode) goto err_out; } - /* total blocks for compressed files */ - if (erofs_inode_is_data_compressed(vi->datalayout)) { - nblks = le32_to_cpu(iu.compressed_blocks); - } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + if (erofs_inode_is_data_compressed(vi->datalayout)) + inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) << + (sb->s_blocksize_bits - 9); + else + inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; + + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { /* fill chunked inode summary info */ - vi->chunkformat = le16_to_cpu(iu.c.format); + vi->chunkformat = le16_to_cpu(copied.i_u.c.format); if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) { erofs_err(sb, "unsupported chunk format %x of nid %llu", vi->chunkformat, vi->nid); @@ -176,22 +186,15 @@ static int erofs_read_inode(struct inode *inode) vi->chunkbits = sb->s_blocksize_bits + (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); } - inode_set_mtime_to_ts(inode, - inode_set_atime_to_ts(inode, inode_get_ctime(inode))); + inode_set_atime_to_ts(inode, + inode_set_ctime_to_ts(inode, inode_get_mtime(inode))); inode->i_flags &= ~S_DAX; if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && (vi->datalayout == EROFS_INODE_FLAT_PLAIN || vi->datalayout == EROFS_INODE_CHUNK_BASED)) inode->i_flags |= S_DAX; - - if (!nblks) - /* measure inode.i_blocks as generic filesystems */ - inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9; - else - inode->i_blocks = nblks << (sb->s_blocksize_bits - 9); err_out: - DBG_BUGON(err); erofs_put_metabuf(&buf); return err; } @@ -202,13 +205,10 @@ static int erofs_fill_inode(struct inode *inode) int err; trace_erofs_fill_inode(inode); - - /* read inode base data from disk */ err = erofs_read_inode(inode); if (err) return err; - /* setup the new inode */ switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &erofs_generic_iops; @@ -229,15 +229,10 @@ static int erofs_fill_inode(struct inode *inode) inode->i_op = &erofs_symlink_iops; inode_nohighmem(inode); break; - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: + default: inode->i_op = &erofs_generic_iops; init_special_inode(inode, inode->i_mode, inode->i_rdev); return 0; - default: - return -EFSCORRUPTED; } mapping_set_large_folios(inode->i_mapping); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 686d835eb533..4ac188d5d894 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -37,8 +37,7 @@ __printf(2, 3) void _erofs_printk(struct super_block *sb, const char *fmt, ...); typedef u64 erofs_nid_t; typedef u64 erofs_off_t; -/* data type for filesystem-wide blocks number */ -typedef u32 erofs_blk_t; +typedef u64 erofs_blk_t; struct erofs_device_info { char *path; @@ -47,8 +46,8 @@ struct erofs_device_info { struct dax_device *dax_dev; u64 dax_part_off; - u32 blocks; - u32 mapped_blkaddr; + erofs_blk_t blocks; + erofs_blk_t uniaddr; }; enum { @@ -143,8 +142,8 @@ struct erofs_sb_info { unsigned char blkszbits; /* filesystem block size in bit shift */ u32 sb_size; /* total superblock size */ - u32 build_time_nsec; - u64 build_time; + u32 fixed_nsec; + s64 epoch; /* what we really care is nid, rather than ino.. */ erofs_nid_t root_nid; @@ -152,8 +151,6 @@ struct erofs_sb_info { /* used for statfs, f_files - f_favail */ u64 inos; - u8 uuid[16]; /* 128-bit uuid for volume */ - u8 volume_name[16]; /* volume name */ u32 feature_compat; u32 feature_incompat; @@ -199,11 +196,6 @@ enum { EROFS_ZIP_CACHE_READAROUND }; -enum erofs_kmap_type { - EROFS_NO_KMAP, /* don't map the buffer */ - EROFS_KMAP, /* use kmap_local_page() to map the buffer */ -}; - struct erofs_buf { struct address_space *mapping; struct file *file; @@ -212,8 +204,8 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) -#define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) +#define erofs_blknr(sb, pos) ((erofs_blk_t)((pos) >> (sb)->s_blocksize_bits)) +#define erofs_blkoff(sb, pos) ((pos) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) @@ -233,6 +225,7 @@ EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) +EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) @@ -252,6 +245,7 @@ struct erofs_inode { unsigned char datalayout; unsigned char inode_isize; + bool dot_omitted; unsigned int xattr_isize; unsigned int xattr_name_filter; @@ -259,7 +253,7 @@ struct erofs_inode { unsigned int *xattr_shared_xattrs; union { - erofs_blk_t raw_blkaddr; + erofs_blk_t startblk; struct { unsigned short chunkformat; unsigned char chunkbits; @@ -268,15 +262,13 @@ struct erofs_inode { struct { unsigned short z_advise; unsigned char z_algorithmtype[2]; - unsigned char z_logical_clusterbits; - unsigned long z_tailextent_headlcn; + unsigned char z_lclusterbits; union { - struct { - erofs_off_t z_idataoff; - unsigned short z_idata_size; - }; - erofs_off_t z_fragmentoff; + u64 z_tailextent_headlcn; + u64 z_extents; }; + erofs_off_t z_fragmentoff; + unsigned short z_idata_size; }; #endif /* CONFIG_EROFS_FS_ZIP */ }; @@ -387,11 +379,10 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp); void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); -void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, - enum erofs_kmap_type type); +void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap); void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb); void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, - erofs_off_t offset, enum erofs_kmap_type type); + erofs_off_t offset, bool need_kmap); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); @@ -448,6 +439,7 @@ int __init erofs_init_shrinker(void); void erofs_exit_shrinker(void); int __init z_erofs_init_subsystem(void); void z_erofs_exit_subsystem(void); +int z_erofs_init_super(struct super_block *sb); unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr_shrink); int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, @@ -457,7 +449,6 @@ void z_erofs_put_gbuf(void *ptr); int z_erofs_gbuf_growsize(unsigned int nrpages); int __init z_erofs_gbuf_init(void); void z_erofs_gbuf_exit(void); -int erofs_init_managed_cache(struct super_block *sb); int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else static inline void erofs_shrinker_register(struct super_block *sb) {} @@ -466,7 +457,7 @@ static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_subsystem(void) { return 0; } static inline void z_erofs_exit_subsystem(void) {} -static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; } +static inline int z_erofs_init_super(struct super_block *sb) { return 0; } #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index c94d0c1608a8..f7cf4f41af28 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -100,7 +100,7 @@ static void *erofs_find_target_block(struct erofs_buf *target, struct erofs_dirent *de; buf.mapping = dir->i_mapping; - de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), EROFS_KMAP); + de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), true); if (!IS_ERR(de)) { const int nameoff = nameoff_from_disk(de->nameoff, bsz); const int ndirents = nameoff / sizeof(*de); diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 827b62665649..cadec6b1b554 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -94,7 +94,7 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, int len, i, cnt; *offset = round_up(*offset, 4); - ptr = erofs_bread(buf, *offset, EROFS_KMAP); + ptr = erofs_bread(buf, *offset, true); if (IS_ERR(ptr)) return ptr; @@ -110,7 +110,7 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, for (i = 0; i < len; i += cnt) { cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset), len - i); - ptr = erofs_bread(buf, *offset, EROFS_KMAP); + ptr = erofs_bread(buf, *offset, true); if (IS_ERR(ptr)) { kfree(buffer); return ptr; @@ -141,7 +141,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, struct erofs_deviceslot *dis; struct file *file; - dis = erofs_read_metabuf(buf, sb, *pos, EROFS_KMAP); + dis = erofs_read_metabuf(buf, sb, *pos, true); if (IS_ERR(dis)) return PTR_ERR(dis); @@ -178,8 +178,8 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, dif->file = file; } - dif->blocks = le32_to_cpu(dis->blocks); - dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); + dif->blocks = le32_to_cpu(dis->blocks_lo); + dif->uniaddr = le32_to_cpu(dis->uniaddr_lo); sbi->total_blocks += dif->blocks; *pos += EROFS_DEVT_SLOT_SIZE; return 0; @@ -255,7 +255,7 @@ static int erofs_read_superblock(struct super_block *sb) void *data; int ret; - data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); + data = erofs_read_metabuf(&buf, sb, 0, true); if (IS_ERR(data)) { erofs_err(sb, "cannot read erofs superblock"); return PTR_ERR(data); @@ -268,7 +268,7 @@ static int erofs_read_superblock(struct super_block *sb) goto out; } - sbi->blkszbits = dsb->blkszbits; + sbi->blkszbits = dsb->blkszbits; if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) { erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits); goto out; @@ -299,7 +299,7 @@ static int erofs_read_superblock(struct super_block *sb) sbi->sb_size); goto out; } - sbi->dif0.blocks = le32_to_cpu(dsb->blocks); + sbi->dif0.blocks = le32_to_cpu(dsb->blocks_lo); sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); #ifdef CONFIG_EROFS_FS_XATTR sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); @@ -308,23 +308,20 @@ static int erofs_read_superblock(struct super_block *sb) sbi->xattr_filter_reserved = dsb->xattr_filter_reserved; #endif sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); - sbi->root_nid = le16_to_cpu(dsb->root_nid); + if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) { + sbi->root_nid = le64_to_cpu(dsb->rootnid_8b); + sbi->dif0.blocks = (sbi->dif0.blocks << 32) | + le16_to_cpu(dsb->rb.blocks_hi); + } else { + sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b); + } sbi->packed_nid = le64_to_cpu(dsb->packed_nid); sbi->inos = le64_to_cpu(dsb->inos); - sbi->build_time = le64_to_cpu(dsb->build_time); - sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); - + sbi->epoch = (s64)le64_to_cpu(dsb->epoch); + sbi->fixed_nsec = le32_to_cpu(dsb->fixed_nsec); super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid)); - ret = strscpy(sbi->volume_name, dsb->volume_name, - sizeof(dsb->volume_name)); - if (ret < 0) { /* -E2BIG */ - erofs_err(sb, "bad volume name without NIL terminator"); - ret = -EFSCORRUPTED; - goto out; - } - /* parse on-disk compression configurations */ ret = z_erofs_parse_cfgs(sb, dsb); if (ret < 0) @@ -333,6 +330,8 @@ static int erofs_read_superblock(struct super_block *sb) /* handle multiple devices */ ret = erofs_scan_devices(sb, dsb); + if (erofs_sb_has_48bit(sbi)) + erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!"); if (erofs_is_fscache_mode(sb)) erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!"); out: @@ -639,9 +638,16 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) else sb->s_flags &= ~SB_POSIXACL; -#ifdef CONFIG_EROFS_FS_ZIP - xa_init(&sbi->managed_pslots); -#endif + err = z_erofs_init_super(sb); + if (err) + return err; + + if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { + inode = erofs_iget(sb, sbi->packed_nid); + if (IS_ERR(inode)) + return PTR_ERR(inode); + sbi->packed_inode = inode; + } inode = erofs_iget(sb, sbi->root_nid); if (IS_ERR(inode)) @@ -653,24 +659,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) iput(inode); return -EINVAL; } - sb->s_root = d_make_root(inode); if (!sb->s_root) return -ENOMEM; erofs_shrinker_register(sb); - if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) { - sbi->packed_inode = erofs_iget(sb, sbi->packed_nid); - if (IS_ERR(sbi->packed_inode)) { - err = PTR_ERR(sbi->packed_inode); - sbi->packed_inode = NULL; - return err; - } - } - err = erofs_init_managed_cache(sb); - if (err) - return err; - err = erofs_xattr_prefixes_init(sb); if (err) return err; @@ -806,6 +799,16 @@ static int erofs_init_fs_context(struct fs_context *fc) return 0; } +static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi) +{ + iput(sbi->packed_inode); + sbi->packed_inode = NULL; +#ifdef CONFIG_EROFS_FS_ZIP + iput(sbi->managed_cache); + sbi->managed_cache = NULL; +#endif +} + static void erofs_kill_sb(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); @@ -815,6 +818,7 @@ static void erofs_kill_sb(struct super_block *sb) kill_anon_super(sb); else kill_block_super(sb); + erofs_drop_internal_inodes(sbi); fs_put_dax(sbi->dif0.dax_dev, NULL); erofs_fscache_unregister_fs(sb); erofs_sb_free(sbi); @@ -825,17 +829,10 @@ static void erofs_put_super(struct super_block *sb) { struct erofs_sb_info *const sbi = EROFS_SB(sb); - DBG_BUGON(!sbi); - erofs_unregister_sysfs(sb); erofs_shrinker_unregister(sb); erofs_xattr_prefixes_cleanup(sb); -#ifdef CONFIG_EROFS_FS_ZIP - iput(sbi->managed_cache); - sbi->managed_cache = NULL; -#endif - iput(sbi->packed_inode); - sbi->packed_inode = NULL; + erofs_drop_internal_inodes(sbi); erofs_free_dev_context(sbi->devs); sbi->devs = NULL; erofs_fscache_unregister_fs(sb); diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index 19d586273b70..dad4e6c6c155 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -81,6 +81,7 @@ EROFS_ATTR_FEATURE(sb_chksum); EROFS_ATTR_FEATURE(ztailpacking); EROFS_ATTR_FEATURE(fragments); EROFS_ATTR_FEATURE(dedupe); +EROFS_ATTR_FEATURE(48bit); static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(zero_padding), @@ -93,6 +94,7 @@ static struct attribute *erofs_feat_attrs[] = { ATTR_LIST(ztailpacking), ATTR_LIST(fragments), ATTR_LIST(dedupe), + ATTR_LIST(48bit), NULL, }; ATTRIBUTE_GROUPS(erofs_feat); diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index df2777e05661..9cf84717a92e 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -81,7 +81,7 @@ static int erofs_init_inode_xattrs(struct inode *inode) it.pos = erofs_iloc(inode) + vi->inode_isize; /* read in shared xattr array (non-atomic, see kmalloc below) */ - it.kaddr = erofs_bread(&it.buf, it.pos, EROFS_KMAP); + it.kaddr = erofs_bread(&it.buf, it.pos, true); if (IS_ERR(it.kaddr)) { ret = PTR_ERR(it.kaddr); goto out_unlock; @@ -102,7 +102,7 @@ static int erofs_init_inode_xattrs(struct inode *inode) it.pos += sizeof(struct erofs_xattr_ibody_header); for (i = 0; i < vi->xattr_shared_count; ++i) { - it.kaddr = erofs_bread(&it.buf, it.pos, EROFS_KMAP); + it.kaddr = erofs_bread(&it.buf, it.pos, true); if (IS_ERR(it.kaddr)) { kfree(vi->xattr_shared_xattrs); vi->xattr_shared_xattrs = NULL; @@ -183,7 +183,7 @@ static int erofs_xattr_copy_to_buffer(struct erofs_xattr_iter *it, void *src; for (processed = 0; processed < len; processed += slice) { - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); + it->kaddr = erofs_bread(&it->buf, it->pos, true); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); @@ -286,7 +286,7 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it) /* 2. handle xattr name */ for (processed = 0; processed < entry.e_name_len; processed += slice) { - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); + it->kaddr = erofs_bread(&it->buf, it->pos, true); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); @@ -330,7 +330,7 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it, it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz; while (remaining) { - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); + it->kaddr = erofs_bread(&it->buf, it->pos, true); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); @@ -367,7 +367,7 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it, for (i = 0; i < vi->xattr_shared_count; ++i) { it->pos = erofs_pos(sb, sbi->xattr_blkaddr) + vi->xattr_shared_xattrs[i] * sizeof(__le32); - it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP); + it->kaddr = erofs_bread(&it->buf, it->pos, true); if (IS_ERR(it->kaddr)) return PTR_ERR(it->kaddr); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index d771e06db738..5c061aaeeb45 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -44,8 +44,8 @@ struct z_erofs_pcluster { /* A: point to next chained pcluster or TAILs */ struct z_erofs_pcluster *next; - /* I: start block address of this pcluster */ - erofs_off_t index; + /* I: start physical position of this pcluster */ + erofs_off_t pos; /* L: the maximum decompression size of this round */ unsigned int length; @@ -73,6 +73,9 @@ struct z_erofs_pcluster { /* I: compression algorithm format */ unsigned char algorithmformat; + /* I: whether compressed data is in-lined or not */ + bool from_meta; + /* L: whether partial decompression or not */ bool partial; @@ -102,14 +105,9 @@ struct z_erofs_decompressqueue { bool eio, sync; }; -static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl) -{ - return !pcl->index; -} - static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) { - return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; + return PAGE_ALIGN(pcl->pageofs_in + pcl->pclustersize) >> PAGE_SHIFT; } static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo) @@ -133,7 +131,7 @@ struct z_erofs_pcluster_slab { static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = { _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128), - _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES) + _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES + 1) }; struct z_erofs_bvec_iter { @@ -267,7 +265,6 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size) pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL); if (!pcl) return ERR_PTR(-ENOMEM); - pcl->pclustersize = size; return pcl; } return ERR_PTR(-EINVAL); @@ -516,6 +513,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe) struct z_erofs_pcluster *pcl = fe->pcl; unsigned int pclusterpages = z_erofs_pclusterpages(pcl); bool shouldalloc = z_erofs_should_alloc_cache(fe); + pgoff_t poff = pcl->pos >> PAGE_SHIFT; bool may_bypass = true; /* Optimistic allocation, as in-place I/O can be used as a fallback */ gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | @@ -532,7 +530,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe) if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; - folio = filemap_get_folio(mc, pcl->index + i); + folio = filemap_get_folio(mc, poff + i); if (IS_ERR(folio)) { may_bypass = false; if (!shouldalloc) @@ -575,7 +573,7 @@ static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, struct folio *folio; int i; - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); + DBG_BUGON(pcl->from_meta); /* Each cached folio contains one page unless bs > ps is supported */ for (i = 0; i < pclusterpages; ++i) { if (pcl->compressed_bvecs[i].page) { @@ -607,7 +605,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ret = false; spin_lock(&pcl->lockref.lock); if (pcl->lockref.count <= 0) { - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); + DBG_BUGON(pcl->from_meta); for (; bvec < end; ++bvec) { if (bvec->page && page_folio(bvec->page) == folio) { bvec->page = NULL; @@ -644,18 +642,18 @@ static const struct address_space_operations z_erofs_cache_aops = { .invalidate_folio = z_erofs_cache_invalidate_folio, }; -int erofs_init_managed_cache(struct super_block *sb) +int z_erofs_init_super(struct super_block *sb) { struct inode *const inode = new_inode(sb); if (!inode) return -ENOMEM; - set_nlink(inode, 1); inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &z_erofs_cache_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); EROFS_SB(sb)->managed_cache = inode; + xa_init(&EROFS_SB(sb)->managed_pslots); return 0; } @@ -667,16 +665,20 @@ static int z_erofs_attach_page(struct z_erofs_frontend *fe, int ret; if (exclusive) { - /* give priority for inplaceio to use file pages first */ - spin_lock(&pcl->lockref.lock); - while (fe->icur > 0) { - if (pcl->compressed_bvecs[--fe->icur].page) - continue; - pcl->compressed_bvecs[fe->icur] = *bvec; + /* Inplace I/O is limited to one page for uncompressed data */ + if (pcl->algorithmformat < Z_EROFS_COMPRESSION_MAX || + fe->icur <= 1) { + /* Try to prioritize inplace I/O here */ + spin_lock(&pcl->lockref.lock); + while (fe->icur > 0) { + if (pcl->compressed_bvecs[--fe->icur].page) + continue; + pcl->compressed_bvecs[fe->icur] = *bvec; + spin_unlock(&pcl->lockref.lock); + return 0; + } spin_unlock(&pcl->lockref.lock); - return 0; } - spin_unlock(&pcl->lockref.lock); /* otherwise, check if it can be used as a bvpage */ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && @@ -711,27 +713,25 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) struct erofs_map_blocks *map = &fe->map; struct super_block *sb = fe->inode->i_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - bool ztailpacking = map->m_flags & EROFS_MAP_META; struct z_erofs_pcluster *pcl, *pre; + unsigned int pageofs_in; int err; - if (!(map->m_flags & EROFS_MAP_ENCODED) || - (!ztailpacking && !erofs_blknr(sb, map->m_pa))) { - DBG_BUGON(1); - return -EFSCORRUPTED; - } - - /* no available pcluster, let's allocate one */ - pcl = z_erofs_alloc_pcluster(map->m_plen); + pageofs_in = erofs_blkoff(sb, map->m_pa); + pcl = z_erofs_alloc_pcluster(pageofs_in + map->m_plen); if (IS_ERR(pcl)) return PTR_ERR(pcl); lockref_init(&pcl->lockref); /* one ref for this request */ pcl->algorithmformat = map->m_algorithmformat; + pcl->pclustersize = map->m_plen; pcl->length = 0; pcl->partial = true; pcl->next = fe->head; + pcl->pos = map->m_pa; + pcl->pageofs_in = pageofs_in; pcl->pageofs_out = map->m_la & ~PAGE_MASK; + pcl->from_meta = map->m_flags & EROFS_MAP_META; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; /* @@ -741,13 +741,10 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) mutex_init(&pcl->lock); DBG_BUGON(!mutex_trylock(&pcl->lock)); - if (ztailpacking) { - pcl->index = 0; /* which indicates ztailpacking */ - } else { - pcl->index = erofs_blknr(sb, map->m_pa); + if (!pcl->from_meta) { while (1) { xa_lock(&sbi->managed_pslots); - pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->index, + pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->pos, NULL, pcl, GFP_KERNEL); if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) { xa_unlock(&sbi->managed_pslots); @@ -779,7 +776,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) { struct erofs_map_blocks *map = &fe->map; struct super_block *sb = fe->inode->i_sb; - erofs_blk_t blknr = erofs_blknr(sb, map->m_pa); struct z_erofs_pcluster *pcl = NULL; int ret; @@ -790,9 +786,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) if (!(map->m_flags & EROFS_MAP_META)) { while (1) { rcu_read_lock(); - pcl = xa_load(&EROFS_SB(sb)->managed_pslots, blknr); + pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa); if (!pcl || z_erofs_get_pcluster(pcl)) { - DBG_BUGON(pcl && blknr != pcl->index); + DBG_BUGON(pcl && map->m_pa != pcl->pos); rcu_read_unlock(); break; } @@ -826,13 +822,13 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset, Z_EROFS_INLINE_BVECS, fe->pcl->vcnt); - if (!z_erofs_is_inline_pcluster(fe->pcl)) { + if (!fe->pcl->from_meta) { /* bind cache first when cached decompression is preferred */ z_erofs_bind_cache(fe); } else { void *mptr; - mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, EROFS_NO_KMAP); + mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, false); if (IS_ERR(mptr)) { ret = PTR_ERR(mptr); erofs_err(sb, "failed to get inline data %d", ret); @@ -871,7 +867,7 @@ static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi, * It's impossible to fail after the pcluster is freezed, but in order * to avoid some race conditions, add a DBG_BUGON to observe this. */ - DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl); + DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->pos) != pcl); lockref_mark_dead(&pcl->lockref); return true; @@ -967,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio, buf.mapping = packed_inode->i_mapping; for (; cur < end; cur += cnt, pos += cnt) { cnt = min(end - cur, sb->s_blocksize - erofs_blkoff(sb, pos)); - src = erofs_bread(&buf, pos, EROFS_KMAP); + src = erofs_bread(&buf, pos, true); if (IS_ERR(src)) { erofs_put_metabuf(&buf); return PTR_ERR(src); @@ -1221,7 +1217,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped) } be->compressed_pages[i] = page; - if (z_erofs_is_inline_pcluster(pcl) || + if (pcl->from_meta || erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) { if (!PageUptodate(page)) err = -EIO; @@ -1284,6 +1280,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) .sb = be->sb, .in = be->compressed_pages, .out = be->decompressed_pages, + .inpages = pclusterpages, + .outpages = be->nr_pages, .pageofs_in = pcl->pageofs_in, .pageofs_out = pcl->pageofs_out, .inputsize = pcl->pclustersize, @@ -1297,7 +1295,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) }, be->pagepool); /* must handle all compressed pages before actual file pages */ - if (z_erofs_is_inline_pcluster(pcl)) { + if (pcl->from_meta) { page = pcl->compressed_bvecs[0].page; WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); put_page(page); @@ -1357,7 +1355,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) WRITE_ONCE(pcl->next, NULL); mutex_unlock(&pcl->lock); - if (z_erofs_is_inline_pcluster(pcl)) + if (pcl->from_meta) z_erofs_free_pcluster(pcl); else z_erofs_put_pcluster(sbi, pcl, try_free); @@ -1538,7 +1536,7 @@ out_allocfolio: folio = page_folio(page); out_tocache: if (!tocache || bs != PAGE_SIZE || - filemap_add_folio(mc, folio, pcl->index + nr, gfp)) { + filemap_add_folio(mc, folio, (pcl->pos >> PAGE_SHIFT) + nr, gfp)) { /* turn into a temporary shortlived folio (1 ref) */ folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; return; @@ -1655,19 +1653,20 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f, pcl = next; next = READ_ONCE(pcl->next); - if (z_erofs_is_inline_pcluster(pcl)) { + if (pcl->from_meta) { z_erofs_move_to_bypass_queue(pcl, next, qtail); continue; } /* no device id here, thus it will always succeed */ mdev = (struct erofs_map_dev) { - .m_pa = erofs_pos(sb, pcl->index), + .m_pa = round_down(pcl->pos, sb->s_blocksize), }; (void)erofs_map_dev(sb, &mdev); cur = mdev.m_pa; - end = cur + pcl->pclustersize; + end = round_up(cur + pcl->pageofs_in + pcl->pclustersize, + sb->s_blocksize); do { bvec.bv_page = NULL; if (bio && (cur != last_pa || diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 689437e99a5a..14ea47f954f5 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -25,13 +25,13 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); - const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) + + const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize) + lcn * sizeof(struct z_erofs_lcluster_index); struct z_erofs_lcluster_index *di; unsigned int advise; - di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, EROFS_KMAP); + di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, true); if (IS_ERR(di)) return PTR_ERR(di); m->lcn = lcn; @@ -40,7 +40,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, advise = le16_to_cpu(di->di_advise); m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK; if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { - m->clusterofs = 1 << vi->z_logical_clusterbits; + m->clusterofs = 1 << vi->z_lclusterbits; m->delta[0] = le16_to_cpu(di->di_u.delta[0]); if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) { if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | @@ -55,7 +55,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, } else { m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); m->clusterofs = le16_to_cpu(di->di_clusterofs); - if (m->clusterofs >= 1 << vi->z_logical_clusterbits) { + if (m->clusterofs >= 1 << vi->z_lclusterbits) { DBG_BUGON(1); return -EFSCORRUPTED; } @@ -102,9 +102,9 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); - const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + - ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); - const unsigned int lclusterbits = vi->z_logical_clusterbits; + const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + + vi->inode_isize + vi->xattr_isize); + const unsigned int lclusterbits = vi->z_lclusterbits; const unsigned int totalidx = erofs_iblks(inode); unsigned int compacted_4b_initial, compacted_2b, amortizedshift; unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; @@ -146,7 +146,7 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, else return -EOPNOTSUPP; - in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, EROFS_KMAP); + in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, true); if (IS_ERR(in)) return PTR_ERR(in); @@ -255,7 +255,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, { struct super_block *sb = m->inode->i_sb; struct erofs_inode *const vi = EROFS_I(m->inode); - const unsigned int lclusterbits = vi->z_logical_clusterbits; + const unsigned int lclusterbits = vi->z_lclusterbits; while (m->lcn >= lookback_distance) { unsigned long lcn = m->lcn - lookback_distance; @@ -265,26 +265,22 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, if (err) return err; - switch (m->type) { - case Z_EROFS_LCLUSTER_TYPE_NONHEAD: + if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { + erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", + m->type, lcn, vi->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } else if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; if (!lookback_distance) - goto err_bogus; + break; continue; - case Z_EROFS_LCLUSTER_TYPE_PLAIN: - case Z_EROFS_LCLUSTER_TYPE_HEAD1: - case Z_EROFS_LCLUSTER_TYPE_HEAD2: + } else { m->headtype = m->type; m->map->m_la = (lcn << lclusterbits) | m->clusterofs; return 0; - default: - erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EOPNOTSUPP; } } -err_bogus: erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", lookback_distance, m->lcn, vi->nid); DBG_BUGON(1); @@ -308,7 +304,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) || ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) || - (lcn << vi->z_logical_clusterbits) >= inode->i_size) + (lcn << vi->z_lclusterbits) >= inode->i_size) m->compressedblks = 1; if (m->compressedblks) @@ -329,35 +325,28 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, DBG_BUGON(lcn == initial_lcn && m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); - switch (m->type) { - case Z_EROFS_LCLUSTER_TYPE_PLAIN: - case Z_EROFS_LCLUSTER_TYPE_HEAD1: - case Z_EROFS_LCLUSTER_TYPE_HEAD2: + if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { + if (m->delta[0] != 1) { + erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + if (m->compressedblks) + goto out; + } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { /* * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type * rather than CBLKCNT, it's a 1 block-sized pcluster. */ m->compressedblks = 1; - break; - case Z_EROFS_LCLUSTER_TYPE_NONHEAD: - if (m->delta[0] != 1) - goto err_bonus_cblkcnt; - if (m->compressedblks) - break; - fallthrough; - default: - erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, - vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; + goto out; } + erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; out: m->map->m_plen = erofs_pos(sb, m->compressedblks); return 0; -err_bonus_cblkcnt: - erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; } static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) @@ -365,7 +354,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) struct inode *inode = m->inode; struct erofs_inode *vi = EROFS_I(inode); struct erofs_map_blocks *map = m->map; - unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned int lclusterbits = vi->z_lclusterbits; u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; int err; @@ -386,9 +375,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) m->delta[1] = 1; DBG_BUGON(1); } - } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN || - m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 || - m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) { + } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) { if (lcn != headlcn) break; /* ends at the next HEAD lcluster */ m->delta[1] = 1; @@ -404,23 +391,32 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) return 0; } -static int z_erofs_do_map_blocks(struct inode *inode, +static int z_erofs_map_blocks_fo(struct inode *inode, struct erofs_map_blocks *map, int flags) { - struct erofs_inode *const vi = EROFS_I(inode); - bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER; + struct erofs_inode *vi = EROFS_I(inode); + struct super_block *sb = inode->i_sb; bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER; + bool ztailpacking = vi->z_idata_size; + unsigned int lclusterbits = vi->z_lclusterbits; struct z_erofs_maprecorder m = { .inode = inode, .map = map, }; int err = 0; - unsigned int lclusterbits, endoff, afmt; + unsigned int endoff, afmt; unsigned long initial_lcn; unsigned long long ofs, end; - lclusterbits = vi->z_logical_clusterbits; ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la; + if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) && + !vi->z_tailextent_headlcn) { + map->m_la = 0; + map->m_llen = inode->i_size; + map->m_flags = EROFS_MAP_MAPPED | + EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; + return 0; + } initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); @@ -428,9 +424,8 @@ static int z_erofs_do_map_blocks(struct inode *inode, if (err) goto unmap_out; - if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL)) - vi->z_idataoff = m.nextpackoff; - + if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking) + vi->z_fragmentoff = m.nextpackoff; map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; @@ -452,8 +447,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, } /* m.lcn should be >= 1 if endoff < m.clusterofs */ if (!m.lcn) { - erofs_err(inode->i_sb, - "invalid logical cluster 0 at nid %llu", + erofs_err(sb, "invalid logical cluster 0 at nid %llu", vi->nid); err = -EFSCORRUPTED; goto unmap_out; @@ -469,8 +463,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, goto unmap_out; break; default: - erofs_err(inode->i_sb, - "unknown type %u @ offset %llu of nid %llu", + erofs_err(sb, "unknown type %u @ offset %llu of nid %llu", m.type, ofs, vi->nid); err = -EOPNOTSUPP; goto unmap_out; @@ -487,12 +480,18 @@ static int z_erofs_do_map_blocks(struct inode *inode, } if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_META; - map->m_pa = vi->z_idataoff; + map->m_pa = vi->z_fragmentoff; map->m_plen = vi->z_idata_size; + if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) { + erofs_err(sb, "invalid tail-packing pclustersize %llu", + map->m_plen); + err = -EFSCORRUPTED; + goto unmap_out; + } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { map->m_flags |= EROFS_MAP_FRAGMENT; } else { - map->m_pa = erofs_pos(inode->i_sb, m.pblk); + map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); if (err) goto unmap_out; @@ -511,7 +510,7 @@ static int z_erofs_do_map_blocks(struct inode *inode, afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ? vi->z_algorithmtype[1] : vi->z_algorithmtype[0]; if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) { - erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu", + erofs_err(sb, "inconsistent algorithmtype %u for nid %llu", afmt, vi->nid); err = -EFSCORRUPTED; goto unmap_out; @@ -535,6 +534,116 @@ unmap_out: return err; } +static int z_erofs_map_blocks_ext(struct inode *inode, + struct erofs_map_blocks *map, int flags) +{ + struct erofs_inode *vi = EROFS_I(inode); + struct super_block *sb = inode->i_sb; + bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER; + unsigned int recsz = z_erofs_extent_recsize(vi->z_advise); + erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) + + vi->inode_isize + vi->xattr_isize), recsz); + erofs_off_t lend = inode->i_size; + erofs_off_t l, r, mid, pa, la, lstart; + struct z_erofs_extent *ext; + unsigned int fmt; + bool last; + + map->m_flags = 0; + if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) { + if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) { + ext = erofs_read_metabuf(&map->buf, sb, pos, true); + if (IS_ERR(ext)) + return PTR_ERR(ext); + pa = le64_to_cpu(*(__le64 *)ext); + pos += sizeof(__le64); + lstart = 0; + } else { + lstart = round_down(map->m_la, 1 << vi->z_lclusterbits); + pos += (lstart >> vi->z_lclusterbits) * recsz; + pa = EROFS_NULL_ADDR; + } + + for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) { + ext = erofs_read_metabuf(&map->buf, sb, pos, true); + if (IS_ERR(ext)) + return PTR_ERR(ext); + map->m_plen = le32_to_cpu(ext->plen); + if (pa != EROFS_NULL_ADDR) { + map->m_pa = pa; + pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK; + } else { + map->m_pa = le32_to_cpu(ext->pstart_lo); + } + pos += recsz; + } + last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits)); + lend = min(lstart, lend); + lstart -= 1 << vi->z_lclusterbits; + } else { + lstart = lend; + for (l = 0, r = vi->z_extents; l < r; ) { + mid = l + (r - l) / 2; + ext = erofs_read_metabuf(&map->buf, sb, + pos + mid * recsz, true); + if (IS_ERR(ext)) + return PTR_ERR(ext); + + la = le32_to_cpu(ext->lstart_lo); + pa = le32_to_cpu(ext->pstart_lo) | + (u64)le32_to_cpu(ext->pstart_hi) << 32; + if (recsz > offsetof(struct z_erofs_extent, lstart_hi)) + la |= (u64)le32_to_cpu(ext->lstart_hi) << 32; + + if (la > map->m_la) { + r = mid; + lend = la; + } else { + l = mid + 1; + if (map->m_la == la) + r = min(l + 1, r); + lstart = la; + map->m_plen = le32_to_cpu(ext->plen); + map->m_pa = pa; + } + } + last = (l >= vi->z_extents); + } + + if (lstart < lend) { + map->m_la = lstart; + if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { + map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; + vi->z_fragmentoff = map->m_plen; + if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) + vi->z_fragmentoff |= map->m_pa << 32; + } else if (map->m_plen) { + map->m_flags |= EROFS_MAP_MAPPED | + EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; + fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; + if (fmt) + map->m_algorithmformat = fmt - 1; + else if (interlaced && !erofs_blkoff(sb, map->m_pa)) + map->m_algorithmformat = + Z_EROFS_COMPRESSION_INTERLACED; + else + map->m_algorithmformat = + Z_EROFS_COMPRESSION_SHIFTED; + if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL) + map->m_flags |= EROFS_MAP_PARTIAL_REF; + map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK; + } + } + map->m_llen = lend - map->m_la; + if (!last && map->m_llen < sb->s_blocksize) { + erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", + map->m_llen, map->m_la, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } + return 0; +} + static int z_erofs_fill_inode_lazy(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); @@ -561,7 +670,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto out_unlock; pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); - h = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP); + h = erofs_read_metabuf(&buf, sb, pos, true); if (IS_ERR(h)) { err = PTR_ERR(h); goto out_unlock; @@ -578,8 +687,20 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto done; } vi->z_advise = le16_to_cpu(h->h_advise); + vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15); + if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && + (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) { + vi->z_extents = le32_to_cpu(h->h_extents_lo) | + ((u64)le16_to_cpu(h->h_extents_hi) << 32); + goto done; + } + vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; + if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) + vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); + else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) + vi->z_idata_size = le16_to_cpu(h->h_idata_size); headnr = 0; if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX || @@ -590,7 +711,6 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto out_put_metabuf; } - vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7); if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { @@ -608,34 +728,13 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) goto out_put_metabuf; } - if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) { + if (vi->z_idata_size || + (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { struct erofs_map_blocks map = { .buf = __EROFS_BUF_INITIALIZER }; - vi->z_idata_size = le16_to_cpu(h->h_idata_size); - err = z_erofs_do_map_blocks(inode, &map, - EROFS_GET_BLOCKS_FINDTAIL); - erofs_put_metabuf(&map.buf); - - if (!map.m_plen || - erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) { - erofs_err(sb, "invalid tail-packing pclustersize %llu", - map.m_plen); - err = -EFSCORRUPTED; - } - if (err < 0) - goto out_put_metabuf; - } - - if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER && - !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) { - struct erofs_map_blocks map = { - .buf = __EROFS_BUF_INITIALIZER - }; - - vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff); - err = z_erofs_do_map_blocks(inode, &map, + err = z_erofs_map_blocks_fo(inode, &map, EROFS_GET_BLOCKS_FINDTAIL); erofs_put_metabuf(&map.buf); if (err < 0) @@ -666,15 +765,11 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, } else { err = z_erofs_fill_inode_lazy(inode); if (!err) { - if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) && - !vi->z_tailextent_headlcn) { - map->m_la = 0; - map->m_llen = inode->i_size; - map->m_flags = EROFS_MAP_MAPPED | - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; - } else { - err = z_erofs_do_map_blocks(inode, map, flags); - } + if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL && + (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) + err = z_erofs_map_blocks_ext(inode, map, flags); + else + err = z_erofs_map_blocks_fo(inode, map, flags); } if (!err && (map->m_flags & EROFS_MAP_ENCODED) && unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || |