diff options
Diffstat (limited to 'fs')
106 files changed, 927 insertions, 607 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index c2a377cdda2b..83eab52fb3f6 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -38,6 +38,7 @@ config FS_DAX bool "Direct Access (DAX) support" depends on MMU depends on !(ARM || MIPS || SPARC) + select FS_IOMAP help Direct Access (DAX) can be used on memory-backed block devices. If the block device supports DAX and the filesystem supports DAX, @@ -1085,7 +1085,8 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2) * Tell lockdep we inherited freeze protection from submission * thread. */ - __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); + if (S_ISREG(file_inode(file)->i_mode)) + __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); file_end_write(file); } @@ -1525,7 +1526,8 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, * by telling it the lock got released so that it doesn't * complain about held lock when we return to userspace. */ - __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); + if (S_ISREG(file_inode(file)->i_mode)) + __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } kfree(iovec); return ret; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 29a02daf08a9..e7bf01373bc4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1428,17 +1428,18 @@ static void fill_prstatus(struct elf_prstatus *prstatus, * group-wide total, not its individual thread total. */ thread_group_cputime(p, &cputime); - cputime_to_timeval(cputime.utime, &prstatus->pr_utime); - cputime_to_timeval(cputime.stime, &prstatus->pr_stime); + prstatus->pr_utime = ns_to_timeval(cputime.utime); + prstatus->pr_stime = ns_to_timeval(cputime.stime); } else { - cputime_t utime, stime; + u64 utime, stime; task_cputime(p, &utime, &stime); - cputime_to_timeval(utime, &prstatus->pr_utime); - cputime_to_timeval(stime, &prstatus->pr_stime); + prstatus->pr_utime = ns_to_timeval(utime); + prstatus->pr_stime = ns_to_timeval(stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + + prstatus->pr_cutime = ns_to_timeval(p->signal->cutime); + prstatus->pr_cstime = ns_to_timeval(p->signal->cstime); } static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, @@ -2298,6 +2299,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; } } + dump_truncate(cprm); if (!elf_core_write_extra_data(cprm)) goto end_coredump; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d2e36f82c35d..ffca4bbc3d63 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1349,17 +1349,17 @@ static void fill_prstatus(struct elf_prstatus *prstatus, * group-wide total, not its individual thread total. */ thread_group_cputime(p, &cputime); - cputime_to_timeval(cputime.utime, &prstatus->pr_utime); - cputime_to_timeval(cputime.stime, &prstatus->pr_stime); + prstatus->pr_utime = ns_to_timeval(cputime.utime); + prstatus->pr_stime = ns_to_timeval(cputime.stime); } else { - cputime_t utime, stime; + u64 utime, stime; task_cputime(p, &utime, &stime); - cputime_to_timeval(utime, &prstatus->pr_utime); - cputime_to_timeval(stime, &prstatus->pr_stime); + prstatus->pr_utime = ns_to_timeval(utime); + prstatus->pr_stime = ns_to_timeval(stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + prstatus->pr_cutime = ns_to_timeval(p->signal->cutime); + prstatus->pr_cstime = ns_to_timeval(p->signal->cstime); prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap; prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap; diff --git a/fs/block_dev.c b/fs/block_dev.c index 5db5d1340d69..3c47614a4b32 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -331,7 +331,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; - bool is_read = (iov_iter_rw(iter) == READ); + bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; int ret; @@ -344,7 +344,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio_get(bio); /* extra ref for the completion handler */ dio = container_of(bio, struct blkdev_dio, bio); - dio->is_sync = is_sync_kiocb(iocb); + dio->is_sync = is_sync = is_sync_kiocb(iocb); if (dio->is_sync) dio->waiter = current; else @@ -398,7 +398,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } blk_finish_plug(&plug); - if (!dio->is_sync) + if (!is_sync) return -EIOCBQUEUED; for (;;) { diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 63d197724519..ff0b0be92d61 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) unsigned long flags; while (1) { + void *wtag; + spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); /* - * we don't want to call the ordered free functions - * with the lock held though + * We don't want to call the ordered free functions with the + * lock held though. Save the work as tag for the trace event, + * because the callback could free the structure. */ + wtag = work; work->ordered_free(work); - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } spin_unlock_irqrestore(lock, flags); } @@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) static void normal_work_helper(struct btrfs_work *work) { struct __btrfs_workqueue *wq; + void *wtag; int need_order = 0; /* @@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work) if (work->ordered_func) need_order = 1; wq = work->wq; + /* Safe for tracepoints in case work gets freed by the callback */ + wtag = work; trace_btrfs_work_sched(work); thresh_exec_hook(wq); @@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work) run_ordered_work(wq); } if (!need_order) - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7f390849343b..c4444d6f439f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1024,6 +1024,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, unsigned long buf_offset; unsigned long current_buf_start; unsigned long start_byte; + unsigned long prev_start_byte; unsigned long working_bytes = total_out - buf_start; unsigned long bytes; char *kaddr; @@ -1071,26 +1072,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, if (!bio->bi_iter.bi_size) return 0; bvec = bio_iter_iovec(bio, bio->bi_iter); - + prev_start_byte = start_byte; start_byte = page_offset(bvec.bv_page) - disk_start; /* - * make sure our new page is covered by this - * working buffer + * We need to make sure we're only adjusting + * our offset into compression working buffer when + * we're switching pages. Otherwise we can incorrectly + * keep copying when we were actually done. */ - if (total_out <= start_byte) - return 1; + if (start_byte != prev_start_byte) { + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) + return 1; - /* - * the next page in the biovec might not be adjacent - * to the last page, but it might still be found - * inside this working buffer. bump our offset pointer - */ - if (total_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = total_out - start_byte; - current_buf_start = buf_start + buf_offset; + /* + * the next page in the biovec might not be adjacent + * to the last page, but it might still be found + * inside this working buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + buf_offset; + } } } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e97302f437a1..dcd2e798767e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2522,11 +2522,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - btrfs_delayed_ref_unlock(locked_ref); spin_lock(&delayed_refs->lock); locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; cond_resched(); count++; @@ -2572,7 +2572,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); @@ -7384,7 +7387,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, spin_unlock(&cluster->refill_lock); - down_read(&used_bg->data_rwsem); + /* We should only have one-level nested. */ + down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING); spin_lock(&cluster->refill_lock); if (used_bg == cluster->block_group) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f2b281ad7af6..1e861a063721 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3835,10 +3835,7 @@ cache_acl: break; case S_IFDIR: inode->i_fop = &btrfs_dir_file_operations; - if (root == fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; @@ -4505,8 +4502,19 @@ search_again: if (found_type > min_type) { del_item = 1; } else { - if (item_end < new_size) + if (item_end < new_size) { + /* + * With NO_HOLES mode, for the following mapping + * + * [0-4k][hole][8k-12k] + * + * if truncating isize down to 6k, it ends up + * isize being 8k. + */ + if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) + last_size = new_size; break; + } if (found_key.offset >= new_size) del_item = 1; else @@ -5710,6 +5718,7 @@ static struct inode *new_simple_dir(struct super_block *s, inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; + inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mtime = current_time(inode); @@ -7059,7 +7068,7 @@ insert: write_unlock(&em_tree->lock); out: - trace_btrfs_get_extent(root, em); + trace_btrfs_get_extent(root, inode, em); btrfs_free_path(path); if (trans) { @@ -7215,7 +7224,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, struct extent_map *em = NULL; int ret; - down_read(&BTRFS_I(inode)->dio_sem); if (type != BTRFS_ORDERED_NOCOW) { em = create_pinned_em(inode, start, len, orig_start, block_start, block_len, orig_block_len, @@ -7234,7 +7242,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, em = ERR_PTR(ret); } out: - up_read(&BTRFS_I(inode)->dio_sem); return em; } @@ -7623,11 +7630,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode, * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (dio_data->outstanding_extents) { + if (dio_data->outstanding_extents >= num_extents) { dio_data->outstanding_extents -= num_extents; } else { + /* + * If dio write length has been split due to no large enough + * contiguous space, we need to compensate our inode counter + * appropriately. + */ + u64 num_needed = num_extents - dio_data->outstanding_extents; + spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents += num_extents; + BTRFS_I(inode)->outstanding_extents += num_needed; spin_unlock(&BTRFS_I(inode)->lock); } } @@ -8685,6 +8699,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.unsubmitted_oe_range_start = (u64)offset; dio_data.unsubmitted_oe_range_end = (u64)offset; current->journal_info = &dio_data; + down_read(&BTRFS_I(inode)->dio_sem); } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_end(inode); @@ -8697,6 +8712,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) iter, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { + up_read(&BTRFS_I(inode)->dio_sem); current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { if (dio_data.reserve) @@ -9205,6 +9221,7 @@ static int btrfs_truncate(struct inode *inode) break; } + btrfs_block_rsv_release(fs_info, rsv, -1); ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv, min_size, 0); BUG_ON(ret); /* shouldn't happen */ @@ -10572,8 +10589,6 @@ static const struct inode_operations btrfs_dir_inode_operations = { static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, - .get_acl = btrfs_get_acl, - .set_acl = btrfs_set_acl, .update_time = btrfs_update_time, }; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 33f967d30b2a..21e51b0ba188 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5653,6 +5653,10 @@ long btrfs_ioctl(struct file *file, unsigned int #ifdef CONFIG_COMPAT long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + /* + * These all access 32-bit values anyway so no further + * handling is necessary. + */ switch (cmd) { case FS_IOC32_GETFLAGS: cmd = FS_IOC_GETFLAGS; @@ -5663,8 +5667,6 @@ long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; - default: - return -ENOIOCTLCMD; } return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f10bf5213ed8..eeffff84f280 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -37,6 +37,7 @@ */ #define LOG_INODE_ALL 0 #define LOG_INODE_EXISTS 1 +#define LOG_OTHER_INODE 2 /* * directory trouble cases @@ -4641,7 +4642,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (S_ISDIR(inode->i_mode) || (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags) && - inode_only == LOG_INODE_EXISTS)) + inode_only >= LOG_INODE_EXISTS)) max_key.type = BTRFS_XATTR_ITEM_KEY; else max_key.type = (u8)-1; @@ -4665,7 +4666,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return ret; } - mutex_lock(&BTRFS_I(inode)->log_mutex); + if (inode_only == LOG_OTHER_INODE) { + inode_only = LOG_INODE_EXISTS; + mutex_lock_nested(&BTRFS_I(inode)->log_mutex, + SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&BTRFS_I(inode)->log_mutex); + } /* * a brute force approach to making sure we get the most uptodate @@ -4817,7 +4824,7 @@ again: * unpin it. */ err = btrfs_log_inode(trans, root, other_inode, - LOG_INODE_EXISTS, + LOG_OTHER_INODE, 0, LLONG_MAX, ctx); iput(other_inode); if (err) diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 161342b73ce5..726f928238d0 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -352,7 +352,5 @@ skip: out: btrfs_free_path(path); - if (ret) - btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret); - return 0; + return ret; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9cd0c0ea7cdb..e4b066cd912a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -502,9 +502,9 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, dout(" head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); if (truncate_size) - *truncate_size = capsnap->truncate_size; + *truncate_size = ci->i_truncate_size; if (truncate_seq) - *truncate_seq = capsnap->truncate_seq; + *truncate_seq = ci->i_truncate_seq; } spin_unlock(&ci->i_ceph_lock); return snapc; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index baea866a6751..94fd76d04683 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2591,8 +2591,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, add_wait_queue(&ci->i_cap_wq, &wait); while (!try_get_cap_refs(ci, need, want, endoff, - true, &_got, &err)) + true, &_got, &err)) { + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + } remove_wait_queue(&ci->i_cap_wq, &wait); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d7a93696663b..8ab1fdf0bd49 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1230,7 +1230,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) struct ceph_mds_client *mdsc = ceph_sb_to_client(dir->i_sb)->mdsc; struct ceph_mds_request *req; - int op, mask, err; + int op, err; + u32 mask; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1245,7 +1246,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED; if (ceph_security_xattr_wanted(dir)) mask |= CEPH_CAP_XATTR_SHARED; - req->r_args.getattr.mask = mask; + req->r_args.getattr.mask = cpu_to_le32(mask); err = ceph_mdsc_do_request(mdsc, NULL, req); switch (err) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 398e5328b309..5e659d054b40 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -305,7 +305,8 @@ static int frag_tree_split_cmp(const void *l, const void *r) { struct ceph_frag_tree_split *ls = (struct ceph_frag_tree_split*)l; struct ceph_frag_tree_split *rs = (struct ceph_frag_tree_split*)r; - return ceph_frag_compare(ls->frag, rs->frag); + return ceph_frag_compare(le32_to_cpu(ls->frag), + le32_to_cpu(rs->frag)); } static bool is_frag_child(u32 f, struct ceph_inode_frag *frag) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4f49253387a0..c9d2e553a6c4 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -288,12 +288,13 @@ static int parse_reply_info_extra(void **p, void *end, struct ceph_mds_reply_info_parsed *info, u64 features) { - if (info->head->op == CEPH_MDS_OP_GETFILELOCK) + u32 op = le32_to_cpu(info->head->op); + + if (op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_READDIR || - info->head->op == CEPH_MDS_OP_LSSNAP) + else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP) return parse_reply_info_dir(p, end, info, features); - else if (info->head->op == CEPH_MDS_OP_CREATE) + else if (op == CEPH_MDS_OP_CREATE) return parse_reply_info_create(p, end, info, features); else return -EIO; @@ -2106,6 +2107,11 @@ static int __do_request(struct ceph_mds_client *mdsc, dout("do_request mdsmap err %d\n", err); goto finish; } + if (mdsc->mdsmap->m_epoch == 0) { + dout("do_request no mdsmap, waiting for map\n"); + list_add(&req->r_wait, &mdsc->waiting_for_map); + goto finish; + } if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 8f6a2a5863b9..a27fc8791551 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -285,6 +285,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file) rc = -ENOMEM; goto error_exit; } + spin_lock_init(&cifsFile->file_info_lock); file->private_data = cifsFile; cifsFile->tlink = cifs_get_tlink(tlink); tcon = tlink_tcon(tlink); diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 4d24d17bcfc1..504b3c3539dc 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -51,22 +51,8 @@ #define elf_prstatus compat_elf_prstatus #define elf_prpsinfo compat_elf_prpsinfo -/* - * Compat version of cputime_to_compat_timeval, perhaps this - * should be an inline in <linux/compat.h>. - */ -static void cputime_to_compat_timeval(const cputime_t cputime, - struct compat_timeval *value) -{ - struct timeval tv; - cputime_to_timeval(cputime, &tv); - value->tv_sec = tv.tv_sec; - value->tv_usec = tv.tv_usec; -} - -#undef cputime_to_timeval -#define cputime_to_timeval cputime_to_compat_timeval - +#undef ns_to_timeval +#define ns_to_timeval ns_to_compat_timeval /* * To use this file, asm/elf.h must define compat_elf_check_arch. diff --git a/fs/coredump.c b/fs/coredump.c index e525b6017cdf..ae6b05629ca1 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -833,3 +833,21 @@ int dump_align(struct coredump_params *cprm, int align) return mod ? dump_skip(cprm, align - mod) : 1; } EXPORT_SYMBOL(dump_align); + +/* + * Ensures that file size is big enough to contain the current file + * postion. This prevents gdb from complaining about a truncated file + * if the last "write" to the file was dump_skip. + */ +void dump_truncate(struct coredump_params *cprm) +{ + struct file *file = cprm->file; + loff_t offset; + + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + offset = file->f_op->llseek(file, 0, SEEK_CUR); + if (i_size_read(file->f_mapping->host) < offset) + do_truncate(file->f_path.dentry, offset, 0, file); + } +} +EXPORT_SYMBOL(dump_truncate); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7bff7b4c7498..fdbb8af32eaf 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -11,7 +11,7 @@ #ifndef _FSCRYPT_PRIVATE_H #define _FSCRYPT_PRIVATE_H -#include <linux/fscrypto.h> +#include <linux/fscrypt_supp.h> #define FS_FNAME_CRYPTO_DIGEST_SIZE 32 @@ -86,7 +86,7 @@ struct fscrypt_completion_result { #define DECLARE_FS_COMPLETION_RESULT(ecr) \ struct fscrypt_completion_result ecr = { \ - COMPLETION_INITIALIZER((ecr).completion), 0 } + COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 } /* crypto.c */ diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index eeb6fd67ea17..02eb6b9e4438 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -211,7 +211,6 @@ retry: ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS; ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS; memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE); - res = sizeof(ctx); } else if (res != sizeof(ctx)) { return -EINVAL; } @@ -691,8 +691,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pgoff_t index, unsigned long pfn) { struct vm_area_struct *vma; - pte_t *ptep; - pte_t pte; + pte_t pte, *ptep = NULL; + pmd_t *pmdp = NULL; spinlock_t *ptl; bool changed; @@ -707,21 +707,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, address = pgoff_address(index, vma); changed = false; - if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) + if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) continue; - if (pfn != pte_pfn(*ptep)) - goto unlock; - if (!pte_dirty(*ptep) && !pte_write(*ptep)) - goto unlock; - flush_cache_page(vma, address, pfn); - pte = ptep_clear_flush(vma, address, ptep); - pte = pte_wrprotect(pte); - pte = pte_mkclean(pte); - set_pte_at(vma->vm_mm, address, ptep, pte); - changed = true; -unlock: - pte_unmap_unlock(ptep, ptl); + if (pmdp) { +#ifdef CONFIG_FS_DAX_PMD + pmd_t pmd; + + if (pfn != pmd_pfn(*pmdp)) + goto unlock_pmd; + if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) + goto unlock_pmd; + + flush_cache_page(vma, address, pfn); + pmd = pmdp_huge_clear_flush(vma, address, pmdp); + pmd = pmd_wrprotect(pmd); + pmd = pmd_mkclean(pmd); + set_pmd_at(vma->vm_mm, address, pmdp, pmd); + changed = true; +unlock_pmd: + spin_unlock(ptl); +#endif + } else { + if (pfn != pte_pfn(*ptep)) + goto unlock_pte; + if (!pte_dirty(*ptep) && !pte_write(*ptep)) + goto unlock_pte; + + flush_cache_page(vma, address, pfn); + pte = ptep_clear_flush(vma, address, ptep); + pte = pte_wrprotect(pte); + pte = pte_mkclean(pte); + set_pte_at(vma->vm_mm, address, ptep, pte); + changed = true; +unlock_pte: + pte_unmap_unlock(ptep, ptl); + } if (changed) mmu_notifier_invalidate_page(vma->vm_mm, address); @@ -969,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL_GPL(__dax_zero_page_range); -#ifdef CONFIG_FS_IOMAP static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) { return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9); @@ -1011,6 +1031,11 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct blk_dax_ctl dax = { 0 }; ssize_t map_len; + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + dax.sector = dax_iomap_sector(iomap, pos); dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; map_len = dax_map_atomic(iomap->bdev, &dax); @@ -1411,4 +1436,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); #endif /* CONFIG_FS_DAX_PMD */ -#endif /* CONFIG_FS_IOMAP */ diff --git a/fs/dcache.c b/fs/dcache.c index 769903dbc19d..95d71eda8142 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1336,8 +1336,11 @@ int d_set_mounted(struct dentry *dentry) } spin_lock(&dentry->d_lock); if (!d_unlinked(dentry)) { - dentry->d_flags |= DCACHE_MOUNTED; - ret = 0; + ret = -EBUSY; + if (!d_mountpoint(dentry)) { + dentry->d_flags |= DCACHE_MOUNTED; + ret = 0; + } } spin_unlock(&dentry->d_lock); out: diff --git a/fs/direct-io.c b/fs/direct-io.c index aeae8c063451..c87bae4376b8 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -906,6 +906,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, struct buffer_head *map_bh) { const unsigned blkbits = sdio->blkbits; + const unsigned i_blkbits = blkbits + sdio->blkfactor; int ret = 0; while (sdio->block_in_file < sdio->final_block_in_request) { @@ -949,7 +950,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, clean_bdev_aliases( map_bh->b_bdev, map_bh->b_blocknr, - map_bh->b_size >> blkbits); + map_bh->b_size >> i_blkbits); } if (!sdio->blkfactor) diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index 5e6a2c0a1f0b..1f7d5e46cdda 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -122,7 +122,7 @@ void exofs_sysfs_dbg_print(void) list_for_each_entry_safe(k_name, k_tmp, &exofs_kset->list, entry) { printk(KERN_INFO "%s: name %s ref %d\n", __func__, kobject_name(k_name), - (int)atomic_read(&k_name->kref.refcount)); + (int)kref_read(&k_name->kref)); } #endif } diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index 36bea5adcaba..c634874e12d9 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig @@ -1,6 +1,5 @@ config EXT2_FS tristate "Second extended fs support" - select FS_IOMAP if FS_DAX help Ext2 is a standard Linux file system for hard disks. diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 7b90691e98c4..e38039fd96ff 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,7 +37,6 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C - select FS_IOMAP if FS_DAX help This is the next generation of the ext3 filesystem. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3a87378b9563..01d52b98f9a7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -32,7 +32,11 @@ #include <linux/percpu_counter.h> #include <linux/ratelimit.h> #include <crypto/hash.h> -#include <linux/fscrypto.h> +#ifdef CONFIG_EXT4_FS_ENCRYPTION +#include <linux/fscrypt_supp.h> +#else +#include <linux/fscrypt_notsupp.h> +#endif #include <linux/falloc.h> #include <linux/percpu-rwsem.h> #ifdef __KERNEL__ @@ -2330,28 +2334,6 @@ static inline int ext4_fname_setup_filename(struct inode *dir, } static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } -#define fscrypt_set_d_op(i) -#define fscrypt_get_ctx fscrypt_notsupp_get_ctx -#define fscrypt_release_ctx fscrypt_notsupp_release_ctx -#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page -#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page -#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages -#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page -#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page -#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range -#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy -#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy -#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context -#define fscrypt_inherit_context fscrypt_notsupp_inherit_context -#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info -#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info -#define fscrypt_setup_filename fscrypt_notsupp_setup_filename -#define fscrypt_free_filename fscrypt_notsupp_free_filename -#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size -#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer -#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer -#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr -#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk #endif /* dir.c */ diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index f8808835a28b..208241b06662 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -24,7 +24,6 @@ #include <linux/slab.h> #include <linux/mm.h> #include <linux/backing-dev.h> -#include <linux/fscrypto.h> #include "ext4_jbd2.h" #include "xattr.h" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a673558fe5f8..2e03a0a88d92 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1193,7 +1193,7 @@ static unsigned ext4_max_namelen(struct inode *inode) EXT4_NAME_LEN; } -static struct fscrypt_operations ext4_cryptops = { +static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, .prepare_context = ext4_prepare_context, @@ -1204,7 +1204,7 @@ static struct fscrypt_operations ext4_cryptops = { .max_namelen = ext4_max_namelen, }; #else -static struct fscrypt_operations ext4_cryptops = { +static const struct fscrypt_operations ext4_cryptops = { .is_encrypted = ext4_encrypted_inode, }; #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 93d38d854a41..069fc7277d8d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -22,7 +22,11 @@ #include <linux/vmalloc.h> #include <linux/bio.h> #include <linux/blkdev.h> -#include <linux/fscrypto.h> +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#include <linux/fscrypt_supp.h> +#else +#include <linux/fscrypt_notsupp.h> +#endif #include <crypto/hash.h> #ifdef CONFIG_F2FS_CHECK_FS @@ -2501,28 +2505,4 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } -#ifndef CONFIG_F2FS_FS_ENCRYPTION -#define fscrypt_set_d_op(i) -#define fscrypt_get_ctx fscrypt_notsupp_get_ctx -#define fscrypt_release_ctx fscrypt_notsupp_release_ctx -#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page -#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page -#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages -#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page -#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page -#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range -#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy -#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy -#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context -#define fscrypt_inherit_context fscrypt_notsupp_inherit_context -#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info -#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info -#define fscrypt_setup_filename fscrypt_notsupp_setup_filename -#define fscrypt_free_filename fscrypt_notsupp_free_filename -#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size -#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer -#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer -#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr -#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk -#endif #endif diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0738f48293cc..0d8802453758 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -713,8 +713,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } sector = SECTOR_FROM_BLOCK(blkstart); - if (sector & (bdev_zone_size(bdev) - 1) || - nr_sects != bdev_zone_size(bdev)) { + if (sector & (bdev_zone_sectors(bdev) - 1) || + nr_sects != bdev_zone_sectors(bdev)) { f2fs_msg(sbi->sb, KERN_INFO, "(%d) %s: Unaligned discard attempted (block %x + %x)", devi, sbi->s_ndevs ? FDEV(devi).path: "", diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 739192d95e71..a831303bb777 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1170,7 +1170,7 @@ static unsigned f2fs_max_namelen(struct inode *inode) inode->i_sb->s_blocksize : F2FS_NAME_LEN; } -static struct fscrypt_operations f2fs_cryptops = { +static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, @@ -1179,7 +1179,7 @@ static struct fscrypt_operations f2fs_cryptops = { .max_namelen = f2fs_max_namelen, }; #else -static struct fscrypt_operations f2fs_cryptops = { +static const struct fscrypt_operations f2fs_cryptops = { .is_encrypted = f2fs_encrypted_inode, }; #endif @@ -1541,16 +1541,16 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != - SECTOR_TO_BLOCK(bdev_zone_size(bdev))) + SECTOR_TO_BLOCK(bdev_zone_sectors(bdev))) return -EINVAL; - sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev)); + sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)); if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz != __ilog2_u32(sbi->blocks_per_blkz)) return -EINVAL; sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz); FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >> sbi->log_blocks_per_blkz; - if (nr_sectors & (bdev_zone_size(bdev) - 1)) + if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 4304072161aa..40d61077bead 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (invalidate) set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); } } else { @@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, TASK_UNINTERRUPTIBLE); + /* Make sure any pending writes are cancelled. */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + fscache_invalidate_writes(cookie); + /* Reset the cookie state if it wasn't relinquished */ if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { atomic_inc(&cookie->n_active); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 9b28649df3a1..a8aa00be4444 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 9e792e30f4db..7a182c87f378 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object static const struct fscache_state *fscache_object_available(struct fscache_object *, int); static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); static const struct fscache_state *fscache_update_object(struct fscache_object *, int); +static const struct fscache_state *fscache_object_dead(struct fscache_object *, int); #define __STATE_NAME(n) fscache_osm_##n #define STATE(n) (&__STATE_NAME(n)) @@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure); static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); -static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); +static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead); static WAIT_STATE(WAIT_FOR_INIT, "?INI", TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); @@ -229,6 +230,10 @@ execute_work_state: event = -1; if (new_state == NO_TRANSIT) { _debug("{OBJ%x} %s notrans", object->debug_id, state->name); + if (unlikely(state == STATE(OBJECT_DEAD))) { + _leave(" [dead]"); + return; + } fscache_enqueue_object(object); event_mask = object->oob_event_mask; goto unmask_events; @@ -239,7 +244,7 @@ execute_work_state: object->state = state = new_state; if (state->work) { - if (unlikely(state->work == ((void *)2UL))) { + if (unlikely(state == STATE(OBJECT_DEAD))) { _leave(" [dead]"); return; } @@ -645,6 +650,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob fscache_mark_object_dead(object); object->oob_event_mask = 0; + if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) { + /* Reject any new read/write ops and abort any that are pending. */ + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + } + if (list_empty(&object->dependents) && object->n_ops == 0 && object->n_children == 0) @@ -1077,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object, } } EXPORT_SYMBOL(fscache_object_mark_killed); + +/* + * The object is dead. We can get here if an object gets queued by an event + * that would lead to its death (such as EV_KILL) when the dispatcher is + * already running (and so can be requeued) but hasn't yet cleared the event + * mask. + */ +static const struct fscache_state *fscache_object_dead(struct fscache_object *object, + int event) +{ + if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD, + &object->flags)) + return NO_TRANSIT; + + WARN(true, "FS-Cache object redispatched after death"); + return NO_TRANSIT; +} diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 70ea57c7b6bb..f11792672977 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -399,6 +399,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->waitq.lock); + if (test_bit(FR_FINISHED, &req->flags)) { + spin_unlock(&fiq->waitq.lock); + return; + } if (list_empty(&req->intr_entry)) { list_add_tail(&req->intr_entry, &fiq->interrupts); wake_up_locked(&fiq->waitq); @@ -1372,6 +1376,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, * code can Oops if the buffer persists after module unload. */ bufs[page_nr].ops = &nosteal_pipe_buf_ops; + bufs[page_nr].flags = 0; ret = add_to_pipe(pipe, &bufs[page_nr++]); if (unlikely(ret < 0)) break; @@ -2025,7 +2030,6 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); req->out.h.error = -ECONNABORTED; - clear_bit(FR_PENDING, &req->flags); clear_bit(FR_SENT, &req->flags); list_del_init(&req->list); request_end(fc, req); @@ -2103,6 +2107,8 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_lock(&fiq->waitq.lock); fiq->connected = 0; list_splice_init(&fiq->pending, &to_end2); + list_for_each_entry(req, &to_end2, list) + clear_bit(FR_PENDING, &req->flags); while (forget_pending(fiq)) kfree(dequeue_forget(fiq, 1, NULL)); wake_up_all_locked(&fiq->waitq); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 1f7c732f32b0..811fd8929a18 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -68,7 +68,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec) if (sec || nsec) { struct timespec64 ts = { sec, - max_t(u32, nsec, NSEC_PER_SEC - 1) + min_t(u32, nsec, NSEC_PER_SEC - 1) }; return get_jiffies_64() + timespec64_to_jiffies(&ts); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 91307940c8ac..052f8d3c41cb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -256,7 +256,7 @@ struct fuse_io_priv { #define FUSE_IO_PRIV_SYNC(f) \ { \ - .refcnt = { ATOMIC_INIT(1) }, \ + .refcnt = KREF_INIT(1), \ .async = 0, \ .file = f, \ } diff --git a/fs/iomap.c b/fs/iomap.c index 354a123f170e..a51cb4c07d4d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -114,6 +114,9 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, BUG_ON(pos + len > iomap->offset + iomap->length); + if (fatal_signal_pending(current)) + return -EINTR; + page = grab_cache_page_write_begin(inode->i_mapping, index, flags); if (!page) return -ENOMEM; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8c514367ba5a..b6b194ec1b4f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -393,7 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); /* * We hold j_checkpoint_mutex so tail cannot change under us. * We don't need any special data guarantees for writing sb diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 85d1483939b2..a1a359bfcc9c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -944,7 +944,7 @@ out: */ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) { - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); if (tid_gt(tid, journal->j_tail_sequence)) __jbd2_update_log_tail(journal, tid, block); mutex_unlock(&journal->j_checkpoint_mutex); @@ -1304,7 +1304,7 @@ static int journal_reset(journal_t *journal) journal->j_flags |= JBD2_FLUSHED; } else { /* Lock here to make assertions happy... */ - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); /* * Update log tail information. We use REQ_FUA since new * transaction will start reusing journal space and so we @@ -1691,7 +1691,7 @@ int jbd2_journal_destroy(journal_t *journal) spin_lock(&journal->j_list_lock); while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); err = jbd2_log_do_checkpoint(journal); mutex_unlock(&journal->j_checkpoint_mutex); /* @@ -1713,7 +1713,7 @@ int jbd2_journal_destroy(journal_t *journal) if (journal->j_sb_buffer) { if (!is_journal_aborted(journal)) { - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); journal->j_tail_sequence = @@ -1955,7 +1955,7 @@ int jbd2_journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); err = jbd2_log_do_checkpoint(journal); mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); @@ -1965,7 +1965,7 @@ int jbd2_journal_flush(journal_t *journal) if (is_journal_aborted(journal)) return -EIO; - mutex_lock(&journal->j_checkpoint_mutex); + mutex_lock_io(&journal->j_checkpoint_mutex); if (!err) { err = jbd2_cleanup_journal_tail(journal); if (err < 0) { diff --git a/fs/libfs.c b/fs/libfs.c index e973cd51f126..28d6f35feed6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -245,7 +245,8 @@ struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); - s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); + s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, + &init_user_ns, NULL); if (IS_ERR(s)) return ERR_CAST(s); diff --git a/fs/namespace.c b/fs/namespace.c index b5b1259e064f..487ba30bb5c6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -742,26 +742,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) return NULL; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +static struct mountpoint *get_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); - struct mountpoint *mp; + struct mountpoint *mp, *new = NULL; int ret; - mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); - if (!mp) + if (d_mountpoint(dentry)) { +mountpoint: + read_seqlock_excl(&mount_lock); + mp = lookup_mountpoint(dentry); + read_sequnlock_excl(&mount_lock); + if (mp) + goto done; + } + + if (!new) + new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); + if (!new) return ERR_PTR(-ENOMEM); + + /* Exactly one processes may set d_mounted */ ret = d_set_mounted(dentry); - if (ret) { - kfree(mp); - return ERR_PTR(ret); - } - mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); - INIT_HLIST_HEAD(&mp->m_list); + /* Someone else set d_mounted? */ + if (ret == -EBUSY) + goto mountpoint; + + /* The dentry is not available as a mountpoint? */ + mp = ERR_PTR(ret); + if (ret) + goto done; + + /* Add the new mountpoint to the hash table */ + read_seqlock_excl(&mount_lock); + new->m_dentry = dentry; + new->m_count = 1; + hlist_add_head(&new->m_hash, mp_hash(dentry)); + INIT_HLIST_HEAD(&new->m_list); + read_sequnlock_excl(&mount_lock); + + mp = new; + new = NULL; +done: + kfree(new); return mp; } @@ -1595,11 +1619,11 @@ void __detach_mounts(struct dentry *dentry) struct mount *mnt; namespace_lock(); + lock_mount_hash(); mp = lookup_mountpoint(dentry); if (IS_ERR_OR_NULL(mp)) goto out_unlock; - lock_mount_hash(); event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); @@ -1609,9 +1633,9 @@ void __detach_mounts(struct dentry *dentry) } else umount_tree(mnt, UMOUNT_CONNECTED); } - unlock_mount_hash(); put_mountpoint(mp); out_unlock: + unlock_mount_hash(); namespace_unlock(); } @@ -2038,9 +2062,7 @@ retry: namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = lookup_mountpoint(dentry); - if (!mp) - mp = new_mountpoint(dentry); + struct mountpoint *mp = get_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); inode_unlock(dentry->d_inode); @@ -2059,7 +2081,11 @@ retry: static void unlock_mount(struct mountpoint *where) { struct dentry *dentry = where->m_dentry; + + read_seqlock_excl(&mount_lock); put_mountpoint(where); + read_sequnlock_excl(&mount_lock); + namespace_unlock(); inode_unlock(dentry->d_inode); } @@ -3135,9 +3161,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, touch_mnt_namespace(current->nsproxy->mnt_ns); /* A moved mount should not expire automatically */ list_del_init(&new_mnt->mnt_expire); + put_mountpoint(root_mp); unlock_mount_hash(); chroot_fs_refs(&root, &new); - put_mountpoint(root_mp); error = 0; out4: unlock_mount(old_mp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6dcbc5defb7a..0a0eaecf9676 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,7 +38,6 @@ #include <linux/mm.h> #include <linux/delay.h> #include <linux/errno.h> -#include <linux/file.h> #include <linux/string.h> #include <linux/ratelimit.h> #include <linux/printk.h> @@ -1083,7 +1082,8 @@ int nfs4_call_sync(struct rpc_clnt *clnt, return nfs4_call_sync_sequence(clnt, server, msg, args, res); } -static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) +static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, + unsigned long timestamp) { struct nfs_inode *nfsi = NFS_I(dir); @@ -1099,6 +1099,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) NFS_INO_INVALID_ACL; } dir->i_version = cinfo->after; + nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); @@ -2391,11 +2392,13 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); if (o_arg->open_flags & O_CREAT) { - update_changeattr(dir, &o_res->cinfo); if (o_arg->open_flags & O_EXCL) data->file_created = 1; else if (o_res->cinfo.before != o_res->cinfo.after) data->file_created = 1; + if (data->file_created || dir->i_version != o_res->cinfo.after) + update_changeattr(dir, &o_res->cinfo, + o_res->f_attr->time_start); } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; @@ -2697,7 +2700,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, sattr->ia_valid |= ATTR_MTIME; /* Except MODE, it seems harmless of setting twice. */ - if ((attrset[1] & FATTR4_WORD1_MODE)) + if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE && + attrset[1] & FATTR4_WORD1_MODE) sattr->ia_valid &= ~ATTR_MODE; if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) @@ -4073,11 +4077,12 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name) .rpc_argp = &args, .rpc_resp = &res, }; + unsigned long timestamp = jiffies; int status; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); if (status == 0) - update_changeattr(dir, &res.cinfo); + update_changeattr(dir, &res.cinfo, timestamp); return status; } @@ -4125,7 +4130,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(dir, &res->cinfo); + if (task->tk_status == 0) + update_changeattr(dir, &res->cinfo, res->dir_attr->time_start); return 1; } @@ -4159,8 +4165,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(old_dir, &res->old_cinfo); - update_changeattr(new_dir, &res->new_cinfo); + if (task->tk_status == 0) { + update_changeattr(old_dir, &res->old_cinfo, res->old_fattr->time_start); + if (new_dir != old_dir) + update_changeattr(new_dir, &res->new_cinfo, res->new_fattr->time_start); + } return 1; } @@ -4197,7 +4206,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { - update_changeattr(dir, &res.cinfo); + update_changeattr(dir, &res.cinfo, res.fattr->time_start); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) nfs_setsecurity(inode, res.fattr, res.label); @@ -4272,7 +4281,8 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { - update_changeattr(dir, &data->res.dir_cinfo); + update_changeattr(dir, &data->res.dir_cinfo, + data->res.fattr->time_start); status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); } return status; @@ -6127,7 +6137,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -6240,7 +6249,6 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); - fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } @@ -8483,6 +8491,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } + nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1d152f4470cd..daeb94e3acd4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1091,6 +1091,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BADXDR: case -NFS4ERR_RESOURCE: case -NFS4ERR_NOFILEHANDLE: + case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; }; @@ -1729,7 +1730,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) break; case -NFS4ERR_STALE_CLIENTID: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 59554f3adf29..dd042498ce7c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1200,10 +1200,10 @@ _pnfs_return_layout(struct inode *ino) send = pnfs_prepare_layoutreturn(lo, &stateid, NULL); spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); if (send) status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); out_put_layout_hdr: + pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 596205d939a1..1fc07a9c70e9 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -223,10 +223,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, struct nfs4_layout_stateid *ls; struct nfs4_stid *stp; - stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); + stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache, + nfsd4_free_layout_stateid); if (!stp) return NULL; - stp->sc_free = nfsd4_free_layout_stateid; + get_nfs4_file(fp); stp->sc_file = fp; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b4beaaa4eaa..a0dee8ae9f97 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -633,8 +633,8 @@ out: return co; } -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab) +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)) { struct nfs4_stid *stid; int new_id; @@ -650,6 +650,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, idr_preload_end(); if (new_id < 0) goto out_free; + + stid->sc_free = sc_free; stid->sc_client = cl; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; @@ -675,15 +677,12 @@ out_free: static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; - struct nfs4_ol_stateid *stp; - stid = nfs4_alloc_stid(clp, stateid_slab); + stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid); if (!stid) return NULL; - stp = openlockstateid(stid); - stp->st_stid.sc_free = nfs4_free_ol_stateid; - return stp; + return openlockstateid(stid); } static void nfs4_free_deleg(struct nfs4_stid *stid) @@ -781,11 +780,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) goto out_dec; - dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) goto out_dec; - dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -5580,7 +5578,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; - stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -5623,7 +5620,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, lst = find_lock_stateid(lo, fi); if (lst == NULL) { spin_unlock(&clp->cl_lock); - ns = nfs4_alloc_stid(clp, stateid_slab); + ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid); if (ns == NULL) return NULL; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7ecf16be4a44..8fae53ce21d1 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2440,7 +2440,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { - u32 *supp = nfsd_suppattrs[minorversion]; + u32 supp[3]; + + memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp)); if (!IS_POSIXACL(dentry->d_inode)) supp[0] &= ~FATTR4_WORD0_ACL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c9399366f9df..4516e8b7d776 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -603,8 +603,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); -struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, - struct kmem_cache *slab); +struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, + void (*sc_free)(struct nfs4_stid *)); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 27d1242c8383..564c504d6efd 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -349,7 +349,7 @@ static void sc_show_sock_container(struct seq_file *seq, " func key: 0x%08x\n" " func type: %u\n", sc, - atomic_read(&sc->sc_kref.refcount), + kref_read(&sc->sc_kref), &saddr, inet ? ntohs(sport) : 0, &daddr, inet ? ntohs(dport) : 0, sc->sc_node->nd_name, diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d4b5c81f0445..ec000575e863 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -97,7 +97,7 @@ typeof(sc) __sc = (sc); \ mlog(ML_SOCKET, "[sc %p refs %d sock %p node %u page %p " \ "pg_off %zu] " fmt, __sc, \ - atomic_read(&__sc->sc_kref.refcount), __sc->sc_sock, \ + kref_read(&__sc->sc_kref), __sc->sc_sock, \ __sc->sc_node->nd_num, __sc->sc_page, __sc->sc_page_off , \ ##args); \ } while (0) diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index e7b760deefae..9b984cae4c4e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -81,7 +81,7 @@ static void __dlm_print_lock(struct dlm_lock *lock) lock->ml.type, lock->ml.convert_type, lock->ml.node, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount), + kref_read(&lock->lock_refs), (list_empty(&lock->ast_list) ? 'y' : 'n'), (lock->ast_pending ? 'y' : 'n'), (list_empty(&lock->bast_list) ? 'y' : 'n'), @@ -106,7 +106,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) printk("lockres: %s, owner=%u, state=%u\n", buf, res->owner, res->state); printk(" last used: %lu, refcnt: %u, on purge list: %s\n", - res->last_used, atomic_read(&res->refs.refcount), + res->last_used, kref_read(&res->refs), list_empty(&res->purge) ? "no" : "yes"); printk(" on dirty list: %s, on reco list: %s, " "migrating pending: %s\n", @@ -298,7 +298,7 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) mle_type, mle->master, mle->new_master, !list_empty(&mle->hb_events), !!mle->inuse, - atomic_read(&mle->mle_refs.refcount)); + kref_read(&mle->mle_refs)); out += snprintf(buf + out, len - out, "Maybe="); out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, @@ -494,7 +494,7 @@ static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) lock->ast_pending, lock->bast_pending, lock->convert_pending, lock->lock_pending, lock->cancel_pending, lock->unlock_pending, - atomic_read(&lock->lock_refs.refcount)); + kref_read(&lock->lock_refs)); spin_unlock(&lock->spinlock); return out; @@ -521,7 +521,7 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) !list_empty(&res->recovering), res->inflight_locks, res->migration_pending, atomic_read(&res->asts_reserved), - atomic_read(&res->refs.refcount)); + kref_read(&res->refs)); /* refmap */ out += snprintf(buf + out, len - out, "RMAP:"); @@ -777,7 +777,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) /* Purge Count: xxx Refs: xxx */ out += snprintf(buf + out, len - out, "Purge Count: %d Refs: %d\n", dlm->purge_count, - atomic_read(&dlm->dlm_refs.refcount)); + kref_read(&dlm->dlm_refs)); /* Dead Node: xxx */ out += snprintf(buf + out, len - out, diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 733e4e79c8e2..32fd261ae13d 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -2072,7 +2072,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, INIT_LIST_HEAD(&dlm->dlm_eviction_callbacks); mlog(0, "context init: refcount %u\n", - atomic_read(&dlm->dlm_refs.refcount)); + kref_read(&dlm->dlm_refs)); leave: if (ret < 0 && dlm) { diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a464c8088170..7025d8c27999 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -233,7 +233,7 @@ static void __dlm_put_mle(struct dlm_master_list_entry *mle) assert_spin_locked(&dlm->spinlock); assert_spin_locked(&dlm->master_lock); - if (!atomic_read(&mle->mle_refs.refcount)) { + if (!kref_read(&mle->mle_refs)) { /* this may or may not crash, but who cares. * it's a BUG. */ mlog(ML_ERROR, "bad mle: %p\n", mle); @@ -1124,9 +1124,9 @@ recheck: unsigned long timeo = msecs_to_jiffies(DLM_MASTERY_TIMEOUT_MS); /* - if (atomic_read(&mle->mle_refs.refcount) < 2) + if (kref_read(&mle->mle_refs) < 2) mlog(ML_ERROR, "mle (%p) refs=%d, name=%.*s\n", mle, - atomic_read(&mle->mle_refs.refcount), + kref_read(&mle->mle_refs), res->lockname.len, res->lockname.name); */ atomic_set(&mle->woken, 0); @@ -1979,7 +1979,7 @@ ok: * on this mle. */ spin_lock(&dlm->master_lock); - rr = atomic_read(&mle->mle_refs.refcount); + rr = kref_read(&mle->mle_refs); if (mle->inuse > 0) { if (extra_ref && rr < 3) err = 1; diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 1082b2c3014b..63d701cd1e2e 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -251,7 +251,7 @@ leave: mlog(0, "lock %u:%llu should be gone now! refs=%d\n", dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), - atomic_read(&lock->lock_refs.refcount)-1); + kref_read(&lock->lock_refs)-1); dlm_lock_put(lock); } if (actions & DLM_UNLOCK_CALL_AST) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 83d576f6a287..77d1632e905d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name, lockres->l_level, new_level); + /* + * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always + * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that + * we can recover correctly from node failure. Otherwise, we may get + * invalid LVB in LKB, but without DLM_SBF_VALNOTVALIDÂ being set. + */ + if (!ocfs2_is_o2cb_active() && + lockres->l_ops->flags & LOCK_TYPE_USES_LVB) + lvb = 1; + if (lvb) dlm_flags |= DLM_LKF_VALBLK; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 52c07346bea3..820359096c7a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; +inline int ocfs2_is_o2cb_active(void) +{ + return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); +} +EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); + static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index f2dce10fae54..e3036e1790e8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); +/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ +int ocfs2_is_o2cb_active(void); + extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 9ad48d9202a9..023bb0b03352 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -154,29 +154,38 @@ out_err: static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d, struct dentry **ret) { - const char *s = d->name.name; + /* Counting down from the end, since the prefix can change */ + size_t rem = d->name.len - 1; struct dentry *dentry = NULL; int err; - if (*s != '/') + if (d->name.name[0] != '/') return ovl_lookup_single(base, d, d->name.name, d->name.len, 0, "", ret); - while (*s++ == '/' && !IS_ERR_OR_NULL(base) && d_can_lookup(base)) { + while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) { + const char *s = d->name.name + d->name.len - rem; const char *next = strchrnul(s, '/'); - size_t slen = strlen(s); + size_t thislen = next - s; + bool end = !next[0]; - if (WARN_ON(slen > d->name.len) || - WARN_ON(strcmp(d->name.name + d->name.len - slen, s))) + /* Verify we did not go off the rails */ + if (WARN_ON(s[-1] != '/')) return -EIO; - err = ovl_lookup_single(base, d, s, next - s, - d->name.len - slen, next, &base); + err = ovl_lookup_single(base, d, s, thislen, + d->name.len - rem, next, &base); dput(dentry); if (err) return err; dentry = base; - s = next; + if (end) + break; + + rem -= thislen + 1; + + if (WARN_ON(rem >= d->name.len)) + return -EIO; } *ret = dentry; return 0; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 595522022aca..c9d48dc78495 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -922,11 +922,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type) int error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return 0; - if (error == 0) - acl = NULL; + error = posix_acl_update_mode(inode, + &inode->i_mode, &acl); + if (error) + return error; } inode->i_ctime = current_time(inode); diff --git a/fs/proc/array.c b/fs/proc/array.c index 51a4213afa2e..fe12b519d09b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -401,8 +401,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; - cputime_t cutime, cstime, utime, stime; - cputime_t cgtime, gtime; + u64 cutime, cstime, utime, stime; + u64 cgtime, gtime; unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; @@ -497,10 +497,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, " ", cmin_flt); seq_put_decimal_ull(m, " ", maj_flt); seq_put_decimal_ull(m, " ", cmaj_flt); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime)); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime)); seq_put_decimal_ll(m, " ", priority); seq_put_decimal_ll(m, " ", nice); seq_put_decimal_ll(m, " ", num_threads); @@ -542,8 +542,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, " ", task->rt_priority); seq_put_decimal_ull(m, " ", task->policy); seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task)); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(gtime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cgtime)); if (mm && permitted) { seq_put_decimal_ull(m, " ", mm->start_data); diff --git a/fs/proc/base.c b/fs/proc/base.c index 8e7e61b28f31..b1f7d30e96c2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2179,7 +2179,7 @@ static const struct file_operations proc_map_files_operations = { .llseek = generic_file_llseek, }; -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; @@ -2936,7 +2936,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), @@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[PROC_NUMBUF]; int len; + + cond_resched(); if (!has_pid_permissions(ns, iter.task, 2)) continue; diff --git a/fs/proc/page.c b/fs/proc/page.c index a2066e6dee90..2726536489b1 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -173,7 +173,8 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active); u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); - u |= kpf_copy_bit(k, KPF_SWAPCACHE, PG_swapcache); + if (PageSwapCache(page)) + u |= 1 << KPF_SWAPCACHE; u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 55313d994895..d4e37acd4821 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -709,7 +709,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) ctl_dir = container_of(head, struct ctl_dir, header); if (!dir_emit_dots(file, ctx)) - return 0; + goto out; pos = 2; @@ -719,6 +719,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx) break; } } +out: sysctl_head_finish(head); return 0; } diff --git a/fs/proc/stat.c b/fs/proc/stat.c index d700c42b3572..e47c3e8c4dfe 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -21,9 +21,9 @@ #ifdef arch_idle_time -static cputime64_t get_idle_time(int cpu) +static u64 get_idle_time(int cpu) { - cputime64_t idle; + u64 idle; idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) @@ -31,9 +31,9 @@ static cputime64_t get_idle_time(int cpu) return idle; } -static cputime64_t get_iowait_time(int cpu) +static u64 get_iowait_time(int cpu) { - cputime64_t iowait; + u64 iowait; iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; if (cpu_online(cpu) && nr_iowait_cpu(cpu)) @@ -45,32 +45,32 @@ static cputime64_t get_iowait_time(int cpu) static u64 get_idle_time(int cpu) { - u64 idle, idle_time = -1ULL; + u64 idle, idle_usecs = -1ULL; if (cpu_online(cpu)) - idle_time = get_cpu_idle_time_us(cpu, NULL); + idle_usecs = get_cpu_idle_time_us(cpu, NULL); - if (idle_time == -1ULL) + if (idle_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; else - idle = usecs_to_cputime64(idle_time); + idle = idle_usecs * NSEC_PER_USEC; return idle; } static u64 get_iowait_time(int cpu) { - u64 iowait, iowait_time = -1ULL; + u64 iowait, iowait_usecs = -1ULL; if (cpu_online(cpu)) - iowait_time = get_cpu_iowait_time_us(cpu, NULL); + iowait_usecs = get_cpu_iowait_time_us(cpu, NULL); - if (iowait_time == -1ULL) + if (iowait_usecs == -1ULL) /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; else - iowait = usecs_to_cputime64(iowait_time); + iowait = iowait_usecs * NSEC_PER_USEC; return iowait; } @@ -115,16 +115,16 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { @@ -140,16 +140,16 @@ static int show_stat(struct seq_file *p, void *v) guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest)); - seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(user)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); } seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 33de567c25af..7981c4ffe787 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,23 +5,20 @@ #include <linux/seq_file.h> #include <linux/time.h> #include <linux/kernel_stat.h> -#include <linux/cputime.h> static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec uptime; struct timespec idle; - u64 idletime; u64 nsec; u32 rem; int i; - idletime = 0; + nsec = 0; for_each_possible_cpu(i) - idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; + nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; get_monotonic_boottime(&uptime); - nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); idle.tv_nsec = rem; seq_printf(m, "%lu.%02lu %lu.%02lu\n", diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 27c059e1760a..1d887efaaf71 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -280,7 +280,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, 1, id, type, PSTORE_TYPE_PMSG, 0); /* ftrace is last since it may want to dynamically allocate memory. */ - if (!prz_ok(prz)) { + if (!prz_ok(prz) && cxt->fprzs) { if (!(cxt->flags & RAMOOPS_FLAG_FTRACE_PER_CPU)) { prz = ramoops_get_next_prz(cxt->fprzs, &cxt->ftrace_read_cnt, 1, id, type, diff --git a/fs/romfs/super.c b/fs/romfs/super.c index d0f8a38dfafa..0186fe6d39f3 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -74,6 +74,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/uaccess.h> +#include <linux/major.h> #include "internal.h" static struct kmem_cache *romfs_inode_cachep; @@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode) static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + u64 id = 0; + + /* When calling huge_encode_dev(), + * use sb->s_bdev->bd_dev when, + * - CONFIG_ROMFS_ON_BLOCK defined + * use sb->s_dev when, + * - CONFIG_ROMFS_ON_BLOCK undefined and + * - CONFIG_ROMFS_ON_MTD defined + * leave id as 0 when, + * - CONFIG_ROMFS_ON_BLOCK undefined and + * - CONFIG_ROMFS_ON_MTD undefined + */ + if (sb->s_bdev) + id = huge_encode_dev(sb->s_bdev->bd_dev); + else if (sb->s_dev) + id = huge_encode_dev(sb->s_dev); buf->f_type = ROMFS_MAGIC; buf->f_namelen = ROMFS_MAXFN; @@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY | MS_NOATIME; sb->s_op = &romfs_super_ops; +#ifdef CONFIG_ROMFS_ON_MTD + /* Use same dev ID from the underlying mtdblock device */ + if (sb->s_mtd) + sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index); +#endif /* read the image superblock and check it */ rsb = kmalloc(512, GFP_KERNEL); if (!rsb) diff --git a/fs/splice.c b/fs/splice.c index 873d83104e79..4ef78aa8ef61 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -204,6 +204,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, buf->len = spd->partial[page_nr].len; buf->private = spd->partial[page_nr].private; buf->ops = spd->ops; + buf->flags = 0; pipe->nrbufs++; page_nr++; diff --git a/fs/timerfd.c b/fs/timerfd.c index c173cc196175..384fa759a563 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -40,6 +40,7 @@ struct timerfd_ctx { short unsigned settime_flags; /* to show in fdinfo */ struct rcu_head rcu; struct list_head clist; + spinlock_t cancel_lock; bool might_cancel; }; @@ -112,7 +113,7 @@ void timerfd_clock_was_set(void) rcu_read_unlock(); } -static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { ctx->might_cancel = false; @@ -122,6 +123,13 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx) } } +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +{ + spin_lock(&ctx->cancel_lock); + __timerfd_remove_cancel(ctx); + spin_unlock(&ctx->cancel_lock); +} + static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs != KTIME_MAX) @@ -132,6 +140,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { + spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { @@ -141,9 +150,10 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } - } else if (ctx->might_cancel) { - timerfd_remove_cancel(ctx); + } else { + __timerfd_remove_cancel(ctx); } + spin_unlock(&ctx->cancel_lock); } static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) @@ -400,6 +410,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) return -ENOMEM; init_waitqueue_head(&ctx->wqh); + spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; if (isalarm(ctx)) diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index 0a908ae7af13..b0d0623c83ed 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -53,7 +53,7 @@ config UBIFS_ATIME_SUPPORT config UBIFS_FS_ENCRYPTION bool "UBIFS Encryption" - depends on UBIFS_FS + depends on UBIFS_FS && BLOCK select FS_ENCRYPTION default n help diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 6335abcf98df..382ed428cfd2 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -77,7 +77,7 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, return 0; } -struct fscrypt_operations ubifs_crypt_operations = { +const struct fscrypt_operations ubifs_crypt_operations = { .flags = FS_CFLG_OWN_PAGES, .key_prefix = "ubifs:", .get_context = ubifs_crypt_get_context, diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 1c5331ac9614..528369f3e472 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -390,16 +390,6 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, dbg_gen("dent '%pd', mode %#hx in dir ino %lu", dentry, mode, dir->i_ino); - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; - - if (!fscrypt_has_encryption_key(dir)) { - return -EPERM; - } - } - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); if (err) return err; @@ -741,17 +731,9 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ubifs_assert(inode_is_locked(dir)); ubifs_assert(inode_is_locked(inode)); - if (ubifs_crypt_is_encrypted(dir)) { - if (!fscrypt_has_permitted_context(dir, inode)) - return -EPERM; - - err = fscrypt_get_encryption_info(inode); - if (err) - return err; - - if (!fscrypt_has_encryption_key(inode)) - return -EPERM; - } + if (ubifs_crypt_is_encrypted(dir) && + !fscrypt_has_permitted_context(dir, inode)) + return -EPERM; err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); if (err) @@ -1000,17 +982,6 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) return err; - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - goto out_budg; - - if (!fscrypt_has_encryption_key(dir)) { - err = -EPERM; - goto out_budg; - } - } - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); if (err) goto out_budg; @@ -1096,17 +1067,6 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, return err; } - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - goto out_budg; - - if (!fscrypt_has_encryption_key(dir)) { - err = -EPERM; - goto out_budg; - } - } - err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); if (err) goto out_budg; @@ -1231,18 +1191,6 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, goto out_inode; } - err = fscrypt_get_encryption_info(inode); - if (err) { - kfree(sd); - goto out_inode; - } - - if (!fscrypt_has_encryption_key(inode)) { - kfree(sd); - err = -EPERM; - goto out_inode; - } - ostr.name = sd->encrypted_path; ostr.len = disk_link.len; diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 78d713644df3..da519ba205f6 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -217,6 +217,9 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC32_SETFLAGS: cmd = FS_IOC_SETFLAGS; break; + case FS_IOC_SET_ENCRYPTION_POLICY: + case FS_IOC_GET_ENCRYPTION_POLICY: + break; default: return -ENOIOCTLCMD; } diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index a459211a1c21..294519b98874 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -744,6 +744,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, } else { data->compr_size = 0; + out_len = compr_len; } dlen = UBIFS_DATA_NODE_SZ + out_len; @@ -1319,6 +1320,7 @@ static int truncate_data_node(const struct ubifs_info *c, const struct inode *in dn->compr_type = cpu_to_le16(compr_type); dn->size = cpu_to_le32(*new_len); *new_len = UBIFS_DATA_NODE_SZ + out_len; + err = 0; out: kfree(buf); return err; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index e08aa04fc835..b73811bd7676 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2000,7 +2000,7 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) } #ifndef CONFIG_UBIFS_FS_ENCRYPTION -struct fscrypt_operations ubifs_crypt_operations = { +const struct fscrypt_operations ubifs_crypt_operations = { .is_encrypted = __ubifs_crypt_is_encrypted, }; #endif diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 74ae2de949df..709aa098dd46 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -34,6 +34,11 @@ #include <linux/slab.h> #include "ubifs.h" +static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs); +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_zbranch *zbr, void *node); + /* * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. * @NAME_LESS: name corresponding to the first argument is less than second @@ -402,7 +407,19 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, return 0; } - err = ubifs_tnc_read_node(c, zbr, node); + if (c->replaying) { + err = fallible_read_node(c, &zbr->key, zbr, node); + /* + * When the node was not found, return -ENOENT, 0 otherwise. + * Negative return codes stay as-is. + */ + if (err == 0) + err = -ENOENT; + else if (err == 1) + err = 0; + } else { + err = ubifs_tnc_read_node(c, zbr, node); + } if (err) return err; @@ -2857,7 +2874,11 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, if (fname_len(nm) > 0) { if (err) { /* Handle collisions */ - err = resolve_collision(c, key, &znode, &n, nm); + if (c->replaying) + err = fallible_resolve_collision(c, key, &znode, &n, + nm, 0); + else + err = resolve_collision(c, key, &znode, &n, nm); dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); if (unlikely(err < 0)) diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index ca72382ce6cc..f0c86f076535 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -38,7 +38,11 @@ #include <linux/backing-dev.h> #include <linux/security.h> #include <linux/xattr.h> -#include <linux/fscrypto.h> +#ifdef CONFIG_UBIFS_FS_ENCRYPTION +#include <linux/fscrypt_supp.h> +#else +#include <linux/fscrypt_notsupp.h> +#endif #include <linux/random.h> #include "ubifs-media.h" @@ -1797,28 +1801,6 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len, #include "key.h" #ifndef CONFIG_UBIFS_FS_ENCRYPTION -#define fscrypt_set_d_op(i) -#define fscrypt_get_ctx fscrypt_notsupp_get_ctx -#define fscrypt_release_ctx fscrypt_notsupp_release_ctx -#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page -#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page -#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages -#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page -#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page -#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range -#define fscrypt_ioctl_set_policy fscrypt_notsupp_ioctl_set_policy -#define fscrypt_ioctl_get_policy fscrypt_notsupp_ioctl_get_policy -#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context -#define fscrypt_inherit_context fscrypt_notsupp_inherit_context -#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info -#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info -#define fscrypt_setup_filename fscrypt_notsupp_setup_filename -#define fscrypt_free_filename fscrypt_notsupp_free_filename -#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size -#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer -#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer -#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr -#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk static inline int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int in_len, unsigned int *out_len, @@ -1842,7 +1824,7 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, unsigned int *out_len, int block); #endif -extern struct fscrypt_operations ubifs_crypt_operations; +extern const struct fscrypt_operations ubifs_crypt_operations; static inline bool __ubifs_crypt_is_encrypted(struct inode *inode) { diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d96e2f30084b..43953e03c356 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -63,6 +63,7 @@ struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_t wq; struct userfaultfd_ctx *ctx; + bool waken; }; struct userfaultfd_wake_range { @@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode, if (len && (start > uwq->msg.arg.pagefault.address || start + len <= uwq->msg.arg.pagefault.address)) goto out; + WRITE_ONCE(uwq->waken, true); + /* + * The implicit smp_mb__before_spinlock in try_to_wake_up() + * renders uwq->waken visible to other CPUs before the task is + * waken. + */ ret = wake_up_state(wq->private, mode); if (ret) /* @@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) struct userfaultfd_wait_queue uwq; int ret; bool must_wait, return_to_userland; + long blocking_state; BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); @@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) uwq.wq.private = current; uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); uwq.ctx = ctx; + uwq.waken = false; return_to_userland = (vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); + blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : + TASK_KILLABLE; spin_lock(&ctx->fault_pending_wqh.lock); /* @@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ - set_current_state(return_to_userland ? TASK_INTERRUPTIBLE : - TASK_KILLABLE); + set_current_state(blocking_state); spin_unlock(&ctx->fault_pending_wqh.lock); must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) wake_up_poll(&ctx->fd_wqh, POLLIN); schedule(); ret |= VM_FAULT_MAJOR; + + /* + * False wakeups can orginate even from rwsem before + * up_read() however userfaults will wait either for a + * targeted wakeup on the specific uwq waitqueue from + * wake_userfault() or for signals or for uffd + * release. + */ + while (!READ_ONCE(uwq.waken)) { + /* + * This needs the full smp_store_mb() + * guarantee as the state write must be + * visible to other CPUs before reading + * uwq.waken from other CPUs. + */ + set_current_state(blocking_state); + if (READ_ONCE(uwq.waken) || + READ_ONCE(ctx->released) || + (return_to_userland ? signal_pending(current) : + fatal_signal_pending(current))) + break; + schedule(); + } } __set_current_state(TASK_RUNNING); diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index d346d42c54d1..33db69be4832 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -39,6 +39,7 @@ #include "xfs_rmap_btree.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" +#include "xfs_ialloc_btree.h" /* * Per-AG Block Reservations @@ -200,22 +201,30 @@ __xfs_ag_resv_init( struct xfs_mount *mp = pag->pag_mount; struct xfs_ag_resv *resv; int error; + xfs_extlen_t reserved; - resv = xfs_perag_resv(pag, type); if (used > ask) ask = used; - resv->ar_asked = ask; - resv->ar_reserved = resv->ar_orig_reserved = ask - used; - mp->m_ag_max_usable -= ask; + reserved = ask - used; - trace_xfs_ag_resv_init(pag, type, ask); - - error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); - if (error) + error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); + if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); + xfs_warn(mp, +"Per-AG reservation for AG %u failed. Filesystem may run out of space.", + pag->pag_agno); + return error; + } - return error; + mp->m_ag_max_usable -= ask; + + resv = xfs_perag_resv(pag, type); + resv->ar_asked = ask; + resv->ar_reserved = resv->ar_orig_reserved = reserved; + + trace_xfs_ag_resv_init(pag, type, ask); + return 0; } /* Create a per-AG block reservation. */ @@ -223,6 +232,8 @@ int xfs_ag_resv_init( struct xfs_perag *pag) { + struct xfs_mount *mp = pag->pag_mount; + xfs_agnumber_t agno = pag->pag_agno; xfs_extlen_t ask; xfs_extlen_t used; int error = 0; @@ -231,23 +242,45 @@ xfs_ag_resv_init( if (pag->pag_meta_resv.ar_asked == 0) { ask = used = 0; - error = xfs_refcountbt_calc_reserves(pag->pag_mount, - pag->pag_agno, &ask, &used); + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; - error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, - ask, used); + error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) { + /* + * Because we didn't have per-AG reservations when the + * finobt feature was added we might not be able to + * reserve all needed blocks. Warn and fall back to the + * old and potentially buggy code in that case, but + * ensure we do have the reservation for the refcountbt. + */ + ask = used = 0; + + mp->m_inotbt_nores = true; + + error = xfs_refcountbt_calc_reserves(mp, agno, &ask, + &used); + if (error) + goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, + ask, used); + if (error) + goto out; + } } /* Create the AGFL metadata reservation */ if (pag->pag_agfl_resv.ar_asked == 0) { ask = used = 0; - error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, - &ask, &used); + error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); if (error) goto out; @@ -256,9 +289,16 @@ xfs_ag_resv_init( goto out; } +#ifdef DEBUG + /* need to read in the AGF for the ASSERT below to work */ + error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); + if (error) + return error; + ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= pag->pagf_freeblks + pag->pagf_flcount); +#endif out: return error; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 5050056a0b06..9f06a211e157 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -95,10 +95,7 @@ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { - unsigned int blocks; - - blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); - return blocks; + return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); } /* @@ -365,36 +362,12 @@ xfs_alloc_fix_len( return; ASSERT(rlen >= args->minlen && rlen <= args->maxlen); ASSERT(rlen % args->prod == args->mod); + ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= + rlen + args->minleft); args->len = rlen; } /* - * Fix up length if there is too little space left in the a.g. - * Return 1 if ok, 0 if too little, should give up. - */ -STATIC int -xfs_alloc_fix_minleft( - xfs_alloc_arg_t *args) /* allocation argument structure */ -{ - xfs_agf_t *agf; /* a.g. freelist header */ - int diff; /* free space difference */ - - if (args->minleft == 0) - return 1; - agf = XFS_BUF_TO_AGF(args->agbp); - diff = be32_to_cpu(agf->agf_freeblks) - - args->len - args->minleft; - if (diff >= 0) - return 1; - args->len += diff; /* shrink the allocated space */ - /* casts to (int) catch length underflows */ - if ((int)args->len >= (int)args->minlen) - return 1; - args->agbno = NULLAGBLOCK; - return 0; -} - -/* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the * actual (current) free extent fbno for flen blocks. @@ -689,8 +662,6 @@ xfs_alloc_ag_vextent( xfs_alloc_arg_t *args) /* argument structure for allocation */ { int error=0; - xfs_extlen_t reservation; - xfs_extlen_t oldmax; ASSERT(args->minlen > 0); ASSERT(args->maxlen > 0); @@ -699,20 +670,6 @@ xfs_alloc_ag_vextent( ASSERT(args->alignment > 0); /* - * Clamp maxlen to the amount of free space minus any reservations - * that have been made. - */ - oldmax = args->maxlen; - reservation = xfs_ag_resv_needed(args->pag, args->resv); - if (args->maxlen > args->pag->pagf_freeblks - reservation) - args->maxlen = args->pag->pagf_freeblks - reservation; - if (args->maxlen == 0) { - args->agbno = NULLAGBLOCK; - args->maxlen = oldmax; - return 0; - } - - /* * Branch to correct routine based on the type. */ args->wasfromfl = 0; @@ -731,8 +688,6 @@ xfs_alloc_ag_vextent( /* NOTREACHED */ } - args->maxlen = oldmax; - if (error || args->agbno == NULLAGBLOCK) return error; @@ -841,9 +796,6 @@ xfs_alloc_ag_vextent_exact( args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto not_found; - ASSERT(args->agbno + args->len <= tend); /* @@ -1149,12 +1101,7 @@ restart: XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_near_nominleft(args); - return 0; - } - blen = args->len; + /* * We are allocating starting at bnew for blen blocks. */ @@ -1346,12 +1293,6 @@ restart: */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - trace_xfs_alloc_near_nominleft(args); - xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - return 0; - } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, args->datatype, ltbnoa, ltlena, <new); @@ -1553,8 +1494,6 @@ restart: } xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) - goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); /* @@ -2056,7 +1995,7 @@ xfs_alloc_space_available( int flags) { struct xfs_perag *pag = args->pag; - xfs_extlen_t longest; + xfs_extlen_t alloc_len, longest; xfs_extlen_t reservation; /* blocks that are still reserved */ int available; @@ -2066,17 +2005,28 @@ xfs_alloc_space_available( reservation = xfs_ag_resv_needed(pag, args->resv); /* do we have enough contiguous free space for the allocation? */ + alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, reservation); - if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) + if (longest < alloc_len) return false; /* do we have enough free space remaining for the allocation? */ available = (int)(pag->pagf_freeblks + pag->pagf_flcount - - reservation - min_free - args->total); - if (available < (int)args->minleft || available <= 0) + reservation - min_free - args->minleft); + if (available < (int)max(args->total, alloc_len)) return false; + /* + * Clamp maxlen to the amount of free space available for the actual + * extent allocation. + */ + if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { + args->maxlen = available; + ASSERT(args->maxlen > 0); + ASSERT(args->maxlen >= args->minlen); + } + return true; } @@ -2122,7 +2072,8 @@ xfs_alloc_fix_freelist( } need = xfs_alloc_min_freelist(mp, pag); - if (!xfs_alloc_space_available(args, need, flags)) + if (!xfs_alloc_space_available(args, need, flags | + XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; /* @@ -2638,12 +2589,10 @@ xfs_alloc_vextent( xfs_agblock_t agsize; /* allocation group size */ int error; int flags; /* XFS_ALLOC_FLAG_... locking flags */ - xfs_extlen_t minleft;/* minimum left value, temp copy */ xfs_mount_t *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ int bump_rotor = 0; - int no_min = 0; xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ mp = args->mp; @@ -2672,7 +2621,6 @@ xfs_alloc_vextent( trace_xfs_alloc_vextent_badargs(args); return 0; } - minleft = args->minleft; switch (type) { case XFS_ALLOCTYPE_THIS_AG: @@ -2683,9 +2631,7 @@ xfs_alloc_vextent( */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->pag = xfs_perag_get(mp, args->agno); - args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2750,9 +2696,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); - args->minleft = minleft; if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -2792,20 +2736,17 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (no_min == 1) { + if (flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - if (flags == 0) { - no_min = 1; - } else { - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } + + flags = 0; + if (type == XFS_ALLOCTYPE_START_BNO) { + args->agbno = XFS_FSB_TO_AGBNO(mp, + args->fsbno); + args->type = XFS_ALLOCTYPE_NEAR_BNO; } } xfs_perag_put(args->pag); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7c404a6b0ae3..1d0f48a501a3 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -56,7 +56,7 @@ typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ #define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ - +#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ /* * Argument structure for xfs_alloc routines. diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -131,9 +131,6 @@ xfs_attr_get( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (!xfs_inode_hasattr(ip)) - return -ENOATTR; - error = xfs_attr_args_init(&args, ip, name, flags); if (error) return error; @@ -392,9 +389,6 @@ xfs_attr_remove( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; - if (!xfs_inode_hasattr(dp)) - return -ENOATTR; - error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 2760bc3b2536..bfc00de5c6f1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3629,7 +3629,7 @@ xfs_bmap_btalloc( align = xfs_get_cowextsz_hint(ap->ip); else if (xfs_alloc_is_userdata(ap->datatype)) align = xfs_get_extsz_hint(ap->ip); - if (unlikely(align)) { + if (align) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, &ap->offset, &ap->length); @@ -3701,7 +3701,7 @@ xfs_bmap_btalloc( args.minlen = ap->minlen; } /* apply extent size hints if obtained earlier */ - if (unlikely(align)) { + if (align) { args.prod = align; if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) args.mod = (xfs_extlen_t)(args.prod - args.mod); @@ -3812,7 +3812,6 @@ xfs_bmap_btalloc( args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; args.total = ap->minlen; - args.minleft = 0; if ((error = xfs_alloc_vextent(&args))) return error; ap->dfops->dop_low = true; @@ -4344,8 +4343,6 @@ xfs_bmapi_allocate( if (error) return error; - if (bma->dfops->dop_low) - bma->minleft = 0; if (bma->cur) bma->cur->bc_private.b.firstblock = *bma->firstblock; if (bma->blkno == NULLFSBLOCK) @@ -4517,8 +4514,6 @@ xfs_bmapi_write( int n; /* current extent index */ xfs_fileoff_t obno; /* old block number (offset) */ int whichfork; /* data or attr fork */ - char inhole; /* current location is hole in file */ - char wasdelay; /* old extent was delayed */ #ifdef DEBUG xfs_fileoff_t orig_bno; /* original block number value */ @@ -4606,22 +4601,44 @@ xfs_bmapi_write( bma.firstblock = firstblock; while (bno < end && n < *nmap) { - inhole = eof || bma.got.br_startoff > bno; - wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); + bool need_alloc = false, wasdelay = false; - /* - * Make sure we only reflink into a hole. - */ - if (flags & XFS_BMAPI_REMAP) - ASSERT(inhole); - if (flags & XFS_BMAPI_COWFORK) - ASSERT(!inhole); + /* in hole or beyoned EOF? */ + if (eof || bma.got.br_startoff > bno) { + if (flags & XFS_BMAPI_DELALLOC) { + /* + * For the COW fork we can reasonably get a + * request for converting an extent that races + * with other threads already having converted + * part of it, as there converting COW to + * regular blocks is not protected using the + * IOLOCK. + */ + ASSERT(flags & XFS_BMAPI_COWFORK); + if (!(flags & XFS_BMAPI_COWFORK)) { + error = -EIO; + goto error0; + } + + if (eof || bno >= end) + break; + } else { + need_alloc = true; + } + } else { + /* + * Make sure we only reflink into a hole. + */ + ASSERT(!(flags & XFS_BMAPI_REMAP)); + if (isnullstartblock(bma.got.br_startblock)) + wasdelay = true; + } /* * First, deal with the hole before the allocated space * that we found, if any. */ - if (inhole || wasdelay) { + if (need_alloc || wasdelay) { bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cecd094404cc..cdef87db5262 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -110,6 +110,9 @@ struct xfs_extent_free_item /* Map something in the CoW fork. */ #define XFS_BMAPI_COWFORK 0x200 +/* Only convert delalloc space, don't allocate entirely new extents */ +#define XFS_BMAPI_DELALLOC 0x400 + #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ @@ -120,7 +123,8 @@ struct xfs_extent_free_item { XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_ZERO, "ZERO" }, \ { XFS_BMAPI_REMAP, "REMAP" }, \ - { XFS_BMAPI_COWFORK, "COWFORK" } + { XFS_BMAPI_COWFORK, "COWFORK" }, \ + { XFS_BMAPI_DELALLOC, "DELALLOC" } static inline int xfs_bmapi_aflag(int w) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index d6330c297ca0..d9be241fc86f 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -502,12 +502,11 @@ try_another_ag: if (args.fsbno == NULLFSBLOCK && args.minleft) { /* * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if + * a full btree split. Try again and if * successful activate the lowspace algorithm. */ args.fsbno = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; error = xfs_alloc_vextent(&args); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index c58d72c220f5..2f389d366e93 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -36,21 +36,29 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; /* - * @mode, if set, indicates that the type field needs to be set up. - * This uses the transformation from file mode to DT_* as defined in linux/fs.h - * for file type specification. This will be propagated into the directory - * structure if appropriate for the given operation and filesystem config. + * Convert inode mode to directory entry filetype */ -const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { - [0] = XFS_DIR3_FT_UNKNOWN, - [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, - [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, - [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, -}; +unsigned char xfs_mode_to_ftype(int mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + return XFS_DIR3_FT_REG_FILE; + case S_IFDIR: + return XFS_DIR3_FT_DIR; + case S_IFCHR: + return XFS_DIR3_FT_CHRDEV; + case S_IFBLK: + return XFS_DIR3_FT_BLKDEV; + case S_IFIFO: + return XFS_DIR3_FT_FIFO; + case S_IFSOCK: + return XFS_DIR3_FT_SOCK; + case S_IFLNK: + return XFS_DIR3_FT_SYMLINK; + default: + return XFS_DIR3_FT_UNKNOWN; + } +} /* * ASCII case-insensitive (ie. A-Z) support for directories that was @@ -631,7 +639,8 @@ xfs_dir2_isblock( if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) return rval; rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; - ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize); + if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) + return -EFSCORRUPTED; *vp = rval; return 0; } diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 0197590fa7d7..d6e6d9d16f6c 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -18,6 +18,9 @@ #ifndef __XFS_DIR2_H__ #define __XFS_DIR2_H__ +#include "xfs_da_format.h" +#include "xfs_da_btree.h" + struct xfs_defer_ops; struct xfs_da_args; struct xfs_inode; @@ -32,10 +35,9 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; /* - * directory filetype conversion tables. + * Convert inode mode to directory entry filetype */ -#define S_SHIFT 12 -extern const unsigned char xfs_mode_to_ftype[]; +extern unsigned char xfs_mode_to_ftype(int mode); /* * directory operations vector for encode/decode routines diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 0fd086d03d41..7c471881c9a6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -82,11 +82,12 @@ xfs_finobt_set_root( } STATIC int -xfs_inobt_alloc_block( +__xfs_inobt_alloc_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *start, union xfs_btree_ptr *new, - int *stat) + int *stat, + enum xfs_ag_resv_type resv) { xfs_alloc_arg_t args; /* block allocation args */ int error; /* error return value */ @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( args.maxlen = 1; args.prod = 1; args.type = XFS_ALLOCTYPE_NEAR_BNO; + args.resv = resv; error = xfs_alloc_vextent(&args); if (error) { @@ -123,6 +125,27 @@ xfs_inobt_alloc_block( } STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); +} + +STATIC int +xfs_finobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + return __xfs_inobt_alloc_block(cur, start, new, stat, + XFS_AG_RESV_METADATA); +} + +STATIC int xfs_inobt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_finobt_set_root, - .alloc_block = xfs_inobt_alloc_block, + .alloc_block = xfs_finobt_alloc_block, .free_block = xfs_inobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, @@ -480,3 +503,64 @@ xfs_inobt_rec_check_count( return 0; } #endif /* DEBUG */ + +static xfs_extlen_t +xfs_inobt_max_size( + struct xfs_mount *mp) +{ + /* Bail out if we're uninitialized, which can happen in mkfs. */ + if (mp->m_inobt_mxr[0] == 0) + return 0; + + return xfs_btree_calc_size(mp, mp->m_inobt_mnr, + (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / + XFS_INODES_PER_CHUNK); +} + +static int +xfs_inobt_count_blocks( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_btnum_t btnum, + xfs_extlen_t *tree_blocks) +{ + struct xfs_buf *agbp; + struct xfs_btree_cur *cur; + int error; + + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); + if (error) + return error; + + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); + error = xfs_btree_count_blocks(cur, tree_blocks); + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); + + return error; +} + +/* + * Figure out how many blocks to reserve and how many are used by this btree. + */ +int +xfs_finobt_calc_reserves( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + xfs_extlen_t tree_len = 0; + int error; + + if (!xfs_sb_version_hasfinobt(&mp->m_sb)) + return 0; + + error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); + if (error) + return error; + + *ask += xfs_inobt_max_size(mp); + *used += tree_len; + return 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bd88453217ce..aa81e2e63f3f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #define xfs_inobt_rec_check_count(mp, rec) 0 #endif /* DEBUG */ +int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_extlen_t *ask, xfs_extlen_t *used); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index dd483e2767f7..d93f9d918cfc 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -29,6 +29,7 @@ #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" +#include "xfs_dir2.h" /* * Check that none of the inode's in the buffer have a next @@ -386,6 +387,7 @@ xfs_dinode_verify( xfs_ino_t ino, struct xfs_dinode *dip) { + uint16_t mode; uint16_t flags; uint64_t flags2; @@ -396,8 +398,12 @@ xfs_dinode_verify( if (be64_to_cpu(dip->di_size) & (1ULL << 63)) return false; - /* No zero-length symlinks. */ - if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0) + mode = be16_to_cpu(dip->di_mode); + if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) + return false; + + /* No zero-length symlinks/dirs. */ + if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0) return false; /* only version 3 or greater inodes are extensively verified here */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2580262e4ea0..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -242,7 +242,7 @@ xfs_mount_validate_sb( sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || - sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0f56fcd3a5d5..631e7c0e0a29 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1152,19 +1152,22 @@ xfs_vm_releasepage( * block_invalidatepage() can send pages that are still marked dirty * but otherwise have invalidated buffers. * - * We've historically freed buffers on the latter. Instead, quietly - * filter out all dirty pages to avoid spurious buffer state warnings. - * This can likely be removed once shrink_active_list() is fixed. + * We want to release the latter to avoid unnecessary buildup of the + * LRU, skip the former and warn if we've left any lingering + * delalloc/unwritten buffers on clean pages. Skip pages with delalloc + * or unwritten buffers and warn if the page is not dirty. Otherwise + * try to release the buffers. */ - if (PageDirty(page)) - return 0; - xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON_ONCE(delalloc)) + if (delalloc) { + WARN_ON_ONCE(!PageDirty(page)); return 0; - if (WARN_ON_ONCE(unwritten)) + } + if (unwritten) { + WARN_ON_ONCE(!PageDirty(page)); return 0; + } return try_to_free_buffers(page); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index b9abce524c33..c1417919ab0a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -528,7 +528,6 @@ xfs_getbmap( xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ - int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ @@ -686,10 +685,8 @@ xfs_getbmap( goto out_free_map; } - nexleft = nex; - do { - nmap = (nexleft > subnex) ? subnex : nexleft; + nmap = (nex> subnex) ? subnex : nex; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); @@ -697,8 +694,8 @@ xfs_getbmap( goto out_free_map; ASSERT(nmap <= subnex); - for (i = 0; i < nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count; i++) { + for (i = 0; i < nmap && bmv->bmv_length && + cur_ext < bmv->bmv_count - 1; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; @@ -760,16 +757,27 @@ xfs_getbmap( continue; } + /* + * In order to report shared extents accurately, + * we report each distinct shared/unshared part + * of a single bmbt record using multiple bmap + * extents. To make that happen, we iterate the + * same map array item multiple times, each + * time trimming out the subextent that we just + * reported. + * + * Because of this, we must check the out array + * index (cur_ext) directly against bmv_count-1 + * to avoid overflows. + */ if (inject_map.br_startblock != NULLFSBLOCK) { map[i] = inject_map; i--; - } else - nexleft--; + } bmv->bmv_entries++; cur_ext++; } - } while (nmap && nexleft && bmv->bmv_length && - cur_ext < bmv->bmv_count); + } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); out_free_map: kmem_free(map); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7f0a01f7b592..ac3b4db519df 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -422,6 +422,7 @@ retry: out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 7a30b8f11db7..9d06cc30e875 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -710,6 +710,10 @@ xfs_dq_get_next_id( /* Simple advance */ next_id = *id + 1; + /* If we'd wrap past the max ID, stop */ + if (next_id < *id) + return -ENOENT; + /* If new ID is within the current chunk, advancing it sufficed */ if (next_id % mp->m_quotainfo->qi_dqperchunk) { *id = next_id; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9557795eb74..de32f0fe47c8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1792,22 +1792,23 @@ xfs_inactive_ifree( int error; /* - * The ifree transaction might need to allocate blocks for record - * insertion to the finobt. We don't want to fail here at ENOSPC, so - * allow ifree to dip into the reserved block pool if necessary. - * - * Freeing large sets of inodes generally means freeing inode chunks, - * directory and file data blocks, so this should be relatively safe. - * Only under severe circumstances should it be possible to free enough - * inodes to exhaust the reserve block pool via finobt expansion while - * at the same time not creating free space in the filesystem. + * We try to use a per-AG reservation for any block needed by the finobt + * tree, but as the finobt feature predates the per-AG reservation + * support a degraded file system might not have enough space for the + * reservation at mount time. In that case try to dip into the reserved + * pool and pray. * * Send a warning if the reservation does happen to fail, as the inode * now remains allocated and sits on the unlinked list until the fs is * repaired. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, - XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); + if (unlikely(mp->m_inotbt_nores)) { + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, + XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, + &tp); + } else { + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); + } if (error) { if (error == -ENOSPC) { xfs_warn_ratelimited(mp, diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0d147428971e..1aa3abd67b36 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -681,7 +681,7 @@ xfs_iomap_write_allocate( xfs_trans_t *tp; int nimaps; int error = 0; - int flags = 0; + int flags = XFS_BMAPI_DELALLOC; int nres; if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 308bebb6dfd2..22c16155f1b4 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -98,12 +98,27 @@ xfs_init_security( static void xfs_dentry_to_name( struct xfs_name *namep, + struct dentry *dentry) +{ + namep->name = dentry->d_name.name; + namep->len = dentry->d_name.len; + namep->type = XFS_DIR3_FT_UNKNOWN; +} + +static int +xfs_dentry_mode_to_name( + struct xfs_name *namep, struct dentry *dentry, int mode) { namep->name = dentry->d_name.name; namep->len = dentry->d_name.len; - namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT]; + namep->type = xfs_mode_to_ftype(mode); + + if (unlikely(namep->type == XFS_DIR3_FT_UNKNOWN)) + return -EFSCORRUPTED; + + return 0; } STATIC void @@ -119,7 +134,7 @@ xfs_cleanup_inode( * xfs_init_security we must back out. * ENOSPC can hit here, among other things. */ - xfs_dentry_to_name(&teardown, dentry, 0); + xfs_dentry_to_name(&teardown, dentry); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); } @@ -154,8 +169,12 @@ xfs_generic_create( if (error) return error; + /* Verify mode is valid also for tmpfile case */ + error = xfs_dentry_mode_to_name(&name, dentry, mode); + if (unlikely(error)) + goto out_free_acl; + if (!tmpfile) { - xfs_dentry_to_name(&name, dentry, mode); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); } else { error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); @@ -248,7 +267,7 @@ xfs_vn_lookup( if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&name, dentry, 0); + xfs_dentry_to_name(&name, dentry); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { if (unlikely(error != -ENOENT)) @@ -275,7 +294,7 @@ xfs_vn_ci_lookup( if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&xname, dentry, 0); + xfs_dentry_to_name(&xname, dentry); error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); if (unlikely(error)) { if (unlikely(error != -ENOENT)) @@ -310,7 +329,9 @@ xfs_vn_link( struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry, inode->i_mode); + error = xfs_dentry_mode_to_name(&name, dentry, inode->i_mode); + if (unlikely(error)) + return error; error = xfs_link(XFS_I(dir), XFS_I(inode), &name); if (unlikely(error)) @@ -329,7 +350,7 @@ xfs_vn_unlink( struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry, 0); + xfs_dentry_to_name(&name, dentry); error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry))); if (error) @@ -359,7 +380,9 @@ xfs_vn_symlink( mode = S_IFLNK | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); - xfs_dentry_to_name(&name, dentry, mode); + error = xfs_dentry_mode_to_name(&name, dentry, mode); + if (unlikely(error)) + goto out; error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) @@ -395,6 +418,7 @@ xfs_vn_rename( { struct inode *new_inode = d_inode(ndentry); int omode = 0; + int error; struct xfs_name oname; struct xfs_name nname; @@ -405,8 +429,14 @@ xfs_vn_rename( if (flags & RENAME_EXCHANGE) omode = d_inode(ndentry)->i_mode; - xfs_dentry_to_name(&oname, odentry, omode); - xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode); + error = xfs_dentry_mode_to_name(&oname, odentry, omode); + if (omode && unlikely(error)) + return error; + + error = xfs_dentry_mode_to_name(&nname, ndentry, + d_inode(odentry)->i_mode); + if (unlikely(error)) + return error; return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index e467218c0098..7a989de224f4 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -331,11 +331,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) } #define ASSERT_ALWAYS(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifdef DEBUG #define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC noinline @@ -346,7 +346,7 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) #ifdef XFS_WARN #define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) + (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC static noinline diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index c39ac14ff540..b1469f0a91a6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3317,12 +3317,8 @@ xfs_log_force( xfs_mount_t *mp, uint flags) { - int error; - trace_xfs_log_force(mp, 0, _RET_IP_); - error = _xfs_log_force(mp, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force(mp, flags, NULL); } /* @@ -3466,12 +3462,8 @@ xfs_log_force_lsn( xfs_lsn_t lsn, uint flags) { - int error; - trace_xfs_log_force(mp, lsn, _RET_IP_); - error = _xfs_log_force_lsn(mp, lsn, flags, NULL); - if (error) - xfs_warn(mp, "%s: error %d returned.", __func__, error); + _xfs_log_force_lsn(mp, lsn, flags, NULL); } /* diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 84f785218907..7f351f706b7a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -140,6 +140,7 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint m_dmevmask; /* DMI events for this FS */ __uint64_t m_flags; /* global mount flags */ + bool m_inotbt_nores; /* no per-AG finobt resv. */ int m_ialloc_inos; /* inodes in inode allocation */ int m_ialloc_blks; /* blocks in inode allocation */ int m_ialloc_min_blks;/* min blocks in sparse inode diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 45e50ea90769..b669b123287b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, + &ip); if (error) { *res = BULKSTAT_RV_NOTHING; return error; |