diff options
Diffstat (limited to 'fs')
1123 files changed, 18148 insertions, 18583 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 11045d8e356a..ac2ec4543fe1 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config 9P_FS tristate "Plan 9 Resource Sharing Support (9P2000)" depends on INET && NET_9P diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 9eb34701a566..995e332eee5c 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -1,23 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * V9FS cache definitions. * * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/jiffies.h> diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 247e47e54bcc..00f107af443e 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -1,23 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS cache definitions. * * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #ifndef _9P_CACHE_H diff --git a/fs/9p/fid.c b/fs/9p/fid.c index a9ef46f02354..3d681a2c2731 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -1,24 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * V9FS FID Management * * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2005, 2006 by Eric Van Hensbergen <ericvh@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/module.h> diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 4491bcaf42b8..928b1093f511 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -1,23 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS FID Management * * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #ifndef FS_9P_FID_H #define FS_9P_FID_H diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 619128b55837..15a99f9c7253 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/v9fs.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 129e5243a6bf..7b763776306e 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -1,24 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS definitions. * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #ifndef FS_9P_V9FS_H #define FS_9P_V9FS_H diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 60cd4ba04afc..fd2a2b040250 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -1,24 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS VFS extensions. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #ifndef FS_9P_V9FS_VFS_H #define FS_9P_V9FS_VFS_H diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 0bcbcc20f769..cce9ace651a2 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_addr.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/module.h> @@ -50,8 +35,9 @@ * @page: structure to page * */ -static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) +static int v9fs_fid_readpage(void *data, struct page *page) { + struct p9_fid *fid = data; struct inode *inode = page->mapping->host; struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE}; struct iov_iter to; @@ -122,7 +108,8 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) return ret; - ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); + ret = read_cache_pages(mapping, pages, v9fs_fid_readpage, + filp->private_data); p9_debug(P9_DEBUG_VFS, " = %d\n", ret); return ret; } diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index bd456c668d39..7d6f69aefd45 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_dentry.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/module.h> diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 00745147329d..674d22bf4f6f 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_dir.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/module.h> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 9a1125305d84..4cc966a31cb3 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_file.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/module.h> diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 24050e866e64..b82423a72f68 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_inode.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a950a927a626..60328b21c5fb 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_inode_dotl.c * @@ -5,22 +6,6 @@ * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/module.h> diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 67d1b965adcd..08112fbcaece 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/9p/vfs_super.c * @@ -6,22 +7,6 @@ * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include <linux/kernel.h> diff --git a/fs/Kconfig b/fs/Kconfig index cbbffc8b9ef5..bfb1c6095c7a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # File system configuration # @@ -10,7 +11,6 @@ config DCACHE_WORD_ACCESS config VALIDATE_FS_PARSER bool "Validate filesystem parameter description" - default y help Enable this to perform validation of the parameter description for a filesystem when it is registered. diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index b795f8da81f3..62dc4f577ba1 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only menu "Executable file formats" @@ -90,12 +91,28 @@ config BINFMT_SCRIPT Most systems will not boot if you say M or N here. If unsure, say Y. +config ARCH_HAS_BINFMT_FLAT + bool + config BINFMT_FLAT bool "Kernel support for flat binaries" - depends on !MMU || ARM || M68K + depends on ARCH_HAS_BINFMT_FLAT help Support uClinux FLAT format binaries. +config BINFMT_FLAT_ARGVP_ENVP_ON_STACK + bool + +config BINFMT_FLAT_OLD_ALWAYS_RAM + bool + +config BINFMT_FLAT_OLD + bool "Enable support for very old legacy flat binaries" + depends on BINFMT_FLAT + help + Support decade old uClinux FLAT format binaries. Unless you know + you have some of those say N here. + config BINFMT_ZFLAT bool "Enable ZFLAT support" depends on BINFMT_FLAT diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig index c5a7787dd5e9..df4650dccf68 100644 --- a/fs/adfs/Kconfig +++ b/fs/adfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config ADFS_FS tristate "ADFS file system support" depends on BLOCK diff --git a/fs/adfs/Makefile b/fs/adfs/Makefile index 9b2d71a9a35c..cf7de6ece659 100644 --- a/fs/adfs/Makefile +++ b/fs/adfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux adfs filesystem routines. # diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index c76db75f02aa..804c6a77c5db 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -113,19 +113,6 @@ struct object_info { __u16 filetype; }; -/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */ -static inline int append_filetype_suffix(char *buf, __u16 filetype) -{ - if (filetype == 0xffff) /* no explicit 12-bit file type was set */ - return 0; - - *buf++ = ','; - *buf++ = hex_asc_lo(filetype >> 8); - *buf++ = hex_asc_lo(filetype >> 4); - *buf++ = hex_asc_lo(filetype >> 0); - return 4; -} - struct adfs_dir_ops { int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir); int (*setpos)(struct adfs_dir *dir, unsigned int fpos); @@ -172,6 +159,7 @@ extern const struct dentry_operations adfs_dentry_operations; extern const struct adfs_dir_ops adfs_f_dir_ops; extern const struct adfs_dir_ops adfs_fplus_dir_ops; +void adfs_object_fixup(struct adfs_dir *dir, struct object_info *obj); extern int adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait); diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index e18eff854e1a..35a4d9f4c3ae 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir.c * * Copyright (C) 1999-2000 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Common directory handling for ADFS */ #include "adfs.h" @@ -16,6 +13,50 @@ */ static DEFINE_RWLOCK(adfs_dir_lock); +void adfs_object_fixup(struct adfs_dir *dir, struct object_info *obj) +{ + unsigned int dots, i; + + /* + * RISC OS allows the use of '/' in directory entry names, so we need + * to fix these up. '/' is typically used for FAT compatibility to + * represent '.', so do the same conversion here. In any case, '.' + * will never be in a RISC OS name since it is used as the pathname + * separator. Handle the case where we may generate a '.' or '..' + * name, replacing the first character with '^' (the RISC OS "parent + * directory" character.) + */ + for (i = dots = 0; i < obj->name_len; i++) + if (obj->name[i] == '/') { + obj->name[i] = '.'; + dots++; + } + + if (obj->name_len <= 2 && dots == obj->name_len) + obj->name[0] = '^'; + + obj->filetype = -1; + + /* + * object is a file and is filetyped and timestamped? + * RISC OS 12-bit filetype is stored in load_address[19:8] + */ + if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && + (0xfff00000 == (0xfff00000 & obj->loadaddr))) { + obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); + + /* optionally append the ,xyz hex filetype suffix */ + if (ADFS_SB(dir->sb)->s_ftsuffix) { + __u16 filetype = obj->filetype; + + obj->name[obj->name_len++] = ','; + obj->name[obj->name_len++] = hex_asc_lo(filetype >> 8); + obj->name[obj->name_len++] = hex_asc_lo(filetype >> 4); + obj->name[obj->name_len++] = hex_asc_lo(filetype >> 0); + } + } +} + static int adfs_readdir(struct file *file, struct dir_context *ctx) { @@ -100,37 +141,36 @@ out: return ret; } -static int -adfs_match(const struct qstr *name, struct object_info *obj) +static unsigned char adfs_tolower(unsigned char c) { - int i; - - if (name->len != obj->name_len) - return 0; + if (c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + return c; +} - for (i = 0; i < name->len; i++) { - char c1, c2; +static int __adfs_compare(const unsigned char *qstr, u32 qlen, + const char *str, u32 len) +{ + u32 i; - c1 = name->name[i]; - c2 = obj->name[i]; + if (qlen != len) + return 1; - if (c1 >= 'A' && c1 <= 'Z') - c1 += 'a' - 'A'; - if (c2 >= 'A' && c2 <= 'Z') - c2 += 'a' - 'A'; + for (i = 0; i < qlen; i++) + if (adfs_tolower(qstr[i]) != adfs_tolower(str[i])) + return 1; - if (c1 != c2) - return 0; - } - return 1; + return 0; } -static int -adfs_dir_lookup_byname(struct inode *inode, const struct qstr *name, struct object_info *obj) +static int adfs_dir_lookup_byname(struct inode *inode, const struct qstr *qstr, + struct object_info *obj) { struct super_block *sb = inode->i_sb; const struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; + const unsigned char *name; struct adfs_dir dir; + u32 name_len; int ret; ret = ops->read(sb, inode->i_ino, inode->i_size, &dir); @@ -153,8 +193,10 @@ adfs_dir_lookup_byname(struct inode *inode, const struct qstr *name, struct obje goto unlock_out; ret = -ENOENT; + name = qstr->name; + name_len = qstr->len; while (ops->getnext(&dir, obj) == 0) { - if (adfs_match(name, obj)) { + if (!__adfs_compare(name, name_len, obj->name, obj->name_len)) { ret = 0; break; } @@ -179,30 +221,18 @@ const struct file_operations adfs_dir_operations = { static int adfs_hash(const struct dentry *parent, struct qstr *qstr) { - const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; const unsigned char *name; unsigned long hash; - int i; + u32 len; - if (qstr->len < name_len) - return 0; + if (qstr->len > ADFS_SB(parent->d_sb)->s_namelen) + return -ENAMETOOLONG; - /* - * Truncate the name in place, avoids - * having to define a compare function. - */ - qstr->len = i = name_len; + len = qstr->len; name = qstr->name; hash = init_name_hash(parent); - while (i--) { - char c; - - c = *name++; - if (c >= 'A' && c <= 'Z') - c += 'a' - 'A'; - - hash = partial_name_hash(c, hash); - } + while (len--) + hash = partial_name_hash(adfs_tolower(*name++), hash); qstr->hash = end_name_hash(hash); return 0; @@ -212,30 +242,10 @@ adfs_hash(const struct dentry *parent, struct qstr *qstr) * Compare two names, taking note of the name length * requirements of the underlying filesystem. */ -static int -adfs_compare(const struct dentry *dentry, - unsigned int len, const char *str, const struct qstr *name) +static int adfs_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *qstr) { - int i; - - if (len != name->len) - return 1; - - for (i = 0; i < name->len; i++) { - char a, b; - - a = str[i]; - b = name->name[i]; - - if (a >= 'A' && a <= 'Z') - a += 'a' - 'A'; - if (b >= 'A' && b <= 'Z') - b += 'a' - 'A'; - - if (a != b) - return 1; - } - return 0; + return __adfs_compare(qstr->name, qstr->len, str, len); } const struct dentry_operations adfs_dentry_operations = { diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index 382c9d7ad375..7557378e58b3 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir_f.c * * Copyright (C) 1997-1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * E and F format directory handling */ #include <linux/buffer_head.h> @@ -47,21 +44,6 @@ static inline void adfs_writeval(unsigned char *p, int len, unsigned int val) } } -static inline int adfs_readname(char *buf, char *ptr, int maxlen) -{ - char *old_buf = buf; - - while ((unsigned char)*ptr >= ' ' && maxlen--) { - if (*ptr == '/') - *buf++ = '.'; - else - *buf++ = *ptr; - ptr++; - } - - return buf - old_buf; -} - #define ror13(v) ((v >> 13) | (v << 19)) #define dir_u8(idx) \ @@ -216,29 +198,23 @@ static inline void adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj, struct adfs_direntry *de) { - obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN); + unsigned int name_len; + + for (name_len = 0; name_len < ADFS_F_NAME_LEN; name_len++) { + if (de->dirobname[name_len] < ' ') + break; + + obj->name[name_len] = de->dirobname[name_len]; + } + + obj->name_len = name_len; obj->file_id = adfs_readval(de->dirinddiscadd, 3); obj->loadaddr = adfs_readval(de->dirload, 4); obj->execaddr = adfs_readval(de->direxec, 4); obj->size = adfs_readval(de->dirlen, 4); obj->attr = de->newdiratts; - obj->filetype = -1; - /* - * object is a file and is filetyped and timestamped? - * RISC OS 12-bit filetype is stored in load_address[19:8] - */ - if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && - (0xfff00000 == (0xfff00000 & obj->loadaddr))) { - obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); - - /* optionally append the ,xyz hex filetype suffix */ - if (ADFS_SB(dir->sb)->s_ftsuffix) - obj->name_len += - append_filetype_suffix( - &obj->name[obj->name_len], - obj->filetype); - } + adfs_object_fixup(dir, obj); } /* diff --git a/fs/adfs/dir_f.h b/fs/adfs/dir_f.h index e4713404096c..5aec332b90f5 100644 --- a/fs/adfs/dir_f.h +++ b/fs/adfs/dir_f.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/adfs/dir_f.h * * Copyright (C) 1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Structures of directories on the F format disk */ #ifndef ADFS_DIR_F_H diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index c92cfb638c18..6c5fbb0259c9 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/dir_fplus.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <linux/slab.h> @@ -169,7 +166,7 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data; struct adfs_bigdirentry bde; unsigned int offset; - int i, ret = -ENOENT; + int ret = -ENOENT; if (dir->pos >= le32_to_cpu(h->bigdirentries)) goto out; @@ -193,27 +190,7 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) offset += le32_to_cpu(bde.bigdirobnameptr); dir_memcpy(dir, offset, obj->name, obj->name_len); - for (i = 0; i < obj->name_len; i++) - if (obj->name[i] == '/') - obj->name[i] = '.'; - - obj->filetype = -1; - - /* - * object is a file and is filetyped and timestamped? - * RISC OS 12-bit filetype is stored in load_address[19:8] - */ - if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && - (0xfff00000 == (0xfff00000 & obj->loadaddr))) { - obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); - - /* optionally append the ,xyz hex filetype suffix */ - if (ADFS_SB(dir->sb)->s_ftsuffix) - obj->name_len += - append_filetype_suffix( - &obj->name[obj->name_len], - obj->filetype); - } + adfs_object_fixup(dir, obj); dir->pos += 1; ret = 0; diff --git a/fs/adfs/dir_fplus.h b/fs/adfs/dir_fplus.h index b55aa41a68fe..4ec0931e36ad 100644 --- a/fs/adfs/dir_fplus.h +++ b/fs/adfs/dir_fplus.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/adfs/dir_fplus.h * * Copyright (C) 1999 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Structures of directories on the F+ format disk */ diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 66621e96f9af..904d624541ad 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/inode.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <linux/writeback.h> diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 6935f05202ac..4d34338c6176 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/map.c * * Copyright (C) 1997-2002 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/buffer_head.h> #include <asm/unaligned.h> diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 2a83655c408f..ffb669f9bba7 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/adfs/super.c * * Copyright (C) 1997-1999 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/module.h> #include <linux/init.h> diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig index a04d9e848d05..84c46b9025c5 100644 --- a/fs/affs/Kconfig +++ b/fs/affs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config AFFS_FS tristate "Amiga FFS file system support" depends on BLOCK diff --git a/fs/affs/Makefile b/fs/affs/Makefile index 3988b4a78339..f2c811429a4e 100644 --- a/fs/affs/Makefile +++ b/fs/affs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux affs filesystem routines. # diff --git a/fs/affs/super.c b/fs/affs/super.c index 7370228eefb2..e7d036efbaa1 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/affs/inode.c * diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index 701aaa9b1899..3fb1f559e317 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config AFS_FS tristate "Andrew File System support (AFS)" depends on INET diff --git a/fs/afs/Makefile b/fs/afs/Makefile index cbf31f6cd177..10359bea7070 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -29,7 +29,6 @@ kafs-y := \ server.o \ server_list.o \ super.o \ - netdevices.o \ vlclient.o \ vl_list.o \ vl_probe.o \ diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 9eaff55df7b4..df415c05939e 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Server address list management * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/slab.h> @@ -250,8 +246,8 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry _enter("%s", cell->name); - ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1", - &result, _expiry, true); + ret = dns_query(cell->net->net, "afsdb", cell->name, cell->name_len, + "srv=1", &result, _expiry, true); if (ret < 0) { _leave(" = %d [dns]", ret); return ERR_PTR(ret); diff --git a/fs/afs/afs.h b/fs/afs/afs.h index 3f4e460c6655..b6d49d646ade 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* AFS common types * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef AFS_H diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h index 255f5dd6040c..565cbe0a8af6 100644 --- a/fs/afs/afs_cm.h +++ b/fs/afs/afs_cm.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* AFS Cache Manager definitions * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef AFS_CM_H diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h index 18a54ca422f8..20ab344baf9d 100644 --- a/fs/afs/afs_fs.h +++ b/fs/afs/afs_fs.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* AFS File Service definitions * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef AFS_FS_H diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index e3c4688f573b..e9b8029920ec 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* AFS Volume Location Service client interface * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #ifndef AFS_VL_H diff --git a/fs/afs/cache.c b/fs/afs/cache.c index f6d0a21e8052..037af93e3aba 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS caching stuff * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/sched.h> diff --git a/fs/afs/callback.c b/fs/afs/callback.c index d441bef72163..6cdd7047c809 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -48,7 +48,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server, refcount_set(&new->usage, 1); new->sb = vnode->vfs_inode.i_sb; new->vid = vnode->volume->vid; - new->server = afs_get_server(server); + new->server = afs_get_server(server, afs_server_trace_get_new_cbi); INIT_HLIST_NODE(&new->cb_vlink); write_lock(&server->cb_break_lock); @@ -195,7 +195,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) write_unlock(&cbi->server->cb_break_lock); if (vi) kfree_rcu(vi, rcu); - afs_put_server(net, cbi->server); + afs_put_server(net, cbi->server, afs_server_trace_put_cbi); } kfree_rcu(cbi, rcu); } @@ -212,7 +212,7 @@ void afs_init_callback_state(struct afs_server *server) /* * actually break a callback */ -void __afs_break_callback(struct afs_vnode *vnode) +void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason) { _enter(""); @@ -223,13 +223,17 @@ void __afs_break_callback(struct afs_vnode *vnode) if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_lock_may_be_available(vnode); + + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true); + } else { + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false); } } -void afs_break_callback(struct afs_vnode *vnode) +void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason) { write_seqlock(&vnode->cb_lock); - __afs_break_callback(vnode); + __afs_break_callback(vnode, reason); write_sequnlock(&vnode->cb_lock); } @@ -275,9 +279,11 @@ static void afs_break_one_callback(struct afs_server *server, struct afs_super_info *as = AFS_FS_S(cbi->sb); struct afs_volume *volume = as->volume; - write_lock(&volume->cb_break_lock); + write_lock(&volume->cb_v_break_lock); volume->cb_v_break++; - write_unlock(&volume->cb_break_lock); + trace_afs_cb_break(fid, volume->cb_v_break, + afs_cb_break_for_volume_callback, false); + write_unlock(&volume->cb_v_break_lock); } else { data.volume = NULL; data.fid = *fid; @@ -285,8 +291,10 @@ static void afs_break_one_callback(struct afs_server *server, afs_iget5_test, &data); if (inode) { vnode = AFS_FS_I(inode); - afs_break_callback(vnode); + afs_break_callback(vnode, afs_cb_break_for_callback); iput(inode); + } else { + trace_afs_cb_miss(fid, afs_cb_break_for_callback); } } } diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 9c3b07ba2222..a2a87117d262 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS cell and server record management * * Copyright (C) 2002, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/slab.h> diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 01437cfe5432..4f1b6f466ff5 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS Cache Manager Service * * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> @@ -260,8 +256,11 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * server holds up change visibility till it receives our reply so as * to maintain cache coherency. */ - if (call->server) + if (call->server) { + trace_afs_server(call->server, atomic_read(&call->server->usage), + afs_server_trace_callback); afs_break_callbacks(call->server, call->count, call->request); + } afs_send_empty_reply(call); afs_put_call(call); @@ -584,9 +583,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) */ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) { - struct afs_interface *ifs; struct afs_call *call = container_of(work, struct afs_call, work); - int loop, nifs; + int loop; struct { struct /* InterfaceAddr */ { @@ -604,19 +602,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) _enter(""); - nifs = 0; - ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL); - if (ifs) { - nifs = afs_get_ipv4_interfaces(call->net, ifs, 32, false); - if (nifs < 0) { - kfree(ifs); - ifs = NULL; - nifs = 0; - } - } - memset(&reply, 0, sizeof(reply)); - reply.ia.nifs = htonl(nifs); reply.ia.uuid[0] = call->net->uuid.time_low; reply.ia.uuid[1] = htonl(ntohs(call->net->uuid.time_mid)); @@ -626,15 +612,6 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) for (loop = 0; loop < 6; loop++) reply.ia.uuid[loop + 5] = htonl((s8) call->net->uuid.node[loop]); - if (ifs) { - for (loop = 0; loop < nifs; loop++) { - reply.ia.ifaddr[loop] = ifs[loop].address.s_addr; - reply.ia.netmask[loop] = ifs[loop].netmask.s_addr; - reply.ia.mtu[loop] = htonl(ifs[loop].mtu); - } - kfree(ifs); - } - reply.cap.capcount = htonl(1); reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION); afs_send_simple_reply(call, &reply, sizeof(reply)); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 79d93a26759a..e640d67274be 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* dir.c: AFS filesystem directory handling * * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -242,8 +238,7 @@ retry: if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) nr_inline = 0; - req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline, - GFP_KERNEL); + req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL); if (!req) return ERR_PTR(-ENOMEM); @@ -1367,12 +1362,12 @@ static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry, drop_nlink(&vnode->vfs_inode); if (vnode->vfs_inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); - __afs_break_callback(vnode); + __afs_break_callback(vnode, afs_cb_break_for_unlink); } write_sequnlock(&vnode->cb_lock); ret = 0; } else { - afs_break_callback(vnode); + afs_break_callback(vnode, afs_cb_break_for_unlink); if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) kdebug("AFS_VNODE_DELETED"); @@ -1394,7 +1389,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) { struct afs_fs_cursor fc; struct afs_status_cb *scb; - struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; + struct afs_vnode *dvnode = AFS_FS_I(dir); + struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; bool need_rehash = false; int ret; @@ -1417,15 +1413,12 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) } /* Try to make sure we have a callback promise on the victim. */ - if (d_really_is_positive(dentry)) { - vnode = AFS_FS_I(d_inode(dentry)); - ret = afs_validate(vnode, key); - if (ret < 0) - goto error_key; - } + ret = afs_validate(vnode, key); + if (ret < 0) + goto error_key; spin_lock(&dentry->d_lock); - if (vnode && d_count(dentry) > 1) { + if (d_count(dentry) > 1) { spin_unlock(&dentry->d_lock); /* Start asynchronous writeout of the inode */ write_inode_now(d_inode(dentry), 0); diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index 8b400f5aead5..d4fbe5f85f1b 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS filesystem directory editing * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 28f4aa015229..361088a5edb9 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS silly rename handling * * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - Derived from NFS's sillyrename. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/kernel.h> @@ -64,11 +60,6 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_edit_dir_add(dvnode, &new->d_name, &vnode->fid, afs_edit_dir_for_silly_1); - - /* vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(old, 0); } kfree(scb); diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index af1689d1f32e..bcd1bafb0278 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS dynamic root handling * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/fs.h> @@ -28,6 +24,7 @@ const struct file_operations afs_dynroot_file_operations = { static int afs_probe_cell_name(struct dentry *dentry) { struct afs_cell *cell; + struct afs_net *net = afs_d2net(dentry); const char *name = dentry->d_name.name; size_t len = dentry->d_name.len; int ret; @@ -40,13 +37,14 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len); + cell = afs_lookup_cell_rcu(net, name, len); if (!IS_ERR(cell)) { - afs_put_cell(afs_d2net(dentry), cell); + afs_put_cell(net, cell); return 0; } - ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false); + ret = dns_query(net->net, "afsdb", name, len, "srv=1", + NULL, NULL, false); if (ret == -ENODATA) ret = -EDESTADDRREQ; return ret; diff --git a/fs/afs/file.c b/fs/afs/file.c index 11e69c5fb7ab..56b69576274d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS filesystem file handling * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -314,8 +310,7 @@ int afs_page_filler(void *data, struct page *page) /* fall through */ default: go_on: - req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), - GFP_KERNEL); + req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); if (!req) goto enomem; @@ -465,8 +460,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, n++; } - req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n, - GFP_NOFS); + req = kzalloc(struct_size(req, array, n), GFP_NOFS); if (!req) return -ENOMEM; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index ed3ac03682d7..d5e5a6ddc847 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS file locking support * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include "internal.h" diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index 9b7266209343..cfe62b154f68 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS fileserver probing * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/sched.h> diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 48298408d6ac..1ce73e014139 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS File Server client stubs * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/init.h> @@ -1915,7 +1911,7 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, return ERR_PTR(-ENOMEM); call->key = key; - call->server = afs_get_server(server); + call->server = afs_get_server(server, afs_server_trace_get_caps); call->server_index = server_index; call->upgrade = true; call->async = true; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b42d9d09669c..7b1c18c32f48 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -56,6 +56,16 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren } /* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->vfs_inode, size); + vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, @@ -124,12 +134,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); } - /* - * Estimate 512 bytes blocks used, rounded up to nearest 1K - * for consistency with other AFS clients. - */ - inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1; - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); vnode->invalid_before = status->data_version; inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); @@ -207,11 +212,13 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (expected_version && *expected_version != status->data_version) { - kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s", - (unsigned long long) status->data_version, - vnode->fid.vid, vnode->fid.vnode, - (unsigned long long) *expected_version, - fc->type ? fc->type->name : "???"); + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + vnode->fid.vid, vnode->fid.vnode, + (unsigned long long)*expected_version, + (unsigned long long)status->data_version, + fc->type ? fc->type->name : "???"); + vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) @@ -230,7 +237,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, if (data_changed) { inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - i_size_write(&vnode->vfs_inode, status->size); + afs_set_i_size(vnode, status->size); } } @@ -276,7 +283,7 @@ void afs_vnode_commit_status(struct afs_fs_cursor *fc, if (scb->status.abort_code == VNOVNODE) { set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_nlink(&vnode->vfs_inode); - __afs_break_callback(vnode); + __afs_break_callback(vnode, afs_cb_break_for_deleted); } } else { if (scb->have_status) @@ -587,8 +594,9 @@ bool afs_check_validity(struct afs_vnode *vnode) struct afs_cb_interest *cbi; struct afs_server *server; struct afs_volume *volume = vnode->volume; + enum afs_cb_break_reason need_clear = afs_cb_break_no_break; time64_t now = ktime_get_real_seconds(); - bool valid, need_clear = false; + bool valid; unsigned int cb_break, cb_s_break, cb_v_break; int seq = 0; @@ -606,13 +614,13 @@ bool afs_check_validity(struct afs_vnode *vnode) vnode->cb_v_break != cb_v_break) { vnode->cb_s_break = cb_s_break; vnode->cb_v_break = cb_v_break; - need_clear = true; + need_clear = afs_cb_break_for_vsbreak; valid = false; } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - need_clear = true; + need_clear = afs_cb_break_for_zap; valid = false; } else if (vnode->cb_expires_at - 10 <= now) { - need_clear = true; + need_clear = afs_cb_break_for_lapsed; valid = false; } else { valid = true; @@ -628,10 +636,12 @@ bool afs_check_validity(struct afs_vnode *vnode) done_seqretry(&vnode->cb_lock, seq); - if (need_clear) { + if (need_clear != afs_cb_break_no_break) { write_seqlock(&vnode->cb_lock); if (cb_break == vnode->cb_break) - __afs_break_callback(vnode); + __afs_break_callback(vnode, need_clear); + else + trace_afs_cb_miss(&vnode->fid, need_clear); write_sequnlock(&vnode->cb_lock); valid = false; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 2073c1a3ab4b..f66a3be12fd6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* internal AFS stuff * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/compiler.h> @@ -113,10 +109,8 @@ struct afs_call { struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_net *net; /* The network namespace */ - union { - struct afs_server *server; - struct afs_vlserver *vlserver; - }; + struct afs_server *server; /* The fileserver record if fs op (pins ref) */ + struct afs_vlserver *vlserver; /* The vlserver record if vl op */ struct afs_cb_interest *cbi; /* Callback interest for server used */ struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ @@ -520,6 +514,7 @@ struct afs_server { atomic_t usage; u32 addr_version; /* Address list version */ u32 cm_epoch; /* Server RxRPC epoch */ + unsigned int debug_id; /* Debugging ID for traces */ /* file service access */ rwlock_t fs_lock; /* access lock */ @@ -620,7 +615,7 @@ struct afs_volume { unsigned int servers_seq; /* Incremented each time ->servers changes */ unsigned cb_v_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; + rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ short error; @@ -725,15 +720,6 @@ struct afs_permits { }; /* - * record of one of a system's set of network interfaces - */ -struct afs_interface { - struct in_addr address; /* IPv4 address bound to interface */ - struct in_addr netmask; /* netmask applied to address */ - unsigned mtu; /* MTU of interface */ -}; - -/* * Error prioritisation and accumulation. */ struct afs_error { @@ -850,9 +836,9 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def; * callback.c */ extern void afs_init_callback_state(struct afs_server *); -extern void __afs_break_callback(struct afs_vnode *); -extern void afs_break_callback(struct afs_vnode *); -extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); +extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); +extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); +extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *); extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_list *, unsigned int); @@ -1096,12 +1082,6 @@ extern struct vfsmount *afs_d_automount(struct path *); extern void afs_mntpt_kill_timer(void); /* - * netdevices.c - */ -extern int afs_get_ipv4_interfaces(struct afs_net *, struct afs_interface *, - size_t, bool); - -/* * proc.c */ #ifdef CONFIG_PROC_FS @@ -1246,17 +1226,12 @@ extern void __exit afs_clean_up_permit_cache(void); */ extern spinlock_t afs_server_peer_lock; -static inline struct afs_server *afs_get_server(struct afs_server *server) -{ - atomic_inc(&server->usage); - return server; -} - extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); -extern void afs_put_server(struct afs_net *, struct afs_server *); +extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); +extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace); extern void afs_manage_servers(struct work_struct *); extern void afs_servers_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); @@ -1440,7 +1415,7 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, { if (fc->ac.error == -ENOENT) { set_bit(AFS_VNODE_DELETED, &vnode->flags); - afs_break_callback(vnode); + afs_break_callback(vnode, afs_cb_break_for_deleted); } } diff --git a/fs/afs/main.c b/fs/afs/main.c index 107427688edd..c9c45d7078bd 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS client file system * * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 7f2af061ea06..52b19e9c1535 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* miscellaneous bits * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -14,6 +10,7 @@ #include <linux/errno.h> #include "internal.h" #include "afs_fs.h" +#include "protocol_uae.h" /* * convert an AFS abort code to a Linux error number @@ -69,34 +66,25 @@ int afs_abort_to_error(u32 abort_code) case AFSVL_PERM: return -EACCES; case AFSVL_NOMEM: return -EREMOTEIO; - /* Unified AFS error table; ET "uae" == 0x2f6df00 */ - case 0x2f6df00: return -EPERM; - case 0x2f6df01: return -ENOENT; - case 0x2f6df04: return -EIO; - case 0x2f6df0a: return -EAGAIN; - case 0x2f6df0b: return -ENOMEM; - case 0x2f6df0c: return -EACCES; - case 0x2f6df0f: return -EBUSY; - case 0x2f6df10: return -EEXIST; - case 0x2f6df11: return -EXDEV; - case 0x2f6df12: return -ENODEV; - case 0x2f6df13: return -ENOTDIR; - case 0x2f6df14: return -EISDIR; - case 0x2f6df15: return -EINVAL; - case 0x2f6df1a: return -EFBIG; - case 0x2f6df1b: return -ENOSPC; - case 0x2f6df1d: return -EROFS; - case 0x2f6df1e: return -EMLINK; - case 0x2f6df20: return -EDOM; - case 0x2f6df21: return -ERANGE; - case 0x2f6df22: return -EDEADLK; - case 0x2f6df23: return -ENAMETOOLONG; - case 0x2f6df24: return -ENOLCK; - case 0x2f6df26: return -ENOTEMPTY; - case 0x2f6df28: return -EWOULDBLOCK; - case 0x2f6df69: return -ENOTCONN; - case 0x2f6df6c: return -ETIMEDOUT; - case 0x2f6df78: return -EDQUOT; + /* Unified AFS error table */ + case UAEPERM: return -EPERM; + case UAENOENT: return -ENOENT; + case UAEACCES: return -EACCES; + case UAEBUSY: return -EBUSY; + case UAEEXIST: return -EEXIST; + case UAENOTDIR: return -ENOTDIR; + case UAEISDIR: return -EISDIR; + case UAEFBIG: return -EFBIG; + case UAENOSPC: return -ENOSPC; + case UAEROFS: return -EROFS; + case UAEMLINK: return -EMLINK; + case UAEDEADLK: return -EDEADLK; + case UAENAMETOOLONG: return -ENAMETOOLONG; + case UAENOLCK: return -ENOLCK; + case UAENOTEMPTY: return -ENOTEMPTY; + case UAELOOP: return -ELOOP; + case UAENOMEDIUM: return -ENOMEDIUM; + case UAEDQUOT: return -EDQUOT; /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */ case RXKADINCONSISTENCY: return -EPROTO; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index eecd8b699186..f532d6d3bd28 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* mountpoint management * * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c deleted file mode 100644 index 2a009d1939d7..000000000000 --- a/fs/afs/netdevices.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* AFS network device helpers - * - * Copyright (c) 2007 Patrick McHardy <kaber@trash.net> - */ - -#include <linux/string.h> -#include <linux/rtnetlink.h> -#include <linux/inetdevice.h> -#include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <net/net_namespace.h> -#include "internal.h" - -/* - * get a list of this system's interface IPv4 addresses, netmasks and MTUs - * - maxbufs must be at least 1 - * - returns the number of interface records in the buffer - */ -int afs_get_ipv4_interfaces(struct afs_net *net, struct afs_interface *bufs, - size_t maxbufs, bool wantloopback) -{ - struct net_device *dev; - struct in_device *idev; - int n = 0; - - ASSERT(maxbufs > 0); - - rtnl_lock(); - for_each_netdev(net->net, dev) { - if (dev->type == ARPHRD_LOOPBACK && !wantloopback) - continue; - idev = __in_dev_get_rtnl(dev); - if (!idev) - continue; - for_primary_ifa(idev) { - bufs[n].address.s_addr = ifa->ifa_address; - bufs[n].netmask.s_addr = ifa->ifa_mask; - bufs[n].mtu = dev->mtu; - n++; - if (n >= maxbufs) - goto out; - } endfor_ifa(idev); - } -out: - rtnl_unlock(); - return n; -} diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 371501d28e08..fba2ec3a3a9c 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* /proc interface for AFS * * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/slab.h> diff --git a/fs/afs/protocol_uae.h b/fs/afs/protocol_uae.h new file mode 100644 index 000000000000..1b3d1060bd34 --- /dev/null +++ b/fs/afs/protocol_uae.h @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Universal AFS Error codes (UAE). + * + * Copyright (C) 2003, Daria Phoebe Brashear + * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + */ + +enum { + UAEPERM = 0x2f6df00, /* Operation not permitted */ + UAENOENT = 0x2f6df01, /* No such file or directory */ + UAESRCH = 0x2f6df02, /* No such process */ + UAEINTR = 0x2f6df03, /* Interrupted system call */ + UAEIO = 0x2f6df04, /* I/O error */ + UAENXIO = 0x2f6df05, /* No such device or address */ + UAE2BIG = 0x2f6df06, /* Arg list too long */ + UAENOEXEC = 0x2f6df07, /* Exec format error */ + UAEBADF = 0x2f6df08, /* Bad file number */ + UAECHILD = 0x2f6df09, /* No child processes */ + UAEAGAIN = 0x2f6df0a, /* Try again */ + UAENOMEM = 0x2f6df0b, /* Out of memory */ + UAEACCES = 0x2f6df0c, /* Permission denied */ + UAEFAULT = 0x2f6df0d, /* Bad address */ + UAENOTBLK = 0x2f6df0e, /* Block device required */ + UAEBUSY = 0x2f6df0f, /* Device or resource busy */ + UAEEXIST = 0x2f6df10, /* File exists */ + UAEXDEV = 0x2f6df11, /* Cross-device link */ + UAENODEV = 0x2f6df12, /* No such device */ + UAENOTDIR = 0x2f6df13, /* Not a directory */ + UAEISDIR = 0x2f6df14, /* Is a directory */ + UAEINVAL = 0x2f6df15, /* Invalid argument */ + UAENFILE = 0x2f6df16, /* File table overflow */ + UAEMFILE = 0x2f6df17, /* Too many open files */ + UAENOTTY = 0x2f6df18, /* Not a typewriter */ + UAETXTBSY = 0x2f6df19, /* Text file busy */ + UAEFBIG = 0x2f6df1a, /* File too large */ + UAENOSPC = 0x2f6df1b, /* No space left on device */ + UAESPIPE = 0x2f6df1c, /* Illegal seek */ + UAEROFS = 0x2f6df1d, /* Read-only file system */ + UAEMLINK = 0x2f6df1e, /* Too many links */ + UAEPIPE = 0x2f6df1f, /* Broken pipe */ + UAEDOM = 0x2f6df20, /* Math argument out of domain of func */ + UAERANGE = 0x2f6df21, /* Math result not representable */ + UAEDEADLK = 0x2f6df22, /* Resource deadlock would occur */ + UAENAMETOOLONG = 0x2f6df23, /* File name too long */ + UAENOLCK = 0x2f6df24, /* No record locks available */ + UAENOSYS = 0x2f6df25, /* Function not implemented */ + UAENOTEMPTY = 0x2f6df26, /* Directory not empty */ + UAELOOP = 0x2f6df27, /* Too many symbolic links encountered */ + UAEWOULDBLOCK = 0x2f6df28, /* Operation would block */ + UAENOMSG = 0x2f6df29, /* No message of desired type */ + UAEIDRM = 0x2f6df2a, /* Identifier removed */ + UAECHRNG = 0x2f6df2b, /* Channel number out of range */ + UAEL2NSYNC = 0x2f6df2c, /* Level 2 not synchronized */ + UAEL3HLT = 0x2f6df2d, /* Level 3 halted */ + UAEL3RST = 0x2f6df2e, /* Level 3 reset */ + UAELNRNG = 0x2f6df2f, /* Link number out of range */ + UAEUNATCH = 0x2f6df30, /* Protocol driver not attached */ + UAENOCSI = 0x2f6df31, /* No CSI structure available */ + UAEL2HLT = 0x2f6df32, /* Level 2 halted */ + UAEBADE = 0x2f6df33, /* Invalid exchange */ + UAEBADR = 0x2f6df34, /* Invalid request descriptor */ + UAEXFULL = 0x2f6df35, /* Exchange full */ + UAENOANO = 0x2f6df36, /* No anode */ + UAEBADRQC = 0x2f6df37, /* Invalid request code */ + UAEBADSLT = 0x2f6df38, /* Invalid slot */ + UAEBFONT = 0x2f6df39, /* Bad font file format */ + UAENOSTR = 0x2f6df3a, /* Device not a stream */ + UAENODATA = 0x2f6df3b, /* No data available */ + UAETIME = 0x2f6df3c, /* Timer expired */ + UAENOSR = 0x2f6df3d, /* Out of streams resources */ + UAENONET = 0x2f6df3e, /* Machine is not on the network */ + UAENOPKG = 0x2f6df3f, /* Package not installed */ + UAEREMOTE = 0x2f6df40, /* Object is remote */ + UAENOLINK = 0x2f6df41, /* Link has been severed */ + UAEADV = 0x2f6df42, /* Advertise error */ + UAESRMNT = 0x2f6df43, /* Srmount error */ + UAECOMM = 0x2f6df44, /* Communication error on send */ + UAEPROTO = 0x2f6df45, /* Protocol error */ + UAEMULTIHOP = 0x2f6df46, /* Multihop attempted */ + UAEDOTDOT = 0x2f6df47, /* RFS specific error */ + UAEBADMSG = 0x2f6df48, /* Not a data message */ + UAEOVERFLOW = 0x2f6df49, /* Value too large for defined data type */ + UAENOTUNIQ = 0x2f6df4a, /* Name not unique on network */ + UAEBADFD = 0x2f6df4b, /* File descriptor in bad state */ + UAEREMCHG = 0x2f6df4c, /* Remote address changed */ + UAELIBACC = 0x2f6df4d, /* Can not access a needed shared library */ + UAELIBBAD = 0x2f6df4e, /* Accessing a corrupted shared library */ + UAELIBSCN = 0x2f6df4f, /* .lib section in a.out corrupted */ + UAELIBMAX = 0x2f6df50, /* Attempting to link in too many shared libraries */ + UAELIBEXEC = 0x2f6df51, /* Cannot exec a shared library directly */ + UAEILSEQ = 0x2f6df52, /* Illegal byte sequence */ + UAERESTART = 0x2f6df53, /* Interrupted system call should be restarted */ + UAESTRPIPE = 0x2f6df54, /* Streams pipe error */ + UAEUSERS = 0x2f6df55, /* Too many users */ + UAENOTSOCK = 0x2f6df56, /* Socket operation on non-socket */ + UAEDESTADDRREQ = 0x2f6df57, /* Destination address required */ + UAEMSGSIZE = 0x2f6df58, /* Message too long */ + UAEPROTOTYPE = 0x2f6df59, /* Protocol wrong type for socket */ + UAENOPROTOOPT = 0x2f6df5a, /* Protocol not available */ + UAEPROTONOSUPPORT = 0x2f6df5b, /* Protocol not supported */ + UAESOCKTNOSUPPORT = 0x2f6df5c, /* Socket type not supported */ + UAEOPNOTSUPP = 0x2f6df5d, /* Operation not supported on transport endpoint */ + UAEPFNOSUPPORT = 0x2f6df5e, /* Protocol family not supported */ + UAEAFNOSUPPORT = 0x2f6df5f, /* Address family not supported by protocol */ + UAEADDRINUSE = 0x2f6df60, /* Address already in use */ + UAEADDRNOTAVAIL = 0x2f6df61, /* Cannot assign requested address */ + UAENETDOWN = 0x2f6df62, /* Network is down */ + UAENETUNREACH = 0x2f6df63, /* Network is unreachable */ + UAENETRESET = 0x2f6df64, /* Network dropped connection because of reset */ + UAECONNABORTED = 0x2f6df65, /* Software caused connection abort */ + UAECONNRESET = 0x2f6df66, /* Connection reset by peer */ + UAENOBUFS = 0x2f6df67, /* No buffer space available */ + UAEISCONN = 0x2f6df68, /* Transport endpoint is already connected */ + UAENOTCONN = 0x2f6df69, /* Transport endpoint is not connected */ + UAESHUTDOWN = 0x2f6df6a, /* Cannot send after transport endpoint shutdown */ + UAETOOMANYREFS = 0x2f6df6b, /* Too many references: cannot splice */ + UAETIMEDOUT = 0x2f6df6c, /* Connection timed out */ + UAECONNREFUSED = 0x2f6df6d, /* Connection refused */ + UAEHOSTDOWN = 0x2f6df6e, /* Host is down */ + UAEHOSTUNREACH = 0x2f6df6f, /* No route to host */ + UAEALREADY = 0x2f6df70, /* Operation already in progress */ + UAEINPROGRESS = 0x2f6df71, /* Operation now in progress */ + UAESTALE = 0x2f6df72, /* Stale NFS file handle */ + UAEUCLEAN = 0x2f6df73, /* Structure needs cleaning */ + UAENOTNAM = 0x2f6df74, /* Not a XENIX named type file */ + UAENAVAIL = 0x2f6df75, /* No XENIX semaphores available */ + UAEISNAM = 0x2f6df76, /* Is a named type file */ + UAEREMOTEIO = 0x2f6df77, /* Remote I/O error */ + UAEDQUOT = 0x2f6df78, /* Quota exceeded */ + UAENOMEDIUM = 0x2f6df79, /* No medium found */ + UAEMEDIUMTYPE = 0x2f6df7a, /* Wrong medium type */ +}; diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index 915b9d10cdf3..32be9c698348 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* YFS protocol bits * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #define YFS_FS_SERVICE 2500 diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index b00c739e0e63..172ba569cd60 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Handle fileserver selection and rotation. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 4fa5ce92b9b9..0e5269374ac1 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Maintain an RxRPC server socket to do AFS communications through * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/slab.h> @@ -188,7 +184,7 @@ void afs_put_call(struct afs_call *call) if (call->type->destructor) call->type->destructor(call); - afs_put_server(call->net, call->server); + afs_put_server(call->net, call->server, afs_server_trace_put_call); afs_put_cb_interest(call->net, call->cbi); afs_put_addrlist(call->alist); kfree(call->request); diff --git a/fs/afs/security.c b/fs/afs/security.c index 5d8ece98561e..71e71c07568f 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS security handling * * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/init.h> diff --git a/fs/afs/server.c b/fs/afs/server.c index 52c170b59cfd..64d440aaabc0 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS server record management * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/sched.h> @@ -17,6 +13,7 @@ static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ +static atomic_t afs_server_debug_id; static void afs_inc_servers_outstanding(struct afs_net *net) { @@ -51,7 +48,7 @@ struct afs_server *afs_find_server(struct afs_net *net, do { if (server) - afs_put_server(net, server); + afs_put_server(net, server, afs_server_trace_put_find_rsq); server = NULL; read_seqbegin_or_lock(&net->fs_addr_lock, &seq); @@ -116,7 +113,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu * changes. */ if (server) - afs_put_server(net, server); + afs_put_server(net, server, afs_server_trace_put_uuid_rsq); server = NULL; read_seqbegin_or_lock(&net->fs_lock, &seq); @@ -131,7 +128,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu } else if (diff > 0) { p = p->rb_right; } else { - afs_get_server(server); + afs_get_server(server, afs_server_trace_get_by_uuid); break; } @@ -202,7 +199,7 @@ static struct afs_server *afs_install_server(struct afs_net *net, ret = 0; exists: - afs_get_server(server); + afs_get_server(server, afs_server_trace_get_install); write_sequnlock(&net->fs_lock); return server; } @@ -223,6 +220,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, goto enomem; atomic_set(&server->usage, 1); + server->debug_id = atomic_inc_return(&afs_server_debug_id); RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; @@ -234,6 +232,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, spin_lock_init(&server->probe_lock); afs_inc_servers_outstanding(net); + trace_afs_server(server, 1, afs_server_trace_alloc); _leave(" = %p", server); return server; @@ -329,9 +328,22 @@ void afs_servers_timer(struct timer_list *timer) } /* + * Get a reference on a server object. + */ +struct afs_server *afs_get_server(struct afs_server *server, + enum afs_server_trace reason) +{ + unsigned int u = atomic_inc_return(&server->usage); + + trace_afs_server(server, u, reason); + return server; +} + +/* * Release a reference on a server record. */ -void afs_put_server(struct afs_net *net, struct afs_server *server) +void afs_put_server(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) { unsigned int usage; @@ -342,7 +354,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server) usage = atomic_dec_return(&server->usage); - _enter("{%u}", usage); + trace_afs_server(server, usage, reason); if (likely(usage > 0)) return; @@ -354,6 +366,8 @@ static void afs_server_rcu(struct rcu_head *rcu) { struct afs_server *server = container_of(rcu, struct afs_server, rcu); + trace_afs_server(server, atomic_read(&server->usage), + afs_server_trace_free); afs_put_addrlist(rcu_access_pointer(server->addresses)); kfree(server); } @@ -369,7 +383,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) .index = alist->preferred, .error = 0, }; - _enter("%p", server); + + trace_afs_server(server, atomic_read(&server->usage), + afs_server_trace_give_up_cb); if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) afs_fs_give_up_all_callbacks(net, server, &ac, NULL); @@ -377,6 +393,8 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) wait_var_event(&server->probe_outstanding, atomic_read(&server->probe_outstanding) == 0); + trace_afs_server(server, atomic_read(&server->usage), + afs_server_trace_destroy); call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } @@ -396,6 +414,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) write_seqlock(&net->fs_lock); usage = 1; deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); + trace_afs_server(server, usage, afs_server_trace_gc); if (deleted) { rb_erase(&server->uuid_rb, &net->fs_servers); hlist_del_rcu(&server->proc_link); @@ -518,6 +537,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a _enter(""); + trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update); + alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, &server->uuid); if (IS_ERR(alist)) { diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 155dc14caef9..888d91d195d9 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS fileserver list management. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -20,7 +16,8 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) if (slist && refcount_dec_and_test(&slist->usage)) { for (i = 0; i < slist->nr_servers; i++) { afs_put_cb_interest(net, slist->servers[i].cb_interest); - afs_put_server(net, slist->servers[i].server); + afs_put_server(net, slist->servers[i].server, + afs_server_trace_put_slist); } kfree(slist); } @@ -71,7 +68,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { - afs_put_server(cell->net, server); + afs_put_server(cell->net, server, + afs_server_trace_put_slist_isort); continue; } diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index 61e25010ff33..21eb0c0be912 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS vlserver list management. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index beb991563939..858498cc1b05 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS vlserver probing * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/sched.h> diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 3f845489a9f0..9a5ce9687779 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Handle vlserver selection and rotation. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 3d4b9836a2e2..d7e0fd3c00df 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS Volume Location Service client * * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/gfp.h> diff --git a/fs/afs/volume.c b/fs/afs/volume.c index f6eba2def0a1..1a414300b654 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* AFS volume management * * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> @@ -47,6 +43,7 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, atomic_set(&volume->usage, 1); INIT_LIST_HEAD(&volume->proc_link); rwlock_init(&volume->servers_lock); + rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask); diff --git a/fs/afs/write.c b/fs/afs/write.c index 8bcab95f1127..cb76566763db 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* handling of writes to regular files and writing back to the server * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/backing-dev.h> @@ -48,8 +44,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, return 0; } - req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), - GFP_KERNEL); + req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); if (!req) return -ENOMEM; diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 17f58fea7ec1..5552d034090a 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Extended attribute handling for AFS. We use xattrs to get and set metadata * instead of providing pioctl(). * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/slab.h> diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index aa21f3068d52..94f1f398eefa 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* AFS fileserver XDR types * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #ifndef XDR_FS_H diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 10de675dc6fc..18722aaeda33 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* YFS File Server client stubs * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/init.h> @@ -423,7 +423,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, BUG_ON(PageWriteback(old)); get_page(new); - rc = migrate_page_move_mapping(mapping, new, old, mode, 1); + rc = migrate_page_move_mapping(mapping, new, old, 1); if (rc != MIGRATEPAGE_SUCCESS) { put_page(new); goto out_unlock; @@ -1477,8 +1477,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) return 0; } -static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, - bool vectored, bool compat, struct iov_iter *iter) +static ssize_t aio_setup_rw(int rw, const struct iocb *iocb, + struct iovec **iovec, bool vectored, bool compat, + struct iov_iter *iter) { void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; size_t len = iocb->aio_nbytes; @@ -1535,7 +1536,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) @@ -1563,7 +1564,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { @@ -2091,8 +2092,8 @@ SYSCALL_DEFINE6(io_pgetevents, const struct __aio_sigset __user *, usig) { struct __aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2101,13 +2102,15 @@ SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_saved_sigmask_unless(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2124,8 +2127,8 @@ SYSCALL_DEFINE6(io_pgetevents_time32, const struct __aio_sigset __user *, usig) { struct __aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2135,13 +2138,15 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return -EFAULT; - ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_user_sigmask(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_saved_sigmask_unless(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2189,8 +2194,8 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, const struct __compat_aio_sigset __user *, usig) { struct __compat_aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2199,13 +2204,15 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_saved_sigmask_unless(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2222,8 +2229,8 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, const struct __compat_aio_sigset __user *, usig) { struct __compat_aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2232,13 +2239,15 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + restore_saved_sigmask_unless(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 7ea017cb1dd4..89714308c25b 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/anon_inodes.c * diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig index eaebcd430cc3..3b3a6b1423c6 100644 --- a/fs/autofs/Kconfig +++ b/fs/autofs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config AUTOFS4_FS tristate "Old Kconfig name for Kernel automounter support" select AUTOFS_FS diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile index 1f85d35ec8b7..495ac542e172 100644 --- a/fs/autofs/Makefile +++ b/fs/autofs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux autofs-filesystem routines. # diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index e1091312abe1..8c0c11181fad 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved * Copyright 2005-2006 Ian Kent <raven@themaw.net> - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ /* Internal header file for autofs */ diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index e9fe74d1541b..a3cdb0036c5d 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2008 Red Hat, Inc. All rights reserved. * Copyright 2008 Ian Kent <raven@themaw.net> - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include <linux/miscdevice.h> diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index 28d9c2b1b3bb..cdff0567aacb 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> * Copyright 2001-2006 Ian Kent <raven@themaw.net> - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include "autofs_i.h" diff --git a/fs/autofs/init.c b/fs/autofs/init.c index c0c1db2cc6ea..d3f55e874338 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include <linux/module.h> diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index fb0225f21c12..9edf243713eb 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 2005-2006 Ian Kent <raven@themaw.net> - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include <linux/seq_file.h> diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 1246f396bf0e..e646569c75ed 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org> * Copyright 2001-2006 Ian Kent <raven@themaw.net> - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include <linux/capability.h> diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index aad3902c0cc1..7ac67dc76039 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include "autofs_i.h" diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index 15a3e31d0904..b04c528b19d3 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 2001-2006 Ian Kent <raven@themaw.net> - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. */ #include <linux/sched/signal.h> diff --git a/fs/befs/Kconfig b/fs/befs/Kconfig index edc5cc2aefad..9550b6462b81 100644 --- a/fs/befs/Kconfig +++ b/fs/befs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config BEFS_FS tristate "BeOS file system (BeFS) support (read only)" depends on BLOCK diff --git a/fs/befs/Makefile b/fs/befs/Makefile index 8b9f66642a83..6c9c3cbc556e 100644 --- a/fs/befs/Makefile +++ b/fs/befs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux BeOS filesystem routines. # diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e273850c95af..462d096ff3e9 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/befs/linuxvfs.c * diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig index 3728a6479c64..3e1247f07913 100644 --- a/fs/bfs/Kconfig +++ b/fs/bfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config BFS_FS tristate "BFS file system support" depends on BLOCK diff --git a/fs/bfs/Makefile b/fs/bfs/Makefile index c787b36d940c..2b6bc5eb4de9 100644 --- a/fs/bfs/Makefile +++ b/fs/bfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for BFS filesystem. # diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index dc0cd2aa3d65..5e97bed073d7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/bfs/inode.c * BFS superblock and inode operations. diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 1fefd87eb4b4..8e8346a81723 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_aout.c * diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fa9e99a962e0..d4e11b2e04f6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_elf.c * @@ -1126,7 +1127,6 @@ out_free_interp: load_addr, interp_load_addr); if (retval < 0) goto out; - /* N.B. passed_fileno might not be initialized? */ current->mm->end_code = end_code; current->mm->start_code = start_code; current->mm->start_data = start_data; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index b53bb3729ac1..d86ebd0dcc3d 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* binfmt_elf_fdpic.c: FDPIC ELF binary format * * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * Derived from binfmt_elf.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index dd2d3f0cd55d..466497860c62 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_em86.c * diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 82a48e830018..831a2b25ba79 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -42,6 +42,11 @@ #include <asm/unaligned.h> #include <asm/cacheflush.h> #include <asm/page.h> +#include <asm/flat.h> + +#ifndef flat_get_relocate_addr +#define flat_get_relocate_addr(rel) (rel) +#endif /****************************************************************************/ @@ -63,6 +68,12 @@ #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ +#ifdef CONFIG_BINFMT_SHARED_FLAT +#define MAX_SHARED_LIBS (4) +#else +#define MAX_SHARED_LIBS (1) +#endif + struct lib_info { struct { unsigned long start_code; /* Start of text segment */ @@ -120,14 +131,15 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start sp -= bprm->envc + 1; sp -= bprm->argc + 1; - sp -= flat_argvp_envp_on_stack() ? 2 : 0; + if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) + sp -= 2; /* argvp + envp */ sp -= 1; /* &argc */ current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN; sp = (unsigned long __user *)current->mm->start_stack; __put_user(bprm->argc, sp++); - if (flat_argvp_envp_on_stack()) { + if (IS_ENABLED(CONFIG_BINFMT_FLAT_ARGVP_ENVP_ON_STACK)) { unsigned long argv, envp; argv = (unsigned long)(sp + 2); envp = (unsigned long)(sp + 2 + bprm->argc + 1); @@ -345,7 +357,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) start_code = p->lib_list[id].start_code; text_len = p->lib_list[id].text_len; - if (!flat_reloc_valid(r, start_brk - start_data + text_len)) { + if (r > start_brk - start_data + text_len) { pr_err("reloc outside program 0x%lx (0 - 0x%lx/0x%lx)", r, start_brk-start_data+text_len, text_len); goto failed; @@ -368,6 +380,7 @@ failed: /****************************************************************************/ +#ifdef CONFIG_BINFMT_FLAT_OLD static void old_reloc(unsigned long rl) { static const char *segment[] = { "TEXT", "DATA", "BSS", "*UNKNOWN*" }; @@ -405,6 +418,7 @@ static void old_reloc(unsigned long rl) pr_debug("Relocation became %lx\n", val); } +#endif /* CONFIG_BINFMT_FLAT_OLD */ /****************************************************************************/ @@ -415,8 +429,8 @@ static int load_flat_file(struct linux_binprm *bprm, unsigned long textpos, datapos, realdatastart; u32 text_len, data_len, bss_len, stack_len, full_data, flags; unsigned long len, memp, memp_size, extra, rlim; - u32 __user *reloc, *rp; - struct inode *inode; + __be32 __user *reloc; + u32 __user *rp; int i, rev, relocs; loff_t fpos; unsigned long start_code, end_code; @@ -424,7 +438,6 @@ static int load_flat_file(struct linux_binprm *bprm, int ret; hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ - inode = file_inode(bprm->file); text_len = ntohl(hdr->data_start); data_len = ntohl(hdr->data_end) - ntohl(hdr->data_start); @@ -454,6 +467,7 @@ static int load_flat_file(struct linux_binprm *bprm, if (flags & FLAT_FLAG_KTRACE) pr_info("Loading file: %s\n", bprm->filename); +#ifdef CONFIG_BINFMT_FLAT_OLD if (rev != FLAT_VERSION && rev != OLD_FLAT_VERSION) { pr_err("bad flat file version 0x%x (supported 0x%lx and 0x%lx)\n", rev, FLAT_VERSION, OLD_FLAT_VERSION); @@ -470,6 +484,23 @@ static int load_flat_file(struct linux_binprm *bprm, } /* + * fix up the flags for the older format, there were all kinds + * of endian hacks, this only works for the simple cases + */ + if (rev == OLD_FLAT_VERSION && + (flags || IS_ENABLED(CONFIG_BINFMT_FLAT_OLD_ALWAYS_RAM))) + flags = FLAT_FLAG_RAM; + +#else /* CONFIG_BINFMT_FLAT_OLD */ + if (rev != FLAT_VERSION) { + pr_err("bad flat file version 0x%x (supported 0x%lx)\n", + rev, FLAT_VERSION); + ret = -ENOEXEC; + goto err; + } +#endif /* !CONFIG_BINFMT_FLAT_OLD */ + + /* * Make sure the header params are sane. * 28 bits (256 MB) is way more than reasonable in this case. * If some top bits are set we have probable binary corruption. @@ -480,13 +511,6 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } - /* - * fix up the flags for the older format, there were all kinds - * of endian hacks, this only works for the simple cases - */ - if (rev == OLD_FLAT_VERSION && flat_old_ram_flag(flags)) - flags = FLAT_FLAG_RAM; - #ifndef CONFIG_BINFMT_ZFLAT if (flags & (FLAT_FLAG_GZIP|FLAT_FLAG_GZDATA)) { pr_err("Support for ZFLAT executables is not enabled.\n"); @@ -547,7 +571,7 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } - len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); + len = data_len + extra; len = PAGE_ALIGN(len); realdatastart = vm_mmap(NULL, 0, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); @@ -561,9 +585,7 @@ static int load_flat_file(struct linux_binprm *bprm, vm_munmap(textpos, text_len); goto err; } - datapos = ALIGN(realdatastart + - MAX_SHARED_LIBS * sizeof(unsigned long), - FLAT_DATA_ALIGN); + datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN); pr_debug("Allocated data+bss+stack (%u bytes): %lx\n", data_len + bss_len + stack_len, datapos); @@ -587,13 +609,13 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } - reloc = (u32 __user *) + reloc = (__be32 __user *) (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = realdatastart; memp_size = len; } else { - len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32); + len = text_len + data_len + extra; len = PAGE_ALIGN(len); textpos = vm_mmap(NULL, 0, len, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); @@ -608,11 +630,9 @@ static int load_flat_file(struct linux_binprm *bprm, } realdatastart = textpos + ntohl(hdr->data_start); - datapos = ALIGN(realdatastart + - MAX_SHARED_LIBS * sizeof(u32), - FLAT_DATA_ALIGN); + datapos = ALIGN(realdatastart, FLAT_DATA_ALIGN); - reloc = (u32 __user *) + reloc = (__be32 __user *) (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = textpos; memp_size = len; @@ -627,8 +647,9 @@ static int load_flat_file(struct linux_binprm *bprm, (text_len + full_data - sizeof(struct flat_hdr)), 0); - memmove((void *) datapos, (void *) realdatastart, - full_data); + if (datapos != realdatastart) + memmove((void *)datapos, (void *)realdatastart, + full_data); #else /* * This is used on MMU systems mainly for testing. @@ -684,8 +705,7 @@ static int load_flat_file(struct linux_binprm *bprm, if (IS_ERR_VALUE(result)) { ret = result; pr_err("Unable to read code+data+bss, errno %d\n", ret); - vm_munmap(textpos, text_len + data_len + extra + - MAX_SHARED_LIBS * sizeof(u32)); + vm_munmap(textpos, text_len + data_len + extra); goto err; } } @@ -775,20 +795,18 @@ static int load_flat_file(struct linux_binprm *bprm, * __start to address 4 so that is okay). */ if (rev > OLD_FLAT_VERSION) { - u32 __maybe_unused persistent = 0; for (i = 0; i < relocs; i++) { u32 addr, relval; + __be32 tmp; /* * Get the address of the pointer to be * relocated (of course, the address has to be * relocated first). */ - if (get_user(relval, reloc + i)) + if (get_user(tmp, reloc + i)) return -EFAULT; - relval = ntohl(relval); - if (flat_set_persistent(relval, &persistent)) - continue; + relval = ntohl(tmp); addr = flat_get_relocate_addr(relval); rp = (u32 __user *)calc_reloc(addr, libinfo, id, 1); if (rp == (u32 __user *)RELOC_FAILED) { @@ -797,8 +815,7 @@ static int load_flat_file(struct linux_binprm *bprm, } /* Get the pointer's value. */ - ret = flat_get_addr_from_rp(rp, relval, flags, - &addr, &persistent); + ret = flat_get_addr_from_rp(rp, relval, flags, &addr); if (unlikely(ret)) goto err; @@ -807,8 +824,13 @@ static int load_flat_file(struct linux_binprm *bprm, * Do the relocation. PIC relocs in the data section are * already in target order */ - if ((flags & FLAT_FLAG_GOTPIC) == 0) - addr = ntohl(addr); + if ((flags & FLAT_FLAG_GOTPIC) == 0) { + /* + * Meh, the same value can have a different + * byte order based on a flag.. + */ + addr = ntohl((__force __be32)addr); + } addr = calc_reloc(addr, libinfo, id, 0); if (addr == RELOC_FAILED) { ret = -ENOEXEC; @@ -821,14 +843,15 @@ static int load_flat_file(struct linux_binprm *bprm, goto err; } } +#ifdef CONFIG_BINFMT_FLAT_OLD } else { for (i = 0; i < relocs; i++) { - u32 relval; + __be32 relval; if (get_user(relval, reloc + i)) return -EFAULT; - relval = ntohl(relval); - old_reloc(relval); + old_reloc(ntohl(relval)); } +#endif /* CONFIG_BINFMT_FLAT_OLD */ } flush_icache_range(start_code, end_code); @@ -856,9 +879,14 @@ err: static int load_flat_shared_library(int id, struct lib_info *libs) { + /* + * This is a fake bprm struct; only the members "buf", "file" and + * "filename" are actually used. + */ struct linux_binprm bprm; int res; char buf[16]; + loff_t pos = 0; memset(&bprm, 0, sizeof(bprm)); @@ -872,25 +900,11 @@ static int load_flat_shared_library(int id, struct lib_info *libs) if (IS_ERR(bprm.file)) return res; - bprm.cred = prepare_exec_creds(); - res = -ENOMEM; - if (!bprm.cred) - goto out; - - /* We don't really care about recalculating credentials at this point - * as we're past the point of no return and are dealing with shared - * libraries. - */ - bprm.called_set_creds = 1; - - res = prepare_binprm(&bprm); + res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos); - if (!res) + if (res >= 0) res = load_flat_file(&bprm, libs, id, NULL); - abort_creds(bprm.cred); - -out: allow_write_access(bprm.file); fput(bprm.file); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 46a3d149bb7f..cdb45829354d 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * binfmt_misc.c * diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index e996174cbfc0..e9e6a6f4a35f 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/binfmt_script.c * diff --git a/fs/block_dev.c b/fs/block_dev.c index 1d051b962159..4707dfff991b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/block_dev.c * @@ -203,13 +204,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec; + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; loff_t pos = iocb->ki_pos; bool should_dirty = false; struct bio bio; ssize_t ret; blk_qc_t qc; - struct bvec_iter_all iter_all; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) @@ -259,13 +259,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, } __set_current_state(TASK_RUNNING); - bio_for_each_segment_all(bvec, &bio, iter_all) { - if (should_dirty && !PageCompound(bvec->bv_page)) - set_page_dirty_lock(bvec->bv_page); - if (!bio_flagged(&bio, BIO_NO_PAGE_REF)) - put_page(bvec->bv_page); - } - + bio_release_pages(&bio, should_dirty); if (unlikely(bio.bi_status)) ret = blk_status_to_errno(bio.bi_status); @@ -335,13 +329,7 @@ static void blkdev_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - put_page(bvec->bv_page); - } + bio_release_pages(bio, false); bio_put(bio); } } @@ -1406,20 +1394,27 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev, */ int revalidate_disk(struct gendisk *disk) { - struct block_device *bdev; int ret = 0; if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - bdev = bdget_disk(disk, 0); - if (!bdev) - return ret; - mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev, ret == 0); - bdev->bd_invalidated = 0; - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + /* + * Hidden disks don't have associated bdev so there's no point in + * revalidating it. + */ + if (!(disk->flags & GENHD_FL_HIDDEN)) { + struct block_device *bdev = bdget_disk(disk, 0); + + if (!bdev) + return ret; + + mutex_lock(&bdev->bd_mutex); + check_disk_size_change(disk, bdev, ret == 0); + bdev->bd_invalidated = 0; + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + } return ret; } EXPORT_SYMBOL(revalidate_disk); diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 23537bc8c827..212b4a854f2c 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -2,7 +2,8 @@ config BTRFS_FS tristate "Btrfs filesystem support" - select LIBCRC32C + select CRYPTO + select CRYPTO_CRC32C select ZLIB_INFLATE select ZLIB_DEFLATE select LZO_COMPRESS diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ca693dd554e9..76a843198bcb 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -10,7 +10,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ - uuid-tree.o props.o free-space-tree.o tree-checker.o + uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ + block-rsv.o delalloc-space.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 982152d3f920..89116afda7a2 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1465,12 +1465,11 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, * * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ -int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) +int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, + struct ulist *roots, struct ulist *tmp) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; - struct ulist *tmp = NULL; - struct ulist *roots = NULL; struct ulist_iterator uiter; struct ulist_node *node; struct seq_list elem = SEQ_LIST_INIT(elem); @@ -1481,12 +1480,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) .share_count = 0, }; - tmp = ulist_alloc(GFP_NOFS); - roots = ulist_alloc(GFP_NOFS); - if (!tmp || !roots) { - ret = -ENOMEM; - goto out; - } + ulist_init(roots); + ulist_init(tmp); trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { @@ -1527,8 +1522,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) up_read(&fs_info->commit_root_sem); } out: - ulist_free(tmp); - ulist_free(roots); + ulist_release(roots); + ulist_release(tmp); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 54d58988483a..777f61dc081e 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -57,7 +57,8 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, u64 start_off, struct btrfs_path *path, struct btrfs_inode_extref **ret_extref, u64 *found_off); -int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr); +int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr, + struct ulist *roots, struct ulist *tmp_ulist); int __init btrfs_prelim_ref_init(void); void __cold btrfs_prelim_ref_exit(void); diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c new file mode 100644 index 000000000000..698470b9f32d --- /dev/null +++ b/fs/btrfs/block-rsv.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "ctree.h" +#include "block-rsv.h" +#include "space-info.h" +#include "math.h" +#include "transaction.h" + +static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *block_rsv, + struct btrfs_block_rsv *dest, u64 num_bytes, + u64 *qgroup_to_release_ret) +{ + struct btrfs_space_info *space_info = block_rsv->space_info; + u64 qgroup_to_release = 0; + u64 ret; + + spin_lock(&block_rsv->lock); + if (num_bytes == (u64)-1) { + num_bytes = block_rsv->size; + qgroup_to_release = block_rsv->qgroup_rsv_size; + } + block_rsv->size -= num_bytes; + if (block_rsv->reserved >= block_rsv->size) { + num_bytes = block_rsv->reserved - block_rsv->size; + block_rsv->reserved = block_rsv->size; + block_rsv->full = 1; + } else { + num_bytes = 0; + } + if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + qgroup_to_release = block_rsv->qgroup_rsv_reserved - + block_rsv->qgroup_rsv_size; + block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; + } else { + qgroup_to_release = 0; + } + spin_unlock(&block_rsv->lock); + + ret = num_bytes; + if (num_bytes > 0) { + if (dest) { + spin_lock(&dest->lock); + if (!dest->full) { + u64 bytes_to_add; + + bytes_to_add = dest->size - dest->reserved; + bytes_to_add = min(num_bytes, bytes_to_add); + dest->reserved += bytes_to_add; + if (dest->reserved >= dest->size) + dest->full = 1; + num_bytes -= bytes_to_add; + } + spin_unlock(&dest->lock); + } + if (num_bytes) + btrfs_space_info_add_old_bytes(fs_info, space_info, + num_bytes); + } + if (qgroup_to_release_ret) + *qgroup_to_release_ret = qgroup_to_release; + return ret; +} + +int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, + struct btrfs_block_rsv *dst, u64 num_bytes, + bool update_size) +{ + int ret; + + ret = btrfs_block_rsv_use_bytes(src, num_bytes); + if (ret) + return ret; + + btrfs_block_rsv_add_bytes(dst, num_bytes, update_size); + return 0; +} + +void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) +{ + memset(rsv, 0, sizeof(*rsv)); + spin_lock_init(&rsv->lock); + rsv->type = type; +} + +void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *rsv, + unsigned short type) +{ + btrfs_init_block_rsv(rsv, type); + rsv->space_info = btrfs_find_space_info(fs_info, + BTRFS_BLOCK_GROUP_METADATA); +} + +struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, + unsigned short type) +{ + struct btrfs_block_rsv *block_rsv; + + block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); + if (!block_rsv) + return NULL; + + btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); + return block_rsv; +} + +void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *rsv) +{ + if (!rsv) + return; + btrfs_block_rsv_release(fs_info, rsv, (u64)-1); + kfree(rsv); +} + +int btrfs_block_rsv_add(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, u64 num_bytes, + enum btrfs_reserve_flush_enum flush) +{ + int ret; + + if (num_bytes == 0) + return 0; + + ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush); + if (!ret) + btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true); + + return ret; +} + +int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor) +{ + u64 num_bytes = 0; + int ret = -ENOSPC; + + if (!block_rsv) + return 0; + + spin_lock(&block_rsv->lock); + num_bytes = div_factor(block_rsv->size, min_factor); + if (block_rsv->reserved >= num_bytes) + ret = 0; + spin_unlock(&block_rsv->lock); + + return ret; +} + +int btrfs_block_rsv_refill(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, u64 min_reserved, + enum btrfs_reserve_flush_enum flush) +{ + u64 num_bytes = 0; + int ret = -ENOSPC; + + if (!block_rsv) + return 0; + + spin_lock(&block_rsv->lock); + num_bytes = min_reserved; + if (block_rsv->reserved >= num_bytes) + ret = 0; + else + num_bytes -= block_rsv->reserved; + spin_unlock(&block_rsv->lock); + + if (!ret) + return 0; + + ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush); + if (!ret) { + btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false); + return 0; + } + + return ret; +} + +u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes, u64 *qgroup_to_release) +{ + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *target = NULL; + + /* + * If we are the delayed_rsv then push to the global rsv, otherwise dump + * into the delayed rsv if it is not full. + */ + if (block_rsv == delayed_rsv) + target = global_rsv; + else if (block_rsv != global_rsv && !delayed_rsv->full) + target = delayed_rsv; + + if (target && block_rsv->space_info != target->space_info) + target = NULL; + + return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes, + qgroup_to_release); +} + +int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes) +{ + int ret = -ENOSPC; + + spin_lock(&block_rsv->lock); + if (block_rsv->reserved >= num_bytes) { + block_rsv->reserved -= num_bytes; + if (block_rsv->reserved < block_rsv->size) + block_rsv->full = 0; + ret = 0; + } + spin_unlock(&block_rsv->lock); + return ret; +} + +void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, + u64 num_bytes, bool update_size) +{ + spin_lock(&block_rsv->lock); + block_rsv->reserved += num_bytes; + if (update_size) + block_rsv->size += num_bytes; + else if (block_rsv->reserved >= block_rsv->size) + block_rsv->full = 1; + spin_unlock(&block_rsv->lock); +} + +int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *dest, u64 num_bytes, + int min_factor) +{ + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + u64 min_bytes; + + if (global_rsv->space_info != dest->space_info) + return -ENOSPC; + + spin_lock(&global_rsv->lock); + min_bytes = div_factor(global_rsv->size, min_factor); + if (global_rsv->reserved < min_bytes + num_bytes) { + spin_unlock(&global_rsv->lock); + return -ENOSPC; + } + global_rsv->reserved -= num_bytes; + if (global_rsv->reserved < global_rsv->size) + global_rsv->full = 0; + spin_unlock(&global_rsv->lock); + + btrfs_block_rsv_add_bytes(dest, num_bytes, true); + return 0; +} + +void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; + struct btrfs_space_info *sinfo = block_rsv->space_info; + u64 num_bytes; + + /* + * The global block rsv is based on the size of the extent tree, the + * checksum tree and the root tree. If the fs is empty we want to set + * it to a minimal amount for safety. + */ + num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) + + btrfs_root_used(&fs_info->csum_root->root_item) + + btrfs_root_used(&fs_info->tree_root->root_item); + num_bytes = max_t(u64, num_bytes, SZ_16M); + + spin_lock(&sinfo->lock); + spin_lock(&block_rsv->lock); + + block_rsv->size = min_t(u64, num_bytes, SZ_512M); + + if (block_rsv->reserved < block_rsv->size) { + num_bytes = btrfs_space_info_used(sinfo, true); + if (sinfo->total_bytes > num_bytes) { + num_bytes = sinfo->total_bytes - num_bytes; + num_bytes = min(num_bytes, + block_rsv->size - block_rsv->reserved); + block_rsv->reserved += num_bytes; + btrfs_space_info_update_bytes_may_use(fs_info, sinfo, + num_bytes); + trace_btrfs_space_reservation(fs_info, "space_info", + sinfo->flags, num_bytes, + 1); + } + } else if (block_rsv->reserved > block_rsv->size) { + num_bytes = block_rsv->reserved - block_rsv->size; + btrfs_space_info_update_bytes_may_use(fs_info, sinfo, + -num_bytes); + trace_btrfs_space_reservation(fs_info, "space_info", + sinfo->flags, num_bytes, 0); + block_rsv->reserved = block_rsv->size; + } + + if (block_rsv->reserved == block_rsv->size) + block_rsv->full = 1; + else + block_rsv->full = 0; + + spin_unlock(&block_rsv->lock); + spin_unlock(&sinfo->lock); +} + +void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *space_info; + + space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); + fs_info->chunk_block_rsv.space_info = space_info; + + space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); + fs_info->global_block_rsv.space_info = space_info; + fs_info->trans_block_rsv.space_info = space_info; + fs_info->empty_block_rsv.space_info = space_info; + fs_info->delayed_block_rsv.space_info = space_info; + fs_info->delayed_refs_rsv.space_info = space_info; + + fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv; + fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv; + fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; + fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; + if (fs_info->quota_root) + fs_info->quota_root->block_rsv = &fs_info->global_block_rsv; + fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; + + btrfs_update_global_block_rsv(fs_info); +} + +void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info) +{ + btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1); + WARN_ON(fs_info->trans_block_rsv.size > 0); + WARN_ON(fs_info->trans_block_rsv.reserved > 0); + WARN_ON(fs_info->chunk_block_rsv.size > 0); + WARN_ON(fs_info->chunk_block_rsv.reserved > 0); + WARN_ON(fs_info->delayed_block_rsv.size > 0); + WARN_ON(fs_info->delayed_block_rsv.reserved > 0); + WARN_ON(fs_info->delayed_refs_rsv.reserved > 0); + WARN_ON(fs_info->delayed_refs_rsv.size > 0); +} + +static struct btrfs_block_rsv *get_block_rsv( + const struct btrfs_trans_handle *trans, + const struct btrfs_root *root) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_rsv *block_rsv = NULL; + + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || + (root == fs_info->csum_root && trans->adding_csums) || + (root == fs_info->uuid_root)) + block_rsv = trans->block_rsv; + + if (!block_rsv) + block_rsv = root->block_rsv; + + if (!block_rsv) + block_rsv = &fs_info->empty_block_rsv; + + return block_rsv; +} + +struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_rsv *block_rsv; + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + int ret; + bool global_updated = false; + + block_rsv = get_block_rsv(trans, root); + + if (unlikely(block_rsv->size == 0)) + goto try_reserve; +again: + ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize); + if (!ret) + return block_rsv; + + if (block_rsv->failfast) + return ERR_PTR(ret); + + if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { + global_updated = true; + btrfs_update_global_block_rsv(fs_info); + goto again; + } + + /* + * The global reserve still exists to save us from ourselves, so don't + * warn_on if we are short on our delayed refs reserve. + */ + if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS && + btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL * 10, + /*DEFAULT_RATELIMIT_BURST*/ 1); + if (__ratelimit(&_rs)) + WARN(1, KERN_DEBUG + "BTRFS: block rsv returned %d\n", ret); + } +try_reserve: + ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize, + BTRFS_RESERVE_NO_FLUSH); + if (!ret) + return block_rsv; + /* + * If we couldn't reserve metadata bytes try and use some from + * the global reserve if its space type is the same as the global + * reservation. + */ + if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && + block_rsv->space_info == global_rsv->space_info) { + ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize); + if (!ret) + return global_rsv; + } + return ERR_PTR(ret); +} diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h new file mode 100644 index 000000000000..d1428bb73fc5 --- /dev/null +++ b/fs/btrfs/block-rsv.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_BLOCK_RSV_H +#define BTRFS_BLOCK_RSV_H + +struct btrfs_trans_handle; +enum btrfs_reserve_flush_enum; + +/* + * Types of block reserves + */ +enum { + BTRFS_BLOCK_RSV_GLOBAL, + BTRFS_BLOCK_RSV_DELALLOC, + BTRFS_BLOCK_RSV_TRANS, + BTRFS_BLOCK_RSV_CHUNK, + BTRFS_BLOCK_RSV_DELOPS, + BTRFS_BLOCK_RSV_DELREFS, + BTRFS_BLOCK_RSV_EMPTY, + BTRFS_BLOCK_RSV_TEMP, +}; + +struct btrfs_block_rsv { + u64 size; + u64 reserved; + struct btrfs_space_info *space_info; + spinlock_t lock; + unsigned short full; + unsigned short type; + unsigned short failfast; + + /* + * Qgroup equivalent for @size @reserved + * + * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care + * about things like csum size nor how many tree blocks it will need to + * reserve. + * + * Qgroup cares more about net change of the extent usage. + * + * So for one newly inserted file extent, in worst case it will cause + * leaf split and level increase, nodesize for each file extent is + * already too much. + * + * In short, qgroup_size/reserved is the upper limit of possible needed + * qgroup metadata reservation. + */ + u64 qgroup_rsv_size; + u64 qgroup_rsv_reserved; +}; + +void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); +struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, + unsigned short type); +void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *rsv, + unsigned short type); +void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *rsv); +int btrfs_block_rsv_add(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, u64 num_bytes, + enum btrfs_reserve_flush_enum flush); +int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor); +int btrfs_block_rsv_refill(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, u64 min_reserved, + enum btrfs_reserve_flush_enum flush); +int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, + struct btrfs_block_rsv *dst_rsv, u64 num_bytes, + bool update_size); +int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); +int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *dest, u64 num_bytes, + int min_factor); +void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, + u64 num_bytes, bool update_size); +u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes, u64 *qgroup_to_release); +void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info); +void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info); +void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info); +struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u32 blocksize); + +static inline void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes) +{ + __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); +} + +static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *block_rsv, + u32 blocksize) +{ + btrfs_block_rsv_add_bytes(block_rsv, blocksize, false); + btrfs_block_rsv_release(fs_info, block_rsv, 0); +} + +#endif /* BTRFS_BLOCK_RSV_H */ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d5b438706b77..f853835c409c 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -337,22 +337,34 @@ static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode) clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); } +/* Array of bytes with variable length, hexadecimal format 0x1234 */ +#define CSUM_FMT "0x%*phN" +#define CSUM_FMT_VALUE(size, bytes) size, bytes + static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode, - u64 logical_start, u32 csum, u32 csum_expected, int mirror_num) + u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num) { struct btrfs_root *root = inode->root; + struct btrfs_super_block *sb = root->fs_info->super_copy; + const u16 csum_size = btrfs_super_csum_size(sb); /* Output minus objectid, which is more meaningful */ if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) btrfs_warn_rl(root->fs_info, - "csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d", +"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", root->root_key.objectid, btrfs_ino(inode), - logical_start, csum, csum_expected, mirror_num); + logical_start, + CSUM_FMT_VALUE(csum_size, csum), + CSUM_FMT_VALUE(csum_size, csum_expected), + mirror_num); else btrfs_warn_rl(root->fs_info, - "csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d", +"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d", root->root_key.objectid, btrfs_ino(inode), - logical_start, csum, csum_expected, mirror_num); + logical_start, + CSUM_FMT_VALUE(csum_size, csum), + CSUM_FMT_VALUE(csum_size, csum_expected), + mirror_num); } #endif diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index b0c8094528d1..81a9731959a9 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -83,7 +83,7 @@ #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/crc32c.h> +#include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1710,9 +1710,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, char **datav, unsigned int num_pages) { struct btrfs_fs_info *fs_info = state->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct btrfs_header *h; u8 csum[BTRFS_CSUM_SIZE]; - u32 crc = ~(u32)0; unsigned int i; if (num_pages * PAGE_SIZE < state->metablock_size) @@ -1723,14 +1723,17 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE)) return 1; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + for (i = 0; i < num_pages; i++) { u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); size_t sublen = i ? PAGE_SIZE : (PAGE_SIZE - BTRFS_CSUM_SIZE); - crc = crc32c(crc, data, sublen); + crypto_shash_update(shash, data, sublen); } - btrfs_csum_final(crc, csum); + crypto_shash_final(shash, csum); if (memcmp(csum, h->csum, state->csum_size)) return 1; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index daf7908d1e35..60c47b417a4b 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -17,6 +17,7 @@ #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> +#include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -42,6 +43,22 @@ const char* btrfs_compress_type2str(enum btrfs_compression_type type) return NULL; } +bool btrfs_compress_is_valid_type(const char *str, size_t len) +{ + int i; + + for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) { + size_t comp_len = strlen(btrfs_compress_types[i]); + + if (len < comp_len) + continue; + + if (!strncmp(btrfs_compress_types[i], str, comp_len)) + return true; + } + return false; +} + static int btrfs_decompress_bio(struct compressed_bio *cb); static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, @@ -57,32 +74,37 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct compressed_bio *cb, u64 disk_start) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int ret; struct page *page; unsigned long i; char *kaddr; - u32 csum; - u32 *cb_sum = &cb->sums; + u8 csum[BTRFS_CSUM_SIZE]; + u8 *cb_sum = cb->sums; if (inode->flags & BTRFS_INODE_NODATASUM) return 0; + shash->tfm = fs_info->csum_shash; + for (i = 0; i < cb->nr_pages; i++) { page = cb->compressed_pages[i]; - csum = ~(u32)0; + crypto_shash_init(shash); kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE); - btrfs_csum_final(csum, (u8 *)&csum); + crypto_shash_update(shash, kaddr, PAGE_SIZE); kunmap_atomic(kaddr); + crypto_shash_final(shash, (u8 *)&csum); - if (csum != *cb_sum) { - btrfs_print_data_csum_error(inode, disk_start, csum, - *cb_sum, cb->mirror_num); + if (memcmp(&csum, cb_sum, csum_size)) { + btrfs_print_data_csum_error(inode, disk_start, + csum, cb_sum, cb->mirror_num); ret = -EIO; goto fail; } - cb_sum++; + cb_sum += csum_size; } ret = 0; @@ -318,7 +340,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bdev = fs_info->fs_devices->latest_bdev; - bio = btrfs_bio_alloc(bdev, first_byte); + bio = btrfs_bio_alloc(first_byte); + bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; @@ -360,7 +383,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } - bio = btrfs_bio_alloc(bdev, first_byte); + bio = btrfs_bio_alloc(first_byte); + bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; @@ -536,7 +560,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, struct extent_map *em; blk_status_t ret = BLK_STS_RESOURCE; int faili = 0; - u32 *sums; + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + u8 *sums; em_tree = &BTRFS_I(inode)->extent_tree; @@ -558,7 +583,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->errors = 0; cb->inode = inode; cb->mirror_num = mirror_num; - sums = &cb->sums; + sums = cb->sums; cb->start = em->orig_start; em_len = em->len; @@ -597,7 +622,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; - comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); + comp_bio = btrfs_bio_alloc(cur_disk_byte); + bio_set_dev(comp_bio, bdev); comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; @@ -617,6 +643,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page->mapping = NULL; if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + unsigned int nr_sectors; + ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -634,8 +662,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, sums); BUG_ON(ret); /* -ENOMEM */ } - sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size, - fs_info->sectorsize); + + nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size, + fs_info->sectorsize); + sums += csum_size * nr_sectors; ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); if (ret) { @@ -643,7 +673,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); + comp_bio = btrfs_bio_alloc(cur_disk_byte); + bio_set_dev(comp_bio, bdev); comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; @@ -1008,6 +1039,7 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, struct list_head *workspace; int ret; + level = btrfs_compress_op[type]->set_level(level); workspace = get_workspace(type, level); ret = btrfs_compress_op[type]->compress_pages(workspace, mapping, start, pages, diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 9976fe0f7526..2035b8eb1290 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -61,7 +61,7 @@ struct compressed_bio { * the start of a variable length array of checksums only * used by reads */ - u32 sums; + u8 sums[]; }; static inline unsigned int btrfs_compress_type(unsigned int type_level) @@ -173,6 +173,7 @@ extern const struct btrfs_compress_op btrfs_lzo_compress; extern const struct btrfs_compress_op btrfs_zstd_compress; const char* btrfs_compress_type2str(enum btrfs_compression_type type); +bool btrfs_compress_is_valid_type(const char *str, size_t len); int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0a61dff27f57..299e11e6c554 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,7 @@ #include <linux/kobject.h> #include <trace/events/btrfs.h> #include <asm/kmap_types.h> +#include <asm/unaligned.h> #include <linux/pagemap.h> #include <linux/btrfs.h> #include <linux/btrfs_tree.h> @@ -31,11 +32,13 @@ #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" +#include "block-rsv.h" struct btrfs_trans_handle; struct btrfs_transaction; struct btrfs_pending_snapshot; struct btrfs_delayed_ref_root; +struct btrfs_space_info; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; @@ -45,7 +48,16 @@ struct btrfs_ref; #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ -#define BTRFS_MAX_MIRRORS 3 +/* + * Maximum number of mirrors that can be available for all profiles counting + * the target device of dev-replace as one. During an active device replace + * procedure, the target device of the copy operation is a mirror for the + * filesystem data as well that can be used to read data in order to repair + * read errors on other disks. + * + * Current value is derived from RAID1 with 2 copies. + */ +#define BTRFS_MAX_MIRRORS (2 + 1) #define BTRFS_MAX_LEVEL 8 @@ -72,6 +84,7 @@ struct btrfs_ref; /* four bytes for CRC32 */ static const int btrfs_csum_sizes[] = { 4 }; +static const char *btrfs_csum_names[] = { "crc32c" }; #define BTRFS_EMPTY_DIR_SIZE 0 @@ -99,10 +112,6 @@ static inline u32 count_max_extents(u64 size) return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); } -struct btrfs_mapping_tree { - struct extent_map_tree map_tree; -}; - static inline unsigned long btrfs_chunk_item_size(int num_stripes) { BUG_ON(num_stripes == 0); @@ -395,115 +404,6 @@ struct raid_kobject { struct list_head list; }; -struct btrfs_space_info { - spinlock_t lock; - - u64 total_bytes; /* total bytes in the space, - this doesn't take mirrors into account */ - u64 bytes_used; /* total bytes used, - this doesn't take mirrors into account */ - u64 bytes_pinned; /* total bytes pinned, will be freed when the - transaction finishes */ - u64 bytes_reserved; /* total bytes the allocator has reserved for - current allocations */ - u64 bytes_may_use; /* number of bytes that may be used for - delalloc/allocations */ - u64 bytes_readonly; /* total bytes that are read only */ - - u64 max_extent_size; /* This will hold the maximum extent size of - the space info if we had an ENOSPC in the - allocator. */ - - unsigned int full:1; /* indicates that we cannot allocate any more - chunks for this space */ - unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ - - unsigned int flush:1; /* set if we are trying to make space */ - - unsigned int force_alloc; /* set if we need to force a chunk - alloc for this space */ - - u64 disk_used; /* total bytes used on disk */ - u64 disk_total; /* total bytes on disk, takes mirrors into - account */ - - u64 flags; - - /* - * bytes_pinned is kept in line with what is actually pinned, as in - * we've called update_block_group and dropped the bytes_used counter - * and increased the bytes_pinned counter. However this means that - * bytes_pinned does not reflect the bytes that will be pinned once the - * delayed refs are flushed, so this counter is inc'ed every time we - * call btrfs_free_extent so it is a realtime count of what will be - * freed once the transaction is committed. It will be zeroed every - * time the transaction commits. - */ - struct percpu_counter total_bytes_pinned; - - struct list_head list; - /* Protected by the spinlock 'lock'. */ - struct list_head ro_bgs; - struct list_head priority_tickets; - struct list_head tickets; - /* - * tickets_id just indicates the next ticket will be handled, so note - * it's not stored per ticket. - */ - u64 tickets_id; - - struct rw_semaphore groups_sem; - /* for block groups in our same type */ - struct list_head block_groups[BTRFS_NR_RAID_TYPES]; - wait_queue_head_t wait; - - struct kobject kobj; - struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; -}; - -/* - * Types of block reserves - */ -enum { - BTRFS_BLOCK_RSV_GLOBAL, - BTRFS_BLOCK_RSV_DELALLOC, - BTRFS_BLOCK_RSV_TRANS, - BTRFS_BLOCK_RSV_CHUNK, - BTRFS_BLOCK_RSV_DELOPS, - BTRFS_BLOCK_RSV_DELREFS, - BTRFS_BLOCK_RSV_EMPTY, - BTRFS_BLOCK_RSV_TEMP, -}; - -struct btrfs_block_rsv { - u64 size; - u64 reserved; - struct btrfs_space_info *space_info; - spinlock_t lock; - unsigned short full; - unsigned short type; - unsigned short failfast; - - /* - * Qgroup equivalent for @size @reserved - * - * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care - * about things like csum size nor how many tree blocks it will need to - * reserve. - * - * Qgroup cares more about net change of the extent usage. - * - * So for one newly inserted file extent, in worst case it will cause - * leaf split and level increase, nodesize for each file extent is - * already too much. - * - * In short, qgroup_size/reserved is the upper limit of possible needed - * qgroup metadata reservation. - */ - u64 qgroup_rsv_size; - u64 qgroup_rsv_reserved; -}; - /* * free clusters are used to claim free space in relatively large chunks, * allowing us to do less seeky writes. They are used for all metadata @@ -786,11 +686,18 @@ enum { /* * Indicate that balance has been set up from the ioctl and is in the * main phase. The fs_info::balance_ctl is initialized. + * Set and cleared while holding fs_info::balance_mutex. */ BTRFS_FS_BALANCE_RUNNING, /* Indicate that the cleaner thread is awake and doing something. */ BTRFS_FS_CLEANER_RUNNING, + + /* + * The checksumming has an optimized version and is considered fast, + * so we don't need to offload checksums to workqueues. + */ + BTRFS_FS_CSUM_IMPL_FAST, }; struct btrfs_fs_info { @@ -824,7 +731,7 @@ struct btrfs_fs_info { struct extent_io_tree *pinned_extents; /* logical->physical extent mapping */ - struct btrfs_mapping_tree mapping_tree; + struct extent_map_tree mapping_tree; /* * block reservation for extent, checksum, root tree and @@ -1160,6 +1067,14 @@ struct btrfs_fs_info { spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins; + struct crypto_shash *csum_shash; + + /* + * Number of send operations in progress. + * Updated while holding fs_info::balance_mutex. + */ + int send_in_progress; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; @@ -2451,6 +2366,11 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s) return btrfs_csum_sizes[t]; } +static inline const char *btrfs_super_csum_name(u16 csum_type) +{ + /* csum type is validated at mount time */ + return btrfs_csum_names[csum_type]; +} /* * The leaf data grows from end-to-front in the node. @@ -2642,6 +2562,16 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \ btrfs_item_offset_nr(leaf, slot))) +static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length) +{ + return crc32c(crc, address, length); +} + +static inline void btrfs_crc32c_final(u32 crc, u8 *result) +{ + put_unaligned_le32(~crc, result); +} + static inline u64 btrfs_name_hash(const char *name, int len) { return crc32c((u32)~1, name, len); @@ -2656,12 +2586,6 @@ static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, return (u64) crc32c(parent_objectid, name, len); } -static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) -{ - return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && - (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); -} - static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) { return mapping_gfp_constraint(mapping, ~__GFP_FS); @@ -2698,8 +2622,6 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info, return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; } -int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans); -bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, const u64 start); void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg); @@ -2814,17 +2736,28 @@ enum btrfs_flush_state { COMMIT_TRANS = 9, }; -int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes); -int btrfs_check_data_free_space(struct inode *inode, - struct extent_changeset **reserved, u64 start, u64 len); -void btrfs_free_reserved_data_space(struct inode *inode, - struct extent_changeset *reserved, u64 start, u64 len); -void btrfs_delalloc_release_space(struct inode *inode, - struct extent_changeset *reserved, - u64 start, u64 len, bool qgroup_free); -void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, - u64 len); -void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans); +/* + * control flags for do_chunk_alloc's force field + * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk + * if we really need one. + * + * CHUNK_ALLOC_LIMITED means to only try and allocate one + * if we have very few chunks already allocated. This is + * used as part of the clustering code to help make sure + * we have a good pool of storage to cluster in, without + * filling the FS with empty chunks + * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * + */ +enum btrfs_chunk_alloc_enum { + CHUNK_ALLOC_NO_FORCE, + CHUNK_ALLOC_LIMITED, + CHUNK_ALLOC_FORCE, +}; + +int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, + enum btrfs_chunk_alloc_enum force); int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, bool use_global_rsv); @@ -2834,41 +2767,6 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, bool qgroup_free); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); -void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free); -int btrfs_delalloc_reserve_space(struct inode *inode, - struct extent_changeset **reserved, u64 start, u64 len); -void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); -struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, - unsigned short type); -void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv, - unsigned short type); -void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv); -int btrfs_block_rsv_add(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, u64 num_bytes, - enum btrfs_reserve_flush_enum flush); -int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor); -int btrfs_block_rsv_refill(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, u64 min_reserved, - enum btrfs_reserve_flush_enum flush); -int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, - struct btrfs_block_rsv *dst_rsv, u64 num_bytes, - bool update_size); -int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *dest, u64 num_bytes, - int min_factor); -void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, - u64 num_bytes); -void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr); -void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); -int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, - enum btrfs_reserve_flush_enum flush); -void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, - u64 num_bytes); int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache); void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); @@ -3186,7 +3084,8 @@ int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot, struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); -blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst); +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, + u8 *dst); blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 logical_offset); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, @@ -3514,8 +3413,7 @@ __cold static inline void assfail(const char *expr, const char *file, int line) { if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) { - pr_err("assertion failed: %s, file: %s, line: %d\n", - expr, file, line); + pr_err("assertion failed: %s, in %s:%d\n", expr, file, line); BUG(); } } @@ -3599,10 +3497,11 @@ do { \ /* compatibility and incompatibility defines */ #define btrfs_set_fs_incompat(__fs_info, opt) \ - __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \ + #opt) static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, - u64 flag) + u64 flag, const char* name) { struct btrfs_super_block *disk_super; u64 features; @@ -3615,18 +3514,20 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, if (!(features & flag)) { features |= flag; btrfs_set_super_incompat_flags(disk_super, features); - btrfs_info(fs_info, "setting %llu feature flag", - flag); + btrfs_info(fs_info, + "setting incompat feature flag for %s (0x%llx)", + name, flag); } spin_unlock(&fs_info->super_lock); } } #define btrfs_clear_fs_incompat(__fs_info, opt) \ - __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \ + #opt) static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, - u64 flag) + u64 flag, const char* name) { struct btrfs_super_block *disk_super; u64 features; @@ -3639,8 +3540,9 @@ static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, if (features & flag) { features &= ~flag; btrfs_set_super_incompat_flags(disk_super, features); - btrfs_info(fs_info, "clearing %llu feature flag", - flag); + btrfs_info(fs_info, + "clearing incompat feature flag for %s (0x%llx)", + name, flag); } spin_unlock(&fs_info->super_lock); } @@ -3657,10 +3559,11 @@ static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) } #define btrfs_set_fs_compat_ro(__fs_info, opt) \ - __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \ + #opt) static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, - u64 flag) + u64 flag, const char *name) { struct btrfs_super_block *disk_super; u64 features; @@ -3673,18 +3576,20 @@ static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, if (!(features & flag)) { features |= flag; btrfs_set_super_compat_ro_flags(disk_super, features); - btrfs_info(fs_info, "setting %llu ro feature flag", - flag); + btrfs_info(fs_info, + "setting compat-ro feature flag for %s (0x%llx)", + name, flag); } spin_unlock(&fs_info->super_lock); } } #define btrfs_clear_fs_compat_ro(__fs_info, opt) \ - __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \ + #opt) static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, - u64 flag) + u64 flag, const char *name) { struct btrfs_super_block *disk_super; u64 features; @@ -3697,8 +3602,9 @@ static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, if (features & flag) { features &= ~flag; btrfs_set_super_compat_ro_flags(disk_super, features); - btrfs_info(fs_info, "clearing %llu ro feature flag", - flag); + btrfs_info(fs_info, + "clearing compat-ro feature flag for %s (0x%llx)", + name, flag); } spin_unlock(&fs_info->super_lock); } diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c new file mode 100644 index 000000000000..17f7c0d38768 --- /dev/null +++ b/fs/btrfs/delalloc-space.c @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "ctree.h" +#include "delalloc-space.h" +#include "block-rsv.h" +#include "btrfs_inode.h" +#include "space-info.h" +#include "transaction.h" +#include "qgroup.h" + +int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) +{ + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; + u64 used; + int ret = 0; + int need_commit = 2; + int have_pinned_space; + + /* Make sure bytes are sectorsize aligned */ + bytes = ALIGN(bytes, fs_info->sectorsize); + + if (btrfs_is_free_space_inode(inode)) { + need_commit = 0; + ASSERT(current->journal_info); + } + +again: + /* Make sure we have enough space to handle the data first */ + spin_lock(&data_sinfo->lock); + used = btrfs_space_info_used(data_sinfo, true); + + if (used + bytes > data_sinfo->total_bytes) { + struct btrfs_trans_handle *trans; + + /* + * If we don't have enough free bytes in this space then we need + * to alloc a new chunk. + */ + if (!data_sinfo->full) { + u64 alloc_target; + + data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; + spin_unlock(&data_sinfo->lock); + + alloc_target = btrfs_data_alloc_profile(fs_info); + /* + * It is ugly that we don't call nolock join + * transaction for the free space inode case here. + * But it is safe because we only do the data space + * reservation for the free space cache in the + * transaction context, the common join transaction + * just increase the counter of the current transaction + * handler, doesn't try to acquire the trans_lock of + * the fs. + */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + ret = btrfs_chunk_alloc(trans, alloc_target, + CHUNK_ALLOC_NO_FORCE); + btrfs_end_transaction(trans); + if (ret < 0) { + if (ret != -ENOSPC) + return ret; + else { + have_pinned_space = 1; + goto commit_trans; + } + } + + goto again; + } + + /* + * If we don't have enough pinned space to deal with this + * allocation, and no removed chunk in current transaction, + * don't bother committing the transaction. + */ + have_pinned_space = __percpu_counter_compare( + &data_sinfo->total_bytes_pinned, + used + bytes - data_sinfo->total_bytes, + BTRFS_TOTAL_BYTES_PINNED_BATCH); + spin_unlock(&data_sinfo->lock); + + /* Commit the current transaction and try again */ +commit_trans: + if (need_commit) { + need_commit--; + + if (need_commit > 0) { + btrfs_start_delalloc_roots(fs_info, -1); + btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, + (u64)-1); + } + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + if (have_pinned_space >= 0 || + test_bit(BTRFS_TRANS_HAVE_FREE_BGS, + &trans->transaction->flags) || + need_commit > 0) { + ret = btrfs_commit_transaction(trans); + if (ret) + return ret; + /* + * The cleaner kthread might still be doing iput + * operations. Wait for it to finish so that + * more space is released. We don't need to + * explicitly run the delayed iputs here because + * the commit_transaction would have woken up + * the cleaner. + */ + ret = btrfs_wait_on_delayed_iputs(fs_info); + if (ret) + return ret; + goto again; + } else { + btrfs_end_transaction(trans); + } + } + + trace_btrfs_space_reservation(fs_info, + "space_info:enospc", + data_sinfo->flags, bytes, 1); + return -ENOSPC; + } + btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, bytes); + trace_btrfs_space_reservation(fs_info, "space_info", + data_sinfo->flags, bytes, 1); + spin_unlock(&data_sinfo->lock); + + return 0; +} + +int btrfs_check_data_free_space(struct inode *inode, + struct extent_changeset **reserved, u64 start, u64 len) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + int ret; + + /* align the range */ + len = round_up(start + len, fs_info->sectorsize) - + round_down(start, fs_info->sectorsize); + start = round_down(start, fs_info->sectorsize); + + ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len); + if (ret < 0) + return ret; + + /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ + ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); + if (ret < 0) + btrfs_free_reserved_data_space_noquota(inode, start, len); + else + ret = 0; + return ret; +} + +/* + * Called if we need to clear a data reservation for this inode + * Normally in a error case. + * + * This one will *NOT* use accurate qgroup reserved space API, just for case + * which we can't sleep and is sure it won't affect qgroup reserved space. + * Like clear_bit_hook(). + */ +void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, + u64 len) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_space_info *data_sinfo; + + /* Make sure the range is aligned to sectorsize */ + len = round_up(start + len, fs_info->sectorsize) - + round_down(start, fs_info->sectorsize); + start = round_down(start, fs_info->sectorsize); + + data_sinfo = fs_info->data_sinfo; + spin_lock(&data_sinfo->lock); + btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, -len); + trace_btrfs_space_reservation(fs_info, "space_info", + data_sinfo->flags, len, 0); + spin_unlock(&data_sinfo->lock); +} + +/* + * Called if we need to clear a data reservation for this inode + * Normally in a error case. + * + * This one will handle the per-inode data rsv map for accurate reserved + * space framework. + */ +void btrfs_free_reserved_data_space(struct inode *inode, + struct extent_changeset *reserved, u64 start, u64 len) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + /* Make sure the range is aligned to sectorsize */ + len = round_up(start + len, root->fs_info->sectorsize) - + round_down(start, root->fs_info->sectorsize); + start = round_down(start, root->fs_info->sectorsize); + + btrfs_free_reserved_data_space_noquota(inode, start, len); + btrfs_qgroup_free_data(inode, reserved, start, len); +} + +/** + * btrfs_inode_rsv_release - release any excessive reservation. + * @inode - the inode we need to release from. + * @qgroup_free - free or convert qgroup meta. + * Unlike normal operation, qgroup meta reservation needs to know if we are + * freeing qgroup reservation or just converting it into per-trans. Normally + * @qgroup_free is true for error handling, and false for normal release. + * + * This is the same as btrfs_block_rsv_release, except that it handles the + * tracepoint for the reservation. + */ +static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) +{ + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 released = 0; + u64 qgroup_to_release = 0; + + /* + * Since we statically set the block_rsv->size we just want to say we + * are releasing 0 bytes, and then we'll just get the reservation over + * the size free'd. + */ + released = __btrfs_block_rsv_release(fs_info, block_rsv, 0, + &qgroup_to_release); + if (released > 0) + trace_btrfs_space_reservation(fs_info, "delalloc", + btrfs_ino(inode), released, 0); + if (qgroup_free) + btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); + else + btrfs_qgroup_convert_reserved_meta(inode->root, + qgroup_to_release); +} + +static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, + struct btrfs_inode *inode) +{ + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 reserve_size = 0; + u64 qgroup_rsv_size = 0; + u64 csum_leaves; + unsigned outstanding_extents; + + lockdep_assert_held(&inode->lock); + outstanding_extents = inode->outstanding_extents; + if (outstanding_extents) + reserve_size = btrfs_calc_trans_metadata_size(fs_info, + outstanding_extents + 1); + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, + inode->csum_bytes); + reserve_size += btrfs_calc_trans_metadata_size(fs_info, + csum_leaves); + /* + * For qgroup rsv, the calculation is very simple: + * account one nodesize for each outstanding extent + * + * This is overestimating in most cases. + */ + qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; + + spin_lock(&block_rsv->lock); + block_rsv->size = reserve_size; + block_rsv->qgroup_rsv_size = qgroup_rsv_size; + spin_unlock(&block_rsv->lock); +} + +static void calc_inode_reservations(struct btrfs_fs_info *fs_info, + u64 num_bytes, u64 *meta_reserve, + u64 *qgroup_reserve) +{ + u64 nr_extents = count_max_extents(num_bytes); + u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); + + /* We add one for the inode update at finish ordered time */ + *meta_reserve = btrfs_calc_trans_metadata_size(fs_info, + nr_extents + csum_leaves + 1); + *qgroup_reserve = nr_extents * fs_info->nodesize; +} + +int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) +{ + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 meta_reserve, qgroup_reserve; + unsigned nr_extents; + enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; + int ret = 0; + bool delalloc_lock = true; + + /* + * If we are a free space inode we need to not flush since we will be in + * the middle of a transaction commit. We also don't need the delalloc + * mutex since we won't race with anybody. We need this mostly to make + * lockdep shut its filthy mouth. + * + * If we have a transaction open (can happen if we call truncate_block + * from truncate), then we need FLUSH_LIMIT so we don't deadlock. + */ + if (btrfs_is_free_space_inode(inode)) { + flush = BTRFS_RESERVE_NO_FLUSH; + delalloc_lock = false; + } else { + if (current->journal_info) + flush = BTRFS_RESERVE_FLUSH_LIMIT; + + if (btrfs_transaction_in_commit(fs_info)) + schedule_timeout(1); + } + + if (delalloc_lock) + mutex_lock(&inode->delalloc_mutex); + + num_bytes = ALIGN(num_bytes, fs_info->sectorsize); + + /* + * We always want to do it this way, every other way is wrong and ends + * in tears. Pre-reserving the amount we are going to add will always + * be the right way, because otherwise if we have enough parallelism we + * could end up with thousands of inodes all holding little bits of + * reservations they were able to make previously and the only way to + * reclaim that space is to ENOSPC out the operations and clear + * everything out and try again, which is bad. This way we just + * over-reserve slightly, and clean up the mess when we are done. + */ + calc_inode_reservations(fs_info, num_bytes, &meta_reserve, + &qgroup_reserve); + ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); + if (ret) + goto out_fail; + ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); + if (ret) + goto out_qgroup; + + /* + * Now we need to update our outstanding extents and csum bytes _first_ + * and then add the reservation to the block_rsv. This keeps us from + * racing with an ordered completion or some such that would think it + * needs to free the reservation we just made. + */ + spin_lock(&inode->lock); + nr_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, nr_extents); + inode->csum_bytes += num_bytes; + btrfs_calculate_inode_block_rsv_size(fs_info, inode); + spin_unlock(&inode->lock); + + /* Now we can safely add our space to our block rsv */ + btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false); + trace_btrfs_space_reservation(root->fs_info, "delalloc", + btrfs_ino(inode), meta_reserve, 1); + + spin_lock(&block_rsv->lock); + block_rsv->qgroup_rsv_reserved += qgroup_reserve; + spin_unlock(&block_rsv->lock); + + if (delalloc_lock) + mutex_unlock(&inode->delalloc_mutex); + return 0; +out_qgroup: + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); +out_fail: + btrfs_inode_rsv_release(inode, true); + if (delalloc_lock) + mutex_unlock(&inode->delalloc_mutex); + return ret; +} + +/** + * btrfs_delalloc_release_metadata - release a metadata reservation for an inode + * @inode: the inode to release the reservation for. + * @num_bytes: the number of bytes we are releasing. + * @qgroup_free: free qgroup reservation or convert it to per-trans reservation + * + * This will release the metadata reservation for an inode. This can be called + * once we complete IO for a given set of bytes to release their metadata + * reservations, or on error for the same reason. + */ +void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, + bool qgroup_free) +{ + struct btrfs_fs_info *fs_info = inode->root->fs_info; + + num_bytes = ALIGN(num_bytes, fs_info->sectorsize); + spin_lock(&inode->lock); + inode->csum_bytes -= num_bytes; + btrfs_calculate_inode_block_rsv_size(fs_info, inode); + spin_unlock(&inode->lock); + + if (btrfs_is_testing(fs_info)) + return; + + btrfs_inode_rsv_release(inode, qgroup_free); +} + +/** + * btrfs_delalloc_release_extents - release our outstanding_extents + * @inode: the inode to balance the reservation for. + * @num_bytes: the number of bytes we originally reserved with + * @qgroup_free: do we need to free qgroup meta reservation or convert them. + * + * When we reserve space we increase outstanding_extents for the extents we may + * add. Once we've set the range as delalloc or created our ordered extents we + * have outstanding_extents to track the real usage, so we use this to free our + * temporarily tracked outstanding_extents. This _must_ be used in conjunction + * with btrfs_delalloc_reserve_metadata. + */ +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, + bool qgroup_free) +{ + struct btrfs_fs_info *fs_info = inode->root->fs_info; + unsigned num_extents; + + spin_lock(&inode->lock); + num_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, -num_extents); + btrfs_calculate_inode_block_rsv_size(fs_info, inode); + spin_unlock(&inode->lock); + + if (btrfs_is_testing(fs_info)) + return; + + btrfs_inode_rsv_release(inode, qgroup_free); +} + +/** + * btrfs_delalloc_reserve_space - reserve data and metadata space for + * delalloc + * @inode: inode we're writing to + * @start: start range we are writing to + * @len: how long the range we are writing to + * @reserved: mandatory parameter, record actually reserved qgroup ranges of + * current reservation. + * + * This will do the following things + * + * - reserve space in data space info for num bytes + * and reserve precious corresponding qgroup space + * (Done in check_data_free_space) + * + * - reserve space for metadata space, based on the number of outstanding + * extents and how much csums will be needed + * also reserve metadata space in a per root over-reserve method. + * - add to the inodes->delalloc_bytes + * - add it to the fs_info's delalloc inodes list. + * (Above 3 all done in delalloc_reserve_metadata) + * + * Return 0 for success + * Return <0 for error(-ENOSPC or -EQUOT) + */ +int btrfs_delalloc_reserve_space(struct inode *inode, + struct extent_changeset **reserved, u64 start, u64 len) +{ + int ret; + + ret = btrfs_check_data_free_space(inode, reserved, start, len); + if (ret < 0) + return ret; + ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len); + if (ret < 0) + btrfs_free_reserved_data_space(inode, *reserved, start, len); + return ret; +} + +/** + * btrfs_delalloc_release_space - release data and metadata space for delalloc + * @inode: inode we're releasing space for + * @start: start position of the space already reserved + * @len: the len of the space already reserved + * @release_bytes: the len of the space we consumed or didn't use + * + * This function will release the metadata space that was not used and will + * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes + * list if there are no delalloc bytes left. + * Also it will handle the qgroup reserved space. + */ +void btrfs_delalloc_release_space(struct inode *inode, + struct extent_changeset *reserved, + u64 start, u64 len, bool qgroup_free) +{ + btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free); + btrfs_free_reserved_data_space(inode, reserved, start, len); +} diff --git a/fs/btrfs/delalloc-space.h b/fs/btrfs/delalloc-space.h new file mode 100644 index 000000000000..54466fbd7075 --- /dev/null +++ b/fs/btrfs/delalloc-space.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_DELALLOC_SPACE_H +#define BTRFS_DELALLOC_SPACE_H + +struct extent_changeset; + +int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes); +int btrfs_check_data_free_space(struct inode *inode, + struct extent_changeset **reserved, u64 start, u64 len); +void btrfs_free_reserved_data_space(struct inode *inode, + struct extent_changeset *reserved, u64 start, u64 len); +void btrfs_delalloc_release_space(struct inode *inode, + struct extent_changeset *reserved, + u64 start, u64 len, bool qgroup_free); +void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, + u64 len); +void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, + bool qgroup_free); +int btrfs_delalloc_reserve_space(struct inode *inode, + struct extent_changeset **reserved, u64 start, u64 len); + +#endif /* BTRFS_DELALLOC_SPACE_H */ diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index a73fc23e2961..9a91d1eb0af4 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -10,6 +10,7 @@ #include "delayed-ref.h" #include "transaction.h" #include "qgroup.h" +#include "space-info.h" struct kmem_cache *btrfs_delayed_ref_head_cachep; struct kmem_cache *btrfs_delayed_tree_ref_cachep; @@ -24,6 +25,179 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep; * of hammering updates on the extent allocation tree. */ +bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + bool ret = false; + u64 reserved; + + spin_lock(&global_rsv->lock); + reserved = global_rsv->reserved; + spin_unlock(&global_rsv->lock); + + /* + * Since the global reserve is just kind of magic we don't really want + * to rely on it to save our bacon, so if our size is more than the + * delayed_refs_rsv and the global rsv then it's time to think about + * bailing. + */ + spin_lock(&delayed_refs_rsv->lock); + reserved += delayed_refs_rsv->reserved; + if (delayed_refs_rsv->size >= reserved) + ret = true; + spin_unlock(&delayed_refs_rsv->lock); + return ret; +} + +int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) +{ + u64 num_entries = + atomic_read(&trans->transaction->delayed_refs.num_entries); + u64 avg_runtime; + u64 val; + + smp_mb(); + avg_runtime = trans->fs_info->avg_delayed_ref_runtime; + val = num_entries * avg_runtime; + if (val >= NSEC_PER_SEC) + return 1; + if (val >= NSEC_PER_SEC / 2) + return 2; + + return btrfs_check_space_for_delayed_refs(trans->fs_info); +} + +/** + * btrfs_delayed_refs_rsv_release - release a ref head's reservation. + * @fs_info - the fs_info for our fs. + * @nr - the number of items to drop. + * + * This drops the delayed ref head's count from the delayed refs rsv and frees + * any excess reservation we had. + */ +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) +{ + struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; + u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr); + u64 released = 0; + + released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, + NULL); + if (released) + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", + 0, released, 0); +} + +/* + * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv + * @trans - the trans that may have generated delayed refs + * + * This is to be called anytime we may have adjusted trans->delayed_ref_updates, + * it'll calculate the additional size and add it to the delayed_refs_rsv. + */ +void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + u64 num_bytes; + + if (!trans->delayed_ref_updates) + return; + + num_bytes = btrfs_calc_trans_metadata_size(fs_info, + trans->delayed_ref_updates); + spin_lock(&delayed_rsv->lock); + delayed_rsv->size += num_bytes; + delayed_rsv->full = 0; + spin_unlock(&delayed_rsv->lock); + trans->delayed_ref_updates = 0; +} + +/** + * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv. + * @fs_info - the fs info for our fs. + * @src - the source block rsv to transfer from. + * @num_bytes - the number of bytes to transfer. + * + * This transfers up to the num_bytes amount from the src rsv to the + * delayed_refs_rsv. Any extra bytes are returned to the space info. + */ +void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *src, + u64 num_bytes) +{ + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + u64 to_free = 0; + + spin_lock(&src->lock); + src->reserved -= num_bytes; + src->size -= num_bytes; + spin_unlock(&src->lock); + + spin_lock(&delayed_refs_rsv->lock); + if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { + u64 delta = delayed_refs_rsv->size - + delayed_refs_rsv->reserved; + if (num_bytes > delta) { + to_free = num_bytes - delta; + num_bytes = delta; + } + } else { + to_free = num_bytes; + num_bytes = 0; + } + + if (num_bytes) + delayed_refs_rsv->reserved += num_bytes; + if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) + delayed_refs_rsv->full = 1; + spin_unlock(&delayed_refs_rsv->lock); + + if (num_bytes) + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", + 0, num_bytes, 1); + if (to_free) + btrfs_space_info_add_old_bytes(fs_info, + delayed_refs_rsv->space_info, to_free); +} + +/** + * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage. + * @fs_info - the fs_info for our fs. + * @flush - control how we can flush for this reservation. + * + * This will refill the delayed block_rsv up to 1 items size worth of space and + * will return -ENOSPC if we can't make the reservation. + */ +int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, + enum btrfs_reserve_flush_enum flush) +{ + struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; + u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1); + u64 num_bytes = 0; + int ret = -ENOSPC; + + spin_lock(&block_rsv->lock); + if (block_rsv->reserved < block_rsv->size) { + num_bytes = block_rsv->size - block_rsv->reserved; + num_bytes = min(num_bytes, limit); + } + spin_unlock(&block_rsv->lock); + + if (!num_bytes) + return 0; + + ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv, + num_bytes, flush); + if (ret) + return ret; + btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0); + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", + 0, num_bytes, 1); + return 0; +} + /* * compare two delayed tree backrefs with same bytenr and type */ @@ -957,13 +1131,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, } /* - * this does a simple search for the head node for a given extent. - * It must be called with the delayed ref spinlock held, and it returns - * the head node if any where found, or NULL if not. + * This does a simple search for the head node for a given extent. Returns the + * head node if found, or NULL if not. */ struct btrfs_delayed_ref_head * btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) { + lockdep_assert_held(&delayed_refs->lock); + return find_ref_head(delayed_refs, bytenr, false); } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index c18f93ea88ed..1c977e6d45dc 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -364,6 +364,16 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head( int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq); +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr); +void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans); +int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, + enum btrfs_reserve_flush_enum flush); +void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *src, + u64 num_bytes); +int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans); +bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); + /* * helper functions to cast a node into its container */ diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 55c15f31d00d..6b2e9aa83ffa 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -201,7 +201,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return PTR_ERR(bdev); } - filemap_write_and_wait(bdev->bd_inode->i_mapping); + sync_blockdev(bdev); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { @@ -237,7 +237,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, } rcu_assign_pointer(device->name, name); - mutex_lock(&fs_info->fs_devices->device_list_mutex); set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); device->generation = 0; device->io_width = fs_info->sectorsize; @@ -256,6 +255,8 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_info->fs_devices; + + mutex_lock(&fs_info->fs_devices->device_list_mutex); list_add(&device->dev_list, &fs_info->fs_devices->devices); fs_info->fs_devices->num_devices++; fs_info->fs_devices->open_devices++; @@ -399,7 +400,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; - bool need_unlock; src_device = btrfs_find_device_by_devspec(fs_info, srcdevid, srcdev_name); @@ -413,11 +413,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, return -ETXTBSY; } - ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name, - src_device, &tgt_device); - if (ret) - return ret; - /* * Here we commit the transaction to make sure commit_total_bytes * of all the devices are updated. @@ -431,7 +426,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, return PTR_ERR(trans); } - need_unlock = true; + ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name, + src_device, &tgt_device); + if (ret) + return ret; + down_write(&dev_replace->rwsem); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: @@ -442,11 +441,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: ASSERT(0); ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; + up_write(&dev_replace->rwsem); goto leave; } dev_replace->cont_reading_from_srcdev_mode = read_src; - WARN_ON(!src_device); dev_replace->srcdev = src_device; dev_replace->tgtdev = tgt_device; @@ -471,7 +470,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); up_write(&dev_replace->rwsem); - need_unlock = false; ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); if (ret) @@ -479,16 +477,16 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); - /* force writing the updated state information to disk */ - trans = btrfs_start_transaction(root, 0); + /* Commit dev_replace state and reserve 1 item for it. */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - need_unlock = true; down_write(&dev_replace->rwsem); dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; + up_write(&dev_replace->rwsem); goto leave; } @@ -510,8 +508,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, return ret; leave: - if (need_unlock) - up_write(&dev_replace->rwsem); btrfs_destroy_dev_replace_tgtdev(tgt_device); return ret; } @@ -603,17 +599,33 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); - return PTR_ERR(trans); + /* + * We have to use this loop approach because at this point src_device + * has to be available for transaction commit to complete, yet new + * chunks shouldn't be allocated on the device. + */ + while (1) { + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); + return PTR_ERR(trans); + } + ret = btrfs_commit_transaction(trans); + WARN_ON(ret); + + /* Prevent write_all_supers() during the finishing procedure */ + mutex_lock(&fs_info->fs_devices->device_list_mutex); + /* Prevent new chunks being allocated on the source device */ + mutex_lock(&fs_info->chunk_mutex); + + if (!list_empty(&src_device->post_commit_list)) { + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_info->chunk_mutex); + } else { + break; + } } - ret = btrfs_commit_transaction(trans); - WARN_ON(ret); - /* keep away write_all_supers() during the finishing procedure */ - mutex_lock(&fs_info->fs_devices->device_list_mutex); - mutex_lock(&fs_info->chunk_mutex); down_write(&dev_replace->rwsem); dev_replace->replace_state = scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED @@ -662,8 +674,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_device_set_disk_total_bytes(tgt_device, src_device->disk_total_bytes); btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); - ASSERT(list_empty(&src_device->post_commit_list)); - tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->commit_bytes_used = src_device->bytes_used; btrfs_assign_next_active_device(src_device, tgt_device); @@ -713,7 +723,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( struct btrfs_device *srcdev, struct btrfs_device *tgtdev) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; u64 start = 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index deb74a8c191a..41a2bd2e0c56 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -19,6 +19,7 @@ #include <linux/crc32c.h> #include <linux/sched/mm.h> #include <asm/unaligned.h> +#include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -40,10 +41,6 @@ #include "tree-checker.h" #include "ref-verify.h" -#ifdef CONFIG_X86 -#include <asm/cpufeature.h> -#endif - #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ @@ -249,16 +246,6 @@ out: return em; } -u32 btrfs_csum_data(const char *data, u32 seed, size_t len) -{ - return crc32c(seed, data, len); -} - -void btrfs_csum_final(u32 crc, u8 *result) -{ - put_unaligned_le32(~crc, result); -} - /* * Compute the csum of a btree block and store the result to provided buffer. * @@ -266,6 +253,8 @@ void btrfs_csum_final(u32 crc, u8 *result) */ static int csum_tree_block(struct extent_buffer *buf, u8 *result) { + struct btrfs_fs_info *fs_info = buf->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@ -273,9 +262,12 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result) unsigned long map_start; unsigned long map_len; int err; - u32 crc = ~(u32)0; + + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); len = buf->len - offset; + while (len > 0) { /* * Note: we don't need to check for the err == 1 case here, as @@ -288,14 +280,13 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result) if (WARN_ON(err)) return err; cur_len = min(len, map_len - (offset - map_start)); - crc = btrfs_csum_data(kaddr + offset - map_start, - crc, cur_len); + crypto_shash_update(shash, kaddr + offset - map_start, cur_len); len -= cur_len; offset += cur_len; } memset(result, 0, BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, result); + crypto_shash_final(shash, result); return 0; } @@ -356,6 +347,16 @@ out: return ret; } +static bool btrfs_supported_super_csum(u16 csum_type) +{ + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + return true; + default: + return false; + } +} + /* * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. @@ -365,33 +366,25 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, { struct btrfs_super_block *disk_sb = (struct btrfs_super_block *)raw_disk_sb; - u16 csum_type = btrfs_super_csum_type(disk_sb); - int ret = 0; + char result[BTRFS_CSUM_SIZE]; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); - if (csum_type == BTRFS_CSUM_TYPE_CRC32) { - u32 crc = ~(u32)0; - char result[sizeof(crc)]; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); - /* - * The super_block structure does not span the whole - * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space - * is filled with zeros and is included in the checksum. - */ - crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, - crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, result); + /* + * The super_block structure does not span the whole + * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is + * filled with zeros and is included in the checksum. + */ + crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + crypto_shash_final(shash, result); - if (memcmp(raw_disk_sb, result, sizeof(result))) - ret = 1; - } + if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb))) + return 1; - if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { - btrfs_err(fs_info, "unsupported checksum algorithm %u", - csum_type); - ret = 1; - } - - return ret; + return 0; } int btrfs_verify_level_key(struct extent_buffer *eb, int level, @@ -873,14 +866,13 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio, return btree_csum_one_bio(bio); } -static int check_async_write(struct btrfs_inode *bi) +static int check_async_write(struct btrfs_fs_info *fs_info, + struct btrfs_inode *bi) { if (atomic_read(&bi->sync_writers)) return 0; -#ifdef CONFIG_X86 - if (static_cpu_has(X86_FEATURE_XMM4_2)) + if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags)) return 0; -#endif return 1; } @@ -889,7 +881,7 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, unsigned long bio_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int async = check_async_write(BTRFS_I(inode)); + int async = check_async_write(fs_info, BTRFS_I(inode)); blk_status_t ret; if (bio_op(bio) != REQ_OP_WRITE) { @@ -2262,6 +2254,29 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, return 0; } +static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) +{ + struct crypto_shash *csum_shash; + const char *csum_name = btrfs_super_csum_name(csum_type); + + csum_shash = crypto_alloc_shash(csum_name, 0, 0); + + if (IS_ERR(csum_shash)) { + btrfs_err(fs_info, "error allocating %s hash for checksum", + csum_name); + return PTR_ERR(csum_shash); + } + + fs_info->csum_shash = csum_shash; + + return 0; +} + +static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) +{ + crypto_free_shash(fs_info->csum_shash); +} + static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { @@ -2577,7 +2592,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info, ret = validate_super(fs_info, sb, -1); if (ret < 0) goto out; - if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) { + if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) { ret = -EUCLEAN; btrfs_err(fs_info, "invalid csum type, has %u want %u", btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32); @@ -2607,6 +2622,7 @@ int open_ctree(struct super_block *sb, u32 stripesize; u64 generation; u64 features; + u16 csum_type; struct btrfs_key location; struct buffer_head *bh; struct btrfs_super_block *disk_super; @@ -2689,7 +2705,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); - btrfs_mapping_init(&fs_info->mapping_tree); + extent_map_tree_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); @@ -2793,6 +2809,8 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->swapfile_pins_lock); fs_info->swapfile_pins = RB_ROOT; + fs_info->send_in_progress = 0; + ret = btrfs_alloc_stripe_hash_table(fs_info); if (ret) { err = ret; @@ -2813,6 +2831,25 @@ int open_ctree(struct super_block *sb, } /* + * Verify the type first, if that or the the checksum value are + * corrupted, we'll find out + */ + csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data); + if (!btrfs_supported_super_csum(csum_type)) { + btrfs_err(fs_info, "unsupported checksum algorithm: %u", + csum_type); + err = -EINVAL; + brelse(bh); + goto fail_alloc; + } + + ret = btrfs_init_csum_hash(fs_info, csum_type); + if (ret) { + err = ret; + goto fail_alloc; + } + + /* * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ @@ -2820,7 +2857,7 @@ int open_ctree(struct super_block *sb, btrfs_err(fs_info, "superblock checksum mismatch"); err = -EINVAL; brelse(bh); - goto fail_alloc; + goto fail_csum; } /* @@ -2857,11 +2894,11 @@ int open_ctree(struct super_block *sb, if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); err = -EINVAL; - goto fail_alloc; + goto fail_csum; } if (!btrfs_super_root(disk_super)) - goto fail_alloc; + goto fail_csum; /* check FS state, whether FS is broken. */ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) @@ -2883,7 +2920,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_parse_options(fs_info, options, sb->s_flags); if (ret) { err = ret; - goto fail_alloc; + goto fail_csum; } features = btrfs_super_incompat_flags(disk_super) & @@ -2893,7 +2930,7 @@ int open_ctree(struct super_block *sb, "cannot mount because of unsupported optional features (%llx)", features); err = -EINVAL; - goto fail_alloc; + goto fail_csum; } features = btrfs_super_incompat_flags(disk_super); @@ -2937,7 +2974,7 @@ int open_ctree(struct super_block *sb, btrfs_err(fs_info, "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", nodesize, sectorsize); - goto fail_alloc; + goto fail_csum; } /* @@ -2953,7 +2990,7 @@ int open_ctree(struct super_block *sb, "cannot mount read-write because of unsupported optional features (%llx)", features); err = -EINVAL; - goto fail_alloc; + goto fail_csum; } ret = btrfs_init_workqueues(fs_info, fs_devices); @@ -3331,6 +3368,8 @@ fail_tree_roots: fail_sb_buffer: btrfs_stop_all_workers(fs_info); btrfs_free_block_groups(fs_info); +fail_csum: + btrfs_free_csum_hash(fs_info); fail_alloc: fail_iput: btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -3472,17 +3511,20 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, int max_mirrors) { + struct btrfs_fs_info *fs_info = device->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct buffer_head *bh; int i; int ret; int errors = 0; - u32 crc; u64 bytenr; int op_flags; if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; + shash->tfm = fs_info->csum_shash; + for (i = 0; i < max_mirrors; i++) { bytenr = btrfs_sb_offset(i); if (bytenr + BTRFS_SUPER_INFO_SIZE >= @@ -3491,10 +3533,10 @@ static int write_dev_supers(struct btrfs_device *device, btrfs_set_super_bytenr(sb, bytenr); - crc = ~(u32)0; - crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, sb->csum); + crypto_shash_init(shash); + crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + crypto_shash_final(shash, sb->csum); /* One reference for us, and we leave it for the caller */ bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, @@ -3709,7 +3751,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 || (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE)) - min_tolerated = min(min_tolerated, + min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[BTRFS_RAID_SINGLE]. tolerated_failures); @@ -3718,7 +3760,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) continue; if (!(flags & btrfs_raid_array[raid_type].bg_flag)) continue; - min_tolerated = min(min_tolerated, + min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[raid_type]. tolerated_failures); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0161aa1ea0b..e80f7c45a307 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -115,8 +115,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, struct btrfs_key *first_key); -u32 btrfs_csum_data(const char *data, u32 seed, size_t len); -void btrfs_csum_final(u32 crc, u8 *result); blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata); blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f79e477a378e..d3b58e388535 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -28,46 +28,12 @@ #include "sysfs.h" #include "qgroup.h" #include "ref-verify.h" +#include "space-info.h" +#include "block-rsv.h" +#include "delalloc-space.h" #undef SCRAMBLE_DELAYED_REFS -/* - * control flags for do_chunk_alloc's force field - * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk - * if we really need one. - * - * CHUNK_ALLOC_LIMITED means to only try and allocate one - * if we have very few chunks already allocated. This is - * used as part of the clustering code to help make sure - * we have a good pool of storage to cluster in, without - * filling the FS with empty chunks - * - * CHUNK_ALLOC_FORCE means it must try to allocate one - * - */ -enum { - CHUNK_ALLOC_NO_FORCE = 0, - CHUNK_ALLOC_LIMITED = 1, - CHUNK_ALLOC_FORCE = 2, -}; - -/* - * Declare a helper function to detect underflow of various space info members - */ -#define DECLARE_SPACE_INFO_UPDATE(name) \ -static inline void update_##name(struct btrfs_space_info *sinfo, \ - s64 bytes) \ -{ \ - if (bytes < 0 && sinfo->name < -bytes) { \ - WARN_ON(1); \ - sinfo->name = 0; \ - return; \ - } \ - sinfo->name += bytes; \ -} - -DECLARE_SPACE_INFO_UPDATE(bytes_may_use); -DECLARE_SPACE_INFO_UPDATE(bytes_pinned); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *node, u64 parent, @@ -84,21 +50,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *node, struct btrfs_delayed_extent_op *extent_op); -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, - int force); static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); -static void dump_space_info(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *info, u64 bytes, - int dump_block_groups); -static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, - u64 num_bytes); -static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 num_bytes); -static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 num_bytes); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -737,60 +690,39 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( return block_group_cache_tree_search(info, bytenr, 1); } -static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, - u64 flags) +static u64 generic_ref_to_space_flags(struct btrfs_ref *ref) { - struct list_head *head = &info->space_info; - struct btrfs_space_info *found; - - flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; - - rcu_read_lock(); - list_for_each_entry_rcu(found, head, list) { - if (found->flags & flags) { - rcu_read_unlock(); - return found; - } + if (ref->type == BTRFS_REF_METADATA) { + if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) + return BTRFS_BLOCK_GROUP_SYSTEM; + else + return BTRFS_BLOCK_GROUP_METADATA; } - rcu_read_unlock(); - return NULL; + return BTRFS_BLOCK_GROUP_DATA; } static void add_pinned_bytes(struct btrfs_fs_info *fs_info, struct btrfs_ref *ref) { struct btrfs_space_info *space_info; - s64 num_bytes = -ref->len; - u64 flags; - - if (ref->type == BTRFS_REF_METADATA) { - if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) - flags = BTRFS_BLOCK_GROUP_SYSTEM; - else - flags = BTRFS_BLOCK_GROUP_METADATA; - } else { - flags = BTRFS_BLOCK_GROUP_DATA; - } + u64 flags = generic_ref_to_space_flags(ref); - space_info = __find_space_info(fs_info, flags); + space_info = btrfs_find_space_info(fs_info, flags); ASSERT(space_info); - percpu_counter_add_batch(&space_info->total_bytes_pinned, num_bytes, + percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len, BTRFS_TOTAL_BYTES_PINNED_BATCH); } -/* - * after adding space to the filesystem, we need to clear the full flags - * on all the space infos. - */ -void btrfs_clear_space_info_full(struct btrfs_fs_info *info) +static void sub_pinned_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_ref *ref) { - struct list_head *head = &info->space_info; - struct btrfs_space_info *found; + struct btrfs_space_info *space_info; + u64 flags = generic_ref_to_space_flags(ref); - rcu_read_lock(); - list_for_each_entry_rcu(found, head, list) - found->full = 0; - rcu_read_unlock(); + space_info = btrfs_find_space_info(fs_info, flags); + ASSERT(space_info); + percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len, + BTRFS_TOTAL_BYTES_PINNED_BATCH); } /* simple helper to search for an existing data extent at a given offset */ @@ -1119,11 +1051,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) __le64 lenum; lenum = cpu_to_le64(root_objectid); - high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); + high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(owner); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); lenum = cpu_to_le64(offset); - low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); + low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); return ((u64)high_crc << 31) ^ (u64)low_crc; } @@ -2063,7 +1995,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_ref_tree_mod(fs_info, generic_ref); if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) - add_pinned_bytes(fs_info, generic_ref); + sub_pinned_bytes(fs_info, generic_ref); return ret; } @@ -2460,7 +2392,7 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, flags = BTRFS_BLOCK_GROUP_SYSTEM; else flags = BTRFS_BLOCK_GROUP_METADATA; - space_info = __find_space_info(fs_info, flags); + space_info = btrfs_find_space_info(fs_info, flags); ASSERT(space_info); percpu_counter_add_batch(&space_info->total_bytes_pinned, -head->num_bytes, @@ -2822,49 +2754,6 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes) return num_csums; } -bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) -{ - struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - bool ret = false; - u64 reserved; - - spin_lock(&global_rsv->lock); - reserved = global_rsv->reserved; - spin_unlock(&global_rsv->lock); - - /* - * Since the global reserve is just kind of magic we don't really want - * to rely on it to save our bacon, so if our size is more than the - * delayed_refs_rsv and the global rsv then it's time to think about - * bailing. - */ - spin_lock(&delayed_refs_rsv->lock); - reserved += delayed_refs_rsv->reserved; - if (delayed_refs_rsv->size >= reserved) - ret = true; - spin_unlock(&delayed_refs_rsv->lock); - return ret; -} - -int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) -{ - u64 num_entries = - atomic_read(&trans->transaction->delayed_refs.num_entries); - u64 avg_runtime; - u64 val; - - smp_mb(); - avg_runtime = trans->fs_info->avg_delayed_ref_runtime; - val = num_entries * avg_runtime; - if (val >= NSEC_PER_SEC) - return 1; - if (val >= NSEC_PER_SEC / 2) - return 2; - - return btrfs_check_space_for_delayed_refs(trans->fs_info); -} - /* * this starts processing the delayed reference count updates and * extent insertions we have queued up so far. count can be @@ -3832,94 +3721,6 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers)); } -static const char *alloc_name(u64 flags) -{ - switch (flags) { - case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA: - return "mixed"; - case BTRFS_BLOCK_GROUP_METADATA: - return "metadata"; - case BTRFS_BLOCK_GROUP_DATA: - return "data"; - case BTRFS_BLOCK_GROUP_SYSTEM: - return "system"; - default: - WARN_ON(1); - return "invalid-combination"; - }; -} - -static int create_space_info(struct btrfs_fs_info *info, u64 flags) -{ - - struct btrfs_space_info *space_info; - int i; - int ret; - - space_info = kzalloc(sizeof(*space_info), GFP_NOFS); - if (!space_info) - return -ENOMEM; - - ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, - GFP_KERNEL); - if (ret) { - kfree(space_info); - return ret; - } - - for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) - INIT_LIST_HEAD(&space_info->block_groups[i]); - init_rwsem(&space_info->groups_sem); - spin_lock_init(&space_info->lock); - space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; - space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; - init_waitqueue_head(&space_info->wait); - INIT_LIST_HEAD(&space_info->ro_bgs); - INIT_LIST_HEAD(&space_info->tickets); - INIT_LIST_HEAD(&space_info->priority_tickets); - - ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, - info->space_info_kobj, "%s", - alloc_name(space_info->flags)); - if (ret) { - percpu_counter_destroy(&space_info->total_bytes_pinned); - kfree(space_info); - return ret; - } - - list_add_rcu(&space_info->list, &info->space_info); - if (flags & BTRFS_BLOCK_GROUP_DATA) - info->data_sinfo = space_info; - - return ret; -} - -static void update_space_info(struct btrfs_fs_info *info, u64 flags, - u64 total_bytes, u64 bytes_used, - u64 bytes_readonly, - struct btrfs_space_info **space_info) -{ - struct btrfs_space_info *found; - int factor; - - factor = btrfs_bg_type_to_factor(flags); - - found = __find_space_info(info, flags); - ASSERT(found); - spin_lock(&found->lock); - found->total_bytes += total_bytes; - found->disk_total += total_bytes * factor; - found->bytes_used += bytes_used; - found->disk_used += bytes_used * factor; - found->bytes_readonly += bytes_readonly; - if (total_bytes > 0) - found->full = 0; - space_info_add_new_bytes(info, found, total_bytes - - bytes_used - bytes_readonly); - spin_unlock(&found->lock); - *space_info = found; -} - static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) { u64 extra_flags = chunk_to_extended(flags) & @@ -4067,215 +3868,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info) return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); } -static u64 btrfs_space_info_used(struct btrfs_space_info *s_info, - bool may_use_included) -{ - ASSERT(s_info); - return s_info->bytes_used + s_info->bytes_reserved + - s_info->bytes_pinned + s_info->bytes_readonly + - (may_use_included ? s_info->bytes_may_use : 0); -} - -int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) -{ - struct btrfs_root *root = inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; - u64 used; - int ret = 0; - int need_commit = 2; - int have_pinned_space; - - /* make sure bytes are sectorsize aligned */ - bytes = ALIGN(bytes, fs_info->sectorsize); - - if (btrfs_is_free_space_inode(inode)) { - need_commit = 0; - ASSERT(current->journal_info); - } - -again: - /* make sure we have enough space to handle the data first */ - spin_lock(&data_sinfo->lock); - used = btrfs_space_info_used(data_sinfo, true); - - if (used + bytes > data_sinfo->total_bytes) { - struct btrfs_trans_handle *trans; - - /* - * if we don't have enough free bytes in this space then we need - * to alloc a new chunk. - */ - if (!data_sinfo->full) { - u64 alloc_target; - - data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; - spin_unlock(&data_sinfo->lock); - - alloc_target = btrfs_data_alloc_profile(fs_info); - /* - * It is ugly that we don't call nolock join - * transaction for the free space inode case here. - * But it is safe because we only do the data space - * reservation for the free space cache in the - * transaction context, the common join transaction - * just increase the counter of the current transaction - * handler, doesn't try to acquire the trans_lock of - * the fs. - */ - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = do_chunk_alloc(trans, alloc_target, - CHUNK_ALLOC_NO_FORCE); - btrfs_end_transaction(trans); - if (ret < 0) { - if (ret != -ENOSPC) - return ret; - else { - have_pinned_space = 1; - goto commit_trans; - } - } - - goto again; - } - - /* - * If we don't have enough pinned space to deal with this - * allocation, and no removed chunk in current transaction, - * don't bother committing the transaction. - */ - have_pinned_space = __percpu_counter_compare( - &data_sinfo->total_bytes_pinned, - used + bytes - data_sinfo->total_bytes, - BTRFS_TOTAL_BYTES_PINNED_BATCH); - spin_unlock(&data_sinfo->lock); - - /* commit the current transaction and try again */ -commit_trans: - if (need_commit) { - need_commit--; - - if (need_commit > 0) { - btrfs_start_delalloc_roots(fs_info, -1); - btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, - (u64)-1); - } - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - if (have_pinned_space >= 0 || - test_bit(BTRFS_TRANS_HAVE_FREE_BGS, - &trans->transaction->flags) || - need_commit > 0) { - ret = btrfs_commit_transaction(trans); - if (ret) - return ret; - /* - * The cleaner kthread might still be doing iput - * operations. Wait for it to finish so that - * more space is released. We don't need to - * explicitly run the delayed iputs here because - * the commit_transaction would have woken up - * the cleaner. - */ - ret = btrfs_wait_on_delayed_iputs(fs_info); - if (ret) - return ret; - goto again; - } else { - btrfs_end_transaction(trans); - } - } - - trace_btrfs_space_reservation(fs_info, - "space_info:enospc", - data_sinfo->flags, bytes, 1); - return -ENOSPC; - } - update_bytes_may_use(data_sinfo, bytes); - trace_btrfs_space_reservation(fs_info, "space_info", - data_sinfo->flags, bytes, 1); - spin_unlock(&data_sinfo->lock); - - return 0; -} - -int btrfs_check_data_free_space(struct inode *inode, - struct extent_changeset **reserved, u64 start, u64 len) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int ret; - - /* align the range */ - len = round_up(start + len, fs_info->sectorsize) - - round_down(start, fs_info->sectorsize); - start = round_down(start, fs_info->sectorsize); - - ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len); - if (ret < 0) - return ret; - - /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ - ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); - if (ret < 0) - btrfs_free_reserved_data_space_noquota(inode, start, len); - else - ret = 0; - return ret; -} - -/* - * Called if we need to clear a data reservation for this inode - * Normally in a error case. - * - * This one will *NOT* use accurate qgroup reserved space API, just for case - * which we can't sleep and is sure it won't affect qgroup reserved space. - * Like clear_bit_hook(). - */ -void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start, - u64 len) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_space_info *data_sinfo; - - /* Make sure the range is aligned to sectorsize */ - len = round_up(start + len, fs_info->sectorsize) - - round_down(start, fs_info->sectorsize); - start = round_down(start, fs_info->sectorsize); - - data_sinfo = fs_info->data_sinfo; - spin_lock(&data_sinfo->lock); - update_bytes_may_use(data_sinfo, -len); - trace_btrfs_space_reservation(fs_info, "space_info", - data_sinfo->flags, len, 0); - spin_unlock(&data_sinfo->lock); -} - -/* - * Called if we need to clear a data reservation for this inode - * Normally in a error case. - * - * This one will handle the per-inode data rsv map for accurate reserved - * space framework. - */ -void btrfs_free_reserved_data_space(struct inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - - /* Make sure the range is aligned to sectorsize */ - len = round_up(start + len, root->fs_info->sectorsize) - - round_down(start, root->fs_info->sectorsize); - start = round_down(start, root->fs_info->sectorsize); - - btrfs_free_reserved_data_space_noquota(inode, start, len); - btrfs_qgroup_free_data(inode, reserved, start, len); -} - static void force_metadata_allocation(struct btrfs_fs_info *info) { struct list_head *head = &info->space_info; @@ -4289,11 +3881,6 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) rcu_read_unlock(); } -static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) -{ - return (global->size << 1); -} - static int should_alloc_chunk(struct btrfs_fs_info *fs_info, struct btrfs_space_info *sinfo, int force) { @@ -4324,15 +3911,9 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) { u64 num_dev; - if (type & (BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) + num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max; + if (!num_dev) num_dev = fs_info->fs_devices->rw_devices; - else if (type & BTRFS_BLOCK_GROUP_RAID1) - num_dev = 2; - else - num_dev = 1; /* DUP or single */ return num_dev; } @@ -4357,7 +3938,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) */ lockdep_assert_held(&fs_info->chunk_mutex); - info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); + info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); spin_lock(&info->lock); left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); @@ -4371,7 +3952,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", left, thresh, type); - dump_space_info(fs_info, info, 0, 0); + btrfs_dump_space_info(fs_info, info, 0, 0); } if (left < thresh) { @@ -4404,8 +3985,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. */ -static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, - int force) +int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, + enum btrfs_chunk_alloc_enum force) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *space_info; @@ -4417,7 +3998,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, if (trans->allocating_chunk) return -ENOSPC; - space_info = __find_space_info(fs_info, flags); + space_info = btrfs_find_space_info(fs_info, flags); ASSERT(space_info); do { @@ -4524,1714 +4105,6 @@ out: return ret; } -static int can_overcommit(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, u64 bytes, - enum btrfs_reserve_flush_enum flush, - bool system_chunk) -{ - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - u64 profile; - u64 space_size; - u64 avail; - u64 used; - int factor; - - /* Don't overcommit when in mixed mode. */ - if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) - return 0; - - if (system_chunk) - profile = btrfs_system_alloc_profile(fs_info); - else - profile = btrfs_metadata_alloc_profile(fs_info); - - used = btrfs_space_info_used(space_info, false); - - /* - * We only want to allow over committing if we have lots of actual space - * free, but if we don't have enough space to handle the global reserve - * space then we could end up having a real enospc problem when trying - * to allocate a chunk or some other such important allocation. - */ - spin_lock(&global_rsv->lock); - space_size = calc_global_rsv_need_space(global_rsv); - spin_unlock(&global_rsv->lock); - if (used + space_size >= space_info->total_bytes) - return 0; - - used += space_info->bytes_may_use; - - avail = atomic64_read(&fs_info->free_chunk_space); - - /* - * If we have dup, raid1 or raid10 then only half of the free - * space is actually usable. For raid56, the space info used - * doesn't include the parity drive, so we don't have to - * change the math - */ - factor = btrfs_bg_type_to_factor(profile); - avail = div_u64(avail, factor); - - /* - * If we aren't flushing all things, let us overcommit up to - * 1/2th of the space. If we can flush, don't let us overcommit - * too much, let it overcommit up to 1/8 of the space. - */ - if (flush == BTRFS_RESERVE_FLUSH_ALL) - avail >>= 3; - else - avail >>= 1; - - if (used + bytes < space_info->total_bytes + avail) - return 1; - return 0; -} - -static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info, - unsigned long nr_pages, int nr_items) -{ - struct super_block *sb = fs_info->sb; - - if (down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); - up_read(&sb->s_umount); - } else { - /* - * We needn't worry the filesystem going from r/w to r/o though - * we don't acquire ->s_umount mutex, because the filesystem - * should guarantee the delalloc inodes list be empty after - * the filesystem is readonly(all dirty pages are written to - * the disk). - */ - btrfs_start_delalloc_roots(fs_info, nr_items); - if (!current->journal_info) - btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1); - } -} - -static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, - u64 to_reclaim) -{ - u64 bytes; - u64 nr; - - bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - nr = div64_u64(to_reclaim, bytes); - if (!nr) - nr = 1; - return nr; -} - -#define EXTENT_SIZE_PER_ITEM SZ_256K - -/* - * shrink metadata reservation for delalloc - */ -static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, - u64 orig, bool wait_ordered) -{ - struct btrfs_space_info *space_info; - struct btrfs_trans_handle *trans; - u64 delalloc_bytes; - u64 dio_bytes; - u64 async_pages; - u64 items; - long time_left; - unsigned long nr_pages; - int loops; - - /* Calc the number of the pages we need flush for space reservation */ - items = calc_reclaim_items_nr(fs_info, to_reclaim); - to_reclaim = items * EXTENT_SIZE_PER_ITEM; - - trans = (struct btrfs_trans_handle *)current->journal_info; - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); - - delalloc_bytes = percpu_counter_sum_positive( - &fs_info->delalloc_bytes); - dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); - if (delalloc_bytes == 0 && dio_bytes == 0) { - if (trans) - return; - if (wait_ordered) - btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); - return; - } - - /* - * If we are doing more ordered than delalloc we need to just wait on - * ordered extents, otherwise we'll waste time trying to flush delalloc - * that likely won't give us the space back we need. - */ - if (dio_bytes > delalloc_bytes) - wait_ordered = true; - - loops = 0; - while ((delalloc_bytes || dio_bytes) && loops < 3) { - nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; - - /* - * Triggers inode writeback for up to nr_pages. This will invoke - * ->writepages callback and trigger delalloc filling - * (btrfs_run_delalloc_range()). - */ - btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items); - - /* - * We need to wait for the compressed pages to start before - * we continue. - */ - async_pages = atomic_read(&fs_info->async_delalloc_pages); - if (!async_pages) - goto skip_async; - - /* - * Calculate how many compressed pages we want to be written - * before we continue. I.e if there are more async pages than we - * require wait_event will wait until nr_pages are written. - */ - if (async_pages <= nr_pages) - async_pages = 0; - else - async_pages -= nr_pages; - - wait_event(fs_info->async_submit_wait, - atomic_read(&fs_info->async_delalloc_pages) <= - (int)async_pages); -skip_async: - spin_lock(&space_info->lock); - if (list_empty(&space_info->tickets) && - list_empty(&space_info->priority_tickets)) { - spin_unlock(&space_info->lock); - break; - } - spin_unlock(&space_info->lock); - - loops++; - if (wait_ordered && !trans) { - btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); - } else { - time_left = schedule_timeout_killable(1); - if (time_left) - break; - } - delalloc_bytes = percpu_counter_sum_positive( - &fs_info->delalloc_bytes); - dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); - } -} - -struct reserve_ticket { - u64 orig_bytes; - u64 bytes; - int error; - struct list_head list; - wait_queue_head_t wait; -}; - -/** - * maybe_commit_transaction - possibly commit the transaction if its ok to - * @root - the root we're allocating for - * @bytes - the number of bytes we want to reserve - * @force - force the commit - * - * This will check to make sure that committing the transaction will actually - * get us somewhere and then commit the transaction if it does. Otherwise it - * will return -ENOSPC. - */ -static int may_commit_transaction(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info) -{ - struct reserve_ticket *ticket = NULL; - struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; - struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; - struct btrfs_trans_handle *trans; - u64 bytes_needed; - u64 reclaim_bytes = 0; - - trans = (struct btrfs_trans_handle *)current->journal_info; - if (trans) - return -EAGAIN; - - spin_lock(&space_info->lock); - if (!list_empty(&space_info->priority_tickets)) - ticket = list_first_entry(&space_info->priority_tickets, - struct reserve_ticket, list); - else if (!list_empty(&space_info->tickets)) - ticket = list_first_entry(&space_info->tickets, - struct reserve_ticket, list); - bytes_needed = (ticket) ? ticket->bytes : 0; - spin_unlock(&space_info->lock); - - if (!bytes_needed) - return 0; - - trans = btrfs_join_transaction(fs_info->extent_root); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - /* - * See if there is enough pinned space to make this reservation, or if - * we have block groups that are going to be freed, allowing us to - * possibly do a chunk allocation the next loop through. - */ - if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || - __percpu_counter_compare(&space_info->total_bytes_pinned, - bytes_needed, - BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) - goto commit; - - /* - * See if there is some space in the delayed insertion reservation for - * this reservation. - */ - if (space_info != delayed_rsv->space_info) - goto enospc; - - spin_lock(&delayed_rsv->lock); - reclaim_bytes += delayed_rsv->reserved; - spin_unlock(&delayed_rsv->lock); - - spin_lock(&delayed_refs_rsv->lock); - reclaim_bytes += delayed_refs_rsv->reserved; - spin_unlock(&delayed_refs_rsv->lock); - if (reclaim_bytes >= bytes_needed) - goto commit; - bytes_needed -= reclaim_bytes; - - if (__percpu_counter_compare(&space_info->total_bytes_pinned, - bytes_needed, - BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) - goto enospc; - -commit: - return btrfs_commit_transaction(trans); -enospc: - btrfs_end_transaction(trans); - return -ENOSPC; -} - -/* - * Try to flush some data based on policy set by @state. This is only advisory - * and may fail for various reasons. The caller is supposed to examine the - * state of @space_info to detect the outcome. - */ -static void flush_space(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, u64 num_bytes, - int state) -{ - struct btrfs_root *root = fs_info->extent_root; - struct btrfs_trans_handle *trans; - int nr; - int ret = 0; - - switch (state) { - case FLUSH_DELAYED_ITEMS_NR: - case FLUSH_DELAYED_ITEMS: - if (state == FLUSH_DELAYED_ITEMS_NR) - nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2; - else - nr = -1; - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - ret = btrfs_run_delayed_items_nr(trans, nr); - btrfs_end_transaction(trans); - break; - case FLUSH_DELALLOC: - case FLUSH_DELALLOC_WAIT: - shrink_delalloc(fs_info, num_bytes * 2, num_bytes, - state == FLUSH_DELALLOC_WAIT); - break; - case FLUSH_DELAYED_REFS_NR: - case FLUSH_DELAYED_REFS: - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - if (state == FLUSH_DELAYED_REFS_NR) - nr = calc_reclaim_items_nr(fs_info, num_bytes); - else - nr = 0; - btrfs_run_delayed_refs(trans, nr); - btrfs_end_transaction(trans); - break; - case ALLOC_CHUNK: - case ALLOC_CHUNK_FORCE: - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - ret = do_chunk_alloc(trans, - btrfs_metadata_alloc_profile(fs_info), - (state == ALLOC_CHUNK) ? - CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE); - btrfs_end_transaction(trans); - if (ret > 0 || ret == -ENOSPC) - ret = 0; - break; - case COMMIT_TRANS: - /* - * If we have pending delayed iputs then we could free up a - * bunch of pinned space, so make sure we run the iputs before - * we do our pinned bytes check below. - */ - btrfs_run_delayed_iputs(fs_info); - btrfs_wait_on_delayed_iputs(fs_info); - - ret = may_commit_transaction(fs_info, space_info); - break; - default: - ret = -ENOSPC; - break; - } - - trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, - ret); - return; -} - -static inline u64 -btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - bool system_chunk) -{ - struct reserve_ticket *ticket; - u64 used; - u64 expected; - u64 to_reclaim = 0; - - list_for_each_entry(ticket, &space_info->tickets, list) - to_reclaim += ticket->bytes; - list_for_each_entry(ticket, &space_info->priority_tickets, list) - to_reclaim += ticket->bytes; - if (to_reclaim) - return to_reclaim; - - to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); - if (can_overcommit(fs_info, space_info, to_reclaim, - BTRFS_RESERVE_FLUSH_ALL, system_chunk)) - return 0; - - used = btrfs_space_info_used(space_info, true); - - if (can_overcommit(fs_info, space_info, SZ_1M, - BTRFS_RESERVE_FLUSH_ALL, system_chunk)) - expected = div_factor_fine(space_info->total_bytes, 95); - else - expected = div_factor_fine(space_info->total_bytes, 90); - - if (used > expected) - to_reclaim = used - expected; - else - to_reclaim = 0; - to_reclaim = min(to_reclaim, space_info->bytes_may_use + - space_info->bytes_reserved); - return to_reclaim; -} - -static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 used, bool system_chunk) -{ - u64 thresh = div_factor_fine(space_info->total_bytes, 98); - - /* If we're just plain full then async reclaim just slows us down. */ - if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) - return 0; - - if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, - system_chunk)) - return 0; - - return (used >= thresh && !btrfs_fs_closing(fs_info) && - !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); -} - -static bool wake_all_tickets(struct list_head *head) -{ - struct reserve_ticket *ticket; - - while (!list_empty(head)) { - ticket = list_first_entry(head, struct reserve_ticket, list); - list_del_init(&ticket->list); - ticket->error = -ENOSPC; - wake_up(&ticket->wait); - if (ticket->bytes != ticket->orig_bytes) - return true; - } - return false; -} - -/* - * This is for normal flushers, we can wait all goddamned day if we want to. We - * will loop and continuously try to flush as long as we are making progress. - * We count progress as clearing off tickets each time we have to loop. - */ -static void btrfs_async_reclaim_metadata_space(struct work_struct *work) -{ - struct btrfs_fs_info *fs_info; - struct btrfs_space_info *space_info; - u64 to_reclaim; - int flush_state; - int commit_cycles = 0; - u64 last_tickets_id; - - fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); - - spin_lock(&space_info->lock); - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, - false); - if (!to_reclaim) { - space_info->flush = 0; - spin_unlock(&space_info->lock); - return; - } - last_tickets_id = space_info->tickets_id; - spin_unlock(&space_info->lock); - - flush_state = FLUSH_DELAYED_ITEMS_NR; - do { - flush_space(fs_info, space_info, to_reclaim, flush_state); - spin_lock(&space_info->lock); - if (list_empty(&space_info->tickets)) { - space_info->flush = 0; - spin_unlock(&space_info->lock); - return; - } - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, - space_info, - false); - if (last_tickets_id == space_info->tickets_id) { - flush_state++; - } else { - last_tickets_id = space_info->tickets_id; - flush_state = FLUSH_DELAYED_ITEMS_NR; - if (commit_cycles) - commit_cycles--; - } - - /* - * We don't want to force a chunk allocation until we've tried - * pretty hard to reclaim space. Think of the case where we - * freed up a bunch of space and so have a lot of pinned space - * to reclaim. We would rather use that than possibly create a - * underutilized metadata chunk. So if this is our first run - * through the flushing state machine skip ALLOC_CHUNK_FORCE and - * commit the transaction. If nothing has changed the next go - * around then we can force a chunk allocation. - */ - if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles) - flush_state++; - - if (flush_state > COMMIT_TRANS) { - commit_cycles++; - if (commit_cycles > 2) { - if (wake_all_tickets(&space_info->tickets)) { - flush_state = FLUSH_DELAYED_ITEMS_NR; - commit_cycles--; - } else { - space_info->flush = 0; - } - } else { - flush_state = FLUSH_DELAYED_ITEMS_NR; - } - } - spin_unlock(&space_info->lock); - } while (flush_state <= COMMIT_TRANS); -} - -void btrfs_init_async_reclaim_work(struct work_struct *work) -{ - INIT_WORK(work, btrfs_async_reclaim_metadata_space); -} - -static const enum btrfs_flush_state priority_flush_states[] = { - FLUSH_DELAYED_ITEMS_NR, - FLUSH_DELAYED_ITEMS, - ALLOC_CHUNK, -}; - -static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - struct reserve_ticket *ticket) -{ - u64 to_reclaim; - int flush_state; - - spin_lock(&space_info->lock); - to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, - false); - if (!to_reclaim) { - spin_unlock(&space_info->lock); - return; - } - spin_unlock(&space_info->lock); - - flush_state = 0; - do { - flush_space(fs_info, space_info, to_reclaim, - priority_flush_states[flush_state]); - flush_state++; - spin_lock(&space_info->lock); - if (ticket->bytes == 0) { - spin_unlock(&space_info->lock); - return; - } - spin_unlock(&space_info->lock); - } while (flush_state < ARRAY_SIZE(priority_flush_states)); -} - -static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - struct reserve_ticket *ticket) - -{ - DEFINE_WAIT(wait); - u64 reclaim_bytes = 0; - int ret = 0; - - spin_lock(&space_info->lock); - while (ticket->bytes > 0 && ticket->error == 0) { - ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); - if (ret) { - ret = -EINTR; - break; - } - spin_unlock(&space_info->lock); - - schedule(); - - finish_wait(&ticket->wait, &wait); - spin_lock(&space_info->lock); - } - if (!ret) - ret = ticket->error; - if (!list_empty(&ticket->list)) - list_del_init(&ticket->list); - if (ticket->bytes && ticket->bytes < ticket->orig_bytes) - reclaim_bytes = ticket->orig_bytes - ticket->bytes; - spin_unlock(&space_info->lock); - - if (reclaim_bytes) - space_info_add_old_bytes(fs_info, space_info, reclaim_bytes); - return ret; -} - -/** - * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space - * @root - the root we're allocating for - * @space_info - the space info we want to allocate from - * @orig_bytes - the number of bytes we want - * @flush - whether or not we can flush to make our reservation - * - * This will reserve orig_bytes number of bytes from the space info associated - * with the block_rsv. If there is not enough space it will make an attempt to - * flush out space to make room. It will do this by flushing delalloc if - * possible or committing the transaction. If flush is 0 then no attempts to - * regain reservations will be made and this will fail if there is not enough - * space already. - */ -static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 orig_bytes, - enum btrfs_reserve_flush_enum flush, - bool system_chunk) -{ - struct reserve_ticket ticket; - u64 used; - u64 reclaim_bytes = 0; - int ret = 0; - - ASSERT(orig_bytes); - ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); - - spin_lock(&space_info->lock); - ret = -ENOSPC; - used = btrfs_space_info_used(space_info, true); - - /* - * If we have enough space then hooray, make our reservation and carry - * on. If not see if we can overcommit, and if we can, hooray carry on. - * If not things get more complicated. - */ - if (used + orig_bytes <= space_info->total_bytes) { - update_bytes_may_use(space_info, orig_bytes); - trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, orig_bytes, 1); - ret = 0; - } else if (can_overcommit(fs_info, space_info, orig_bytes, flush, - system_chunk)) { - update_bytes_may_use(space_info, orig_bytes); - trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, orig_bytes, 1); - ret = 0; - } - - /* - * If we couldn't make a reservation then setup our reservation ticket - * and kick the async worker if it's not already running. - * - * If we are a priority flusher then we just need to add our ticket to - * the list and we will do our own flushing further down. - */ - if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { - ticket.orig_bytes = orig_bytes; - ticket.bytes = orig_bytes; - ticket.error = 0; - init_waitqueue_head(&ticket.wait); - if (flush == BTRFS_RESERVE_FLUSH_ALL) { - list_add_tail(&ticket.list, &space_info->tickets); - if (!space_info->flush) { - space_info->flush = 1; - trace_btrfs_trigger_flush(fs_info, - space_info->flags, - orig_bytes, flush, - "enospc"); - queue_work(system_unbound_wq, - &fs_info->async_reclaim_work); - } - } else { - list_add_tail(&ticket.list, - &space_info->priority_tickets); - } - } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { - used += orig_bytes; - /* - * We will do the space reservation dance during log replay, - * which means we won't have fs_info->fs_root set, so don't do - * the async reclaim as we will panic. - */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && - need_do_async_reclaim(fs_info, space_info, - used, system_chunk) && - !work_busy(&fs_info->async_reclaim_work)) { - trace_btrfs_trigger_flush(fs_info, space_info->flags, - orig_bytes, flush, "preempt"); - queue_work(system_unbound_wq, - &fs_info->async_reclaim_work); - } - } - spin_unlock(&space_info->lock); - if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) - return ret; - - if (flush == BTRFS_RESERVE_FLUSH_ALL) - return wait_reserve_ticket(fs_info, space_info, &ticket); - - ret = 0; - priority_reclaim_metadata_space(fs_info, space_info, &ticket); - spin_lock(&space_info->lock); - if (ticket.bytes) { - if (ticket.bytes < orig_bytes) - reclaim_bytes = orig_bytes - ticket.bytes; - list_del_init(&ticket.list); - ret = -ENOSPC; - } - spin_unlock(&space_info->lock); - - if (reclaim_bytes) - space_info_add_old_bytes(fs_info, space_info, reclaim_bytes); - ASSERT(list_empty(&ticket.list)); - return ret; -} - -/** - * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space - * @root - the root we're allocating for - * @block_rsv - the block_rsv we're allocating for - * @orig_bytes - the number of bytes we want - * @flush - whether or not we can flush to make our reservation - * - * This will reserve orig_bytes number of bytes from the space info associated - * with the block_rsv. If there is not enough space it will make an attempt to - * flush out space to make room. It will do this by flushing delalloc if - * possible or committing the transaction. If flush is 0 then no attempts to - * regain reservations will be made and this will fail if there is not enough - * space already. - */ -static int reserve_metadata_bytes(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 orig_bytes, - enum btrfs_reserve_flush_enum flush) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - int ret; - bool system_chunk = (root == fs_info->chunk_root); - - ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, - orig_bytes, flush, system_chunk); - if (ret == -ENOSPC && - unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { - if (block_rsv != global_rsv && - !block_rsv_use_bytes(global_rsv, orig_bytes)) - ret = 0; - } - if (ret == -ENOSPC) { - trace_btrfs_space_reservation(fs_info, "space_info:enospc", - block_rsv->space_info->flags, - orig_bytes, 1); - - if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) - dump_space_info(fs_info, block_rsv->space_info, - orig_bytes, 0); - } - return ret; -} - -static struct btrfs_block_rsv *get_block_rsv( - const struct btrfs_trans_handle *trans, - const struct btrfs_root *root) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_block_rsv *block_rsv = NULL; - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || - (root == fs_info->csum_root && trans->adding_csums) || - (root == fs_info->uuid_root)) - block_rsv = trans->block_rsv; - - if (!block_rsv) - block_rsv = root->block_rsv; - - if (!block_rsv) - block_rsv = &fs_info->empty_block_rsv; - - return block_rsv; -} - -static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, - u64 num_bytes) -{ - int ret = -ENOSPC; - spin_lock(&block_rsv->lock); - if (block_rsv->reserved >= num_bytes) { - block_rsv->reserved -= num_bytes; - if (block_rsv->reserved < block_rsv->size) - block_rsv->full = 0; - ret = 0; - } - spin_unlock(&block_rsv->lock); - return ret; -} - -static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, - u64 num_bytes, bool update_size) -{ - spin_lock(&block_rsv->lock); - block_rsv->reserved += num_bytes; - if (update_size) - block_rsv->size += num_bytes; - else if (block_rsv->reserved >= block_rsv->size) - block_rsv->full = 1; - spin_unlock(&block_rsv->lock); -} - -int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *dest, u64 num_bytes, - int min_factor) -{ - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - u64 min_bytes; - - if (global_rsv->space_info != dest->space_info) - return -ENOSPC; - - spin_lock(&global_rsv->lock); - min_bytes = div_factor(global_rsv->size, min_factor); - if (global_rsv->reserved < min_bytes + num_bytes) { - spin_unlock(&global_rsv->lock); - return -ENOSPC; - } - global_rsv->reserved -= num_bytes; - if (global_rsv->reserved < global_rsv->size) - global_rsv->full = 0; - spin_unlock(&global_rsv->lock); - - block_rsv_add_bytes(dest, num_bytes, true); - return 0; -} - -/** - * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv. - * @fs_info - the fs info for our fs. - * @src - the source block rsv to transfer from. - * @num_bytes - the number of bytes to transfer. - * - * This transfers up to the num_bytes amount from the src rsv to the - * delayed_refs_rsv. Any extra bytes are returned to the space info. - */ -void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *src, - u64 num_bytes) -{ - struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; - u64 to_free = 0; - - spin_lock(&src->lock); - src->reserved -= num_bytes; - src->size -= num_bytes; - spin_unlock(&src->lock); - - spin_lock(&delayed_refs_rsv->lock); - if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { - u64 delta = delayed_refs_rsv->size - - delayed_refs_rsv->reserved; - if (num_bytes > delta) { - to_free = num_bytes - delta; - num_bytes = delta; - } - } else { - to_free = num_bytes; - num_bytes = 0; - } - - if (num_bytes) - delayed_refs_rsv->reserved += num_bytes; - if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) - delayed_refs_rsv->full = 1; - spin_unlock(&delayed_refs_rsv->lock); - - if (num_bytes) - trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, num_bytes, 1); - if (to_free) - space_info_add_old_bytes(fs_info, delayed_refs_rsv->space_info, - to_free); -} - -/** - * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage. - * @fs_info - the fs_info for our fs. - * @flush - control how we can flush for this reservation. - * - * This will refill the delayed block_rsv up to 1 items size worth of space and - * will return -ENOSPC if we can't make the reservation. - */ -int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, - enum btrfs_reserve_flush_enum flush) -{ - struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; - u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1); - u64 num_bytes = 0; - int ret = -ENOSPC; - - spin_lock(&block_rsv->lock); - if (block_rsv->reserved < block_rsv->size) { - num_bytes = block_rsv->size - block_rsv->reserved; - num_bytes = min(num_bytes, limit); - } - spin_unlock(&block_rsv->lock); - - if (!num_bytes) - return 0; - - ret = reserve_metadata_bytes(fs_info->extent_root, block_rsv, - num_bytes, flush); - if (ret) - return ret; - block_rsv_add_bytes(block_rsv, num_bytes, 0); - trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, num_bytes, 1); - return 0; -} - -/* - * This is for space we already have accounted in space_info->bytes_may_use, so - * basically when we're returning space from block_rsv's. - */ -static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 num_bytes) -{ - struct reserve_ticket *ticket; - struct list_head *head; - u64 used; - enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; - bool check_overcommit = false; - - spin_lock(&space_info->lock); - head = &space_info->priority_tickets; - - /* - * If we are over our limit then we need to check and see if we can - * overcommit, and if we can't then we just need to free up our space - * and not satisfy any requests. - */ - used = btrfs_space_info_used(space_info, true); - if (used - num_bytes >= space_info->total_bytes) - check_overcommit = true; -again: - while (!list_empty(head) && num_bytes) { - ticket = list_first_entry(head, struct reserve_ticket, - list); - /* - * We use 0 bytes because this space is already reserved, so - * adding the ticket space would be a double count. - */ - if (check_overcommit && - !can_overcommit(fs_info, space_info, 0, flush, false)) - break; - if (num_bytes >= ticket->bytes) { - list_del_init(&ticket->list); - num_bytes -= ticket->bytes; - ticket->bytes = 0; - space_info->tickets_id++; - wake_up(&ticket->wait); - } else { - ticket->bytes -= num_bytes; - num_bytes = 0; - } - } - - if (num_bytes && head == &space_info->priority_tickets) { - head = &space_info->tickets; - flush = BTRFS_RESERVE_FLUSH_ALL; - goto again; - } - update_bytes_may_use(space_info, -num_bytes); - trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, num_bytes, 0); - spin_unlock(&space_info->lock); -} - -/* - * This is for newly allocated space that isn't accounted in - * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent - * we use this helper. - */ -static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 num_bytes) -{ - struct reserve_ticket *ticket; - struct list_head *head = &space_info->priority_tickets; - -again: - while (!list_empty(head) && num_bytes) { - ticket = list_first_entry(head, struct reserve_ticket, - list); - if (num_bytes >= ticket->bytes) { - trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, - ticket->bytes, 1); - list_del_init(&ticket->list); - num_bytes -= ticket->bytes; - update_bytes_may_use(space_info, ticket->bytes); - ticket->bytes = 0; - space_info->tickets_id++; - wake_up(&ticket->wait); - } else { - trace_btrfs_space_reservation(fs_info, "space_info", - space_info->flags, - num_bytes, 1); - update_bytes_may_use(space_info, num_bytes); - ticket->bytes -= num_bytes; - num_bytes = 0; - } - } - - if (num_bytes && head == &space_info->priority_tickets) { - head = &space_info->tickets; - goto again; - } -} - -static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, - struct btrfs_block_rsv *dest, u64 num_bytes, - u64 *qgroup_to_release_ret) -{ - struct btrfs_space_info *space_info = block_rsv->space_info; - u64 qgroup_to_release = 0; - u64 ret; - - spin_lock(&block_rsv->lock); - if (num_bytes == (u64)-1) { - num_bytes = block_rsv->size; - qgroup_to_release = block_rsv->qgroup_rsv_size; - } - block_rsv->size -= num_bytes; - if (block_rsv->reserved >= block_rsv->size) { - num_bytes = block_rsv->reserved - block_rsv->size; - block_rsv->reserved = block_rsv->size; - block_rsv->full = 1; - } else { - num_bytes = 0; - } - if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { - qgroup_to_release = block_rsv->qgroup_rsv_reserved - - block_rsv->qgroup_rsv_size; - block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; - } else { - qgroup_to_release = 0; - } - spin_unlock(&block_rsv->lock); - - ret = num_bytes; - if (num_bytes > 0) { - if (dest) { - spin_lock(&dest->lock); - if (!dest->full) { - u64 bytes_to_add; - - bytes_to_add = dest->size - dest->reserved; - bytes_to_add = min(num_bytes, bytes_to_add); - dest->reserved += bytes_to_add; - if (dest->reserved >= dest->size) - dest->full = 1; - num_bytes -= bytes_to_add; - } - spin_unlock(&dest->lock); - } - if (num_bytes) - space_info_add_old_bytes(fs_info, space_info, - num_bytes); - } - if (qgroup_to_release_ret) - *qgroup_to_release_ret = qgroup_to_release; - return ret; -} - -int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, - struct btrfs_block_rsv *dst, u64 num_bytes, - bool update_size) -{ - int ret; - - ret = block_rsv_use_bytes(src, num_bytes); - if (ret) - return ret; - - block_rsv_add_bytes(dst, num_bytes, update_size); - return 0; -} - -void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) -{ - memset(rsv, 0, sizeof(*rsv)); - spin_lock_init(&rsv->lock); - rsv->type = type; -} - -void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv, - unsigned short type) -{ - btrfs_init_block_rsv(rsv, type); - rsv->space_info = __find_space_info(fs_info, - BTRFS_BLOCK_GROUP_METADATA); -} - -struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, - unsigned short type) -{ - struct btrfs_block_rsv *block_rsv; - - block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); - if (!block_rsv) - return NULL; - - btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); - return block_rsv; -} - -void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv) -{ - if (!rsv) - return; - btrfs_block_rsv_release(fs_info, rsv, (u64)-1); - kfree(rsv); -} - -int btrfs_block_rsv_add(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, u64 num_bytes, - enum btrfs_reserve_flush_enum flush) -{ - int ret; - - if (num_bytes == 0) - return 0; - - ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); - if (!ret) - block_rsv_add_bytes(block_rsv, num_bytes, true); - - return ret; -} - -int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor) -{ - u64 num_bytes = 0; - int ret = -ENOSPC; - - if (!block_rsv) - return 0; - - spin_lock(&block_rsv->lock); - num_bytes = div_factor(block_rsv->size, min_factor); - if (block_rsv->reserved >= num_bytes) - ret = 0; - spin_unlock(&block_rsv->lock); - - return ret; -} - -int btrfs_block_rsv_refill(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, u64 min_reserved, - enum btrfs_reserve_flush_enum flush) -{ - u64 num_bytes = 0; - int ret = -ENOSPC; - - if (!block_rsv) - return 0; - - spin_lock(&block_rsv->lock); - num_bytes = min_reserved; - if (block_rsv->reserved >= num_bytes) - ret = 0; - else - num_bytes -= block_rsv->reserved; - spin_unlock(&block_rsv->lock); - - if (!ret) - return 0; - - ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); - if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, false); - return 0; - } - - return ret; -} - -static u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, - u64 num_bytes, u64 *qgroup_to_release) -{ - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; - struct btrfs_block_rsv *target = delayed_rsv; - - if (target->full || target == block_rsv) - target = global_rsv; - - if (block_rsv->space_info != target->space_info) - target = NULL; - - return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes, - qgroup_to_release); -} - -void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, - u64 num_bytes) -{ - __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); -} - -/** - * btrfs_inode_rsv_release - release any excessive reservation. - * @inode - the inode we need to release from. - * @qgroup_free - free or convert qgroup meta. - * Unlike normal operation, qgroup meta reservation needs to know if we are - * freeing qgroup reservation or just converting it into per-trans. Normally - * @qgroup_free is true for error handling, and false for normal release. - * - * This is the same as btrfs_block_rsv_release, except that it handles the - * tracepoint for the reservation. - */ -static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; - u64 released = 0; - u64 qgroup_to_release = 0; - - /* - * Since we statically set the block_rsv->size we just want to say we - * are releasing 0 bytes, and then we'll just get the reservation over - * the size free'd. - */ - released = __btrfs_block_rsv_release(fs_info, block_rsv, 0, - &qgroup_to_release); - if (released > 0) - trace_btrfs_space_reservation(fs_info, "delalloc", - btrfs_ino(inode), released, 0); - if (qgroup_free) - btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); - else - btrfs_qgroup_convert_reserved_meta(inode->root, - qgroup_to_release); -} - -/** - * btrfs_delayed_refs_rsv_release - release a ref head's reservation. - * @fs_info - the fs_info for our fs. - * @nr - the number of items to drop. - * - * This drops the delayed ref head's count from the delayed refs rsv and frees - * any excess reservation we had. - */ -void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) -{ - struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr); - u64 released = 0; - - released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, - num_bytes, NULL); - if (released) - trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, released, 0); -} - -static void update_global_block_rsv(struct btrfs_fs_info *fs_info) -{ - struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; - struct btrfs_space_info *sinfo = block_rsv->space_info; - u64 num_bytes; - - /* - * The global block rsv is based on the size of the extent tree, the - * checksum tree and the root tree. If the fs is empty we want to set - * it to a minimal amount for safety. - */ - num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) + - btrfs_root_used(&fs_info->csum_root->root_item) + - btrfs_root_used(&fs_info->tree_root->root_item); - num_bytes = max_t(u64, num_bytes, SZ_16M); - - spin_lock(&sinfo->lock); - spin_lock(&block_rsv->lock); - - block_rsv->size = min_t(u64, num_bytes, SZ_512M); - - if (block_rsv->reserved < block_rsv->size) { - num_bytes = btrfs_space_info_used(sinfo, true); - if (sinfo->total_bytes > num_bytes) { - num_bytes = sinfo->total_bytes - num_bytes; - num_bytes = min(num_bytes, - block_rsv->size - block_rsv->reserved); - block_rsv->reserved += num_bytes; - update_bytes_may_use(sinfo, num_bytes); - trace_btrfs_space_reservation(fs_info, "space_info", - sinfo->flags, num_bytes, - 1); - } - } else if (block_rsv->reserved > block_rsv->size) { - num_bytes = block_rsv->reserved - block_rsv->size; - update_bytes_may_use(sinfo, -num_bytes); - trace_btrfs_space_reservation(fs_info, "space_info", - sinfo->flags, num_bytes, 0); - block_rsv->reserved = block_rsv->size; - } - - if (block_rsv->reserved == block_rsv->size) - block_rsv->full = 1; - else - block_rsv->full = 0; - - spin_unlock(&block_rsv->lock); - spin_unlock(&sinfo->lock); -} - -static void init_global_block_rsv(struct btrfs_fs_info *fs_info) -{ - struct btrfs_space_info *space_info; - - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); - fs_info->chunk_block_rsv.space_info = space_info; - - space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); - fs_info->global_block_rsv.space_info = space_info; - fs_info->trans_block_rsv.space_info = space_info; - fs_info->empty_block_rsv.space_info = space_info; - fs_info->delayed_block_rsv.space_info = space_info; - fs_info->delayed_refs_rsv.space_info = space_info; - - fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv; - fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv; - fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; - fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; - if (fs_info->quota_root) - fs_info->quota_root->block_rsv = &fs_info->global_block_rsv; - fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; - - update_global_block_rsv(fs_info); -} - -static void release_global_block_rsv(struct btrfs_fs_info *fs_info) -{ - block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, - (u64)-1, NULL); - WARN_ON(fs_info->trans_block_rsv.size > 0); - WARN_ON(fs_info->trans_block_rsv.reserved > 0); - WARN_ON(fs_info->chunk_block_rsv.size > 0); - WARN_ON(fs_info->chunk_block_rsv.reserved > 0); - WARN_ON(fs_info->delayed_block_rsv.size > 0); - WARN_ON(fs_info->delayed_block_rsv.reserved > 0); - WARN_ON(fs_info->delayed_refs_rsv.reserved > 0); - WARN_ON(fs_info->delayed_refs_rsv.size > 0); -} - -/* - * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv - * @trans - the trans that may have generated delayed refs - * - * This is to be called anytime we may have adjusted trans->delayed_ref_updates, - * it'll calculate the additional size and add it to the delayed_refs_rsv. - */ -void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) -{ - struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; - u64 num_bytes; - - if (!trans->delayed_ref_updates) - return; - - num_bytes = btrfs_calc_trans_metadata_size(fs_info, - trans->delayed_ref_updates); - spin_lock(&delayed_rsv->lock); - delayed_rsv->size += num_bytes; - delayed_rsv->full = 0; - spin_unlock(&delayed_rsv->lock); - trans->delayed_ref_updates = 0; -} - -/* - * To be called after all the new block groups attached to the transaction - * handle have been created (btrfs_create_pending_block_groups()). - */ -void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) -{ - struct btrfs_fs_info *fs_info = trans->fs_info; - - if (!trans->chunk_bytes_reserved) - return; - - WARN_ON_ONCE(!list_empty(&trans->new_bgs)); - - block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL, - trans->chunk_bytes_reserved, NULL); - trans->chunk_bytes_reserved = 0; -} - -/* - * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation - * root: the root of the parent directory - * rsv: block reservation - * items: the number of items that we need do reservation - * use_global_rsv: allow fallback to the global block reservation - * - * This function is used to reserve the space for snapshot/subvolume - * creation and deletion. Those operations are different with the - * common file/directory operations, they change two fs/file trees - * and root tree, the number of items that the qgroup reserves is - * different with the free space reservation. So we can not use - * the space reservation mechanism in start_transaction(). - */ -int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv, int items, - bool use_global_rsv) -{ - u64 qgroup_num_bytes = 0; - u64 num_bytes; - int ret; - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { - /* One for parent inode, two for dir entries */ - qgroup_num_bytes = 3 * fs_info->nodesize; - ret = btrfs_qgroup_reserve_meta_prealloc(root, - qgroup_num_bytes, true); - if (ret) - return ret; - } - - num_bytes = btrfs_calc_trans_metadata_size(fs_info, items); - rsv->space_info = __find_space_info(fs_info, - BTRFS_BLOCK_GROUP_METADATA); - ret = btrfs_block_rsv_add(root, rsv, num_bytes, - BTRFS_RESERVE_FLUSH_ALL); - - if (ret == -ENOSPC && use_global_rsv) - ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true); - - if (ret && qgroup_num_bytes) - btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); - - return ret; -} - -void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv) -{ - btrfs_block_rsv_release(fs_info, rsv, (u64)-1); -} - -static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, - struct btrfs_inode *inode) -{ - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; - u64 reserve_size = 0; - u64 qgroup_rsv_size = 0; - u64 csum_leaves; - unsigned outstanding_extents; - - lockdep_assert_held(&inode->lock); - outstanding_extents = inode->outstanding_extents; - if (outstanding_extents) - reserve_size = btrfs_calc_trans_metadata_size(fs_info, - outstanding_extents + 1); - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, - inode->csum_bytes); - reserve_size += btrfs_calc_trans_metadata_size(fs_info, - csum_leaves); - /* - * For qgroup rsv, the calculation is very simple: - * account one nodesize for each outstanding extent - * - * This is overestimating in most cases. - */ - qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; - - spin_lock(&block_rsv->lock); - block_rsv->size = reserve_size; - block_rsv->qgroup_rsv_size = qgroup_rsv_size; - spin_unlock(&block_rsv->lock); -} - -static void calc_inode_reservations(struct btrfs_fs_info *fs_info, - u64 num_bytes, u64 *meta_reserve, - u64 *qgroup_reserve) -{ - u64 nr_extents = count_max_extents(num_bytes); - u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); - - /* We add one for the inode update at finish ordered time */ - *meta_reserve = btrfs_calc_trans_metadata_size(fs_info, - nr_extents + csum_leaves + 1); - *qgroup_reserve = nr_extents * fs_info->nodesize; -} - -int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) -{ - struct btrfs_root *root = inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_block_rsv *block_rsv = &inode->block_rsv; - u64 meta_reserve, qgroup_reserve; - unsigned nr_extents; - enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; - int ret = 0; - bool delalloc_lock = true; - - /* If we are a free space inode we need to not flush since we will be in - * the middle of a transaction commit. We also don't need the delalloc - * mutex since we won't race with anybody. We need this mostly to make - * lockdep shut its filthy mouth. - * - * If we have a transaction open (can happen if we call truncate_block - * from truncate), then we need FLUSH_LIMIT so we don't deadlock. - */ - if (btrfs_is_free_space_inode(inode)) { - flush = BTRFS_RESERVE_NO_FLUSH; - delalloc_lock = false; - } else { - if (current->journal_info) - flush = BTRFS_RESERVE_FLUSH_LIMIT; - - if (btrfs_transaction_in_commit(fs_info)) - schedule_timeout(1); - } - - if (delalloc_lock) - mutex_lock(&inode->delalloc_mutex); - - num_bytes = ALIGN(num_bytes, fs_info->sectorsize); - - /* - * We always want to do it this way, every other way is wrong and ends - * in tears. Pre-reserving the amount we are going to add will always - * be the right way, because otherwise if we have enough parallelism we - * could end up with thousands of inodes all holding little bits of - * reservations they were able to make previously and the only way to - * reclaim that space is to ENOSPC out the operations and clear - * everything out and try again, which is bad. This way we just - * over-reserve slightly, and clean up the mess when we are done. - */ - calc_inode_reservations(fs_info, num_bytes, &meta_reserve, - &qgroup_reserve); - ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); - if (ret) - goto out_fail; - ret = reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); - if (ret) - goto out_qgroup; - - /* - * Now we need to update our outstanding extents and csum bytes _first_ - * and then add the reservation to the block_rsv. This keeps us from - * racing with an ordered completion or some such that would think it - * needs to free the reservation we just made. - */ - spin_lock(&inode->lock); - nr_extents = count_max_extents(num_bytes); - btrfs_mod_outstanding_extents(inode, nr_extents); - inode->csum_bytes += num_bytes; - btrfs_calculate_inode_block_rsv_size(fs_info, inode); - spin_unlock(&inode->lock); - - /* Now we can safely add our space to our block rsv */ - block_rsv_add_bytes(block_rsv, meta_reserve, false); - trace_btrfs_space_reservation(root->fs_info, "delalloc", - btrfs_ino(inode), meta_reserve, 1); - - spin_lock(&block_rsv->lock); - block_rsv->qgroup_rsv_reserved += qgroup_reserve; - spin_unlock(&block_rsv->lock); - - if (delalloc_lock) - mutex_unlock(&inode->delalloc_mutex); - return 0; -out_qgroup: - btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); -out_fail: - btrfs_inode_rsv_release(inode, true); - if (delalloc_lock) - mutex_unlock(&inode->delalloc_mutex); - return ret; -} - -/** - * btrfs_delalloc_release_metadata - release a metadata reservation for an inode - * @inode: the inode to release the reservation for. - * @num_bytes: the number of bytes we are releasing. - * @qgroup_free: free qgroup reservation or convert it to per-trans reservation - * - * This will release the metadata reservation for an inode. This can be called - * once we complete IO for a given set of bytes to release their metadata - * reservations, or on error for the same reason. - */ -void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - - num_bytes = ALIGN(num_bytes, fs_info->sectorsize); - spin_lock(&inode->lock); - inode->csum_bytes -= num_bytes; - btrfs_calculate_inode_block_rsv_size(fs_info, inode); - spin_unlock(&inode->lock); - - if (btrfs_is_testing(fs_info)) - return; - - btrfs_inode_rsv_release(inode, qgroup_free); -} - -/** - * btrfs_delalloc_release_extents - release our outstanding_extents - * @inode: the inode to balance the reservation for. - * @num_bytes: the number of bytes we originally reserved with - * @qgroup_free: do we need to free qgroup meta reservation or convert them. - * - * When we reserve space we increase outstanding_extents for the extents we may - * add. Once we've set the range as delalloc or created our ordered extents we - * have outstanding_extents to track the real usage, so we use this to free our - * temporarily tracked outstanding_extents. This _must_ be used in conjunction - * with btrfs_delalloc_reserve_metadata. - */ -void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes, - bool qgroup_free) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - unsigned num_extents; - - spin_lock(&inode->lock); - num_extents = count_max_extents(num_bytes); - btrfs_mod_outstanding_extents(inode, -num_extents); - btrfs_calculate_inode_block_rsv_size(fs_info, inode); - spin_unlock(&inode->lock); - - if (btrfs_is_testing(fs_info)) - return; - - btrfs_inode_rsv_release(inode, qgroup_free); -} - -/** - * btrfs_delalloc_reserve_space - reserve data and metadata space for - * delalloc - * @inode: inode we're writing to - * @start: start range we are writing to - * @len: how long the range we are writing to - * @reserved: mandatory parameter, record actually reserved qgroup ranges of - * current reservation. - * - * This will do the following things - * - * o reserve space in data space info for num bytes - * and reserve precious corresponding qgroup space - * (Done in check_data_free_space) - * - * o reserve space for metadata space, based on the number of outstanding - * extents and how much csums will be needed - * also reserve metadata space in a per root over-reserve method. - * o add to the inodes->delalloc_bytes - * o add it to the fs_info's delalloc inodes list. - * (Above 3 all done in delalloc_reserve_metadata) - * - * Return 0 for success - * Return <0 for error(-ENOSPC or -EQUOT) - */ -int btrfs_delalloc_reserve_space(struct inode *inode, - struct extent_changeset **reserved, u64 start, u64 len) -{ - int ret; - - ret = btrfs_check_data_free_space(inode, reserved, start, len); - if (ret < 0) - return ret; - ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len); - if (ret < 0) - btrfs_free_reserved_data_space(inode, *reserved, start, len); - return ret; -} - -/** - * btrfs_delalloc_release_space - release data and metadata space for delalloc - * @inode: inode we're releasing space for - * @start: start position of the space already reserved - * @len: the len of the space already reserved - * @release_bytes: the len of the space we consumed or didn't use - * - * This function will release the metadata space that was not used and will - * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes - * list if there are no delalloc bytes left. - * Also it will handle the qgroup reserved space. - */ -void btrfs_delalloc_release_space(struct inode *inode, - struct extent_changeset *reserved, - u64 start, u64 len, bool qgroup_free) -{ - btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free); - btrfs_free_reserved_data_space(inode, reserved, start, len); -} - static int update_block_group(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, int alloc) { @@ -6295,7 +4168,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val -= num_bytes; btrfs_set_block_group_used(&cache->item, old_val); cache->pinned += num_bytes; - update_bytes_pinned(cache->space_info, num_bytes); + btrfs_space_info_update_bytes_pinned(info, + cache->space_info, num_bytes); cache->space_info->bytes_used -= num_bytes; cache->space_info->disk_used -= num_bytes * factor; spin_unlock(&cache->lock); @@ -6370,7 +4244,8 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache, spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); cache->pinned += num_bytes; - update_bytes_pinned(cache->space_info, num_bytes); + btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info, + num_bytes); if (reserved) { cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; @@ -6579,7 +4454,8 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, } else { cache->reserved += num_bytes; space_info->bytes_reserved += num_bytes; - update_bytes_may_use(space_info, -ram_bytes); + btrfs_space_info_update_bytes_may_use(cache->fs_info, + space_info, -ram_bytes); if (delalloc) cache->delalloc_bytes += num_bytes; } @@ -6645,7 +4521,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) up_write(&fs_info->commit_root_sem); - update_global_block_rsv(fs_info); + btrfs_update_global_block_rsv(fs_info); } /* @@ -6735,7 +4611,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); spin_lock(&cache->lock); cache->pinned -= len; - update_bytes_pinned(space_info, -len); + btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len); trace_btrfs_space_reservation(fs_info, "pinned", space_info->flags, len, 0); @@ -6756,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, to_add = min(len, global_rsv->size - global_rsv->reserved); global_rsv->reserved += to_add; - update_bytes_may_use(space_info, to_add); + btrfs_space_info_update_bytes_may_use(fs_info, + space_info, to_add); if (global_rsv->reserved >= global_rsv->size) global_rsv->full = 1; trace_btrfs_space_reservation(fs_info, @@ -6768,8 +4645,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, spin_unlock(&global_rsv->lock); /* Add to any tickets we may have */ if (len) - space_info_add_new_bytes(fs_info, space_info, - len); + btrfs_space_info_add_new_bytes(fs_info, + space_info, len); } spin_unlock(&space_info->lock); } @@ -7291,10 +5168,10 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) } enum btrfs_loop_type { - LOOP_CACHING_NOWAIT = 0, - LOOP_CACHING_WAIT = 1, - LOOP_ALLOC_CHUNK = 2, - LOOP_NO_EMPTY_SIZE = 3, + LOOP_CACHING_NOWAIT, + LOOP_CACHING_WAIT, + LOOP_ALLOC_CHUNK, + LOOP_NO_EMPTY_SIZE, }; static inline void @@ -7660,8 +5537,8 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, return ret; } - ret = do_chunk_alloc(trans, ffe_ctl->flags, - CHUNK_ALLOC_FORCE); + ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, + CHUNK_ALLOC_FORCE); /* * If we can't allocate a new chunk we've already looped @@ -7757,7 +5634,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info, trace_find_free_extent(fs_info, num_bytes, empty_size, flags); - space_info = __find_space_info(fs_info, flags); + space_info = btrfs_find_space_info(fs_info, flags); if (!space_info) { btrfs_err(fs_info, "No space info for %llu", flags); return -ENOSPC; @@ -7862,9 +5739,8 @@ search: */ if (!block_group_bits(block_group, flags)) { u64 extra = BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_RAID1_MASK | + BTRFS_BLOCK_GROUP_RAID56_MASK | BTRFS_BLOCK_GROUP_RAID10; /* @@ -7983,60 +5859,6 @@ loop: return ret; } -#define DUMP_BLOCK_RSV(fs_info, rsv_name) \ -do { \ - struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \ - spin_lock(&__rsv->lock); \ - btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \ - __rsv->size, __rsv->reserved); \ - spin_unlock(&__rsv->lock); \ -} while (0) - -static void dump_space_info(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *info, u64 bytes, - int dump_block_groups) -{ - struct btrfs_block_group_cache *cache; - int index = 0; - - spin_lock(&info->lock); - btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", - info->flags, - info->total_bytes - btrfs_space_info_used(info, true), - info->full ? "" : "not "); - btrfs_info(fs_info, - "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", - info->total_bytes, info->bytes_used, info->bytes_pinned, - info->bytes_reserved, info->bytes_may_use, - info->bytes_readonly); - spin_unlock(&info->lock); - - DUMP_BLOCK_RSV(fs_info, global_block_rsv); - DUMP_BLOCK_RSV(fs_info, trans_block_rsv); - DUMP_BLOCK_RSV(fs_info, chunk_block_rsv); - DUMP_BLOCK_RSV(fs_info, delayed_block_rsv); - DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); - - if (!dump_block_groups) - return; - - down_read(&info->groups_sem); -again: - list_for_each_entry(cache, &info->block_groups[index], list) { - spin_lock(&cache->lock); - btrfs_info(fs_info, - "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", - cache->key.objectid, cache->key.offset, - btrfs_block_group_used(&cache->item), cache->pinned, - cache->reserved, cache->ro ? "[readonly]" : ""); - btrfs_dump_free_space(cache, bytes); - spin_unlock(&cache->lock); - } - if (++index < BTRFS_NR_RAID_TYPES) - goto again; - up_read(&info->groups_sem); -} - /* * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a * hole that is at least as big as @num_bytes. @@ -8112,12 +5934,13 @@ again: } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { struct btrfs_space_info *sinfo; - sinfo = __find_space_info(fs_info, flags); + sinfo = btrfs_find_space_info(fs_info, flags); btrfs_err(fs_info, "allocation failed flags %llu, wanted %llu", flags, num_bytes); if (sinfo) - dump_space_info(fs_info, sinfo, num_bytes, 1); + btrfs_dump_space_info(fs_info, sinfo, + num_bytes, 1); } } @@ -8455,73 +6278,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, return buf; } -static struct btrfs_block_rsv * -use_block_rsv(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u32 blocksize) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_block_rsv *block_rsv; - struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; - int ret; - bool global_updated = false; - - block_rsv = get_block_rsv(trans, root); - - if (unlikely(block_rsv->size == 0)) - goto try_reserve; -again: - ret = block_rsv_use_bytes(block_rsv, blocksize); - if (!ret) - return block_rsv; - - if (block_rsv->failfast) - return ERR_PTR(ret); - - if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) { - global_updated = true; - update_global_block_rsv(fs_info); - goto again; - } - - /* - * The global reserve still exists to save us from ourselves, so don't - * warn_on if we are short on our delayed refs reserve. - */ - if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS && - btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { - static DEFINE_RATELIMIT_STATE(_rs, - DEFAULT_RATELIMIT_INTERVAL * 10, - /*DEFAULT_RATELIMIT_BURST*/ 1); - if (__ratelimit(&_rs)) - WARN(1, KERN_DEBUG - "BTRFS: block rsv returned %d\n", ret); - } -try_reserve: - ret = reserve_metadata_bytes(root, block_rsv, blocksize, - BTRFS_RESERVE_NO_FLUSH); - if (!ret) - return block_rsv; - /* - * If we couldn't reserve metadata bytes try and use some from - * the global reserve if its space type is the same as the global - * reservation. - */ - if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL && - block_rsv->space_info == global_rsv->space_info) { - ret = block_rsv_use_bytes(global_rsv, blocksize); - if (!ret) - return global_rsv; - } - return ERR_PTR(ret); -} - -static void unuse_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv, u32 blocksize) -{ - block_rsv_add_bytes(block_rsv, blocksize, false); - block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL); -} - /* * finds a free extent and does all the dirty work required for allocation * returns the tree buffer or an ERR_PTR on error. @@ -8554,7 +6310,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, } #endif - block_rsv = use_block_rsv(trans, root, blocksize); + block_rsv = btrfs_use_block_rsv(trans, root, blocksize); if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); @@ -8612,7 +6368,7 @@ out_free_buf: out_free_reserved: btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); out_unuse: - unuse_block_rsv(fs_info, block_rsv, blocksize); + btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize); return ERR_PTR(ret); } @@ -9551,9 +7307,8 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) num_devices = fs_info->fs_devices->rw_devices; - stripped = BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 | - BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; + stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID56_MASK | + BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10; if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; @@ -9564,7 +7319,7 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags) return stripped; /* turn mirroring into duplication */ - if (flags & (BTRFS_BLOCK_GROUP_RAID1 | + if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)) return stripped | BTRFS_BLOCK_GROUP_DUP; } else { @@ -9635,7 +7390,7 @@ out: btrfs_info(cache->fs_info, "sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu", sinfo_used, num_bytes, min_allocable_bytes); - dump_space_info(cache->fs_info, cache->space_info, 0, 0); + btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); } return ret; } @@ -9677,8 +7432,7 @@ again: */ alloc_flags = update_block_group_flags(fs_info, cache->flags); if (alloc_flags != cache->flags) { - ret = do_chunk_alloc(trans, alloc_flags, - CHUNK_ALLOC_FORCE); + ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); /* * ENOSPC is allowed here, we may have enough space * already allocated at the new raid level to @@ -9694,7 +7448,7 @@ again: if (!ret) goto out; alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags); - ret = do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); @@ -9715,7 +7469,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) { u64 alloc_flags = get_alloc_profile(trans->fs_info, type); - return do_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); } /* @@ -9948,7 +7702,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree; struct extent_map *em; - em_tree = &root->fs_info->mapping_tree.map_tree; + em_tree = &root->fs_info->mapping_tree; read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, found_key.objectid, found_key.offset); @@ -10101,7 +7855,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) */ synchronize_rcu(); - release_global_block_rsv(info); + btrfs_release_global_block_rsv(info); while (!list_empty(&info->space_info)) { int i; @@ -10117,7 +7871,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_reserved > 0 || space_info->bytes_may_use > 0)) - dump_space_info(info, space_info, 0, 0); + btrfs_dump_space_info(info, space_info, 0, 0); list_del(&space_info->list); for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { struct kobject *kobj; @@ -10140,7 +7894,6 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) struct btrfs_space_info *space_info; struct raid_kobject *rkobj; LIST_HEAD(list); - int index; int ret = 0; spin_lock(&fs_info->pending_raid_kobjs_lock); @@ -10148,11 +7901,10 @@ void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->pending_raid_kobjs_lock); list_for_each_entry(rkobj, &list, list) { - space_info = __find_space_info(fs_info, rkobj->flags); - index = btrfs_bg_flags_to_raid_index(rkobj->flags); + space_info = btrfs_find_space_info(fs_info, rkobj->flags); ret = kobject_add(&rkobj->kobj, &space_info->kobj, - "%s", get_raid_name(index)); + "%s", btrfs_bg_type_to_raid_name(rkobj->flags)); if (ret) { kobject_put(&rkobj->kobj); break; @@ -10242,21 +7994,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, */ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) { - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct extent_map *em; struct btrfs_block_group_cache *bg; u64 start = 0; int ret = 0; while (1) { - read_lock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); /* * lookup_extent_mapping will return the first extent map * intersecting the range, so setting @len to 1 is enough to * get the first chunk. */ - em = lookup_extent_mapping(&map_tree->map_tree, start, 1); - read_unlock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(map_tree, start, 1); + read_unlock(&map_tree->lock); if (!em) break; @@ -10416,9 +8168,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) } trace_btrfs_add_block_group(info, cache, 0); - update_space_info(info, cache->flags, found_key.offset, - btrfs_block_group_used(&cache->item), - cache->bytes_super, &space_info); + btrfs_update_space_info(info, cache->flags, found_key.offset, + btrfs_block_group_used(&cache->item), + cache->bytes_super, &space_info); cache->space_info = space_info; @@ -10436,9 +8188,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) list_for_each_entry_rcu(space_info, &info->space_info, list) { if (!(get_alloc_profile(info, space_info->flags) & (BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6 | + BTRFS_BLOCK_GROUP_RAID1_MASK | + BTRFS_BLOCK_GROUP_RAID56_MASK | BTRFS_BLOCK_GROUP_DUP))) continue; /* @@ -10456,7 +8207,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) } btrfs_add_raid_kobjects(info); - init_global_block_rsv(info); + btrfs_init_global_block_rsv(info); ret = check_chunk_block_group_mappings(info); error: btrfs_free_path(path); @@ -10553,7 +8304,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, * assigned to our block group. We want our bg to be added to the rbtree * with its ->space_info set. */ - cache->space_info = __find_space_info(fs_info, cache->flags); + cache->space_info = btrfs_find_space_info(fs_info, cache->flags); ASSERT(cache->space_info); ret = btrfs_add_block_group_cache(fs_info, cache); @@ -10568,9 +8319,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used, * the rbtree, update the space info's counters. */ trace_btrfs_add_block_group(fs_info, cache, 1); - update_space_info(fs_info, cache->flags, size, bytes_used, + btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, cache->bytes_super, &cache->space_info); - update_global_block_rsv(fs_info); + btrfs_update_global_block_rsv(fs_info); link_block_group(cache); @@ -10597,6 +8348,35 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) write_sequnlock(&fs_info->profiles_lock); } +/* + * Clear incompat bits for the following feature(s): + * + * - RAID56 - in case there's neither RAID5 nor RAID6 profile block group + * in the whole filesystem + */ +static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) +{ + if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) { + struct list_head *head = &fs_info->space_info; + struct btrfs_space_info *sinfo; + + list_for_each_entry_rcu(sinfo, head, list) { + bool found = false; + + down_read(&sinfo->groups_sem); + if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5])) + found = true; + if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6])) + found = true; + up_read(&sinfo->groups_sem); + + if (found) + return; + } + btrfs_clear_fs_incompat(fs_info, RAID56); + } +} + int btrfs_remove_block_group(struct btrfs_trans_handle *trans, u64 group_start, struct extent_map *em) { @@ -10743,6 +8523,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, clear_avail_alloc_bits(fs_info, block_group->flags); } up_write(&block_group->space_info->groups_sem); + clear_incompat_bg_bits(fs_info, block_group->flags); if (kobj) { kobject_del(kobj); kobject_put(kobj); @@ -10830,17 +8611,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, remove_em = (atomic_read(&block_group->trimming) == 0); spin_unlock(&block_group->lock); - if (remove_em) { - struct extent_map_tree *em_tree; - - em_tree = &fs_info->mapping_tree.map_tree; - write_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - /* once for the tree */ - free_extent_map(em); - } - mutex_unlock(&fs_info->chunk_mutex); ret = remove_block_group_free_space(trans, block_group); @@ -10857,6 +8627,19 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + if (remove_em) { + struct extent_map_tree *em_tree; + + em_tree = &fs_info->mapping_tree; + write_lock(&em_tree->lock); + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + /* once for the tree */ + free_extent_map(em); + } out: if (remove_rsv) btrfs_delayed_refs_rsv_release(fs_info, 1); @@ -10868,7 +8651,7 @@ struct btrfs_trans_handle * btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, const u64 chunk_offset) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; unsigned int num_items; @@ -11017,7 +8800,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&space_info->lock); spin_lock(&block_group->lock); - update_bytes_pinned(space_info, -block_group->pinned); + btrfs_space_info_update_bytes_pinned(fs_info, space_info, + -block_group->pinned); space_info->bytes_readonly += block_group->pinned; percpu_counter_add_batch(&space_info->total_bytes_pinned, -block_group->pinned, @@ -11073,43 +8857,6 @@ next: spin_unlock(&fs_info->unused_bgs_lock); } -int btrfs_init_space_info(struct btrfs_fs_info *fs_info) -{ - struct btrfs_super_block *disk_super; - u64 features; - u64 flags; - int mixed = 0; - int ret; - - disk_super = fs_info->super_copy; - if (!btrfs_super_root(disk_super)) - return -EINVAL; - - features = btrfs_super_incompat_flags(disk_super); - if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) - mixed = 1; - - flags = BTRFS_BLOCK_GROUP_SYSTEM; - ret = create_space_info(fs_info, flags); - if (ret) - goto out; - - if (mixed) { - flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; - ret = create_space_info(fs_info, flags); - } else { - flags = BTRFS_BLOCK_GROUP_METADATA; - ret = create_space_info(fs_info, flags); - if (ret) - goto out; - - flags = BTRFS_BLOCK_GROUP_DATA; - ret = create_space_info(fs_info, flags); - } -out: - return ret; -} - int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end) { @@ -11136,13 +8883,11 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, * it while performing the free space search since we have already * held back allocations. */ -static int btrfs_trim_free_extents(struct btrfs_device *device, - struct fstrim_range *range, u64 *trimmed) +static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) { - u64 start, len = 0, end = 0; + u64 start = SZ_1M, len = 0, end = 0; int ret; - start = max_t(u64, range->start, SZ_1M); *trimmed = 0; /* Discard not supported = nothing to do. */ @@ -11170,12 +8915,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, find_first_clear_extent_bit(&device->alloc_state, start, &start, &end, CHUNK_TRIMMED | CHUNK_ALLOCATED); + + /* Ensure we skip the reserved area in the first 1M */ + start = max_t(u64, start, SZ_1M); + /* * If find_first_clear_extent_bit find a range that spans the * end of the device it will set end to -1, in this case it's up * to the caller to trim the value to the size of the device. */ end = min(end, device->total_bytes - 1); + len = end - start + 1; /* We didn't find any extents */ @@ -11185,22 +8935,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, break; } - /* Keep going until we satisfy minlen or reach end of space */ - if (len < range->minlen) { - mutex_unlock(&fs_info->chunk_mutex); - start += len; - continue; - } - - /* If we are out of the passed range break */ - if (start > range->start + range->len - 1) { - mutex_unlock(&fs_info->chunk_mutex); - break; - } - - start = max(range->start, start); - len = min(range->len, len); - ret = btrfs_issue_discard(device->bdev, start, len, &bytes); if (!ret) @@ -11215,10 +8949,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, start += len; *trimmed += bytes; - /* We've trimmed enough */ - if (*trimmed >= range->len) - break; - if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; break; @@ -11302,7 +9032,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { - ret = btrfs_trim_free_extents(device, range, &group_trimmed); + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; dev_ret = ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index db337e53aab3..1ff438fd5bc2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -359,6 +359,24 @@ do_insert: return NULL; } +/** + * __etree_search - searche @tree for an entry that contains @offset. Such + * entry would have entry->start <= offset && entry->end >= offset. + * + * @tree - the tree to search + * @offset - offset that should fall within an entry in @tree + * @next_ret - pointer to the first entry whose range ends after @offset + * @prev - pointer to the first entry whose range begins before @offset + * @p_ret - pointer where new node should be anchored (used when inserting an + * entry in the tree) + * @parent_ret - points to entry which would have been the parent of the entry, + * containing @offset + * + * This function returns a pointer to the entry that contains @offset byte + * address. If no such entry exists, then NULL is returned and the other + * pointer arguments to the function are filled, otherwise the found entry is + * returned and other pointers are left untouched. + */ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, struct rb_node **next_ret, struct rb_node **prev_ret, @@ -504,9 +522,11 @@ static int insert_state(struct extent_io_tree *tree, { struct rb_node *node; - if (end < start) - WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n", - end, start); + if (end < start) { + btrfs_err(tree->fs_info, + "insert state: end < start %llu %llu", end, start); + WARN_ON(1); + } state->start = start; state->end = end; @@ -516,7 +536,8 @@ static int insert_state(struct extent_io_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); - pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n", + btrfs_err(tree->fs_info, + "found node %llu %llu on insert of %llu %llu", found->start, found->end, start, end); return -EEXIST; } @@ -1537,8 +1558,8 @@ out: } /** - * find_first_clear_extent_bit - finds the first range that has @bits not set - * and that starts after @start + * find_first_clear_extent_bit - find the first range that has @bits not set. + * This range could start before @start. * * @tree - the tree to search * @start - the offset at/after which the found extent should start @@ -1578,12 +1599,52 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, goto out; } } + /* + * At this point 'node' either contains 'start' or start is + * before 'node' + */ state = rb_entry(node, struct extent_state, rb_node); - if (in_range(start, state->start, state->end - state->start + 1) && - (state->state & bits)) { - start = state->end + 1; + + if (in_range(start, state->start, state->end - state->start + 1)) { + if (state->state & bits) { + /* + * |--range with bits sets--| + * | + * start + */ + start = state->end + 1; + } else { + /* + * 'start' falls within a range that doesn't + * have the bits set, so take its start as + * the beginning of the desired range + * + * |--range with bits cleared----| + * | + * start + */ + *start_ret = state->start; + break; + } } else { - *start_ret = start; + /* + * |---prev range---|---hole/unset---|---node range---| + * | + * start + * + * or + * + * |---hole/unset--||--first node--| + * 0 | + * start + */ + if (prev) { + state = rb_entry(prev, struct extent_state, + rb_node); + *start_ret = state->end + 1; + } else { + *start_ret = 0; + } break; } } @@ -1719,10 +1780,10 @@ static noinline int lock_delalloc_pages(struct inode *inode, */ EXPORT_FOR_TESTS noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, - struct extent_io_tree *tree, struct page *locked_page, u64 *start, u64 *end) { + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; u64 delalloc_start; u64 delalloc_end; @@ -2800,12 +2861,11 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) * never fail. We're returning a bio right now but you can call btrfs_io_bio * for the appropriate container_of magic */ -struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) +struct bio *btrfs_bio_alloc(u64 first_byte) { struct bio *bio; bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; @@ -2911,12 +2971,13 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, bio = NULL; } else { if (wbc) - wbc_account_io(wbc, page, page_size); + wbc_account_cgroup_owner(wbc, page, page_size); return 0; } } - bio = btrfs_bio_alloc(bdev, offset); + bio = btrfs_bio_alloc(offset); + bio_set_dev(bio, bdev); bio_add_page(bio, page, page_size, pg_offset); bio->bi_end_io = end_io_func; bio->bi_private = tree; @@ -2924,7 +2985,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, bio->bi_opf = opf; if (wbc) { wbc_init_bio(wbc, bio); - wbc_account_io(wbc, page, page_size); + wbc_account_cgroup_owner(wbc, page, page_size); } *bio_ret = bio; @@ -3204,21 +3265,10 @@ static inline void contiguous_readpages(struct extent_io_tree *tree, unsigned long *bio_flags, u64 *prev_em_start) { - struct inode *inode; - struct btrfs_ordered_extent *ordered; + struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host); int index; - inode = pages[0]->mapping->host; - while (1) { - lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, - end - start + 1); - if (!ordered) - break; - unlock_extent(tree, start, end); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } + btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL); for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], btrfs_get_extent, em_cached, @@ -3234,22 +3284,12 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unsigned long *bio_flags, unsigned int read_flags) { - struct inode *inode = page->mapping->host; - struct btrfs_ordered_extent *ordered; + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); u64 start = page_offset(page); u64 end = start + PAGE_SIZE - 1; int ret; - while (1) { - lock_extent(tree, start, end); - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start, - PAGE_SIZE); - if (!ordered) - break; - unlock_extent(tree, start, end); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } + btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL); ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, bio_flags, read_flags, NULL); @@ -3290,7 +3330,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, struct page *page, struct writeback_control *wbc, u64 delalloc_start, unsigned long *nr_written) { - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; u64 page_end = delalloc_start + PAGE_SIZE - 1; bool found; u64 delalloc_to_write = 0; @@ -3300,8 +3339,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, while (delalloc_end < page_end) { - found = find_lock_delalloc_range(inode, tree, - page, + found = find_lock_delalloc_range(inode, page, &delalloc_start, &delalloc_end); if (!found) { @@ -3310,7 +3348,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, } ret = btrfs_run_delalloc_range(inode, page, delalloc_start, delalloc_end, &page_started, nr_written, wbc); - /* File system has been set read-only */ if (ret) { SetPageError(page); /* @@ -4542,6 +4579,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct fiemap_cache cache = { 0 }; + struct ulist *roots; + struct ulist *tmp_ulist; int end = 0; u64 em_start = 0; u64 em_len = 0; @@ -4555,6 +4594,13 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return -ENOMEM; path->leave_spinning = 1; + roots = ulist_alloc(GFP_KERNEL); + tmp_ulist = ulist_alloc(GFP_KERNEL); + if (!roots || !tmp_ulist) { + ret = -ENOMEM; + goto out_free_ulist; + } + start = round_down(start, btrfs_inode_sectorsize(inode)); len = round_up(max, btrfs_inode_sectorsize(inode)) - start; @@ -4565,8 +4611,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(BTRFS_I(inode)), -1, 0); if (ret < 0) { - btrfs_free_path(path); - return ret; + goto out_free_ulist; } else { WARN_ON(!ret); if (ret == 1) @@ -4675,7 +4720,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ ret = btrfs_check_shared(root, btrfs_ino(BTRFS_I(inode)), - bytenr); + bytenr, roots, tmp_ulist); if (ret < 0) goto out_free; if (ret) @@ -4718,9 +4763,13 @@ out_free: ret = emit_last_fiemap_cache(fieinfo, &cache); free_extent_map(em); out: - btrfs_free_path(path); unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state); + +out_free_ulist: + btrfs_free_path(path); + ulist_free(roots); + ulist_free(tmp_ulist); return ret; } @@ -4808,7 +4857,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, eb->bflags = 0; rwlock_init(&eb->lock); atomic_set(&eb->blocking_readers, 0); - atomic_set(&eb->blocking_writers, 0); + eb->blocking_writers = 0; eb->lock_nested = false; init_waitqueue_head(&eb->write_lock_wq); init_waitqueue_head(&eb->read_lock_wq); @@ -4827,10 +4876,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE); #ifdef CONFIG_BTRFS_DEBUG - atomic_set(&eb->spinning_writers, 0); + eb->spinning_writers = 0; atomic_set(&eb->spinning_readers, 0); atomic_set(&eb->read_locks, 0); - atomic_set(&eb->write_locks, 0); + eb->write_locks = 0; #endif return eb; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index aa18a16a6ed7..401423b16976 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -167,7 +167,7 @@ struct extent_buffer { struct rcu_head rcu_head; pid_t lock_owner; - atomic_t blocking_writers; + int blocking_writers; atomic_t blocking_readers; bool lock_nested; /* >= 0 if eb belongs to a log tree, -1 otherwise */ @@ -187,10 +187,10 @@ struct extent_buffer { wait_queue_head_t read_lock_wq; struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; #ifdef CONFIG_BTRFS_DEBUG - atomic_t spinning_writers; + int spinning_writers; atomic_t spinning_readers; atomic_t read_locks; - atomic_t write_locks; + int write_locks; struct list_head leak_list; #endif }; @@ -497,7 +497,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, u64 delalloc_end, struct page *locked_page, unsigned bits_to_clear, unsigned long page_ops); -struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte); +struct bio *btrfs_bio_alloc(u64 first_byte); struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs); struct bio *btrfs_bio_clone(struct bio *bio); struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size); @@ -549,7 +549,7 @@ int free_io_failure(struct extent_io_tree *failure_tree, struct extent_io_tree *io_tree, struct io_failure_record *rec); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -bool find_lock_delalloc_range(struct inode *inode, struct extent_io_tree *tree, +bool find_lock_delalloc_range(struct inode *inode, struct page *locked_page, u64 *start, u64 *end); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d431ea8198e4..1a599f50837b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -8,6 +8,7 @@ #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/sched/mm.h> +#include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -22,9 +23,13 @@ #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ PAGE_SIZE)) -#define MAX_ORDERED_SUM_BYTES(fs_info) ((PAGE_SIZE - \ - sizeof(struct btrfs_ordered_sum)) / \ - sizeof(u32) * (fs_info)->sectorsize) +static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, + u16 csum_size) +{ + u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size; + + return ncsums * fs_info->sectorsize; +} int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -144,7 +149,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, } static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u64 logical_offset, u32 *dst, int dio) + u64 logical_offset, u8 *dst, int dio) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct bio_vec bvec; @@ -182,7 +187,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio } csum = btrfs_bio->csum; } else { - csum = (u8 *)dst; + csum = dst; } if (bio->bi_iter.bi_size > PAGE_SIZE * 8) @@ -211,7 +216,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio if (!dio) offset = page_offset(bvec.bv_page) + bvec.bv_offset; count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, - (u32 *)csum, nblocks); + csum, nblocks); if (count) goto found; @@ -283,7 +288,8 @@ next: return 0; } -blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst) +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, + u8 *dst) { return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0); } @@ -374,7 +380,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct btrfs_csum_item); while (start < csum_end) { size = min_t(size_t, csum_end - start, - MAX_ORDERED_SUM_BYTES(fs_info)); + max_ordered_sum_bytes(fs_info, csum_size)); sums = kzalloc(btrfs_ordered_sum_size(fs_info, size), GFP_NOFS); if (!sums) { @@ -427,6 +433,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, u64 file_start, int contig) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct btrfs_ordered_sum *sums; struct btrfs_ordered_extent *ordered = NULL; char *data; @@ -439,6 +446,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, int i; u64 offset; unsigned nofs_flag; + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); nofs_flag = memalloc_nofs_save(); sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), @@ -459,6 +467,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, sums->bytenr = (u64)bio->bi_iter.bi_sector << 9; index = 0; + shash->tfm = fs_info->csum_shash; + bio_for_each_segment(bvec, bio, iter) { if (!contig) offset = page_offset(bvec.bv_page) + bvec.bv_offset; @@ -498,17 +508,14 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, index = 0; } - sums->sums[index] = ~(u32)0; + crypto_shash_init(shash); data = kmap_atomic(bvec.bv_page); - sums->sums[index] - = btrfs_csum_data(data + bvec.bv_offset - + (i * fs_info->sectorsize), - sums->sums[index], - fs_info->sectorsize); + crypto_shash_update(shash, data + bvec.bv_offset + + (i * fs_info->sectorsize), + fs_info->sectorsize); kunmap_atomic(data); - btrfs_csum_final(sums->sums[index], - (char *)(sums->sums + index)); - index++; + crypto_shash_final(shash, (char *)(sums->sums + index)); + index += csum_size; offset += fs_info->sectorsize; this_sum_bytes += fs_info->sectorsize; total_bytes += fs_info->sectorsize; @@ -904,9 +911,9 @@ found: write_extent_buffer(leaf, sums->sums + index, (unsigned long)item, ins_size); + index += ins_size; ins_size /= csum_size; total_bytes += ins_size * fs_info->sectorsize; - index += ins_size; btrfs_mark_buffer_dirty(path->nodes[0]); if (total_bytes < sums->len) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7e85dca0e6f2..58a18ed11546 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -26,6 +26,7 @@ #include "volumes.h" #include "qgroup.h" #include "compression.h" +#include "delalloc-space.h" static struct kmem_cache *btrfs_inode_defrag_cachep; /* @@ -1550,30 +1551,20 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_root *root = inode->root; - struct btrfs_ordered_extent *ordered; u64 lockstart, lockend; u64 num_bytes; int ret; ret = btrfs_start_write_no_snapshotting(root); if (!ret) - return -ENOSPC; + return -EAGAIN; lockstart = round_down(pos, fs_info->sectorsize); lockend = round_up(pos + *write_bytes, fs_info->sectorsize) - 1; - while (1) { - lock_extent(&inode->io_tree, lockstart, lockend); - ordered = btrfs_lookup_ordered_range(inode, lockstart, - lockend - lockstart + 1); - if (!ordered) { - break; - } - unlock_extent(&inode->io_tree, lockstart, lockend); - btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } + btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart, + lockend, NULL); num_bytes = lockend - lockstart + 1; ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, @@ -2068,6 +2059,18 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) u64 len; /* + * If the inode needs a full sync, make sure we use a full range to + * avoid log tree corruption, due to hole detection racing with ordered + * extent completion for adjacent ranges, and assertion failures during + * hole detection. + */ + if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags)) { + start = 0; + end = LLONG_MAX; + } + + /* * The range length can be represented by u64, we have to do the typecasts * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() */ @@ -2554,10 +2557,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, &cached_state); - if (ret) { - inode_unlock(inode); + if (ret) goto out_only_mutex; - } path = btrfs_alloc_path(); if (!path) { @@ -2711,6 +2712,11 @@ out_only_mutex: * for detecting, at fsync time, if the inode isn't yet in the * log tree or it's there but not up to date. */ + struct timespec64 now = current_time(inode); + + inode_inc_iversion(inode); + inode->i_mtime = now; + inode->i_ctime = now; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2791,9 +2797,9 @@ static int btrfs_fallocate_update_isize(struct inode *inode, } enum { - RANGE_BOUNDARY_WRITTEN_EXTENT = 0, - RANGE_BOUNDARY_PREALLOC_EXTENT = 1, - RANGE_BOUNDARY_HOLE = 2, + RANGE_BOUNDARY_WRITTEN_EXTENT, + RANGE_BOUNDARY_PREALLOC_EXTENT, + RANGE_BOUNDARY_HOLE, }; static int btrfs_zero_range_check_range_boundary(struct inode *inode, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f74dc259307b..062be9dde4c6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -18,6 +18,8 @@ #include "extent_io.h" #include "inode-map.h" #include "volumes.h" +#include "space-info.h" +#include "delalloc-space.h" #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_32K @@ -465,9 +467,8 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) if (index == 0) offset = sizeof(u32) * io_ctl->num_pages; - crc = btrfs_csum_data(io_ctl->orig + offset, crc, - PAGE_SIZE - offset); - btrfs_csum_final(crc, (u8 *)&crc); + crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); + btrfs_crc32c_final(crc, (u8 *)&crc); io_ctl_unmap_page(io_ctl); tmp = page_address(io_ctl->pages[0]); tmp += index; @@ -493,9 +494,8 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) val = *tmp; io_ctl_map_page(io_ctl, 0); - crc = btrfs_csum_data(io_ctl->orig + offset, crc, - PAGE_SIZE - offset); - btrfs_csum_final(crc, (u8 *)&crc); + crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); + btrfs_crc32c_final(crc, (u8 *)&crc); if (val != crc) { btrfs_err_rl(io_ctl->fs_info, "csum mismatch on free space cache"); @@ -3166,8 +3166,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group, space_info->bytes_readonly += reserved_bytes; block_group->reserved -= reserved_bytes; space_info->bytes_reserved -= reserved_bytes; - spin_unlock(&space_info->lock); spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); } return ret; @@ -3358,7 +3358,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) if (cleanup) { mutex_lock(&fs_info->chunk_mutex); - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, block_group->key.objectid, 1); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ffca2abf13d0..2e8bb402050b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -11,6 +11,7 @@ #include "free-space-cache.h" #include "inode-map.h" #include "transaction.h" +#include "delalloc-space.h" static int caching_kthread(void *data) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9aba9660efe5..1af069a9a0c7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -47,6 +47,7 @@ #include "props.h" #include "qgroup.h" #include "dedupe.h" +#include "delalloc-space.h" struct btrfs_iget_args { struct btrfs_key *location; @@ -1932,17 +1933,19 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio, u64 length = 0; u64 map_length; int ret; + struct btrfs_io_geometry geom; if (bio_flags & EXTENT_BIO_COMPRESSED) return 0; length = bio->bi_iter.bi_size; map_length = length; - ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, - NULL, 0); + ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length, + &geom); if (ret < 0) return ret; - if (map_length < length + size) + + if (geom.len < length + size) return 1; return 0; } @@ -3203,16 +3206,23 @@ static int __readpage_endio_check(struct inode *inode, int icsum, struct page *page, int pgoff, u64 start, size_t len) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; - u32 csum_expected; - u32 csum = ~(u32)0; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + u8 *csum_expected; + u8 csum[BTRFS_CSUM_SIZE]; - csum_expected = *(((u32 *)io_bio->csum) + icsum); + csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size; kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr + pgoff, csum, len); - btrfs_csum_final(csum, (u8 *)&csum); - if (csum != csum_expected) + shash->tfm = fs_info->csum_shash; + + crypto_shash_init(shash); + crypto_shash_update(shash, kaddr + pgoff, len); + crypto_shash_final(shash, csum); + + if (memcmp(csum, csum_expected, csum_size)) goto zeroit; kunmap_atomic(kaddr); @@ -3286,6 +3296,28 @@ void btrfs_add_delayed_iput(struct inode *inode) wake_up_process(fs_info->cleaner_kthread); } +static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info, + struct btrfs_inode *inode) +{ + list_del_init(&inode->delayed_iput); + spin_unlock(&fs_info->delayed_iput_lock); + iput(&inode->vfs_inode); + if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) + wake_up(&fs_info->delayed_iputs_wait); + spin_lock(&fs_info->delayed_iput_lock); +} + +static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info, + struct btrfs_inode *inode) +{ + if (!list_empty(&inode->delayed_iput)) { + spin_lock(&fs_info->delayed_iput_lock); + if (!list_empty(&inode->delayed_iput)) + run_delayed_iput_locked(fs_info, inode); + spin_unlock(&fs_info->delayed_iput_lock); + } +} + void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) { @@ -3295,12 +3327,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) inode = list_first_entry(&fs_info->delayed_iputs, struct btrfs_inode, delayed_iput); - list_del_init(&inode->delayed_iput); - spin_unlock(&fs_info->delayed_iput_lock); - iput(&inode->vfs_inode); - if (atomic_dec_and_test(&fs_info->nr_delayed_iputs)) - wake_up(&fs_info->delayed_iputs_wait); - spin_lock(&fs_info->delayed_iput_lock); + run_delayed_iput_locked(fs_info, inode); } spin_unlock(&fs_info->delayed_iput_lock); } @@ -3935,9 +3962,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_path *path; int ret = 0; - struct extent_buffer *leaf; struct btrfs_dir_item *di; - struct btrfs_key key; u64 index; u64 ino = btrfs_ino(inode); u64 dir_ino = btrfs_ino(dir); @@ -3955,8 +3980,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = di ? PTR_ERR(di) : -ENOENT; goto err; } - leaf = path->nodes[0]; - btrfs_dir_item_key_to_cpu(leaf, di, &key); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; @@ -4009,6 +4032,17 @@ skip_backref: ret = 0; else if (ret) btrfs_abort_transaction(trans, ret); + + /* + * If we have a pending delayed iput we could end up with the final iput + * being run in btrfs-cleaner context. If we have enough of these built + * up we can end up burning a lot of time in btrfs-cleaner without any + * way to throttle the unlinks. Since we're currently holding a ref on + * the inode we can run the delayed iput here without any issues as the + * final iput won't be done until after we drop the ref we're currently + * holding. + */ + btrfs_run_delayed_iput(fs_info, inode); err: btrfs_free_path(path); if (ret) @@ -5008,21 +5042,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) if (size <= hole_start) return 0; - while (1) { - struct btrfs_ordered_extent *ordered; - - lock_extent_bits(io_tree, hole_start, block_end - 1, - &cached_state); - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start, - block_end - hole_start); - if (!ordered) - break; - unlock_extent_cached(io_tree, hole_start, block_end - 1, - &cached_state); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - } - + btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start, + block_end - 1, &cached_state); cur_offset = hole_start; while (1) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, @@ -6433,8 +6454,18 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + name_len * 2); inode_inc_iversion(&parent_inode->vfs_inode); - parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime = - current_time(&parent_inode->vfs_inode); + /* + * If we are replaying a log tree, we do not want to update the mtime + * and ctime of the parent directory with the current time, since the + * log replay procedure is responsible for setting them to their correct + * values (the ones it had when the fsync was done). + */ + if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) { + struct timespec64 now = current_time(&parent_inode->vfs_inode); + + parent_inode->vfs_inode.i_mtime = now; + parent_inode->vfs_inode.i_ctime = now; + } ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode); if (ret) btrfs_abort_transaction(trans, ret); @@ -8308,22 +8339,21 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) struct bio *orig_bio = dip->orig_bio; u64 start_sector = orig_bio->bi_iter.bi_sector; u64 file_offset = dip->logical_offset; - u64 map_length; int async_submit = 0; u64 submit_len; int clone_offset = 0; int clone_len; int ret; blk_status_t status; + struct btrfs_io_geometry geom; - map_length = orig_bio->bi_iter.bi_size; - submit_len = map_length; - ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9, - &map_length, NULL, 0); + submit_len = orig_bio->bi_iter.bi_size; + ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio), + start_sector << 9, submit_len, &geom); if (ret) return -EIO; - if (map_length >= submit_len) { + if (geom.len >= submit_len) { bio = orig_bio; dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; goto submit; @@ -8336,10 +8366,10 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) async_submit = 1; /* bio split */ - ASSERT(map_length <= INT_MAX); + ASSERT(geom.len <= INT_MAX); atomic_inc(&dip->pending_bios); do { - clone_len = min_t(int, submit_len, map_length); + clone_len = min_t(int, submit_len, geom.len); /* * This will never fail as it's passing GPF_NOFS and @@ -8376,9 +8406,8 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) start_sector += clone_len >> 9; file_offset += clone_len; - map_length = submit_len; - ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), - start_sector << 9, &map_length, NULL, 0); + ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio), + start_sector << 9, submit_len, &geom); if (ret) goto out_err; } while (submit_len > 0); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6dafa857bbb9..818f7ec8bb0e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,8 @@ #include "qgroup.h" #include "tree-log.h" #include "compression.h" +#include "space-info.h" +#include "delalloc-space.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI @@ -187,7 +189,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - unsigned int fsflags; + unsigned int fsflags, old_fsflags; int ret; const char *comp = NULL; u32 binode_flags = binode->flags; @@ -212,13 +214,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode_lock(inode); fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); - if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) & - (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; - goto out_unlock; - } - } + old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out_unlock; if (fsflags & FS_SYNC_FL) binode_flags |= BTRFS_INODE_SYNC; @@ -312,8 +311,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) btrfs_abort_transaction(trans, ret); goto out_end_trans; } - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); } else { ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0); @@ -378,9 +375,7 @@ static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(file_inode(file)); struct fsxattr fa; - memset(&fa, 0, sizeof(fa)); - fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags); - + simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags)); if (copy_to_user(arg, &fa, sizeof(fa))) return -EFAULT; @@ -393,7 +388,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - struct fsxattr fa; + struct fsxattr fa, old_fa; unsigned old_flags; unsigned old_i_flags; int ret = 0; @@ -404,7 +399,6 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) if (btrfs_root_readonly(root)) return -EROFS; - memset(&fa, 0, sizeof(fa)); if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; @@ -424,13 +418,11 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) old_flags = binode->flags; old_i_flags = inode->i_flags; - /* We need the capabilities to change append-only or immutable inode */ - if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) || - (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) && - !capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; + simple_fill_fsxattr(&old_fa, + btrfs_inode_flags_to_xflags(binode->flags)); + ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); + if (ret) goto out_unlock; - } if (fa.fsx_xflags & FS_XFLAG_SYNC) binode->flags |= BTRFS_INODE_SYNC; @@ -2930,8 +2922,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode_lock(inode); err = btrfs_delete_subvolume(dir, dentry); inode_unlock(inode); - if (!err) + if (!err) { + fsnotify_rmdir(dir, dentry); d_delete(dentry); + } out_dput: dput(dentry); @@ -4001,6 +3995,27 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (!same_inode) inode_dio_wait(inode_out); + /* + * Workaround to make sure NOCOW buffered write reach disk as NOCOW. + * + * Btrfs' back references do not have a block level granularity, they + * work at the whole extent level. + * NOCOW buffered write without data space reserved may not be able + * to fall back to CoW due to lack of data space, thus could cause + * data loss. + * + * Here we take a shortcut by flushing the whole inode, so that all + * nocow write should reach disk as nocow before we increase the + * reference of the extent. We could do better by only flushing NOCOW + * data, but that needs extra accounting. + * + * Also we don't need to check ASYNC_EXTENT, as async extent will be + * CoWed anyway, not affecting nocow part. + */ + ret = filemap_flush(inode_in->i_mapping); + if (ret < 0) + return ret; + ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), wb_len); if (ret < 0) diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 2f6c3c7851ed..98fccce4208c 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -15,19 +15,19 @@ #ifdef CONFIG_BTRFS_DEBUG static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { - WARN_ON(atomic_read(&eb->spinning_writers)); - atomic_inc(&eb->spinning_writers); + WARN_ON(eb->spinning_writers); + eb->spinning_writers++; } static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { - WARN_ON(atomic_read(&eb->spinning_writers) != 1); - atomic_dec(&eb->spinning_writers); + WARN_ON(eb->spinning_writers != 1); + eb->spinning_writers--; } static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { - WARN_ON(atomic_read(&eb->spinning_writers)); + WARN_ON(eb->spinning_writers); } static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) @@ -58,17 +58,17 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { - atomic_inc(&eb->write_locks); + eb->write_locks++; } static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { - atomic_dec(&eb->write_locks); + eb->write_locks--; } void btrfs_assert_tree_locked(struct extent_buffer *eb) { - BUG_ON(!atomic_read(&eb->write_locks)); + BUG_ON(!eb->write_locks); } #else @@ -111,10 +111,10 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb) */ if (eb->lock_nested && current->pid == eb->lock_owner) return; - if (atomic_read(&eb->blocking_writers) == 0) { + if (eb->blocking_writers == 0) { btrfs_assert_spinning_writers_put(eb); btrfs_assert_tree_locked(eb); - atomic_inc(&eb->blocking_writers); + eb->blocking_writers++; write_unlock(&eb->lock); } } @@ -148,12 +148,11 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb) */ if (eb->lock_nested && current->pid == eb->lock_owner) return; - BUG_ON(atomic_read(&eb->blocking_writers) != 1); write_lock(&eb->lock); + BUG_ON(eb->blocking_writers != 1); btrfs_assert_spinning_writers_get(eb); - /* atomic_dec_and_test implies a barrier */ - if (atomic_dec_and_test(&eb->blocking_writers)) - cond_wake_up_nomb(&eb->write_lock_wq); + if (--eb->blocking_writers == 0) + cond_wake_up(&eb->write_lock_wq); } /* @@ -167,12 +166,10 @@ void btrfs_tree_read_lock(struct extent_buffer *eb) if (trace_btrfs_tree_read_lock_enabled()) start_ns = ktime_get_ns(); again: - BUG_ON(!atomic_read(&eb->blocking_writers) && - current->pid == eb->lock_owner); - read_lock(&eb->lock); - if (atomic_read(&eb->blocking_writers) && - current->pid == eb->lock_owner) { + BUG_ON(eb->blocking_writers == 0 && + current->pid == eb->lock_owner); + if (eb->blocking_writers && current->pid == eb->lock_owner) { /* * This extent is already write-locked by our thread. We allow * an additional read lock to be added because it's for the same @@ -185,10 +182,10 @@ again: trace_btrfs_tree_read_lock(eb, start_ns); return; } - if (atomic_read(&eb->blocking_writers)) { + if (eb->blocking_writers) { read_unlock(&eb->lock); wait_event(eb->write_lock_wq, - atomic_read(&eb->blocking_writers) == 0); + eb->blocking_writers == 0); goto again; } btrfs_assert_tree_read_locks_get(eb); @@ -203,11 +200,11 @@ again: */ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) { - if (atomic_read(&eb->blocking_writers)) + if (eb->blocking_writers) return 0; read_lock(&eb->lock); - if (atomic_read(&eb->blocking_writers)) { + if (eb->blocking_writers) { read_unlock(&eb->lock); return 0; } @@ -223,13 +220,13 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) */ int btrfs_try_tree_read_lock(struct extent_buffer *eb) { - if (atomic_read(&eb->blocking_writers)) + if (eb->blocking_writers) return 0; if (!read_trylock(&eb->lock)) return 0; - if (atomic_read(&eb->blocking_writers)) { + if (eb->blocking_writers) { read_unlock(&eb->lock); return 0; } @@ -245,13 +242,11 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb) */ int btrfs_try_tree_write_lock(struct extent_buffer *eb) { - if (atomic_read(&eb->blocking_writers) || - atomic_read(&eb->blocking_readers)) + if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) return 0; write_lock(&eb->lock); - if (atomic_read(&eb->blocking_writers) || - atomic_read(&eb->blocking_readers)) { + if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) { write_unlock(&eb->lock); return 0; } @@ -322,10 +317,9 @@ void btrfs_tree_lock(struct extent_buffer *eb) WARN_ON(eb->lock_owner == current->pid); again: wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); - wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); + wait_event(eb->write_lock_wq, eb->blocking_writers == 0); write_lock(&eb->lock); - if (atomic_read(&eb->blocking_readers) || - atomic_read(&eb->blocking_writers)) { + if (atomic_read(&eb->blocking_readers) || eb->blocking_writers) { write_unlock(&eb->lock); goto again; } @@ -340,7 +334,7 @@ again: */ void btrfs_tree_unlock(struct extent_buffer *eb) { - int blockers = atomic_read(&eb->blocking_writers); + int blockers = eb->blocking_writers; BUG_ON(blockers > 1); @@ -351,7 +345,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb) if (blockers) { btrfs_assert_no_spinning_writers(eb); - atomic_dec(&eb->blocking_writers); + eb->blocking_writers--; /* Use the lighter barrier after atomic */ smp_mb__after_atomic(); cond_wake_up_nomb(&eb->write_lock_wq); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 52889da69113..1744ba8b2754 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -13,6 +13,7 @@ #include "extent_io.h" #include "disk-io.h" #include "compression.h" +#include "delalloc-space.h" static struct kmem_cache *btrfs_ordered_extent_cache; @@ -924,14 +925,16 @@ out: * be reclaimed before their checksum is actually put into the btree */ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, - u32 *sum, int len) + u8 *sum, int len) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ordered_sum *ordered_sum; struct btrfs_ordered_extent *ordered; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; unsigned long num_sectors; unsigned long i; u32 sectorsize = btrfs_inode_sectorsize(inode); + const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int index = 0; ordered = btrfs_lookup_ordered_extent(inode, offset); @@ -947,10 +950,10 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, num_sectors = ordered_sum->len >> inode->i_sb->s_blocksize_bits; num_sectors = min_t(int, len - index, num_sectors - i); - memcpy(sum + index, ordered_sum->sums + i, - num_sectors); + memcpy(sum + index, ordered_sum->sums + i * csum_size, + num_sectors * csum_size); - index += (int)num_sectors; + index += (int)num_sectors * csum_size; if (index == len) goto out; disk_bytenr += num_sectors * sectorsize; @@ -962,6 +965,51 @@ out: return index; } +/* + * btrfs_flush_ordered_range - Lock the passed range and ensures all pending + * ordered extents in it are run to completion. + * + * @tree: IO tree used for locking out other users of the range + * @inode: Inode whose ordered tree is to be searched + * @start: Beginning of range to flush + * @end: Last byte of range to lock + * @cached_state: If passed, will return the extent state responsible for the + * locked range. It's the caller's responsibility to free the cached state. + * + * This function always returns with the given range locked, ensuring after it's + * called no order extent can be pending. + */ +void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, + struct btrfs_inode *inode, u64 start, + u64 end, + struct extent_state **cached_state) +{ + struct btrfs_ordered_extent *ordered; + struct extent_state *cachedp = NULL; + + if (cached_state) + cachedp = *cached_state; + + while (1) { + lock_extent_bits(tree, start, end, &cachedp); + ordered = btrfs_lookup_ordered_range(inode, start, + end - start + 1); + if (!ordered) { + /* + * If no external cached_state has been passed then + * decrement the extra ref taken for cachedp since we + * aren't exposing it outside of this function + */ + if (!cached_state) + refcount_dec(&cachedp->refs); + break; + } + unlock_extent_cached(tree, start, end, &cachedp); + btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } +} + int __init ordered_data_init(void) { btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4c5991c3de14..5204171ea962 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -23,7 +23,7 @@ struct btrfs_ordered_sum { int len; struct list_head list; /* last field is a variable length array of csums */ - u32 sums[]; + u8 sums[]; }; /* @@ -183,11 +183,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range( int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, - u32 *sum, int len); + u8 *sum, int len); u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, const u64 range_start, const u64 range_len); u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, const u64 range_start, const u64 range_len); +void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, + struct btrfs_inode *inode, u64 start, + u64 end, + struct extent_state **cached_state); int __init ordered_data_init(void); void __cold ordered_data_exit(void); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1141ca5fae6a..9cb50577d982 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -153,11 +153,11 @@ static void print_eb_refs_lock(struct extent_buffer *eb) #ifdef CONFIG_BTRFS_DEBUG btrfs_info(eb->fs_info, "refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u", - atomic_read(&eb->refs), atomic_read(&eb->write_locks), + atomic_read(&eb->refs), eb->write_locks, atomic_read(&eb->read_locks), - atomic_read(&eb->blocking_writers), + eb->blocking_writers, atomic_read(&eb->blocking_readers), - atomic_read(&eb->spinning_writers), + eb->spinning_writers, atomic_read(&eb->spinning_readers), eb->lock_owner, current->pid); #endif diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index ca2716917e37..e0469816c678 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -257,11 +257,7 @@ static int prop_compression_validate(const char *value, size_t len) if (!value) return 0; - if (!strncmp("lzo", value, 3)) - return 0; - else if (!strncmp("zlib", value, 4)) - return 0; - else if (!strncmp("zstd", value, 4)) + if (btrfs_compress_is_valid_type(value, len)) return 0; return -EINVAL; @@ -332,6 +328,7 @@ static int inherit_props(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; int ret; int i; + bool need_reserve = false; if (!test_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(parent)->runtime_flags)) @@ -340,7 +337,7 @@ static int inherit_props(struct btrfs_trans_handle *trans, for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) { const struct prop_handler *h = &prop_handlers[i]; const char *value; - u64 num_bytes; + u64 num_bytes = 0; if (!h->inheritable) continue; @@ -357,11 +354,20 @@ static int inherit_props(struct btrfs_trans_handle *trans, if (ret) continue; - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); - ret = btrfs_block_rsv_add(root, trans->block_rsv, - num_bytes, BTRFS_RESERVE_NO_FLUSH); - if (ret) - return ret; + /* + * Currently callers should be reserving 1 item for properties, + * since we only have 1 property that we currently support. If + * we add more in the future we need to try and reserve more + * space for them. But we should also revisit how we do space + * reservations if we do add more properties in the future. + */ + if (need_reserve) { + num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + ret = btrfs_block_rsv_add(root, trans->block_rsv, + num_bytes, BTRFS_RESERVE_NO_FLUSH); + if (ret) + return ret; + } ret = btrfs_setxattr(trans, inode, h->xattr_name, value, strlen(value), 0); @@ -375,9 +381,13 @@ static int inherit_props(struct btrfs_trans_handle *trans, &BTRFS_I(inode)->runtime_flags); } - btrfs_block_rsv_release(fs_info, trans->block_rsv, num_bytes); - if (ret) - return ret; + if (need_reserve) { + btrfs_block_rsv_release(fs_info, trans->block_rsv, + num_bytes); + if (ret) + return ret; + } + need_reserve = true; } return 0; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2f708f2c4e67..f8a3c1b0a15a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2614,6 +2614,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, int ret = 0; int i; u64 *i_qgroups; + bool committing = false; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *quota_root; struct btrfs_qgroup *srcgroup; @@ -2621,7 +2622,25 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, u32 level_size = 0; u64 nums; - mutex_lock(&fs_info->qgroup_ioctl_lock); + /* + * There are only two callers of this function. + * + * One in create_subvol() in the ioctl context, which needs to hold + * the qgroup_ioctl_lock. + * + * The other one in create_pending_snapshot() where no other qgroup + * code can modify the fs as they all need to either start a new trans + * or hold a trans handler, thus we don't need to hold + * qgroup_ioctl_lock. + * This would avoid long and complex lock chain and make lockdep happy. + */ + spin_lock(&fs_info->trans_lock); + if (trans->transaction->state == TRANS_STATE_COMMIT_DOING) + committing = true; + spin_unlock(&fs_info->trans_lock); + + if (!committing) + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) goto out; @@ -2785,7 +2804,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, unlock: spin_unlock(&fs_info->qgroup_lock); out: - mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (!committing) + mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -3830,7 +3850,13 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, subvol_slot); block->last_snapshot = last_snapshot; block->level = level; - if (bg->flags & BTRFS_BLOCK_GROUP_DATA) + + /* + * If we have bg == NULL, we're called from btrfs_recover_relocation(), + * no one else can modify tree blocks thus we qgroup will not change + * no matter the value of trace_leaf. + */ + if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) block->trace_leaf = true; else block->trace_leaf = false; diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index f5d4c13a8dbc..2503485db859 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -7,7 +7,7 @@ #ifndef BTRFS_RAID56_H #define BTRFS_RAID56_H -static inline int nr_parity_stripes(struct map_lookup *map) +static inline int nr_parity_stripes(const struct map_lookup *map) { if (map->type & BTRFS_BLOCK_GROUP_RAID5) return 1; @@ -17,7 +17,7 @@ static inline int nr_parity_stripes(struct map_lookup *map) return 0; } -static inline int nr_data_stripes(struct map_lookup *map) +static inline int nr_data_stripes(const struct map_lookup *map) { return map->num_stripes - nr_parity_stripes(map); } diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 10d9589001a9..bb5bd49573b4 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -747,6 +747,7 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) u64 total = 0; int i; +again: do { enqueued = 0; mutex_lock(&fs_devices->device_list_mutex); @@ -758,6 +759,10 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) mutex_unlock(&fs_devices->device_list_mutex); total += enqueued; } while (enqueued && total < 10000); + if (fs_devices->seed) { + fs_devices = fs_devices->seed; + goto again; + } if (enqueued == 0) return; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a459ecddcce4..7f219851fa23 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -20,6 +20,7 @@ #include "inode-map.h" #include "qgroup.h" #include "print-tree.h" +#include "delalloc-space.h" /* * backref_node, mapping_node and tree_block start with this @@ -2177,22 +2178,30 @@ static int clean_dirty_subvols(struct reloc_control *rc) struct btrfs_root *root; struct btrfs_root *next; int ret = 0; + int ret2; list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots, reloc_dirty_list) { - struct btrfs_root *reloc_root = root->reloc_root; + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + /* Merged subvolume, cleanup its reloc root */ + struct btrfs_root *reloc_root = root->reloc_root; - clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); - list_del_init(&root->reloc_dirty_list); - root->reloc_root = NULL; - if (reloc_root) { - int ret2; + clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); + list_del_init(&root->reloc_dirty_list); + root->reloc_root = NULL; + if (reloc_root) { - ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1); + ret2 = btrfs_drop_snapshot(reloc_root, NULL, 0, 1); + if (ret2 < 0 && !ret) + ret = ret2; + } + btrfs_put_fs_root(root); + } else { + /* Orphan reloc tree, just clean it up */ + ret2 = btrfs_drop_snapshot(root, NULL, 0, 1); if (ret2 < 0 && !ret) ret = ret2; } - btrfs_put_fs_root(root); } return ret; } @@ -2480,6 +2489,9 @@ again: } } else { list_del_init(&reloc_root->root_list); + /* Don't forget to queue this reloc root for cleanup */ + list_add_tail(&reloc_root->reloc_dirty_list, + &rc->dirty_subvol_roots); } } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1b9a5d0de139..47733fb55df7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -9,6 +9,8 @@ #include "transaction.h" #include "disk-io.h" #include "print-tree.h" +#include "qgroup.h" +#include "space-info.h" /* * Read a root item from the tree. In case we detect a root item smaller then @@ -132,10 +134,8 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root return -ENOMEM; ret = btrfs_search_slot(trans, root, key, path, 0, 1); - if (ret < 0) { - btrfs_abort_transaction(trans, ret); + if (ret < 0) goto out; - } if (ret > 0) { btrfs_crit(fs_info, @@ -499,3 +499,57 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec); spin_unlock(&root->root_item_lock); } + +/* + * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation + * root: the root of the parent directory + * rsv: block reservation + * items: the number of items that we need do reservation + * use_global_rsv: allow fallback to the global block reservation + * + * This function is used to reserve the space for snapshot/subvolume + * creation and deletion. Those operations are different with the + * common file/directory operations, they change two fs/file trees + * and root tree, the number of items that the qgroup reserves is + * different with the free space reservation. So we can not use + * the space reservation mechanism in start_transaction(). + */ +int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, + struct btrfs_block_rsv *rsv, int items, + bool use_global_rsv) +{ + u64 qgroup_num_bytes = 0; + u64 num_bytes; + int ret; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + + if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { + /* One for parent inode, two for dir entries */ + qgroup_num_bytes = 3 * fs_info->nodesize; + ret = btrfs_qgroup_reserve_meta_prealloc(root, + qgroup_num_bytes, true); + if (ret) + return ret; + } + + num_bytes = btrfs_calc_trans_metadata_size(fs_info, items); + rsv->space_info = btrfs_find_space_info(fs_info, + BTRFS_BLOCK_GROUP_METADATA); + ret = btrfs_block_rsv_add(root, rsv, num_bytes, + BTRFS_RESERVE_FLUSH_ALL); + + if (ret == -ENOSPC && use_global_rsv) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true); + + if (ret && qgroup_num_bytes) + btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes); + + return ret; +} + +void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *rsv) +{ + btrfs_block_rsv_release(fs_info, rsv, (u64)-1); +} diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f7b29f9db5e2..0c99cf9fb595 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -6,6 +6,7 @@ #include <linux/blkdev.h> #include <linux/ratelimit.h> #include <linux/sched/mm.h> +#include <crypto/hash.h> #include "ctree.h" #include "volumes.h" #include "disk-io.h" @@ -1787,11 +1788,12 @@ static int scrub_checksum(struct scrub_block *sblock) static int scrub_checksum_data(struct scrub_block *sblock) { struct scrub_ctx *sctx = sblock->sctx; + struct btrfs_fs_info *fs_info = sctx->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 csum[BTRFS_CSUM_SIZE]; u8 *on_disk_csum; struct page *page; void *buffer; - u32 crc = ~(u32)0; u64 len; int index; @@ -1799,6 +1801,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) if (!sblock->pagev[0]->have_csum) return 0; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + on_disk_csum = sblock->pagev[0]->csum; page = sblock->pagev[0]->page; buffer = kmap_atomic(page); @@ -1808,7 +1813,7 @@ static int scrub_checksum_data(struct scrub_block *sblock) for (;;) { u64 l = min_t(u64, len, PAGE_SIZE); - crc = btrfs_csum_data(buffer, crc, l); + crypto_shash_update(shash, buffer, l); kunmap_atomic(buffer); len -= l; if (len == 0) @@ -1820,7 +1825,7 @@ static int scrub_checksum_data(struct scrub_block *sblock) buffer = kmap_atomic(page); } - btrfs_csum_final(crc, csum); + crypto_shash_final(shash, csum); if (memcmp(csum, on_disk_csum, sctx->csum_size)) sblock->checksum_error = 1; @@ -1832,16 +1837,19 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) struct scrub_ctx *sctx = sblock->sctx; struct btrfs_header *h; struct btrfs_fs_info *fs_info = sctx->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 calculated_csum[BTRFS_CSUM_SIZE]; u8 on_disk_csum[BTRFS_CSUM_SIZE]; struct page *page; void *mapped_buffer; u64 mapped_size; void *p; - u32 crc = ~(u32)0; u64 len; int index; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + BUG_ON(sblock->page_count < 1); page = sblock->pagev[0]->page; mapped_buffer = kmap_atomic(page); @@ -1875,7 +1883,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) for (;;) { u64 l = min_t(u64, len, mapped_size); - crc = btrfs_csum_data(p, crc, l); + crypto_shash_update(shash, p, l); kunmap_atomic(mapped_buffer); len -= l; if (len == 0) @@ -1889,7 +1897,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) p = mapped_buffer; } - btrfs_csum_final(crc, calculated_csum); + crypto_shash_final(shash, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) sblock->checksum_error = 1; @@ -1900,18 +1908,22 @@ static int scrub_checksum_super(struct scrub_block *sblock) { struct btrfs_super_block *s; struct scrub_ctx *sctx = sblock->sctx; + struct btrfs_fs_info *fs_info = sctx->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 calculated_csum[BTRFS_CSUM_SIZE]; u8 on_disk_csum[BTRFS_CSUM_SIZE]; struct page *page; void *mapped_buffer; u64 mapped_size; void *p; - u32 crc = ~(u32)0; int fail_gen = 0; int fail_cor = 0; u64 len; int index; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + BUG_ON(sblock->page_count < 1); page = sblock->pagev[0]->page; mapped_buffer = kmap_atomic(page); @@ -1934,7 +1946,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) for (;;) { u64 l = min_t(u64, len, mapped_size); - crc = btrfs_csum_data(p, crc, l); + crypto_shash_update(shash, p, l); kunmap_atomic(mapped_buffer); len -= l; if (len == 0) @@ -1948,7 +1960,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) p = mapped_buffer; } - btrfs_csum_final(crc, calculated_csum); + crypto_shash_final(shash, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) ++fail_cor; @@ -2448,7 +2460,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum) ASSERT(index < UINT_MAX); num_sectors = sum->len / sctx->fs_info->sectorsize; - memcpy(csum, sum->sums + index, sctx->csum_size); + memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size); if (index == num_sectors - 1) { list_del(&sum->list); kfree(sum); @@ -2660,18 +2672,18 @@ static int get_raid56_logic_offset(u64 physical, int num, u64 last_offset; u32 stripe_index; u32 rot; + const int data_stripes = nr_data_stripes(map); - last_offset = (physical - map->stripes[num].physical) * - nr_data_stripes(map); + last_offset = (physical - map->stripes[num].physical) * data_stripes; if (stripe_start) *stripe_start = last_offset; *offset = last_offset; - for (i = 0; i < nr_data_stripes(map); i++) { + for (i = 0; i < data_stripes; i++) { *offset = last_offset + i * map->stripe_len; stripe_nr = div64_u64(*offset, map->stripe_len); - stripe_nr = div_u64(stripe_nr, nr_data_stripes(map)); + stripe_nr = div_u64(stripe_nr, data_stripes); /* Work out the disk rotation on this stripe-set */ stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot); @@ -3079,7 +3091,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, offset = map->stripe_len * (num / map->sub_stripes); increment = map->stripe_len * factor; mirror_num = num % map->sub_stripes + 1; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { increment = map->stripe_len; mirror_num = num % map->num_stripes + 1; } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { @@ -3410,15 +3422,15 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_block_group_cache *cache) { struct btrfs_fs_info *fs_info = sctx->fs_info; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; struct extent_map *em; int i; int ret = 0; - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - read_unlock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, chunk_offset, 1); + read_unlock(&map_tree->lock); if (!em) { /* diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index dd38dfe174df..69b59bf75882 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -686,7 +686,7 @@ static int send_cmd(struct send_ctx *sctx) hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); hdr->crc = 0; - crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); + crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); hdr->crc = cpu_to_le32(crc); ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, @@ -4999,6 +4999,12 @@ static int send_hole(struct send_ctx *sctx, u64 end) if (offset >= sctx->cur_inode_size) return 0; + /* + * Don't go beyond the inode's i_size due to prealloc extents that start + * after the i_size. + */ + end = min_t(u64, end, sctx->cur_inode_size); + if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, end - offset); @@ -5218,10 +5224,50 @@ static int clone_range(struct send_ctx *sctx, clone_len = min_t(u64, ext_len, len); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && - clone_data_offset == data_offset) - ret = send_clone(sctx, offset, clone_len, clone_root); - else + clone_data_offset == data_offset) { + const u64 src_end = clone_root->offset + clone_len; + const u64 sectorsize = SZ_64K; + + /* + * We can't clone the last block, when its size is not + * sector size aligned, into the middle of a file. If we + * do so, the receiver will get a failure (-EINVAL) when + * trying to clone or will silently corrupt the data in + * the destination file if it's on a kernel without the + * fix introduced by commit ac765f83f1397646 + * ("Btrfs: fix data corruption due to cloning of eof + * block). + * + * So issue a clone of the aligned down range plus a + * regular write for the eof block, if we hit that case. + * + * Also, we use the maximum possible sector size, 64K, + * because we don't know what's the sector size of the + * filesystem that receives the stream, so we have to + * assume the largest possible sector size. + */ + if (src_end == clone_src_i_size && + !IS_ALIGNED(src_end, sectorsize) && + offset + clone_len < sctx->cur_inode_size) { + u64 slen; + + slen = ALIGN_DOWN(src_end - clone_root->offset, + sectorsize); + if (slen > 0) { + ret = send_clone(sctx, offset, slen, + clone_root); + if (ret < 0) + goto out; + } + ret = send_extent_data(sctx, offset + slen, + clone_len - slen); + } else { + ret = send_clone(sctx, offset, clone_len, + clone_root); + } + } else { ret = send_extent_data(sctx, offset, clone_len); + } if (ret < 0) goto out; @@ -6883,9 +6929,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) if (ret) goto out; + mutex_lock(&fs_info->balance_mutex); + if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { + mutex_unlock(&fs_info->balance_mutex); + btrfs_warn_rl(fs_info, + "cannot run send because a balance operation is in progress"); + ret = -EAGAIN; + goto out; + } + fs_info->send_in_progress++; + mutex_unlock(&fs_info->balance_mutex); + current->journal_info = BTRFS_SEND_TRANS_STUB; ret = send_subvol(sctx); current->journal_info = NULL; + mutex_lock(&fs_info->balance_mutex); + fs_info->send_in_progress--; + mutex_unlock(&fs_info->balance_mutex); if (ret < 0) goto out; diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c new file mode 100644 index 000000000000..ab7b9ec4c240 --- /dev/null +++ b/fs/btrfs/space-info.c @@ -0,0 +1,1094 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "ctree.h" +#include "space-info.h" +#include "sysfs.h" +#include "volumes.h" +#include "free-space-cache.h" +#include "ordered-data.h" +#include "transaction.h" +#include "math.h" + +u64 btrfs_space_info_used(struct btrfs_space_info *s_info, + bool may_use_included) +{ + ASSERT(s_info); + return s_info->bytes_used + s_info->bytes_reserved + + s_info->bytes_pinned + s_info->bytes_readonly + + (may_use_included ? s_info->bytes_may_use : 0); +} + +/* + * after adding space to the filesystem, we need to clear the full flags + * on all the space infos. + */ +void btrfs_clear_space_info_full(struct btrfs_fs_info *info) +{ + struct list_head *head = &info->space_info; + struct btrfs_space_info *found; + + rcu_read_lock(); + list_for_each_entry_rcu(found, head, list) + found->full = 0; + rcu_read_unlock(); +} + +static const char *alloc_name(u64 flags) +{ + switch (flags) { + case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA: + return "mixed"; + case BTRFS_BLOCK_GROUP_METADATA: + return "metadata"; + case BTRFS_BLOCK_GROUP_DATA: + return "data"; + case BTRFS_BLOCK_GROUP_SYSTEM: + return "system"; + default: + WARN_ON(1); + return "invalid-combination"; + }; +} + +static int create_space_info(struct btrfs_fs_info *info, u64 flags) +{ + + struct btrfs_space_info *space_info; + int i; + int ret; + + space_info = kzalloc(sizeof(*space_info), GFP_NOFS); + if (!space_info) + return -ENOMEM; + + ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, + GFP_KERNEL); + if (ret) { + kfree(space_info); + return ret; + } + + for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) + INIT_LIST_HEAD(&space_info->block_groups[i]); + init_rwsem(&space_info->groups_sem); + spin_lock_init(&space_info->lock); + space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; + space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; + init_waitqueue_head(&space_info->wait); + INIT_LIST_HEAD(&space_info->ro_bgs); + INIT_LIST_HEAD(&space_info->tickets); + INIT_LIST_HEAD(&space_info->priority_tickets); + + ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, + info->space_info_kobj, "%s", + alloc_name(space_info->flags)); + if (ret) { + kobject_put(&space_info->kobj); + return ret; + } + + list_add_rcu(&space_info->list, &info->space_info); + if (flags & BTRFS_BLOCK_GROUP_DATA) + info->data_sinfo = space_info; + + return ret; +} + +int btrfs_init_space_info(struct btrfs_fs_info *fs_info) +{ + struct btrfs_super_block *disk_super; + u64 features; + u64 flags; + int mixed = 0; + int ret; + + disk_super = fs_info->super_copy; + if (!btrfs_super_root(disk_super)) + return -EINVAL; + + features = btrfs_super_incompat_flags(disk_super); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = 1; + + flags = BTRFS_BLOCK_GROUP_SYSTEM; + ret = create_space_info(fs_info, flags); + if (ret) + goto out; + + if (mixed) { + flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; + ret = create_space_info(fs_info, flags); + } else { + flags = BTRFS_BLOCK_GROUP_METADATA; + ret = create_space_info(fs_info, flags); + if (ret) + goto out; + + flags = BTRFS_BLOCK_GROUP_DATA; + ret = create_space_info(fs_info, flags); + } +out: + return ret; +} + +void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + u64 bytes_readonly, + struct btrfs_space_info **space_info) +{ + struct btrfs_space_info *found; + int factor; + + factor = btrfs_bg_type_to_factor(flags); + + found = btrfs_find_space_info(info, flags); + ASSERT(found); + spin_lock(&found->lock); + found->total_bytes += total_bytes; + found->disk_total += total_bytes * factor; + found->bytes_used += bytes_used; + found->disk_used += bytes_used * factor; + found->bytes_readonly += bytes_readonly; + if (total_bytes > 0) + found->full = 0; + btrfs_space_info_add_new_bytes(info, found, + total_bytes - bytes_used - + bytes_readonly); + spin_unlock(&found->lock); + *space_info = found; +} + +struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, + u64 flags) +{ + struct list_head *head = &info->space_info; + struct btrfs_space_info *found; + + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; + + rcu_read_lock(); + list_for_each_entry_rcu(found, head, list) { + if (found->flags & flags) { + rcu_read_unlock(); + return found; + } + } + rcu_read_unlock(); + return NULL; +} + +static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) +{ + return (global->size << 1); +} + +static int can_overcommit(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, u64 bytes, + enum btrfs_reserve_flush_enum flush, + bool system_chunk) +{ + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + u64 profile; + u64 space_size; + u64 avail; + u64 used; + int factor; + + /* Don't overcommit when in mixed mode. */ + if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) + return 0; + + if (system_chunk) + profile = btrfs_system_alloc_profile(fs_info); + else + profile = btrfs_metadata_alloc_profile(fs_info); + + used = btrfs_space_info_used(space_info, false); + + /* + * We only want to allow over committing if we have lots of actual space + * free, but if we don't have enough space to handle the global reserve + * space then we could end up having a real enospc problem when trying + * to allocate a chunk or some other such important allocation. + */ + spin_lock(&global_rsv->lock); + space_size = calc_global_rsv_need_space(global_rsv); + spin_unlock(&global_rsv->lock); + if (used + space_size >= space_info->total_bytes) + return 0; + + used += space_info->bytes_may_use; + + avail = atomic64_read(&fs_info->free_chunk_space); + + /* + * If we have dup, raid1 or raid10 then only half of the free + * space is actually usable. For raid56, the space info used + * doesn't include the parity drive, so we don't have to + * change the math + */ + factor = btrfs_bg_type_to_factor(profile); + avail = div_u64(avail, factor); + + /* + * If we aren't flushing all things, let us overcommit up to + * 1/2th of the space. If we can flush, don't let us overcommit + * too much, let it overcommit up to 1/8 of the space. + */ + if (flush == BTRFS_RESERVE_FLUSH_ALL) + avail >>= 3; + else + avail >>= 1; + + if (used + bytes < space_info->total_bytes + avail) + return 1; + return 0; +} + +/* + * This is for space we already have accounted in space_info->bytes_may_use, so + * basically when we're returning space from block_rsv's. + */ +void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes) +{ + struct reserve_ticket *ticket; + struct list_head *head; + u64 used; + enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; + bool check_overcommit = false; + + spin_lock(&space_info->lock); + head = &space_info->priority_tickets; + + /* + * If we are over our limit then we need to check and see if we can + * overcommit, and if we can't then we just need to free up our space + * and not satisfy any requests. + */ + used = btrfs_space_info_used(space_info, true); + if (used - num_bytes >= space_info->total_bytes) + check_overcommit = true; +again: + while (!list_empty(head) && num_bytes) { + ticket = list_first_entry(head, struct reserve_ticket, + list); + /* + * We use 0 bytes because this space is already reserved, so + * adding the ticket space would be a double count. + */ + if (check_overcommit && + !can_overcommit(fs_info, space_info, 0, flush, false)) + break; + if (num_bytes >= ticket->bytes) { + list_del_init(&ticket->list); + num_bytes -= ticket->bytes; + ticket->bytes = 0; + space_info->tickets_id++; + wake_up(&ticket->wait); + } else { + ticket->bytes -= num_bytes; + num_bytes = 0; + } + } + + if (num_bytes && head == &space_info->priority_tickets) { + head = &space_info->tickets; + flush = BTRFS_RESERVE_FLUSH_ALL; + goto again; + } + btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes); + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, num_bytes, 0); + spin_unlock(&space_info->lock); +} + +/* + * This is for newly allocated space that isn't accounted in + * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent + * we use this helper. + */ +void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes) +{ + struct reserve_ticket *ticket; + struct list_head *head = &space_info->priority_tickets; + +again: + while (!list_empty(head) && num_bytes) { + ticket = list_first_entry(head, struct reserve_ticket, + list); + if (num_bytes >= ticket->bytes) { + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, + ticket->bytes, 1); + list_del_init(&ticket->list); + num_bytes -= ticket->bytes; + btrfs_space_info_update_bytes_may_use(fs_info, + space_info, + ticket->bytes); + ticket->bytes = 0; + space_info->tickets_id++; + wake_up(&ticket->wait); + } else { + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, + num_bytes, 1); + btrfs_space_info_update_bytes_may_use(fs_info, + space_info, + num_bytes); + ticket->bytes -= num_bytes; + num_bytes = 0; + } + } + + if (num_bytes && head == &space_info->priority_tickets) { + head = &space_info->tickets; + goto again; + } +} + +#define DUMP_BLOCK_RSV(fs_info, rsv_name) \ +do { \ + struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \ + spin_lock(&__rsv->lock); \ + btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \ + __rsv->size, __rsv->reserved); \ + spin_unlock(&__rsv->lock); \ +} while (0) + +void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *info, u64 bytes, + int dump_block_groups) +{ + struct btrfs_block_group_cache *cache; + int index = 0; + + spin_lock(&info->lock); + btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", + info->flags, + info->total_bytes - btrfs_space_info_used(info, true), + info->full ? "" : "not "); + btrfs_info(fs_info, + "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", + info->total_bytes, info->bytes_used, info->bytes_pinned, + info->bytes_reserved, info->bytes_may_use, + info->bytes_readonly); + spin_unlock(&info->lock); + + DUMP_BLOCK_RSV(fs_info, global_block_rsv); + DUMP_BLOCK_RSV(fs_info, trans_block_rsv); + DUMP_BLOCK_RSV(fs_info, chunk_block_rsv); + DUMP_BLOCK_RSV(fs_info, delayed_block_rsv); + DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); + + if (!dump_block_groups) + return; + + down_read(&info->groups_sem); +again: + list_for_each_entry(cache, &info->block_groups[index], list) { + spin_lock(&cache->lock); + btrfs_info(fs_info, + "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", + cache->key.objectid, cache->key.offset, + btrfs_block_group_used(&cache->item), cache->pinned, + cache->reserved, cache->ro ? "[readonly]" : ""); + btrfs_dump_free_space(cache, bytes); + spin_unlock(&cache->lock); + } + if (++index < BTRFS_NR_RAID_TYPES) + goto again; + up_read(&info->groups_sem); +} + +static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info, + unsigned long nr_pages, int nr_items) +{ + struct super_block *sb = fs_info->sb; + + if (down_read_trylock(&sb->s_umount)) { + writeback_inodes_sb_nr(sb, nr_pages, WB_REASON_FS_FREE_SPACE); + up_read(&sb->s_umount); + } else { + /* + * We needn't worry the filesystem going from r/w to r/o though + * we don't acquire ->s_umount mutex, because the filesystem + * should guarantee the delalloc inodes list be empty after + * the filesystem is readonly(all dirty pages are written to + * the disk). + */ + btrfs_start_delalloc_roots(fs_info, nr_items); + if (!current->journal_info) + btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1); + } +} + +static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, + u64 to_reclaim) +{ + u64 bytes; + u64 nr; + + bytes = btrfs_calc_trans_metadata_size(fs_info, 1); + nr = div64_u64(to_reclaim, bytes); + if (!nr) + nr = 1; + return nr; +} + +#define EXTENT_SIZE_PER_ITEM SZ_256K + +/* + * shrink metadata reservation for delalloc + */ +static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, + u64 orig, bool wait_ordered) +{ + struct btrfs_space_info *space_info; + struct btrfs_trans_handle *trans; + u64 delalloc_bytes; + u64 dio_bytes; + u64 async_pages; + u64 items; + long time_left; + unsigned long nr_pages; + int loops; + + /* Calc the number of the pages we need flush for space reservation */ + items = calc_reclaim_items_nr(fs_info, to_reclaim); + to_reclaim = items * EXTENT_SIZE_PER_ITEM; + + trans = (struct btrfs_trans_handle *)current->journal_info; + space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); + + delalloc_bytes = percpu_counter_sum_positive( + &fs_info->delalloc_bytes); + dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); + if (delalloc_bytes == 0 && dio_bytes == 0) { + if (trans) + return; + if (wait_ordered) + btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); + return; + } + + /* + * If we are doing more ordered than delalloc we need to just wait on + * ordered extents, otherwise we'll waste time trying to flush delalloc + * that likely won't give us the space back we need. + */ + if (dio_bytes > delalloc_bytes) + wait_ordered = true; + + loops = 0; + while ((delalloc_bytes || dio_bytes) && loops < 3) { + nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; + + /* + * Triggers inode writeback for up to nr_pages. This will invoke + * ->writepages callback and trigger delalloc filling + * (btrfs_run_delalloc_range()). + */ + btrfs_writeback_inodes_sb_nr(fs_info, nr_pages, items); + + /* + * We need to wait for the compressed pages to start before + * we continue. + */ + async_pages = atomic_read(&fs_info->async_delalloc_pages); + if (!async_pages) + goto skip_async; + + /* + * Calculate how many compressed pages we want to be written + * before we continue. I.e if there are more async pages than we + * require wait_event will wait until nr_pages are written. + */ + if (async_pages <= nr_pages) + async_pages = 0; + else + async_pages -= nr_pages; + + wait_event(fs_info->async_submit_wait, + atomic_read(&fs_info->async_delalloc_pages) <= + (int)async_pages); +skip_async: + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets) && + list_empty(&space_info->priority_tickets)) { + spin_unlock(&space_info->lock); + break; + } + spin_unlock(&space_info->lock); + + loops++; + if (wait_ordered && !trans) { + btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); + } else { + time_left = schedule_timeout_killable(1); + if (time_left) + break; + } + delalloc_bytes = percpu_counter_sum_positive( + &fs_info->delalloc_bytes); + dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); + } +} + +/** + * maybe_commit_transaction - possibly commit the transaction if its ok to + * @root - the root we're allocating for + * @bytes - the number of bytes we want to reserve + * @force - force the commit + * + * This will check to make sure that committing the transaction will actually + * get us somewhere and then commit the transaction if it does. Otherwise it + * will return -ENOSPC. + */ +static int may_commit_transaction(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info) +{ + struct reserve_ticket *ticket = NULL; + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_trans_handle *trans; + u64 bytes_needed; + u64 reclaim_bytes = 0; + + trans = (struct btrfs_trans_handle *)current->journal_info; + if (trans) + return -EAGAIN; + + spin_lock(&space_info->lock); + if (!list_empty(&space_info->priority_tickets)) + ticket = list_first_entry(&space_info->priority_tickets, + struct reserve_ticket, list); + else if (!list_empty(&space_info->tickets)) + ticket = list_first_entry(&space_info->tickets, + struct reserve_ticket, list); + bytes_needed = (ticket) ? ticket->bytes : 0; + spin_unlock(&space_info->lock); + + if (!bytes_needed) + return 0; + + trans = btrfs_join_transaction(fs_info->extent_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + /* + * See if there is enough pinned space to make this reservation, or if + * we have block groups that are going to be freed, allowing us to + * possibly do a chunk allocation the next loop through. + */ + if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || + __percpu_counter_compare(&space_info->total_bytes_pinned, + bytes_needed, + BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) + goto commit; + + /* + * See if there is some space in the delayed insertion reservation for + * this reservation. + */ + if (space_info != delayed_rsv->space_info) + goto enospc; + + spin_lock(&delayed_rsv->lock); + reclaim_bytes += delayed_rsv->reserved; + spin_unlock(&delayed_rsv->lock); + + spin_lock(&delayed_refs_rsv->lock); + reclaim_bytes += delayed_refs_rsv->reserved; + spin_unlock(&delayed_refs_rsv->lock); + if (reclaim_bytes >= bytes_needed) + goto commit; + bytes_needed -= reclaim_bytes; + + if (__percpu_counter_compare(&space_info->total_bytes_pinned, + bytes_needed, + BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) + goto enospc; + +commit: + return btrfs_commit_transaction(trans); +enospc: + btrfs_end_transaction(trans); + return -ENOSPC; +} + +/* + * Try to flush some data based on policy set by @state. This is only advisory + * and may fail for various reasons. The caller is supposed to examine the + * state of @space_info to detect the outcome. + */ +static void flush_space(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, u64 num_bytes, + int state) +{ + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_trans_handle *trans; + int nr; + int ret = 0; + + switch (state) { + case FLUSH_DELAYED_ITEMS_NR: + case FLUSH_DELAYED_ITEMS: + if (state == FLUSH_DELAYED_ITEMS_NR) + nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2; + else + nr = -1; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + ret = btrfs_run_delayed_items_nr(trans, nr); + btrfs_end_transaction(trans); + break; + case FLUSH_DELALLOC: + case FLUSH_DELALLOC_WAIT: + shrink_delalloc(fs_info, num_bytes * 2, num_bytes, + state == FLUSH_DELALLOC_WAIT); + break; + case FLUSH_DELAYED_REFS_NR: + case FLUSH_DELAYED_REFS: + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + if (state == FLUSH_DELAYED_REFS_NR) + nr = calc_reclaim_items_nr(fs_info, num_bytes); + else + nr = 0; + btrfs_run_delayed_refs(trans, nr); + btrfs_end_transaction(trans); + break; + case ALLOC_CHUNK: + case ALLOC_CHUNK_FORCE: + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + ret = btrfs_chunk_alloc(trans, + btrfs_metadata_alloc_profile(fs_info), + (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE : + CHUNK_ALLOC_FORCE); + btrfs_end_transaction(trans); + if (ret > 0 || ret == -ENOSPC) + ret = 0; + break; + case COMMIT_TRANS: + /* + * If we have pending delayed iputs then we could free up a + * bunch of pinned space, so make sure we run the iputs before + * we do our pinned bytes check below. + */ + btrfs_run_delayed_iputs(fs_info); + btrfs_wait_on_delayed_iputs(fs_info); + + ret = may_commit_transaction(fs_info, space_info); + break; + default: + ret = -ENOSPC; + break; + } + + trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, + ret); + return; +} + +static inline u64 +btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + bool system_chunk) +{ + struct reserve_ticket *ticket; + u64 used; + u64 expected; + u64 to_reclaim = 0; + + list_for_each_entry(ticket, &space_info->tickets, list) + to_reclaim += ticket->bytes; + list_for_each_entry(ticket, &space_info->priority_tickets, list) + to_reclaim += ticket->bytes; + if (to_reclaim) + return to_reclaim; + + to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); + if (can_overcommit(fs_info, space_info, to_reclaim, + BTRFS_RESERVE_FLUSH_ALL, system_chunk)) + return 0; + + used = btrfs_space_info_used(space_info, true); + + if (can_overcommit(fs_info, space_info, SZ_1M, + BTRFS_RESERVE_FLUSH_ALL, system_chunk)) + expected = div_factor_fine(space_info->total_bytes, 95); + else + expected = div_factor_fine(space_info->total_bytes, 90); + + if (used > expected) + to_reclaim = used - expected; + else + to_reclaim = 0; + to_reclaim = min(to_reclaim, space_info->bytes_may_use + + space_info->bytes_reserved); + return to_reclaim; +} + +static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 used, bool system_chunk) +{ + u64 thresh = div_factor_fine(space_info->total_bytes, 98); + + /* If we're just plain full then async reclaim just slows us down. */ + if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) + return 0; + + if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info, + system_chunk)) + return 0; + + return (used >= thresh && !btrfs_fs_closing(fs_info) && + !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); +} + +static bool wake_all_tickets(struct list_head *head) +{ + struct reserve_ticket *ticket; + + while (!list_empty(head)) { + ticket = list_first_entry(head, struct reserve_ticket, list); + list_del_init(&ticket->list); + ticket->error = -ENOSPC; + wake_up(&ticket->wait); + if (ticket->bytes != ticket->orig_bytes) + return true; + } + return false; +} + +/* + * This is for normal flushers, we can wait all goddamned day if we want to. We + * will loop and continuously try to flush as long as we are making progress. + * We count progress as clearing off tickets each time we have to loop. + */ +static void btrfs_async_reclaim_metadata_space(struct work_struct *work) +{ + struct btrfs_fs_info *fs_info; + struct btrfs_space_info *space_info; + u64 to_reclaim; + int flush_state; + int commit_cycles = 0; + u64 last_tickets_id; + + fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); + space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); + + spin_lock(&space_info->lock); + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, + false); + if (!to_reclaim) { + space_info->flush = 0; + spin_unlock(&space_info->lock); + return; + } + last_tickets_id = space_info->tickets_id; + spin_unlock(&space_info->lock); + + flush_state = FLUSH_DELAYED_ITEMS_NR; + do { + flush_space(fs_info, space_info, to_reclaim, flush_state); + spin_lock(&space_info->lock); + if (list_empty(&space_info->tickets)) { + space_info->flush = 0; + spin_unlock(&space_info->lock); + return; + } + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, + space_info, + false); + if (last_tickets_id == space_info->tickets_id) { + flush_state++; + } else { + last_tickets_id = space_info->tickets_id; + flush_state = FLUSH_DELAYED_ITEMS_NR; + if (commit_cycles) + commit_cycles--; + } + + /* + * We don't want to force a chunk allocation until we've tried + * pretty hard to reclaim space. Think of the case where we + * freed up a bunch of space and so have a lot of pinned space + * to reclaim. We would rather use that than possibly create a + * underutilized metadata chunk. So if this is our first run + * through the flushing state machine skip ALLOC_CHUNK_FORCE and + * commit the transaction. If nothing has changed the next go + * around then we can force a chunk allocation. + */ + if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles) + flush_state++; + + if (flush_state > COMMIT_TRANS) { + commit_cycles++; + if (commit_cycles > 2) { + if (wake_all_tickets(&space_info->tickets)) { + flush_state = FLUSH_DELAYED_ITEMS_NR; + commit_cycles--; + } else { + space_info->flush = 0; + } + } else { + flush_state = FLUSH_DELAYED_ITEMS_NR; + } + } + spin_unlock(&space_info->lock); + } while (flush_state <= COMMIT_TRANS); +} + +void btrfs_init_async_reclaim_work(struct work_struct *work) +{ + INIT_WORK(work, btrfs_async_reclaim_metadata_space); +} + +static const enum btrfs_flush_state priority_flush_states[] = { + FLUSH_DELAYED_ITEMS_NR, + FLUSH_DELAYED_ITEMS, + ALLOC_CHUNK, +}; + +static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) +{ + u64 to_reclaim; + int flush_state; + + spin_lock(&space_info->lock); + to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info, + false); + if (!to_reclaim) { + spin_unlock(&space_info->lock); + return; + } + spin_unlock(&space_info->lock); + + flush_state = 0; + do { + flush_space(fs_info, space_info, to_reclaim, + priority_flush_states[flush_state]); + flush_state++; + spin_lock(&space_info->lock); + if (ticket->bytes == 0) { + spin_unlock(&space_info->lock); + return; + } + spin_unlock(&space_info->lock); + } while (flush_state < ARRAY_SIZE(priority_flush_states)); +} + +static int wait_reserve_ticket(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + struct reserve_ticket *ticket) + +{ + DEFINE_WAIT(wait); + u64 reclaim_bytes = 0; + int ret = 0; + + spin_lock(&space_info->lock); + while (ticket->bytes > 0 && ticket->error == 0) { + ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); + if (ret) { + ret = -EINTR; + break; + } + spin_unlock(&space_info->lock); + + schedule(); + + finish_wait(&ticket->wait, &wait); + spin_lock(&space_info->lock); + } + if (!ret) + ret = ticket->error; + if (!list_empty(&ticket->list)) + list_del_init(&ticket->list); + if (ticket->bytes && ticket->bytes < ticket->orig_bytes) + reclaim_bytes = ticket->orig_bytes - ticket->bytes; + spin_unlock(&space_info->lock); + + if (reclaim_bytes) + btrfs_space_info_add_old_bytes(fs_info, space_info, + reclaim_bytes); + return ret; +} + +/** + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space + * @root - the root we're allocating for + * @space_info - the space info we want to allocate from + * @orig_bytes - the number of bytes we want + * @flush - whether or not we can flush to make our reservation + * + * This will reserve orig_bytes number of bytes from the space info associated + * with the block_rsv. If there is not enough space it will make an attempt to + * flush out space to make room. It will do this by flushing delalloc if + * possible or committing the transaction. If flush is 0 then no attempts to + * regain reservations will be made and this will fail if there is not enough + * space already. + */ +static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 orig_bytes, + enum btrfs_reserve_flush_enum flush, + bool system_chunk) +{ + struct reserve_ticket ticket; + u64 used; + u64 reclaim_bytes = 0; + int ret = 0; + + ASSERT(orig_bytes); + ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); + + spin_lock(&space_info->lock); + ret = -ENOSPC; + used = btrfs_space_info_used(space_info, true); + + /* + * Carry on if we have enough space (short-circuit) OR call + * can_overcommit() to ensure we can overcommit to continue. + */ + if ((used + orig_bytes <= space_info->total_bytes) || + can_overcommit(fs_info, space_info, orig_bytes, flush, + system_chunk)) { + btrfs_space_info_update_bytes_may_use(fs_info, space_info, + orig_bytes); + trace_btrfs_space_reservation(fs_info, "space_info", + space_info->flags, orig_bytes, 1); + ret = 0; + } + + /* + * If we couldn't make a reservation then setup our reservation ticket + * and kick the async worker if it's not already running. + * + * If we are a priority flusher then we just need to add our ticket to + * the list and we will do our own flushing further down. + */ + if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { + ticket.orig_bytes = orig_bytes; + ticket.bytes = orig_bytes; + ticket.error = 0; + init_waitqueue_head(&ticket.wait); + if (flush == BTRFS_RESERVE_FLUSH_ALL) { + list_add_tail(&ticket.list, &space_info->tickets); + if (!space_info->flush) { + space_info->flush = 1; + trace_btrfs_trigger_flush(fs_info, + space_info->flags, + orig_bytes, flush, + "enospc"); + queue_work(system_unbound_wq, + &fs_info->async_reclaim_work); + } + } else { + list_add_tail(&ticket.list, + &space_info->priority_tickets); + } + } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { + used += orig_bytes; + /* + * We will do the space reservation dance during log replay, + * which means we won't have fs_info->fs_root set, so don't do + * the async reclaim as we will panic. + */ + if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && + need_do_async_reclaim(fs_info, space_info, + used, system_chunk) && + !work_busy(&fs_info->async_reclaim_work)) { + trace_btrfs_trigger_flush(fs_info, space_info->flags, + orig_bytes, flush, "preempt"); + queue_work(system_unbound_wq, + &fs_info->async_reclaim_work); + } + } + spin_unlock(&space_info->lock); + if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) + return ret; + + if (flush == BTRFS_RESERVE_FLUSH_ALL) + return wait_reserve_ticket(fs_info, space_info, &ticket); + + ret = 0; + priority_reclaim_metadata_space(fs_info, space_info, &ticket); + spin_lock(&space_info->lock); + if (ticket.bytes) { + if (ticket.bytes < orig_bytes) + reclaim_bytes = orig_bytes - ticket.bytes; + list_del_init(&ticket.list); + ret = -ENOSPC; + } + spin_unlock(&space_info->lock); + + if (reclaim_bytes) + btrfs_space_info_add_old_bytes(fs_info, space_info, + reclaim_bytes); + ASSERT(list_empty(&ticket.list)); + return ret; +} + +/** + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space + * @root - the root we're allocating for + * @block_rsv - the block_rsv we're allocating for + * @orig_bytes - the number of bytes we want + * @flush - whether or not we can flush to make our reservation + * + * This will reserve orig_bytes number of bytes from the space info associated + * with the block_rsv. If there is not enough space it will make an attempt to + * flush out space to make room. It will do this by flushing delalloc if + * possible or committing the transaction. If flush is 0 then no attempts to + * regain reservations will be made and this will fail if there is not enough + * space already. + */ +int btrfs_reserve_metadata_bytes(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 orig_bytes, + enum btrfs_reserve_flush_enum flush) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + int ret; + bool system_chunk = (root == fs_info->chunk_root); + + ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info, + orig_bytes, flush, system_chunk); + if (ret == -ENOSPC && + unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { + if (block_rsv != global_rsv && + !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes)) + ret = 0; + } + if (ret == -ENOSPC) { + trace_btrfs_space_reservation(fs_info, "space_info:enospc", + block_rsv->space_info->flags, + orig_bytes, 1); + + if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) + btrfs_dump_space_info(fs_info, block_rsv->space_info, + orig_bytes, 0); + } + return ret; +} diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h new file mode 100644 index 000000000000..c2b54b8e1a14 --- /dev/null +++ b/fs/btrfs/space-info.h @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_SPACE_INFO_H +#define BTRFS_SPACE_INFO_H + +struct btrfs_space_info { + spinlock_t lock; + + u64 total_bytes; /* total bytes in the space, + this doesn't take mirrors into account */ + u64 bytes_used; /* total bytes used, + this doesn't take mirrors into account */ + u64 bytes_pinned; /* total bytes pinned, will be freed when the + transaction finishes */ + u64 bytes_reserved; /* total bytes the allocator has reserved for + current allocations */ + u64 bytes_may_use; /* number of bytes that may be used for + delalloc/allocations */ + u64 bytes_readonly; /* total bytes that are read only */ + + u64 max_extent_size; /* This will hold the maximum extent size of + the space info if we had an ENOSPC in the + allocator. */ + + unsigned int full:1; /* indicates that we cannot allocate any more + chunks for this space */ + unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ + + unsigned int flush:1; /* set if we are trying to make space */ + + unsigned int force_alloc; /* set if we need to force a chunk + alloc for this space */ + + u64 disk_used; /* total bytes used on disk */ + u64 disk_total; /* total bytes on disk, takes mirrors into + account */ + + u64 flags; + + /* + * bytes_pinned is kept in line with what is actually pinned, as in + * we've called update_block_group and dropped the bytes_used counter + * and increased the bytes_pinned counter. However this means that + * bytes_pinned does not reflect the bytes that will be pinned once the + * delayed refs are flushed, so this counter is inc'ed every time we + * call btrfs_free_extent so it is a realtime count of what will be + * freed once the transaction is committed. It will be zeroed every + * time the transaction commits. + */ + struct percpu_counter total_bytes_pinned; + + struct list_head list; + /* Protected by the spinlock 'lock'. */ + struct list_head ro_bgs; + struct list_head priority_tickets; + struct list_head tickets; + /* + * tickets_id just indicates the next ticket will be handled, so note + * it's not stored per ticket. + */ + u64 tickets_id; + + struct rw_semaphore groups_sem; + /* for block groups in our same type */ + struct list_head block_groups[BTRFS_NR_RAID_TYPES]; + wait_queue_head_t wait; + + struct kobject kobj; + struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; +}; + +struct reserve_ticket { + u64 orig_bytes; + u64 bytes; + int error; + struct list_head list; + wait_queue_head_t wait; +}; + +static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) +{ + return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && + (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); +} + +/* + * + * Declare a helper function to detect underflow of various space info members + */ +#define DECLARE_SPACE_INFO_UPDATE(name) \ +static inline void \ +btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ + struct btrfs_space_info *sinfo, \ + s64 bytes) \ +{ \ + lockdep_assert_held(&sinfo->lock); \ + trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \ + if (bytes < 0 && sinfo->name < -bytes) { \ + WARN_ON(1); \ + sinfo->name = 0; \ + return; \ + } \ + sinfo->name += bytes; \ +} + +DECLARE_SPACE_INFO_UPDATE(bytes_may_use); +DECLARE_SPACE_INFO_UPDATE(bytes_pinned); + +void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes); +void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + u64 num_bytes); +int btrfs_init_space_info(struct btrfs_fs_info *fs_info); +void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, + u64 total_bytes, u64 bytes_used, + u64 bytes_readonly, + struct btrfs_space_info **space_info); +struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, + u64 flags); +u64 btrfs_space_info_used(struct btrfs_space_info *s_info, + bool may_use_included); +void btrfs_clear_space_info_full(struct btrfs_fs_info *info); +void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *info, u64 bytes, + int dump_block_groups); +int btrfs_reserve_metadata_bytes(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 orig_bytes, + enum btrfs_reserve_flush_enum flush); + +#endif /* BTRFS_SPACE_INFO_H */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0645ec428b4f..78de9d5d80c6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -42,6 +42,7 @@ #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" +#include "space-info.h" #include "tests/btrfs-tests.h" #include "qgroup.h" @@ -1553,6 +1554,8 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, } else { snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); btrfs_sb(s)->bdev_holder = fs_type; + if (!strstr(crc32c_impl(), "generic")) + set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); error = btrfs_fill_super(s, fs_devices, data); } if (!error) @@ -1601,14 +1604,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, { struct vfsmount *mnt_root; struct dentry *root; - fmode_t mode = FMODE_READ; char *subvol_name = NULL; u64 subvol_objectid = 0; int error = 0; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - error = btrfs_parse_subvol_options(data, &subvol_name, &subvol_objectid); if (error) { @@ -1904,8 +1903,9 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, u64 type; u64 avail_space; u64 min_stripe_size; - int min_stripes = 1, num_stripes = 1; + int min_stripes, num_stripes = 1; int i = 0, nr_devices; + const struct btrfs_raid_attr *rattr; /* * We aren't under the device list lock, so this is racy-ish, but good @@ -1929,21 +1929,18 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, /* calc min stripe number for data space allocation */ type = btrfs_data_alloc_profile(fs_info); - if (type & BTRFS_BLOCK_GROUP_RAID0) { - min_stripes = 2; + rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)]; + min_stripes = rattr->devs_min; + + if (type & BTRFS_BLOCK_GROUP_RAID0) num_stripes = nr_devices; - } else if (type & BTRFS_BLOCK_GROUP_RAID1) { - min_stripes = 2; + else if (type & BTRFS_BLOCK_GROUP_RAID1) num_stripes = 2; - } else if (type & BTRFS_BLOCK_GROUP_RAID10) { - min_stripes = 4; + else if (type & BTRFS_BLOCK_GROUP_RAID10) num_stripes = 4; - } - if (type & BTRFS_BLOCK_GROUP_DUP) - min_stripe_size = 2 * BTRFS_STRIPE_LEN; - else - min_stripe_size = BTRFS_STRIPE_LEN; + /* Adjust for more than 1 stripe per device */ + min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN; rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { @@ -2466,3 +2463,4 @@ late_initcall(init_btrfs_fs); module_exit(exit_btrfs_fs) MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32c"); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 5a5930e3d32b..9539f8143b7a 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -16,6 +16,7 @@ #include "transaction.h" #include "sysfs.h" #include "volumes.h" +#include "space-info.h" static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj); static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj); @@ -303,11 +304,12 @@ static ssize_t raid_bytes_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", val); } -static struct attribute *raid_attributes[] = { +static struct attribute *raid_attrs[] = { BTRFS_ATTR_PTR(raid, total_bytes), BTRFS_ATTR_PTR(raid, used_bytes), NULL }; +ATTRIBUTE_GROUPS(raid); static void release_raid_kobj(struct kobject *kobj) { @@ -317,7 +319,7 @@ static void release_raid_kobj(struct kobject *kobj) struct kobj_type btrfs_raid_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = release_raid_kobj, - .default_attrs = raid_attributes, + .default_groups = raid_groups, }; #define SPACE_INFO_ATTR(field) \ @@ -364,6 +366,7 @@ static struct attribute *space_info_attrs[] = { BTRFS_ATTR_PTR(space_info, total_bytes_pinned), NULL, }; +ATTRIBUTE_GROUPS(space_info); static void space_info_release(struct kobject *kobj) { @@ -375,7 +378,7 @@ static void space_info_release(struct kobject *kobj) struct kobj_type space_info_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = space_info_release, - .default_attrs = space_info_attrs, + .default_groups = space_info_groups, }; static const struct attribute *allocation_attrs[] = { @@ -825,7 +828,12 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, fs_devs->fsid_kobj.kset = btrfs_kset; error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, parent, "%pU", fs_devs->fsid); - return error; + if (error) { + kobject_put(&fs_devs->fsid_kobj); + return error; + } + + return 0; } int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) @@ -905,12 +913,10 @@ void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); } -static int btrfs_init_debugfs(void) +static void btrfs_init_debugfs(void) { #ifdef CONFIG_DEBUG_FS btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); - if (!btrfs_debugfs_root_dentry) - return -ENOMEM; /* * Example code, how to export data through debugfs. @@ -924,7 +930,6 @@ static int btrfs_init_debugfs(void) #endif #endif - return 0; } int __init btrfs_init_sysfs(void) @@ -935,9 +940,7 @@ int __init btrfs_init_sysfs(void) if (!btrfs_kset) return -ENOMEM; - ret = btrfs_init_debugfs(); - if (ret) - goto out1; + btrfs_init_debugfs(); init_feature_attrs(); ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); @@ -954,7 +957,6 @@ out_remove_group: sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); out2: debugfs_remove_recursive(btrfs_debugfs_root_dentry); -out1: kset_unregister(btrfs_kset); return ret; diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 7bf4d5734dbe..1bf6b5a79191 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -10,6 +10,7 @@ #include "btrfs-tests.h" #include "../ctree.h" #include "../extent_io.h" +#include "../btrfs_inode.h" #define PROCESS_UNLOCK (1 << 0) #define PROCESS_RELEASE (1 << 1) @@ -58,7 +59,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end, static int test_find_delalloc(u32 sectorsize) { struct inode *inode; - struct extent_io_tree tmp; + struct extent_io_tree *tmp; struct page *page; struct page *locked_page = NULL; unsigned long index = 0; @@ -76,12 +77,13 @@ static int test_find_delalloc(u32 sectorsize) test_std_err(TEST_ALLOC_INODE); return -ENOMEM; } + tmp = &BTRFS_I(inode)->io_tree; /* * Passing NULL as we don't have fs_info but tracepoints are not used * at this point */ - extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL); + extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST, NULL); /* * First go through and create and mark all of our pages dirty, we pin @@ -108,10 +110,10 @@ static int test_find_delalloc(u32 sectorsize) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL); + set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL); start = 0; end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { test_err("should have found at least one delalloc"); @@ -122,7 +124,7 @@ static int test_find_delalloc(u32 sectorsize) sectorsize - 1, start, end); goto out_bits; } - unlock_extent(&tmp, start, end); + unlock_extent(tmp, start, end); unlock_page(locked_page); put_page(locked_page); @@ -139,10 +141,10 @@ static int test_find_delalloc(u32 sectorsize) test_err("couldn't find the locked page"); goto out_bits; } - set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL); + set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL); start = test_start; end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { test_err("couldn't find delalloc in our range"); @@ -158,7 +160,7 @@ static int test_find_delalloc(u32 sectorsize) test_err("there were unlocked pages in the range"); goto out_bits; } - unlock_extent(&tmp, start, end); + unlock_extent(tmp, start, end); /* locked_page was unlocked above */ put_page(locked_page); @@ -176,7 +178,7 @@ static int test_find_delalloc(u32 sectorsize) } start = test_start; end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (found) { test_err("found range when we shouldn't have"); @@ -194,10 +196,10 @@ static int test_find_delalloc(u32 sectorsize) * * We are re-using our test_start from above since it works out well. */ - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL); + set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL); start = test_start; end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { test_err("didn't find our range"); @@ -213,7 +215,7 @@ static int test_find_delalloc(u32 sectorsize) test_err("pages in range were not all locked"); goto out_bits; } - unlock_extent(&tmp, start, end); + unlock_extent(tmp, start, end); /* * Now to test where we run into a page that is no longer dirty in the @@ -238,7 +240,7 @@ static int test_find_delalloc(u32 sectorsize) * this changes at any point in the future we will need to fix this * tests expected behavior. */ - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, + found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { test_err("didn't find our range"); @@ -256,7 +258,7 @@ static int test_find_delalloc(u32 sectorsize) } ret = 0; out_bits: - clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1); + clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1); out: if (locked_page) put_page(locked_page); @@ -432,6 +434,89 @@ out: return ret; } +static int test_find_first_clear_extent_bit(void) +{ + struct extent_io_tree tree; + u64 start, end; + + test_msg("running find_first_clear_extent_bit test"); + extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL); + + /* + * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between + * 4M-32M + */ + set_extent_bits(&tree, SZ_1M, SZ_4M - 1, + CHUNK_TRIMMED | CHUNK_ALLOCATED); + + find_first_clear_extent_bit(&tree, SZ_512K, &start, &end, + CHUNK_TRIMMED | CHUNK_ALLOCATED); + + if (start != 0 || end != SZ_1M -1) + test_err("error finding beginning range: start %llu end %llu", + start, end); + + /* Now add 32M-64M so that we have a hole between 4M-32M */ + set_extent_bits(&tree, SZ_32M, SZ_64M - 1, + CHUNK_TRIMMED | CHUNK_ALLOCATED); + + /* + * Request first hole starting at 12M, we should get 4M-32M + */ + find_first_clear_extent_bit(&tree, 12 * SZ_1M, &start, &end, + CHUNK_TRIMMED | CHUNK_ALLOCATED); + + if (start != SZ_4M || end != SZ_32M - 1) + test_err("error finding trimmed range: start %llu end %llu", + start, end); + + /* + * Search in the middle of allocated range, should get the next one + * available, which happens to be unallocated -> 4M-32M + */ + find_first_clear_extent_bit(&tree, SZ_2M, &start, &end, + CHUNK_TRIMMED | CHUNK_ALLOCATED); + + if (start != SZ_4M || end != SZ_32M -1) + test_err("error finding next unalloc range: start %llu end %llu", + start, end); + + /* + * Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag + * being unset in this range, we should get the entry in range 64M-72M + */ + set_extent_bits(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED); + find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end, + CHUNK_TRIMMED); + + if (start != SZ_64M || end != SZ_64M + SZ_8M - 1) + test_err("error finding exact range: start %llu end %llu", + start, end); + + find_first_clear_extent_bit(&tree, SZ_64M - SZ_8M, &start, &end, + CHUNK_TRIMMED); + + /* + * Search in the middle of set range whose immediate neighbour doesn't + * have the bits set so it must be returned + */ + if (start != SZ_64M || end != SZ_64M + SZ_8M - 1) + test_err("error finding next alloc range: start %llu end %llu", + start, end); + + /* + * Search beyond any known range, shall return after last known range + * and end should be -1 + */ + find_first_clear_extent_bit(&tree, -1, &start, &end, CHUNK_TRIMMED); + if (start != SZ_64M + SZ_8M || end != -1) + test_err( + "error handling beyond end of range search: start %llu end %llu", + start, end); + + return 0; +} + int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) { int ret; @@ -442,6 +527,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize) if (ret) goto out; + ret = test_find_first_clear_extent_bit(); + if (ret) + goto out; + ret = test_eb_bitmaps(sectorsize, nodesize); out: return ret; diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 87aeabe9d610..4a7f796c9900 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -66,7 +66,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em->len = SZ_16K; em->block_start = 0; em->block_len = SZ_16K; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [0, 16K)"); goto out; @@ -85,7 +87,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em->len = SZ_4K; em->block_start = SZ_32K; /* avoid merging */ em->block_len = SZ_4K; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [16K, 20K)"); goto out; @@ -104,7 +108,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, em->len = len; em->block_start = start; em->block_len = len; + write_lock(&em_tree->lock); ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len); + write_unlock(&em_tree->lock); if (ret) { test_err("case1 [%llu %llu]: ret %d", start, start + len, ret); goto out; @@ -148,7 +154,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em->len = SZ_1K; em->block_start = EXTENT_MAP_INLINE; em->block_len = (u64)-1; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [0, 1K)"); goto out; @@ -167,7 +175,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em->len = SZ_4K; em->block_start = SZ_4K; em->block_len = SZ_4K; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [4K, 8K)"); goto out; @@ -186,7 +196,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, em->len = SZ_1K; em->block_start = EXTENT_MAP_INLINE; em->block_len = (u64)-1; + write_lock(&em_tree->lock); ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len); + write_unlock(&em_tree->lock); if (ret) { test_err("case2 [0 1K]: ret %d", ret); goto out; @@ -225,7 +237,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info, em->len = SZ_4K; em->block_start = SZ_4K; em->block_len = SZ_4K; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [4K, 8K)"); goto out; @@ -244,7 +258,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info, em->len = SZ_16K; em->block_start = 0; em->block_len = SZ_16K; + write_lock(&em_tree->lock); ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); + write_unlock(&em_tree->lock); if (ret) { test_err("case3 [0x%llx 0x%llx): ret %d", start, start + len, ret); @@ -320,7 +336,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em->len = SZ_8K; em->block_start = 0; em->block_len = SZ_8K; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [0, 8K)"); goto out; @@ -339,7 +357,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em->len = 24 * SZ_1K; em->block_start = SZ_16K; /* avoid merging */ em->block_len = 24 * SZ_1K; + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); + write_unlock(&em_tree->lock); if (ret < 0) { test_err("cannot add extent range [8K, 32K)"); goto out; @@ -357,7 +377,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info, em->len = SZ_32K; em->block_start = 0; em->block_len = SZ_32K; + write_lock(&em_tree->lock); ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len); + write_unlock(&em_tree->lock); if (ret) { test_err("case4 [0x%llx 0x%llx): ret %d", start, len, ret); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3f6811cdf803..3b8ae1a8f02d 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -129,6 +129,24 @@ static inline int extwriter_counter_read(struct btrfs_transaction *trans) } /* + * To be called after all the new block groups attached to the transaction + * handle have been created (btrfs_create_pending_block_groups()). + */ +void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + + if (!trans->chunk_bytes_reserved) + return; + + WARN_ON_ONCE(!list_empty(&trans->new_bgs)); + + btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv, + trans->chunk_bytes_reserved); + trans->chunk_bytes_reserved = 0; +} + +/* * either allocate a new transaction or hop into the existing one */ static noinline int join_transaction(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 78c446c222b7..527ea94b57d9 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -224,5 +224,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction); void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); +void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans); #endif diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 748cd1598255..ccd5706199d7 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -107,13 +107,32 @@ static void file_extent_err(const struct extent_buffer *eb, int slot, (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ }) +static u64 file_extent_end(struct extent_buffer *leaf, + struct btrfs_key *key, + struct btrfs_file_extent_item *extent) +{ + u64 end; + u64 len; + + if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + end = ALIGN(key->offset + len, leaf->fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + end = key->offset + len; + } + return end; +} + static int check_extent_data_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_file_extent_item *fi; u32 sectorsize = fs_info->sectorsize; u32 item_size = btrfs_item_size_nr(leaf, slot); + u64 extent_end; if (!IS_ALIGNED(key->offset, sectorsize)) { file_extent_err(leaf, slot, @@ -188,6 +207,38 @@ static int check_extent_data_item(struct extent_buffer *leaf, CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize)) return -EUCLEAN; + + /* Catch extent end overflow */ + if (check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi), + key->offset, &extent_end)) { + file_extent_err(leaf, slot, + "extent end overflow, have file offset %llu extent num bytes %llu", + key->offset, + btrfs_file_extent_num_bytes(leaf, fi)); + return -EUCLEAN; + } + + /* + * Check that no two consecutive file extent items, in the same leaf, + * present ranges that overlap each other. + */ + if (slot > 0 && + prev_key->objectid == key->objectid && + prev_key->type == BTRFS_EXTENT_DATA_KEY) { + struct btrfs_file_extent_item *prev_fi; + u64 prev_end; + + prev_fi = btrfs_item_ptr(leaf, slot - 1, + struct btrfs_file_extent_item); + prev_end = file_extent_end(leaf, prev_key, prev_fi); + if (prev_end > key->offset) { + file_extent_err(leaf, slot - 1, +"file extent end range (%llu) goes beyond start offset (%llu) of the next file extent", + prev_end, key->offset); + return -EUCLEAN; + } + } + return 0; } @@ -774,14 +825,15 @@ static int check_inode_item(struct extent_buffer *leaf, * Common point to switch the item-specific validation. */ static int check_leaf_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { int ret = 0; struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(leaf, key, slot); + ret = check_extent_data_item(leaf, key, slot, prev_key); break; case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(leaf, key, slot); @@ -928,7 +980,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) * Check if the item size and content meet other * criteria */ - ret = check_leaf_item(leaf, &key, slot); + ret = check_leaf_item(leaf, &key, slot, &prev_key); if (ret < 0) return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6adcd8a2c5c7..6c8297bcfeb7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3110,6 +3110,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, log->log_transid = root->log_transid; root->log_start_pid = 0; /* + * Update or create log root item under the root's log_mutex to prevent + * races with concurrent log syncs that can lead to failure to update + * log root item because it was not created yet. + */ + ret = update_log_root(trans, log); + /* * IO has been started, blocks of the log tree have WRITTEN flag set * in their headers. new modifications of the log will be written to * new positions. so it's safe to allow log writers to go in. @@ -3128,8 +3134,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&log_root_tree->log_mutex); - ret = update_log_root(trans, log); - mutex_lock(&log_root_tree->log_mutex); if (atomic_dec_and_test(&log_root_tree->log_writers)) { /* atomic_dec_and_test implies a barrier */ @@ -3319,6 +3323,30 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, } /* + * Check if an inode was logged in the current transaction. We can't always rely + * on an inode's logged_trans value, because it's an in-memory only field and + * therefore not persisted. This means that its value is lost if the inode gets + * evicted and loaded again from disk (in which case it has a value of 0, and + * certainly it is smaller then any possible transaction ID), when that happens + * the full_sync flag is set in the inode's runtime flags, so on that case we + * assume eviction happened and ignore the logged_trans value, assuming the + * worst case, that the inode was logged before in the current transaction. + */ +static bool inode_logged(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode) +{ + if (inode->logged_trans == trans->transid) + return true; + + if (inode->last_trans == trans->transid && + test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) && + !test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags)) + return true; + + return false; +} + +/* * If both a file and directory are logged, and unlinks or renames are * mixed in, we have a few interesting corners: * @@ -3352,7 +3380,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int bytes_del = 0; u64 dir_ino = btrfs_ino(dir); - if (dir->logged_trans < trans->transid) + if (!inode_logged(trans, dir)) return 0; ret = join_running_log_trans(root); @@ -3456,7 +3484,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, u64 index; int ret; - if (inode->logged_trans < trans->transid) + if (!inode_logged(trans, inode)) return 0; ret = join_running_log_trans(root); @@ -4182,6 +4210,7 @@ fill_holes: *last_extent, 0, 0, len, 0, len, 0, 0, 0); + *last_extent += len; } } } @@ -5415,9 +5444,19 @@ log_extents: } } + /* + * Don't update last_log_commit if we logged that an inode exists after + * it was loaded to memory (full_sync bit set). + * This is to prevent data loss when we do a write to the inode, then + * the inode gets evicted after all delalloc was flushed, then we log + * it exists (due to a rename for example) and then fsync it. This last + * fsync would do nothing (not logging the extents previously written). + */ spin_lock(&inode->lock); inode->logged_trans = trans->transid; - inode->last_log_commit = inode->last_sub_trans; + if (inode_only != LOG_INODE_EXISTS || + !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags)) + inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); out_unlock: mutex_unlock(&inode->log_mutex); @@ -5477,7 +5516,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, { int ret = 0; struct dentry *old_parent = NULL; - struct btrfs_inode *orig_inode = inode; /* * for regular files, if its inode is already on disk, we don't @@ -5497,16 +5535,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, } while (1) { - /* - * If we are logging a directory then we start with our inode, - * not our parent's inode, so we need to skip setting the - * logged_trans so that further down in the log code we don't - * think this inode has already been logged. - */ - if (inode != orig_inode) - inode->logged_trans = trans->transid; - smp_mb(); - if (btrfs_must_commit_transaction(trans, inode)) { ret = 1; break; @@ -6383,7 +6411,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, * if this directory was already logged any new * names for this file/dir will get recorded */ - smp_mb(); if (dir->logged_trans == trans->transid) return; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1c2a6e4b39da..a13ddba1ebc3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -28,6 +28,7 @@ #include "dev-replace.h" #include "sysfs.h" #include "tree-checker.h" +#include "space-info.h" const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { @@ -123,12 +124,14 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { }, }; -const char *get_raid_name(enum btrfs_raid_types type) +const char *btrfs_bg_type_to_raid_name(u64 flags) { - if (type >= BTRFS_NR_RAID_TYPES) + const int index = btrfs_bg_flags_to_raid_index(flags); + + if (index >= BTRFS_NR_RAID_TYPES) return NULL; - return btrfs_raid_array[type].raid_name; + return btrfs_raid_array[index].raid_name; } /* @@ -237,7 +240,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, * chunk_mutex * ----------- * protects chunks, adding or removing during allocation, trim or when a new - * device is added/removed + * device is added/removed. Additionally it also protects post_commit_list of + * individual devices, since they can be added to the transaction's + * post_commit_list only with chunk_mutex held. * * cleaner_mutex * ------------- @@ -1818,7 +1823,7 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) struct rb_node *n; u64 ret = 0; - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; read_lock(&em_tree->lock); n = rb_last(&em_tree->map.rb_root); if (n) { @@ -2941,7 +2946,7 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree; struct extent_map *em; - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, length); read_unlock(&em_tree->lock); @@ -3474,6 +3479,18 @@ static int chunk_devid_filter(struct extent_buffer *leaf, return 1; } +static u64 calc_data_stripes(u64 type, int num_stripes) +{ + const int index = btrfs_bg_flags_to_raid_index(type); + const int ncopies = btrfs_raid_array[index].ncopies; + const int nparity = btrfs_raid_array[index].nparity; + + if (nparity) + return num_stripes - nparity; + else + return num_stripes / ncopies; +} + /* [pstart, pend) */ static int chunk_drange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk, @@ -3483,22 +3500,15 @@ static int chunk_drange_filter(struct extent_buffer *leaf, int num_stripes = btrfs_chunk_num_stripes(leaf, chunk); u64 stripe_offset; u64 stripe_length; + u64 type; int factor; int i; if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID)) return 0; - if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP | - BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) { - factor = num_stripes / 2; - } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) { - factor = num_stripes - 1; - } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) { - factor = num_stripes - 2; - } else { - factor = num_stripes; - } + type = btrfs_chunk_type(leaf, chunk); + factor = calc_data_stripes(type, num_stripes); for (i = 0; i < num_stripes; i++) { stripe = btrfs_stripe_nr(chunk, i); @@ -3921,11 +3931,9 @@ static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf, bp += ret; \ } while (0) - if (flags & BTRFS_BALANCE_ARGS_CONVERT) { - int index = btrfs_bg_flags_to_raid_index(bargs->target); - - CHECK_APPEND_1ARG("convert=%s,", get_raid_name(index)); - } + if (flags & BTRFS_BALANCE_ARGS_CONVERT) + CHECK_APPEND_1ARG("convert=%s,", + btrfs_bg_type_to_raid_name(bargs->target)); if (flags & BTRFS_BALANCE_ARGS_SOFT) CHECK_APPEND_NOARG("soft,"); @@ -4047,6 +4055,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, u64 num_devices; unsigned seq; bool reducing_integrity; + int i; if (btrfs_fs_closing(fs_info) || atomic_read(&fs_info->balance_pause_req) || @@ -4076,48 +4085,43 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, } num_devices = btrfs_num_devices(fs_info); + allowed = 0; + for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) + if (num_devices >= btrfs_raid_array[i].devs_min) + allowed |= btrfs_raid_array[i].bg_flag; - allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP; - if (num_devices > 1) - allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); - if (num_devices > 2) - allowed |= BTRFS_BLOCK_GROUP_RAID5; - if (num_devices > 3) - allowed |= (BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_RAID6); if (validate_convert_profile(&bctl->data, allowed)) { - int index = btrfs_bg_flags_to_raid_index(bctl->data.target); - btrfs_err(fs_info, "balance: invalid convert data profile %s", - get_raid_name(index)); + btrfs_bg_type_to_raid_name(bctl->data.target)); ret = -EINVAL; goto out; } if (validate_convert_profile(&bctl->meta, allowed)) { - int index = btrfs_bg_flags_to_raid_index(bctl->meta.target); - btrfs_err(fs_info, "balance: invalid convert metadata profile %s", - get_raid_name(index)); + btrfs_bg_type_to_raid_name(bctl->meta.target)); ret = -EINVAL; goto out; } if (validate_convert_profile(&bctl->sys, allowed)) { - int index = btrfs_bg_flags_to_raid_index(bctl->sys.target); - btrfs_err(fs_info, "balance: invalid convert system profile %s", - get_raid_name(index)); + btrfs_bg_type_to_raid_name(bctl->sys.target)); ret = -EINVAL; goto out; } - /* allow to reduce meta or sys integrity only if force set */ - allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6; + /* + * Allow to reduce metadata or system integrity only if force set for + * profiles with redundancy (copies, parity) + */ + allowed = 0; + for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) { + if (btrfs_raid_array[i].ncopies >= 2 || + btrfs_raid_array[i].tolerated_failures >= 1) + allowed |= btrfs_raid_array[i].bg_flag; + } do { seq = read_seqbegin(&fs_info->profiles_lock); @@ -4152,12 +4156,18 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) < btrfs_get_num_tolerated_disk_barrier_failures(data_target)) { - int meta_index = btrfs_bg_flags_to_raid_index(meta_target); - int data_index = btrfs_bg_flags_to_raid_index(data_target); - btrfs_warn(fs_info, "balance: metadata profile %s has lower redundancy than data profile %s", - get_raid_name(meta_index), get_raid_name(data_index)); + btrfs_bg_type_to_raid_name(meta_target), + btrfs_bg_type_to_raid_name(data_target)); + } + + if (fs_info->send_in_progress) { + btrfs_warn_rl(fs_info, +"cannot run balance while send operations are in progress (%d in progress)", + fs_info->send_in_progress); + ret = -EAGAIN; + goto out; } ret = insert_balance_item(fs_info, bctl); @@ -4949,6 +4959,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, sub_stripes = btrfs_raid_array[index].sub_stripes; dev_stripes = btrfs_raid_array[index].dev_stripes; devs_max = btrfs_raid_array[index].devs_max; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS(info); devs_min = btrfs_raid_array[index].devs_min; devs_increment = btrfs_raid_array[index].devs_increment; ncopies = btrfs_raid_array[index].ncopies; @@ -4957,8 +4969,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = SZ_1G; max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; - if (!devs_max) - devs_max = BTRFS_MAX_DEVS(info); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { /* for larger filesystems, use larger metadata chunks */ if (fs_devices->total_rw_bytes > 50ULL * SZ_1G) @@ -4966,13 +4976,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, else max_stripe_size = SZ_256M; max_chunk_size = max_stripe_size; - if (!devs_max) - devs_max = BTRFS_MAX_DEVS(info); } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { max_stripe_size = SZ_32M; max_chunk_size = 2 * max_stripe_size; - if (!devs_max) - devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; } else { btrfs_err(info, "invalid chunk type 0x%llx requested", type); @@ -5143,7 +5149,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em->block_len = em->len; em->orig_block_len = stripe_size; - em_tree = &info->mapping_tree.map_tree; + em_tree = &info->mapping_tree; write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); if (ret) { @@ -5324,20 +5330,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) static inline int btrfs_chunk_max_errors(struct map_lookup *map) { - int max_errors; + const int index = btrfs_bg_flags_to_raid_index(map->type); - if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10 | - BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_DUP)) { - max_errors = 1; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { - max_errors = 2; - } else { - max_errors = 0; - } - - return max_errors; + return btrfs_raid_array[index].tolerated_failures; } int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) @@ -5378,21 +5373,16 @@ end: return readonly; } -void btrfs_mapping_init(struct btrfs_mapping_tree *tree) -{ - extent_map_tree_init(&tree->map_tree); -} - -void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) +void btrfs_mapping_tree_free(struct extent_map_tree *tree) { struct extent_map *em; while (1) { - write_lock(&tree->map_tree.lock); - em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, 0, (u64)-1); if (em) - remove_extent_mapping(&tree->map_tree, em); - write_unlock(&tree->map_tree.lock); + remove_extent_mapping(tree, em); + write_unlock(&tree->lock); if (!em) break; /* once for us */ @@ -5419,7 +5409,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) return 1; map = em->map_lookup; - if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) + if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK)) ret = map->num_stripes; else if (map->type & BTRFS_BLOCK_GROUP_RAID10) ret = map->sub_stripes; @@ -5493,7 +5483,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev; ASSERT((map->type & - (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))); + (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10))); if (map->type & BTRFS_BLOCK_GROUP_RAID10) num_stripes = map->sub_stripes; @@ -5682,7 +5672,7 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, &remaining_stripes); div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); last_stripe *= sub_stripes; - } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_DUP)) { num_stripes = map->num_stripes; } else { @@ -5926,6 +5916,102 @@ static bool need_full_stripe(enum btrfs_map_op op) return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS); } +/* + * btrfs_get_io_geometry - calculates the geomery of a particular (address, len) + * tuple. This information is used to calculate how big a + * particular bio can get before it straddles a stripe. + * + * @fs_info - the filesystem + * @logical - address that we want to figure out the geometry of + * @len - the length of IO we are going to perform, starting at @logical + * @op - type of operation - write or read + * @io_geom - pointer used to return values + * + * Returns < 0 in case a chunk for the given logical address cannot be found, + * usually shouldn't happen unless @logical is corrupted, 0 otherwise. + */ +int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, + u64 logical, u64 len, struct btrfs_io_geometry *io_geom) +{ + struct extent_map *em; + struct map_lookup *map; + u64 offset; + u64 stripe_offset; + u64 stripe_nr; + u64 stripe_len; + u64 raid56_full_stripe_start = (u64)-1; + int data_stripes; + + ASSERT(op != BTRFS_MAP_DISCARD); + + em = btrfs_get_chunk_map(fs_info, logical, len); + if (IS_ERR(em)) + return PTR_ERR(em); + + map = em->map_lookup; + /* Offset of this logical address in the chunk */ + offset = logical - em->start; + /* Len of a stripe in a chunk */ + stripe_len = map->stripe_len; + /* Stripe wher this block falls in */ + stripe_nr = div64_u64(offset, stripe_len); + /* Offset of stripe in the chunk */ + stripe_offset = stripe_nr * stripe_len; + if (offset < stripe_offset) { + btrfs_crit(fs_info, +"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu", + stripe_offset, offset, em->start, logical, stripe_len); + free_extent_map(em); + return -EINVAL; + } + + /* stripe_offset is the offset of this block in its stripe */ + stripe_offset = offset - stripe_offset; + data_stripes = nr_data_stripes(map); + + if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + u64 max_len = stripe_len - stripe_offset; + + /* + * In case of raid56, we need to know the stripe aligned start + */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + unsigned long full_stripe_len = stripe_len * data_stripes; + raid56_full_stripe_start = offset; + + /* + * Allow a write of a full stripe, but make sure we + * don't allow straddling of stripes + */ + raid56_full_stripe_start = div64_u64(raid56_full_stripe_start, + full_stripe_len); + raid56_full_stripe_start *= full_stripe_len; + + /* + * For writes to RAID[56], allow a full stripeset across + * all disks. For other RAID types and for RAID[56] + * reads, just allow a single stripe (on a single disk). + */ + if (op == BTRFS_MAP_WRITE) { + max_len = stripe_len * data_stripes - + (offset - raid56_full_stripe_start); + } + } + len = min_t(u64, em->len - offset, max_len); + } else { + len = em->len - offset; + } + + io_geom->len = len; + io_geom->offset = offset; + io_geom->stripe_len = stripe_len; + io_geom->stripe_nr = stripe_nr; + io_geom->stripe_offset = stripe_offset; + io_geom->raid56_stripe_offset = raid56_full_stripe_start; + + return 0; +} + static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, @@ -5939,6 +6025,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 stripe_nr; u64 stripe_len; u32 stripe_index; + int data_stripes; int i; int ret = 0; int num_stripes; @@ -5951,76 +6038,29 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int patch_the_first_stripe_for_dev_replace = 0; u64 physical_to_patch_in_first_stripe = 0; u64 raid56_full_stripe_start = (u64)-1; + struct btrfs_io_geometry geom; + + ASSERT(bbio_ret); if (op == BTRFS_MAP_DISCARD) return __btrfs_map_block_for_discard(fs_info, logical, *length, bbio_ret); - em = btrfs_get_chunk_map(fs_info, logical, *length); - if (IS_ERR(em)) - return PTR_ERR(em); + ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom); + if (ret < 0) + return ret; + em = btrfs_get_chunk_map(fs_info, logical, *length); + ASSERT(em); map = em->map_lookup; - offset = logical - em->start; - - stripe_len = map->stripe_len; - stripe_nr = offset; - /* - * stripe_nr counts the total number of stripes we have to stride - * to get to this block - */ - stripe_nr = div64_u64(stripe_nr, stripe_len); - - stripe_offset = stripe_nr * stripe_len; - if (offset < stripe_offset) { - btrfs_crit(fs_info, - "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu", - stripe_offset, offset, em->start, logical, - stripe_len); - free_extent_map(em); - return -EINVAL; - } - - /* stripe_offset is the offset of this block in its stripe*/ - stripe_offset = offset - stripe_offset; - - /* if we're here for raid56, we need to know the stripe aligned start */ - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); - raid56_full_stripe_start = offset; - /* allow a write of a full stripe, but make sure we don't - * allow straddling of stripes - */ - raid56_full_stripe_start = div64_u64(raid56_full_stripe_start, - full_stripe_len); - raid56_full_stripe_start *= full_stripe_len; - } - - if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { - u64 max_len; - /* For writes to RAID[56], allow a full stripeset across all disks. - For other RAID types and for RAID[56] reads, just allow a single - stripe (on a single disk). */ - if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && - (op == BTRFS_MAP_WRITE)) { - max_len = stripe_len * nr_data_stripes(map) - - (offset - raid56_full_stripe_start); - } else { - /* we limit the length of each bio to what fits in a stripe */ - max_len = stripe_len - stripe_offset; - } - *length = min_t(u64, em->len - offset, max_len); - } else { - *length = em->len - offset; - } - - /* - * This is for when we're called from btrfs_bio_fits_in_stripe and all - * it cares about is the length - */ - if (!bbio_ret) - goto out; + *length = geom.len; + offset = geom.offset; + stripe_len = geom.stripe_len; + stripe_nr = geom.stripe_nr; + stripe_offset = geom.stripe_offset; + raid56_full_stripe_start = geom.raid56_stripe_offset; + data_stripes = nr_data_stripes(map); down_read(&dev_replace->rwsem); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); @@ -6052,7 +6092,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, &stripe_index); if (!need_full_stripe(op)) mirror_num = 1; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { + } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { if (need_full_stripe(op)) num_stripes = map->num_stripes; else if (mirror_num) @@ -6094,7 +6134,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ stripe_nr = div64_u64(raid56_full_stripe_start, - stripe_len * nr_data_stripes(map)); + stripe_len * data_stripes); /* RAID[56] write or recovery. Return all stripes */ num_stripes = map->num_stripes; @@ -6110,10 +6150,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, * Mirror #3 is RAID6 Q block. */ stripe_nr = div_u64_rem(stripe_nr, - nr_data_stripes(map), &stripe_index); + data_stripes, &stripe_index); if (mirror_num > 1) - stripe_index = nr_data_stripes(map) + - mirror_num - 2; + stripe_index = data_stripes + mirror_num - 2; /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, @@ -6171,8 +6210,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, div_u64_rem(stripe_nr, num_stripes, &rot); /* Fill in the logical address of each stripe */ - tmp = stripe_nr * nr_data_stripes(map); - for (i = 0; i < nr_data_stripes(map); i++) + tmp = stripe_nr * data_stripes; + for (i = 0; i < data_stripes; i++) bbio->raid_map[(i+rot) % num_stripes] = em->start + (tmp + i) * map->stripe_len; @@ -6687,7 +6726,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) { struct btrfs_fs_info *fs_info = leaf->fs_info; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; struct extent_map *em; u64 logical; @@ -6712,9 +6751,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, return ret; } - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); - read_unlock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, logical, 1); + read_unlock(&map_tree->lock); /* already mapped? */ if (em && em->start <= logical && em->start + em->len > logical) { @@ -6783,9 +6822,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, } - write_lock(&map_tree->map_tree.lock); - ret = add_extent_mapping(&map_tree->map_tree, em, 0); - write_unlock(&map_tree->map_tree.lock); + write_lock(&map_tree->lock); + ret = add_extent_mapping(map_tree, em, 0); + write_unlock(&map_tree->lock); if (ret < 0) { btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", @@ -7103,14 +7142,14 @@ out_short_read: bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev) { - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct extent_map *em; u64 next_start = 0; bool ret = true; - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1); - read_unlock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, 0, (u64)-1); + read_unlock(&map_tree->lock); /* No chunk at all? Return false anyway */ if (!em) { ret = false; @@ -7148,10 +7187,10 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, next_start = extent_map_end(em); free_extent_map(em); - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, next_start, + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, next_start, (u64)(-1) - next_start); - read_unlock(&map_tree->map_tree.lock); + read_unlock(&map_tree->lock); } out: return ret; @@ -7600,10 +7639,9 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) */ int btrfs_bg_type_to_factor(u64 flags) { - if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10)) - return 2; - return 1; + const int index = btrfs_bg_flags_to_raid_index(flags); + + return btrfs_raid_array[index].ncopies; } @@ -7612,7 +7650,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, u64 physical_offset, u64 physical_len) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; struct btrfs_device *dev; @@ -7701,7 +7739,7 @@ out: static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct rb_node *node; int ret = 0; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 136a3eb64604..7f6aa1816409 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -23,6 +23,21 @@ struct btrfs_pending_bios { struct bio *tail; }; +struct btrfs_io_geometry { + /* remaining bytes before crossing a stripe */ + u64 len; + /* offset of logical address in chunk */ + u64 offset; + /* length of single IO stripe */ + u64 stripe_len; + /* number of stripe where address falls */ + u64 stripe_nr; + /* offset of address in stripe */ + u64 stripe_offset; + /* offset of raid56 stripe into the chunk */ + u64 raid56_stripe_offset; +}; + /* * Use sequence counter to get consistent device stat data on * 32-bit processors. @@ -43,8 +58,8 @@ struct btrfs_pending_bios { #define BTRFS_DEV_STATE_FLUSH_SENT (4) struct btrfs_device { - struct list_head dev_list; - struct list_head dev_alloc_list; + struct list_head dev_list; /* device_list_mutex */ + struct list_head dev_alloc_list; /* chunk mutex */ struct list_head post_commit_list; /* chunk mutex */ struct btrfs_fs_devices *fs_devices; struct btrfs_fs_info *fs_info; @@ -229,9 +244,14 @@ struct btrfs_fs_devices { * this mutex lock. */ struct mutex device_list_mutex; + + /* List of all devices, protected by device_list_mutex */ struct list_head devices; - /* devices not currently being allocated */ + /* + * Devices which can satisfy space allocation. Protected by + * chunk_mutex + */ struct list_head alloc_list; struct btrfs_fs_devices *seed; @@ -336,16 +356,16 @@ struct btrfs_device_info { }; struct btrfs_raid_attr { - int sub_stripes; /* sub_stripes info for map */ - int dev_stripes; /* stripes per dev */ - int devs_max; /* max devs to use */ - int devs_min; /* min devs needed */ - int tolerated_failures; /* max tolerated fail devs */ - int devs_increment; /* ndevs has to be a multiple of this */ - int ncopies; /* how many copies to data has */ - int nparity; /* number of stripes worth of bytes to store + u8 sub_stripes; /* sub_stripes info for map */ + u8 dev_stripes; /* stripes per dev */ + u8 devs_max; /* max devs to use */ + u8 devs_min; /* min devs needed */ + u8 tolerated_failures; /* max tolerated fail devs */ + u8 devs_increment; /* ndevs has to be a multiple of this */ + u8 ncopies; /* how many copies to data has */ + u8 nparity; /* number of stripes worth of bytes to store * parity information */ - int mindev_error; /* error code if min devs requisite is unmet */ + u8 mindev_error; /* error code if min devs requisite is unmet */ const char raid_name[8]; /* name of the raid */ u64 bg_flag; /* block group flag of the raid */ }; @@ -408,13 +428,14 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_bio **bbio_ret); +int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, + u64 logical, u64 len, struct btrfs_io_geometry *io_geom); int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 **logical, int *naddrs, int *stripe_len); int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); -void btrfs_mapping_init(struct btrfs_mapping_tree *tree); -void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); +void btrfs_mapping_tree_free(struct extent_map_tree *tree); blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, @@ -557,8 +578,6 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags) return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ } -const char *get_raid_name(enum btrfs_raid_types type); - void btrfs_commit_device_sizes(struct btrfs_transaction *trans); struct list_head *btrfs_get_fs_uuids(void); @@ -568,6 +587,7 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); int btrfs_bg_type_to_factor(u64 flags); +const char *btrfs_bg_type_to_raid_name(u64 flags); int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); #endif diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 78b6ba2029e8..95d9aebff2c4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -213,6 +213,9 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, } out: btrfs_free_path(path); + if (!ret) + set_bit(BTRFS_INODE_COPY_EVERYTHING, + &BTRFS_I(inode)->runtime_flags); return ret; } @@ -236,7 +239,6 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); out: @@ -388,8 +390,6 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler, if (!ret) { inode_inc_iversion(inode); inode->i_ctime = current_time(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); } diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index a6ff07cf11d5..3837ca180d52 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES; struct list_head *pos, *next; - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); if (list_empty(&wsm.lru_list)) { - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); return; } @@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer) if (!list_empty(&wsm.lru_list)) mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); } /* @@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void) struct workspace *workspace; int i; - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { while (!list_empty(&wsm.idle_ws[i])) { workspace = container_of(wsm.idle_ws[i].next, @@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void) zstd_free_workspace(&workspace->list); } } - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); del_timer_sync(&wsm.timer); } @@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level) struct workspace *workspace; int i = level - 1; - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) { if (!list_empty(&wsm.idle_ws[i])) { ws = wsm.idle_ws[i].next; @@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level) list_del(&workspace->lru_list); if (list_empty(&wsm.idle_ws[i])) clear_bit(i, &wsm.active_map); - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); return ws; } } - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); return NULL; } @@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws) { struct workspace *workspace = list_to_workspace(ws); - spin_lock(&wsm.lock); + spin_lock_bh(&wsm.lock); /* A node is only taken off the lru if we are the corresponding level */ if (workspace->req_level == workspace->level) { @@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws) list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]); workspace->req_level = 0; - spin_unlock(&wsm.lock); + spin_unlock_bh(&wsm.lock); if (workspace->level == ZSTD_BTRFS_MAX_LEVEL) cond_wake_up(&wsm.wait); diff --git a/fs/buffer.c b/fs/buffer.c index 0faa41fb4c88..86a38b979323 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/buffer.c * @@ -2085,38 +2086,6 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, } EXPORT_SYMBOL(block_write_begin); -void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page) -{ - loff_t old_size = inode->i_size; - bool i_size_changed = false; - - /* - * No need to use i_size_read() here, the i_size cannot change under us - * because we hold i_rwsem. - * - * But it's important to update i_size while still holding page lock: - * page writeout could otherwise come in and zero beyond i_size. - */ - if (pos + copied > inode->i_size) { - i_size_write(inode, pos + copied); - i_size_changed = true; - } - - unlock_page(page); - - if (old_size < pos) - pagecache_isize_extended(inode, old_size, pos); - /* - * Don't mark the inode dirty under page lock. First, it unnecessarily - * makes the holding time of page lock longer. Second, it forces lock - * ordering of page lock and transaction start for journaling - * filesystems. - */ - if (i_size_changed) - mark_inode_dirty(inode); -} - int block_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) @@ -2157,9 +2126,37 @@ int generic_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; + bool i_size_changed = false; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - __generic_write_end(mapping->host, pos, copied, page); + + /* + * No need to use i_size_read() here, the i_size cannot change under us + * because we hold i_rwsem. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos + copied > inode->i_size) { + i_size_write(inode, pos + copied); + i_size_changed = true; + } + + unlock_page(page); put_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (i_size_changed) + mark_inode_dirty(inode); return copied; } EXPORT_SYMBOL(generic_write_end); @@ -3092,7 +3089,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, if (wbc) { wbc_init_bio(wbc, bio); - wbc_account_io(wbc, bh->b_page, bh->b_size); + wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); } submit_bio(bio); diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig index 80e9c6167f0b..ae559ed5b3b3 100644 --- a/fs/cachefiles/Kconfig +++ b/fs/cachefiles/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config CACHEFILES tristate "Filesystem caching on files" diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 4a717d400807..dfb14dbddf51 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Bind and unbind a cache from the filesystem backing it * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 3fdee214a5bb..752c1e43416f 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Daemon interface * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 222bc5d8b62c..4cea5fbf695e 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache interface to CacheFiles * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/slab.h> diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index d2f6f996e65a..cf9bd6401c2d 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* General netfs cache on cache files internal defs * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #ifdef pr_fmt diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index 33b58c60f2d1..be96f5fc5cac 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Key to pathname encoder * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/slab.h> diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index f54d3f5b2e40..ddf0cd58d60c 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Network filesystem caching backend to use cache files on a premounted * filesystem * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d27720cd3664..ecc8ecbbfa5a 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles path walking and related routines * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c index 0ce1aa56b67f..6e67aea0f24e 100644 --- a/fs/cachefiles/proc.c +++ b/fs/cachefiles/proc.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles statistics * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 8a577409d030..44a3ce1e4ce4 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Storage object read/write * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/mount.h> diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index 31bbc0528b11..aec13fd94692 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles security management * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/fs.h> diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 511e6c68156a..72e42438f3d7 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* CacheFiles extended attribute management * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 52095f473464..cf235f6eacf9 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config CEPH_FS tristate "Ceph distributed file system" depends on INET @@ -35,3 +36,15 @@ config CEPH_FS_POSIX_ACL groups beyond the owner/group/world scheme. If you don't know what Access Control Lists are, say N + +config CEPH_FS_SECURITY_LABEL + bool "CephFS Security Labels" + depends on CEPH_FS && SECURITY + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the Ceph filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 5f0103f40079..aa55f412a6e3 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/ceph/acl.c * * Copyright (C) 2013 Guangliang Zhao, <lucienchao@gmail.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/ceph/ceph_debug.h> @@ -172,7 +159,7 @@ out: } int ceph_pre_init_acls(struct inode *dir, umode_t *mode, - struct ceph_acls_info *info) + struct ceph_acl_sec_ctx *as_ctx) { struct posix_acl *acl, *default_acl; size_t val_size1 = 0, val_size2 = 0; @@ -247,9 +234,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode, kfree(tmp_buf); - info->acl = acl; - info->default_acl = default_acl; - info->pagelist = pagelist; + as_ctx->acl = acl; + as_ctx->default_acl = default_acl; + as_ctx->pagelist = pagelist; return 0; out_err: @@ -261,18 +248,10 @@ out_err: return err; } -void ceph_init_inode_acls(struct inode* inode, struct ceph_acls_info *info) +void ceph_init_inode_acls(struct inode *inode, struct ceph_acl_sec_ctx *as_ctx) { if (!inode) return; - ceph_set_cached_acl(inode, ACL_TYPE_ACCESS, info->acl); - ceph_set_cached_acl(inode, ACL_TYPE_DEFAULT, info->default_acl); -} - -void ceph_release_acls_info(struct ceph_acls_info *info) -{ - posix_acl_release(info->acl); - posix_acl_release(info->default_acl); - if (info->pagelist) - ceph_pagelist_release(info->pagelist); + ceph_set_cached_acl(inode, ACL_TYPE_ACCESS, as_ctx->acl); + ceph_set_cached_acl(inode, ACL_TYPE_DEFAULT, as_ctx->default_acl); } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index a47c541f8006..e078cc55b989 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -10,6 +10,7 @@ #include <linux/pagevec.h> #include <linux/task_io_accounting_ops.h> #include <linux/signal.h> +#include <linux/iversion.h> #include "super.h" #include "mds_client.h" @@ -1576,6 +1577,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) /* Update time before taking page lock */ file_update_time(vma->vm_file); + inode_inc_iversion_raw(inode); do { lock_page(page); diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 1bf3502bdd6f..bc90cf6ad7ed 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -1,24 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Ceph cache definitions. * * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved. * Written by Milosz Tanski (milosz@adfin.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #include "super.h" diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index 7e72c7594f0c..e486fac3434d 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -1,24 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Ceph cache definitions. * * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved. * Written by Milosz Tanski (milosz@adfin.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * */ #ifndef _CEPH_CACHE_H diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 72f8e1311392..d98dcd976c80 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -8,6 +8,7 @@ #include <linux/vmalloc.h> #include <linux/wait.h> #include <linux/writeback.h> +#include <linux/iversion.h> #include "super.h" #include "mds_client.h" @@ -1138,8 +1139,9 @@ struct cap_msg_args { u64 ino, cid, follows; u64 flush_tid, oldest_flush_tid, size, max_size; u64 xattr_version; + u64 change_attr; struct ceph_buffer *xattr_buf; - struct timespec64 atime, mtime, ctime; + struct timespec64 atime, mtime, ctime, btime; int op, caps, wanted, dirty; u32 seq, issue_seq, mseq, time_warp_seq; u32 flags; @@ -1160,7 +1162,6 @@ static int send_cap_msg(struct cap_msg_args *arg) struct ceph_msg *msg; void *p; size_t extra_len; - struct timespec64 zerotime = {0}; struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc; dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s" @@ -1245,15 +1246,10 @@ static int send_cap_msg(struct cap_msg_args *arg) /* pool namespace (version 8) (mds always ignores this) */ ceph_encode_32(&p, 0); - /* - * btime and change_attr (version 9) - * - * We just zero these out for now, as the MDS ignores them unless - * the requisite feature flags are set (which we don't do yet). - */ - ceph_encode_timespec64(p, &zerotime); + /* btime and change_attr (version 9) */ + ceph_encode_timespec64(p, &arg->btime); p += sizeof(struct ceph_timespec); - ceph_encode_64(&p, 0); + ceph_encode_64(&p, arg->change_attr); /* Advisory flags (version 10) */ ceph_encode_32(&p, arg->flags); @@ -1263,20 +1259,22 @@ static int send_cap_msg(struct cap_msg_args *arg) } /* - * Queue cap releases when an inode is dropped from our cache. Since - * inode is about to be destroyed, there is no need for i_ceph_lock. + * Queue cap releases when an inode is dropped from our cache. */ -void __ceph_remove_caps(struct inode *inode) +void __ceph_remove_caps(struct ceph_inode_info *ci) { - struct ceph_inode_info *ci = ceph_inode(inode); struct rb_node *p; + /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU) + * may call __ceph_caps_issued_mask() on a freeing inode. */ + spin_lock(&ci->i_ceph_lock); p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); p = rb_next(p); __ceph_remove_cap(cap, true); } + spin_unlock(&ci->i_ceph_lock); } /* @@ -1297,7 +1295,7 @@ void __ceph_remove_caps(struct inode *inode) * caller should hold snap_rwsem (read), s_mutex. */ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, - int op, bool sync, int used, int want, int retain, + int op, int flags, int used, int want, int retain, int flushing, u64 flush_tid, u64 oldest_flush_tid) __releases(cap->ci->i_ceph_lock) { @@ -1377,6 +1375,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.mtime = inode->i_mtime; arg.atime = inode->i_atime; arg.ctime = inode->i_ctime; + arg.btime = ci->i_btime; + arg.change_attr = inode_peek_iversion_raw(inode); arg.op = op; arg.caps = cap->implemented; @@ -1393,12 +1393,19 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, arg.mode = inode->i_mode; arg.inline_data = ci->i_inline_version != CEPH_INLINE_NONE; - if (list_empty(&ci->i_cap_snaps)) - arg.flags = CEPH_CLIENT_CAPS_NO_CAPSNAP; - else - arg.flags = CEPH_CLIENT_CAPS_PENDING_CAPSNAP; - if (sync) - arg.flags |= CEPH_CLIENT_CAPS_SYNC; + if (!(flags & CEPH_CLIENT_CAPS_PENDING_CAPSNAP) && + !list_empty(&ci->i_cap_snaps)) { + struct ceph_cap_snap *capsnap; + list_for_each_entry_reverse(capsnap, &ci->i_cap_snaps, ci_item) { + if (capsnap->cap_flush.tid) + break; + if (capsnap->need_flush) { + flags |= CEPH_CLIENT_CAPS_PENDING_CAPSNAP; + break; + } + } + } + arg.flags = flags; spin_unlock(&ci->i_ceph_lock); @@ -1436,6 +1443,8 @@ static inline int __send_flush_snap(struct inode *inode, arg.atime = capsnap->atime; arg.mtime = capsnap->mtime; arg.ctime = capsnap->ctime; + arg.btime = capsnap->btime; + arg.change_attr = capsnap->change_attr; arg.op = CEPH_CAP_OP_FLUSHSNAP; arg.caps = capsnap->issued; @@ -1603,10 +1612,8 @@ retry: } // make sure flushsnap messages are sent in proper order. - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { + if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) __kick_flushing_caps(mdsc, session, ci, 0); - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; - } __ceph_flush_snaps(ci, session); out: @@ -2048,10 +2055,8 @@ ack: if (cap == ci->i_auth_cap && (ci->i_ceph_flags & (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) { - if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { + if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) __kick_flushing_caps(mdsc, session, ci, 0); - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; - } if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) __ceph_flush_snaps(ci, session); @@ -2087,7 +2092,7 @@ ack: sent++; /* __send_cap drops i_ceph_lock */ - delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, false, + delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0, cap_used, want, retain, flushing, flush_tid, oldest_flush_tid); goto retry; /* retake i_ceph_lock and restart our cap scan. */ @@ -2121,6 +2126,7 @@ static int try_flush_caps(struct inode *inode, u64 *ptid) retry: spin_lock(&ci->i_ceph_lock); +retry_locked: if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { spin_unlock(&ci->i_ceph_lock); dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); @@ -2128,8 +2134,6 @@ retry: } if (ci->i_dirty_caps && ci->i_auth_cap) { struct ceph_cap *cap = ci->i_auth_cap; - int used = __ceph_caps_used(ci); - int want = __ceph_caps_wanted(ci); int delayed; if (!session || session != cap->session) { @@ -2145,13 +2149,25 @@ retry: goto out; } + if (ci->i_ceph_flags & + (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) { + if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) + __kick_flushing_caps(mdsc, session, ci, 0); + if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) + __ceph_flush_snaps(ci, session); + goto retry_locked; + } + flushing = __mark_caps_flushing(inode, session, true, &flush_tid, &oldest_flush_tid); /* __send_cap drops i_ceph_lock */ - delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, true, - used, want, (cap->issued | cap->implemented), - flushing, flush_tid, oldest_flush_tid); + delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, + CEPH_CLIENT_CAPS_SYNC, + __ceph_caps_used(ci), + __ceph_caps_wanted(ci), + (cap->issued | cap->implemented), + flushing, flush_tid, oldest_flush_tid); if (delayed) { spin_lock(&ci->i_ceph_lock); @@ -2320,6 +2336,16 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, struct ceph_cap_flush *cf; int ret; u64 first_tid = 0; + u64 last_snap_flush = 0; + + ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; + + list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { + if (!cf->caps) { + last_snap_flush = cf->tid; + break; + } + } list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) { if (cf->tid < first_tid) @@ -2338,10 +2364,13 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode, cap, cf->tid, ceph_cap_string(cf->caps)); ci->i_ceph_flags |= CEPH_I_NODELAY; + ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, - false, __ceph_caps_used(ci), + (cf->tid < last_snap_flush ? + CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0), + __ceph_caps_used(ci), __ceph_caps_wanted(ci), - cap->issued | cap->implemented, + (cap->issued | cap->implemented), cf->caps, cf->tid, oldest_flush_tid); if (ret) { pr_err("kick_flushing_caps: error sending " @@ -2410,7 +2439,6 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc, */ if ((cap->issued & ci->i_flushing_caps) != ci->i_flushing_caps) { - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; /* encode_caps_cb() also will reset these sequence * numbers. make sure sequence numbers in cap flush * message match later reconnect message */ @@ -2450,7 +2478,6 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, continue; } if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) { - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); } @@ -2478,7 +2505,6 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, oldest_flush_tid = __get_oldest_flush_tid(mdsc); spin_unlock(&mdsc->cap_dirty_lock); - ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid); spin_unlock(&ci->i_ceph_lock); } else { @@ -2738,15 +2764,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, _got = 0; ret = try_get_cap_refs(ci, need, want, endoff, false, &_got); - if (ret == -EAGAIN) { + if (ret == -EAGAIN) continue; - } else if (!ret) { - int err; - + if (!ret) { DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(&ci->i_cap_wq, &wait); - while (!(err = try_get_cap_refs(ci, need, want, endoff, + while (!(ret = try_get_cap_refs(ci, need, want, endoff, true, &_got))) { if (signal_pending(current)) { ret = -ERESTARTSYS; @@ -2756,14 +2780,16 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, } remove_wait_queue(&ci->i_cap_wq, &wait); - if (err == -EAGAIN) + if (ret == -EAGAIN) continue; } - if (ret == -ESTALE) { - /* session was killed, try renew caps */ - ret = ceph_renew_caps(&ci->vfs_inode); - if (ret == 0) - continue; + if (ret < 0) { + if (ret == -ESTALE) { + /* session was killed, try renew caps */ + ret = ceph_renew_caps(&ci->vfs_inode); + if (ret == 0) + continue; + } return ret; } @@ -2992,8 +3018,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, } if (complete_capsnap) wake_up_all(&ci->i_cap_wq); - while (put-- > 0) - iput(inode); + while (put-- > 0) { + /* avoid calling iput_final() in osd dispatch threads */ + ceph_async_iput(inode); + } } /* @@ -3038,8 +3066,10 @@ struct cap_extra_info { bool dirstat_valid; u64 nfiles; u64 nsubdirs; + u64 change_attr; /* currently issued */ int issued; + struct timespec64 btime; }; /* @@ -3121,11 +3151,14 @@ static void handle_cap_grant(struct inode *inode, __check_cap_issue(ci, cap, newcaps); + inode_set_max_iversion_raw(inode, extra_info->change_attr); + if ((newcaps & CEPH_CAP_AUTH_SHARED) && (extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); + ci->i_btime = extra_info->btime; dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, from_kuid(&init_user_ns, inode->i_uid), from_kgid(&init_user_ns, inode->i_gid)); @@ -3152,6 +3185,7 @@ static void handle_cap_grant(struct inode *inode, ci->i_xattrs.blob = ceph_buffer_get(xattr_buf); ci->i_xattrs.version = version; ceph_forget_all_cached_acls(inode); + ceph_security_invalidate_secctx(inode); } } @@ -3846,17 +3880,19 @@ void ceph_handle_caps(struct ceph_mds_session *session, } } - if (msg_version >= 11) { + if (msg_version >= 9) { struct ceph_timespec *btime; - u64 change_attr; - u32 flags; - /* version >= 9 */ if (p + sizeof(*btime) > end) goto bad; btime = p; + ceph_decode_timespec64(&extra_info.btime, btime); p += sizeof(*btime); - ceph_decode_64_safe(&p, end, change_attr, bad); + ceph_decode_64_safe(&p, end, extra_info.change_attr, bad); + } + + if (msg_version >= 11) { + u32 flags; /* version >= 10 */ ceph_decode_32_safe(&p, end, flags, bad); /* version >= 11 */ @@ -3964,8 +4000,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, done: mutex_unlock(&session->s_mutex); done_unlocked: - iput(inode); ceph_put_string(extra_info.pool_ns); + /* avoid calling iput_final() in mds dispatch threads */ + ceph_async_iput(inode); return; flush_cap_releases: @@ -4011,7 +4048,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) if (inode) { dout("check_delayed_caps on %p\n", inode); ceph_check_caps(ci, flags, NULL); - iput(inode); + /* avoid calling iput_final() in tick thread */ + ceph_async_iput(inode); } } spin_unlock(&mdsc->cap_delay_lock); diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index b3fc5fe26a1a..2eb88ed22993 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -52,7 +52,7 @@ static int mdsc_show(struct seq_file *s, void *p) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct rb_node *rp; - int pathlen; + int pathlen = 0; u64 pathbase; char *path; @@ -245,21 +245,17 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) debugfs_remove(fsc->debugfs_mdsc); } -int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) +void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { char name[100]; - int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); - BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, fsc->client->debugfs_dir, fsc, &congestion_kb_fops); - if (!fsc->debugfs_congestion_kb) - goto out; snprintf(name, sizeof(name), "../../bdi/%s", dev_name(fsc->sb->s_bdi->dev)); @@ -267,52 +263,36 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) debugfs_create_symlink("bdi", fsc->client->debugfs_dir, name); - if (!fsc->debugfs_bdi) - goto out; fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", 0400, fsc->client->debugfs_dir, fsc, &mdsmap_show_fops); - if (!fsc->debugfs_mdsmap) - goto out; fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions", 0400, fsc->client->debugfs_dir, fsc, &mds_sessions_show_fops); - if (!fsc->debugfs_mds_sessions) - goto out; fsc->debugfs_mdsc = debugfs_create_file("mdsc", 0400, fsc->client->debugfs_dir, fsc, &mdsc_show_fops); - if (!fsc->debugfs_mdsc) - goto out; fsc->debugfs_caps = debugfs_create_file("caps", 0400, fsc->client->debugfs_dir, fsc, &caps_show_fops); - if (!fsc->debugfs_caps) - goto out; - - return 0; - -out: - ceph_fs_debugfs_cleanup(fsc); - return err; } #else /* CONFIG_DEBUG_FS */ -int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) +void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) { return 0; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0637149fb9f9..aab29f48c62d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -825,7 +825,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct ceph_acls_info acls = {}; + struct ceph_acl_sec_ctx as_ctx = {}; int err; if (ceph_snap(dir) != CEPH_NOSNAP) @@ -836,7 +836,10 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, goto out; } - err = ceph_pre_init_acls(dir, &mode, &acls); + err = ceph_pre_init_acls(dir, &mode, &as_ctx); + if (err < 0) + goto out; + err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out; @@ -855,9 +858,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, req->r_args.mknod.rdev = cpu_to_le32(rdev); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; - if (acls.pagelist) { - req->r_pagelist = acls.pagelist; - acls.pagelist = NULL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && !req->r_reply_info.head->is_dentry) @@ -865,10 +868,10 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, ceph_mdsc_put_request(req); out: if (!err) - ceph_init_inode_acls(d_inode(dentry), &acls); + ceph_init_inode_acls(d_inode(dentry), &as_ctx); else d_drop(dentry); - ceph_release_acls_info(&acls); + ceph_release_acl_sec_ctx(&as_ctx); return err; } @@ -884,6 +887,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; + struct ceph_acl_sec_ctx as_ctx = {}; int err; if (ceph_snap(dir) != CEPH_NOSNAP) @@ -894,6 +898,10 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, goto out; } + err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx); + if (err < 0) + goto out; + dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); if (IS_ERR(req)) { @@ -919,6 +927,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, out: if (err) d_drop(dentry); + ceph_release_acl_sec_ctx(&as_ctx); return err; } @@ -927,7 +936,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; - struct ceph_acls_info acls = {}; + struct ceph_acl_sec_ctx as_ctx = {}; int err = -EROFS; int op; @@ -950,7 +959,10 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } mode |= S_IFDIR; - err = ceph_pre_init_acls(dir, &mode, &acls); + err = ceph_pre_init_acls(dir, &mode, &as_ctx); + if (err < 0) + goto out; + err = ceph_security_init_secctx(dentry, mode, &as_ctx); if (err < 0) goto out; @@ -967,9 +979,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) req->r_args.mkdir.mode = cpu_to_le32(mode); req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; - if (acls.pagelist) { - req->r_pagelist = acls.pagelist; - acls.pagelist = NULL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; } err = ceph_mdsc_do_request(mdsc, dir, req); if (!err && @@ -979,10 +991,10 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ceph_mdsc_put_request(req); out: if (!err) - ceph_init_inode_acls(d_inode(dentry), &acls); + ceph_init_inode_acls(d_inode(dentry), &as_ctx); else d_drop(dentry); - ceph_release_acls_info(&acls); + ceph_release_acl_sec_ctx(&as_ctx); return err; } @@ -1433,8 +1445,7 @@ static bool __dentry_lease_is_valid(struct ceph_dentry_info *di) return false; } -static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags, - struct inode *dir) +static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags) { struct ceph_dentry_info *di; struct ceph_mds_session *session = NULL; @@ -1466,7 +1477,7 @@ static int dentry_lease_is_valid(struct dentry *dentry, unsigned int flags, spin_unlock(&dentry->d_lock); if (session) { - ceph_mdsc_lease_send_msg(session, dir, dentry, + ceph_mdsc_lease_send_msg(session, dentry, CEPH_MDS_LEASE_RENEW, seq); ceph_put_mds_session(session); } @@ -1512,18 +1523,26 @@ static int __dir_lease_try_check(const struct dentry *dentry) static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) { struct ceph_inode_info *ci = ceph_inode(dir); - struct ceph_dentry_info *di = ceph_dentry(dentry); - int valid = 0; + int valid; + int shared_gen; spin_lock(&ci->i_ceph_lock); - if (atomic_read(&ci->i_shared_gen) == di->lease_shared_gen) - valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); + valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); + shared_gen = atomic_read(&ci->i_shared_gen); spin_unlock(&ci->i_ceph_lock); - if (valid) - __ceph_dentry_dir_lease_touch(di); - dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", - dir, (unsigned)atomic_read(&ci->i_shared_gen), - dentry, (unsigned)di->lease_shared_gen, valid); + if (valid) { + struct ceph_dentry_info *di; + spin_lock(&dentry->d_lock); + di = ceph_dentry(dentry); + if (dir == d_inode(dentry->d_parent) && + di && di->lease_shared_gen == shared_gen) + __ceph_dentry_dir_lease_touch(di); + else + valid = 0; + spin_unlock(&dentry->d_lock); + } + dout("dir_lease_is_valid dir %p v%u dentry %p = %d\n", + dir, (unsigned)atomic_read(&ci->i_shared_gen), dentry, valid); return valid; } @@ -1558,7 +1577,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) { valid = 1; } else { - valid = dentry_lease_is_valid(dentry, flags, dir); + valid = dentry_lease_is_valid(dentry, flags); if (valid == -ECHILD) return valid; if (valid || dir_lease_is_valid(dir, dentry)) { diff --git a/fs/ceph/export.c b/fs/ceph/export.c index d3ef7ee429ec..15ff1b09cfa2 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -368,7 +368,7 @@ static struct dentry *ceph_get_parent(struct dentry *child) } out: dout("get_parent %p ino %llx.%llx err=%ld\n", - child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0)); + child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn)); return dn; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 305daf043eb0..685a03cc4b77 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -10,6 +10,7 @@ #include <linux/namei.h> #include <linux/writeback.h> #include <linux/falloc.h> +#include <linux/iversion.h> #include "super.h" #include "mds_client.h" @@ -437,7 +438,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct dentry *dn; - struct ceph_acls_info acls = {}; + struct ceph_acl_sec_ctx as_ctx = {}; int mask; int err; @@ -451,25 +452,28 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (flags & O_CREAT) { if (ceph_quota_is_max_files_exceeded(dir)) return -EDQUOT; - err = ceph_pre_init_acls(dir, &mode, &acls); + err = ceph_pre_init_acls(dir, &mode, &as_ctx); if (err < 0) return err; + err = ceph_security_init_secctx(dentry, mode, &as_ctx); + if (err < 0) + goto out_ctx; } /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) { err = PTR_ERR(req); - goto out_acl; + goto out_ctx; } req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; - if (acls.pagelist) { - req->r_pagelist = acls.pagelist; - acls.pagelist = NULL; + if (as_ctx.pagelist) { + req->r_pagelist = as_ctx.pagelist; + as_ctx.pagelist = NULL; } } @@ -507,7 +511,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, } else { dout("atomic_open finish_open on dn %p\n", dn); if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { - ceph_init_inode_acls(d_inode(dentry), &acls); + ceph_init_inode_acls(d_inode(dentry), &as_ctx); file->f_mode |= FMODE_CREATED; } err = finish_open(file, dentry, ceph_open); @@ -516,8 +520,8 @@ out_req: if (!req->r_err && req->r_target_inode) ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode); ceph_mdsc_put_request(req); -out_acl: - ceph_release_acls_info(&acls); +out_ctx: + ceph_release_acl_sec_ctx(&as_ctx); dout("atomic_open result=%d\n", err); return err; } @@ -791,7 +795,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) if (aio_work) { INIT_WORK(&aio_work->work, ceph_aio_retry_work); aio_work->req = req; - queue_work(ceph_inode_to_client(inode)->wb_wq, + queue_work(ceph_inode_to_client(inode)->inode_wq, &aio_work->work); return; } @@ -1007,7 +1011,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, * may block. */ truncate_inode_pages_range(inode->i_mapping, pos, - (pos+len) | (PAGE_SIZE - 1)); + PAGE_ALIGN(pos + len) - 1); req->r_mtime = mtime; } @@ -1022,7 +1026,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, req->r_callback = ceph_aio_complete_req; req->r_inode = inode; req->r_priv = aio_req; - list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs); + list_add_tail(&req->r_private_item, &aio_req->osd_reqs); pos += len; continue; @@ -1082,8 +1086,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, while (!list_empty(&osd_reqs)) { req = list_first_entry(&osd_reqs, struct ceph_osd_request, - r_unsafe_item); - list_del_init(&req->r_unsafe_item); + r_private_item); + list_del_init(&req->r_private_item); if (ret >= 0) ret = ceph_osdc_start_request(req->r_osdc, req, false); @@ -1432,6 +1436,8 @@ retry_snap: if (err) goto out; + inode_inc_iversion_raw(inode); + if (ci->i_inline_version != CEPH_INLINE_NONE) { err = ceph_uninline_data(file, NULL); if (err < 0) @@ -1889,9 +1895,9 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode, return 0; } -static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, - size_t len, unsigned int flags) +static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) { struct inode *src_inode = file_inode(src_file); struct inode *dst_inode = file_inode(dst_file); @@ -1909,6 +1915,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, if (src_inode == dst_inode) return -EINVAL; + if (src_inode->i_sb != dst_inode->i_sb) + return -EXDEV; if (ceph_snap(dst_inode) != CEPH_NOSNAP) return -EROFS; @@ -2061,6 +2069,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, do_final_copy = true; file_update_time(dst_file); + inode_inc_iversion_raw(dst_inode); + if (endoff > size) { int caps_flags = 0; @@ -2100,6 +2110,21 @@ out: return ret; } +static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f85355bf49c4..791f84a13bb8 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -13,6 +13,7 @@ #include <linux/posix_acl.h> #include <linux/random.h> #include <linux/sort.h> +#include <linux/iversion.h> #include "super.h" #include "mds_client.h" @@ -33,9 +34,7 @@ static const struct inode_operations ceph_symlink_iops; -static void ceph_invalidate_work(struct work_struct *work); -static void ceph_writeback_work(struct work_struct *work); -static void ceph_vmtruncate_work(struct work_struct *work); +static void ceph_inode_work(struct work_struct *work); /* * find or create an inode, given the ceph ino number @@ -44,6 +43,7 @@ static int ceph_set_ino_cb(struct inode *inode, void *data) { ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); + inode_set_iversion_raw(inode, 0); return 0; } @@ -509,10 +509,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_snap_realm_item); INIT_LIST_HEAD(&ci->i_snap_flush_item); - INIT_WORK(&ci->i_wb_work, ceph_writeback_work); - INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work); - - INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work); + INIT_WORK(&ci->i_work, ceph_inode_work); + ci->i_work_mask = 0; + memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); ceph_fscache_inode_init(ci); @@ -527,17 +526,20 @@ void ceph_free_inode(struct inode *inode) kmem_cache_free(ceph_inode_cachep, ci); } -void ceph_destroy_inode(struct inode *inode) +void ceph_evict_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_frag *frag; struct rb_node *n; - dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); + dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); ceph_fscache_unregister_inode_cookie(ci); - __ceph_remove_caps(inode); + __ceph_remove_caps(ci); if (__ceph_has_any_quota(ci)) ceph_adjust_quota_realms_count(inode, false); @@ -582,16 +584,6 @@ void ceph_destroy_inode(struct inode *inode) ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns)); } -int ceph_drop_inode(struct inode *inode) -{ - /* - * Positve dentry and corresponding inode are always accompanied - * in MDS reply. So no need to keep inode in the cache after - * dropping all its aliases. - */ - return 1; -} - static inline blkcnt_t calc_inode_blocks(u64 size) { return (size + (1<<9) - 1) >> 9; @@ -799,6 +791,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page, le64_to_cpu(info->version) > (ci->i_version & ~1))) new_version = true; + /* Update change_attribute */ + inode_set_max_iversion_raw(inode, iinfo->change_attr); + __ceph_caps_issued(ci, &issued); issued |= __ceph_caps_dirty(ci); new_issued = ~issued & info_caps; @@ -817,6 +812,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page, dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, from_kuid(&init_user_ns, inode->i_uid), from_kgid(&init_user_ns, inode->i_gid)); + ceph_decode_timespec64(&ci->i_btime, &iinfo->btime); + ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime); } if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && @@ -891,6 +888,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, iinfo->xattr_data, iinfo->xattr_len); ci->i_xattrs.version = le64_to_cpu(info->xattr_version); ceph_forget_all_cached_acls(inode); + ceph_security_invalidate_secctx(inode); xattr_blob = NULL; } @@ -1031,59 +1029,38 @@ out: } /* - * caller should hold session s_mutex. + * caller should hold session s_mutex and dentry->d_lock. */ -static void update_dentry_lease(struct dentry *dentry, - struct ceph_mds_reply_lease *lease, - struct ceph_mds_session *session, - unsigned long from_time, - struct ceph_vino *tgt_vino, - struct ceph_vino *dir_vino) +static void __update_dentry_lease(struct inode *dir, struct dentry *dentry, + struct ceph_mds_reply_lease *lease, + struct ceph_mds_session *session, + unsigned long from_time, + struct ceph_mds_session **old_lease_session) { struct ceph_dentry_info *di = ceph_dentry(dentry); long unsigned duration = le32_to_cpu(lease->duration_ms); long unsigned ttl = from_time + (duration * HZ) / 1000; long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; - struct inode *dir; - struct ceph_mds_session *old_lease_session = NULL; - /* - * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that - * we expect a negative dentry. - */ - if (!tgt_vino && d_really_is_positive(dentry)) - return; - - if (tgt_vino && (d_really_is_negative(dentry) || - !ceph_ino_compare(d_inode(dentry), tgt_vino))) - return; - - spin_lock(&dentry->d_lock); dout("update_dentry_lease %p duration %lu ms ttl %lu\n", dentry, duration, ttl); - dir = d_inode(dentry->d_parent); - - /* make sure parent matches dir_vino */ - if (!ceph_ino_compare(dir, dir_vino)) - goto out_unlock; - /* only track leases on regular dentries */ if (ceph_snap(dir) != CEPH_NOSNAP) - goto out_unlock; + return; di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); if (duration == 0) { __ceph_dentry_dir_lease_touch(di); - goto out_unlock; + return; } if (di->lease_gen == session->s_cap_gen && time_before(ttl, di->time)) - goto out_unlock; /* we already have a newer lease. */ + return; /* we already have a newer lease. */ if (di->lease_session && di->lease_session != session) { - old_lease_session = di->lease_session; + *old_lease_session = di->lease_session; di->lease_session = NULL; } @@ -1096,6 +1073,62 @@ static void update_dentry_lease(struct dentry *dentry, di->time = ttl; __ceph_dentry_lease_touch(di); +} + +static inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, + struct ceph_mds_reply_lease *lease, + struct ceph_mds_session *session, + unsigned long from_time) +{ + struct ceph_mds_session *old_lease_session = NULL; + spin_lock(&dentry->d_lock); + __update_dentry_lease(dir, dentry, lease, session, from_time, + &old_lease_session); + spin_unlock(&dentry->d_lock); + if (old_lease_session) + ceph_put_mds_session(old_lease_session); +} + +/* + * update dentry lease without having parent inode locked + */ +static void update_dentry_lease_careful(struct dentry *dentry, + struct ceph_mds_reply_lease *lease, + struct ceph_mds_session *session, + unsigned long from_time, + char *dname, u32 dname_len, + struct ceph_vino *pdvino, + struct ceph_vino *ptvino) + +{ + struct inode *dir; + struct ceph_mds_session *old_lease_session = NULL; + + spin_lock(&dentry->d_lock); + /* make sure dentry's name matches target */ + if (dentry->d_name.len != dname_len || + memcmp(dentry->d_name.name, dname, dname_len)) + goto out_unlock; + + dir = d_inode(dentry->d_parent); + /* make sure parent matches dvino */ + if (!ceph_ino_compare(dir, pdvino)) + goto out_unlock; + + /* make sure dentry's inode matches target. NULL ptvino means that + * we expect a negative dentry */ + if (ptvino) { + if (d_really_is_negative(dentry)) + goto out_unlock; + if (!ceph_ino_compare(d_inode(dentry), ptvino)) + goto out_unlock; + } else { + if (d_really_is_positive(dentry)) + goto out_unlock; + } + + __update_dentry_lease(dir, dentry, lease, session, + from_time, &old_lease_session); out_unlock: spin_unlock(&dentry->d_lock); if (old_lease_session) @@ -1160,19 +1193,6 @@ static int splice_dentry(struct dentry **pdn, struct inode *in) return 0; } -static int d_name_cmp(struct dentry *dentry, const char *name, size_t len) -{ - int ret; - - /* take d_lock to ensure dentry->d_name stability */ - spin_lock(&dentry->d_lock); - ret = dentry->d_name.len - len; - if (!ret) - ret = memcmp(dentry->d_name.name, name, len); - spin_unlock(&dentry->d_lock); - return ret; -} - /* * Incorporate results into the local cache. This is either just * one inode, or a directory, dentry, and possibly linked-to inode (e.g., @@ -1375,10 +1395,9 @@ retry_lookup: } else if (have_lease) { if (d_unhashed(dn)) d_add(dn, NULL); - update_dentry_lease(dn, rinfo->dlease, - session, - req->r_request_started, - NULL, &dvino); + update_dentry_lease(dir, dn, + rinfo->dlease, session, + req->r_request_started); } goto done; } @@ -1400,11 +1419,9 @@ retry_lookup: } if (have_lease) { - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); - update_dentry_lease(dn, rinfo->dlease, session, - req->r_request_started, - &tvino, &dvino); + update_dentry_lease(dir, dn, + rinfo->dlease, session, + req->r_request_started); } dout(" final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || @@ -1422,27 +1439,20 @@ retry_lookup: err = splice_dentry(&req->r_dentry, in); if (err < 0) goto done; - } else if (rinfo->head->is_dentry && - !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) { + } else if (rinfo->head->is_dentry && req->r_dentry) { + /* parent inode is not locked, be carefull */ struct ceph_vino *ptvino = NULL; - - if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) || - le32_to_cpu(rinfo->dlease->duration_ms)) { - dvino.ino = le64_to_cpu(rinfo->diri.in->ino); - dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); - - if (rinfo->head->is_target) { - tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); - tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); - ptvino = &tvino; - } - - update_dentry_lease(req->r_dentry, rinfo->dlease, - session, req->r_request_started, ptvino, - &dvino); - } else { - dout("%s: no dentry lease or dir cap\n", __func__); + dvino.ino = le64_to_cpu(rinfo->diri.in->ino); + dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); + if (rinfo->head->is_target) { + tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); + tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); + ptvino = &tvino; } + update_dentry_lease_careful(req->r_dentry, rinfo->dlease, + session, req->r_request_started, + rinfo->dname, rinfo->dname_len, + &dvino, ptvino); } done: dout("fill_trace done err=%d\n", err); @@ -1480,7 +1490,8 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, pr_err("fill_inode badness on %p got %d\n", in, rc); err = rc; } - iput(in); + /* avoid calling iput_final() in mds dispatch threads */ + ceph_async_iput(in); } return err; @@ -1603,7 +1614,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, /* FIXME: release caps/leases if error occurs */ for (i = 0; i < rinfo->dir_nr; i++) { struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; - struct ceph_vino tvino, dvino; + struct ceph_vino tvino; dname.name = rde->name; dname.len = rde->name_len; @@ -1678,8 +1689,11 @@ retry_lookup: &req->r_caps_reservation); if (ret < 0) { pr_err("fill_inode badness on %p\n", in); - if (d_really_is_negative(dn)) - iput(in); + if (d_really_is_negative(dn)) { + /* avoid calling iput_final() in mds + * dispatch threads */ + ceph_async_iput(in); + } d_drop(dn); err = ret; goto next_item; @@ -1689,7 +1703,7 @@ retry_lookup: if (ceph_security_xattr_deadlock(in)) { dout(" skip splicing dn %p to inode %p" " (security xattr deadlock)\n", dn, in); - iput(in); + ceph_async_iput(in); skipped++; goto next_item; } @@ -1701,9 +1715,9 @@ retry_lookup: ceph_dentry(dn)->offset = rde->offset; - dvino = ceph_vino(d_inode(parent)); - update_dentry_lease(dn, rde->lease, req->r_session, - req->r_request_started, &tvino, &dvino); + update_dentry_lease(d_inode(parent), dn, + rde->lease, req->r_session, + req->r_request_started); if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { ret = fill_readdir_cache(d_inode(parent), dn, @@ -1741,56 +1755,86 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size) } /* + * Put reference to inode, but avoid calling iput_final() in current thread. + * iput_final() may wait for reahahead pages. The wait can cause deadlock in + * some contexts. + */ +void ceph_async_iput(struct inode *inode) +{ + if (!inode) + return; + for (;;) { + if (atomic_add_unless(&inode->i_count, -1, 1)) + break; + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ceph_inode(inode)->i_work)) + break; + /* queue work failed, i_count must be at least 2 */ + } +} + +/* * Write back inode data in a worker thread. (This can't be done * in the message handler context.) */ void ceph_queue_writeback(struct inode *inode) { + struct ceph_inode_info *ci = ceph_inode(inode); + set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask); + ihold(inode); - if (queue_work(ceph_inode_to_client(inode)->wb_wq, - &ceph_inode(inode)->i_wb_work)) { + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ci->i_work)) { dout("ceph_queue_writeback %p\n", inode); } else { - dout("ceph_queue_writeback %p failed\n", inode); + dout("ceph_queue_writeback %p already queued, mask=%lx\n", + inode, ci->i_work_mask); iput(inode); } } -static void ceph_writeback_work(struct work_struct *work) -{ - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_wb_work); - struct inode *inode = &ci->vfs_inode; - - dout("writeback %p\n", inode); - filemap_fdatawrite(&inode->i_data); - iput(inode); -} - /* * queue an async invalidation */ void ceph_queue_invalidate(struct inode *inode) { + struct ceph_inode_info *ci = ceph_inode(inode); + set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask); + ihold(inode); - if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, - &ceph_inode(inode)->i_pg_inv_work)) { + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ceph_inode(inode)->i_work)) { dout("ceph_queue_invalidate %p\n", inode); } else { - dout("ceph_queue_invalidate %p failed\n", inode); + dout("ceph_queue_invalidate %p already queued, mask=%lx\n", + inode, ci->i_work_mask); iput(inode); } } /* - * Invalidate inode pages in a worker thread. (This can't be done - * in the message handler context.) + * Queue an async vmtruncate. If we fail to queue work, we will handle + * the truncation the next time we call __ceph_do_pending_vmtruncate. */ -static void ceph_invalidate_work(struct work_struct *work) +void ceph_queue_vmtruncate(struct inode *inode) { - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_pg_inv_work); - struct inode *inode = &ci->vfs_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask); + + ihold(inode); + if (queue_work(ceph_inode_to_client(inode)->inode_wq, + &ci->i_work)) { + dout("ceph_queue_vmtruncate %p\n", inode); + } else { + dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n", + inode, ci->i_work_mask); + iput(inode); + } +} + +static void ceph_do_invalidate_pages(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); u32 orig_gen; int check = 0; @@ -1842,44 +1886,6 @@ static void ceph_invalidate_work(struct work_struct *work) out: if (check) ceph_check_caps(ci, 0, NULL); - iput(inode); -} - - -/* - * called by trunc_wq; - * - * We also truncate in a separate thread as well. - */ -static void ceph_vmtruncate_work(struct work_struct *work) -{ - struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, - i_vmtruncate_work); - struct inode *inode = &ci->vfs_inode; - - dout("vmtruncate_work %p\n", inode); - __ceph_do_pending_vmtruncate(inode); - iput(inode); -} - -/* - * Queue an async vmtruncate. If we fail to queue work, we will handle - * the truncation the next time we call __ceph_do_pending_vmtruncate. - */ -void ceph_queue_vmtruncate(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - - ihold(inode); - - if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, - &ci->i_vmtruncate_work)) { - dout("ceph_queue_vmtruncate %p\n", inode); - } else { - dout("ceph_queue_vmtruncate %p failed, pending=%d\n", - inode, ci->i_truncate_pending); - iput(inode); - } } /* @@ -1943,6 +1949,25 @@ retry: wake_up_all(&ci->i_cap_wq); } +static void ceph_inode_work(struct work_struct *work) +{ + struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, + i_work); + struct inode *inode = &ci->vfs_inode; + + if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { + dout("writeback %p\n", inode); + filemap_fdatawrite(&inode->i_data); + } + if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) + ceph_do_invalidate_pages(inode); + + if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask)) + __ceph_do_pending_vmtruncate(inode); + + iput(inode); +} + /* * symlinks */ @@ -2271,7 +2296,7 @@ static int statx_to_caps(u32 want) { int mask = 0; - if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME)) + if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) mask |= CEPH_CAP_AUTH_SHARED; if (want & (STATX_NLINK|STATX_CTIME)) @@ -2296,6 +2321,7 @@ int ceph_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct ceph_inode_info *ci = ceph_inode(inode); + u32 valid_mask = STATX_BASIC_STATS; int err = 0; /* Skip the getattr altogether if we're asked not to sync */ @@ -2308,6 +2334,16 @@ int ceph_getattr(const struct path *path, struct kstat *stat, generic_fillattr(inode, stat); stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); + + /* + * btime on newly-allocated inodes is 0, so if this is still set to + * that, then assume that it's not valid. + */ + if (ci->i_btime.tv_sec || ci->i_btime.tv_nsec) { + stat->btime = ci->i_btime; + valid_mask |= STATX_BTIME; + } + if (ceph_snap(inode) == CEPH_NOSNAP) stat->dev = inode->i_sb->s_dev; else @@ -2331,7 +2367,6 @@ int ceph_getattr(const struct path *path, struct kstat *stat, stat->nlink = 1 + 1 + ci->i_subdirs; } - /* Mask off any higher bits (e.g. btime) until we have support */ - stat->result_mask = request_mask & STATX_BASIC_STATS; + stat->result_mask = request_mask & valid_mask; return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 959b1bf7c327..920e9f048bd8 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -150,14 +150,13 @@ static int parse_reply_info_in(void **p, void *end, info->pool_ns_data = *p; *p += info->pool_ns_len; } - /* btime, change_attr */ - { - struct ceph_timespec btime; - u64 change_attr; - ceph_decode_need(p, end, sizeof(btime), bad); - ceph_decode_copy(p, &btime, sizeof(btime)); - ceph_decode_64_safe(p, end, change_attr, bad); - } + + /* btime */ + ceph_decode_need(p, end, sizeof(info->btime), bad); + ceph_decode_copy(p, &info->btime, sizeof(info->btime)); + + /* change attribute */ + ceph_decode_64_safe(p, end, info->change_attr, bad); /* dir pin */ if (struct_v >= 2) { @@ -166,6 +165,15 @@ static int parse_reply_info_in(void **p, void *end, info->dir_pin = -ENODATA; } + /* snapshot birth time, remains zero for v<=2 */ + if (struct_v >= 3) { + ceph_decode_need(p, end, sizeof(info->snap_btime), bad); + ceph_decode_copy(p, &info->snap_btime, + sizeof(info->snap_btime)); + } else { + memset(&info->snap_btime, 0, sizeof(info->snap_btime)); + } + *p = end; } else { if (features & CEPH_FEATURE_MDS_INLINE_DATA) { @@ -197,7 +205,14 @@ static int parse_reply_info_in(void **p, void *end, } } + if (features & CEPH_FEATURE_FS_BTIME) { + ceph_decode_need(p, end, sizeof(info->btime), bad); + ceph_decode_copy(p, &info->btime, sizeof(info->btime)); + ceph_decode_64_safe(p, end, info->change_attr, bad); + } + info->dir_pin = -ENODATA; + /* info->snap_btime remains zero */ } return 0; bad: @@ -690,11 +705,12 @@ void ceph_mdsc_release_request(struct kref *kref) ceph_msg_put(req->r_reply); if (req->r_inode) { ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); - iput(req->r_inode); + /* avoid calling iput_final() in mds dispatch threads */ + ceph_async_iput(req->r_inode); } if (req->r_parent) ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); - iput(req->r_target_inode); + ceph_async_iput(req->r_target_inode); if (req->r_dentry) dput(req->r_dentry); if (req->r_old_dentry) @@ -708,7 +724,7 @@ void ceph_mdsc_release_request(struct kref *kref) */ ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), CEPH_CAP_PIN); - iput(req->r_old_dentry_dir); + ceph_async_iput(req->r_old_dentry_dir); } kfree(req->r_path1); kfree(req->r_path2); @@ -716,6 +732,7 @@ void ceph_mdsc_release_request(struct kref *kref) ceph_pagelist_release(req->r_pagelist); put_request_session(req); ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); + WARN_ON_ONCE(!list_empty(&req->r_wait)); kfree(req); } @@ -818,7 +835,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, } if (req->r_unsafe_dir) { - iput(req->r_unsafe_dir); + /* avoid calling iput_final() in mds dispatch threads */ + ceph_async_iput(req->r_unsafe_dir); req->r_unsafe_dir = NULL; } @@ -901,7 +919,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, struct inode *dir; rcu_read_lock(); - parent = req->r_dentry->d_parent; + parent = READ_ONCE(req->r_dentry->d_parent); dir = req->r_parent ? : d_inode_rcu(parent); if (!dir || dir->i_sb != mdsc->fsc->sb) { @@ -983,7 +1001,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); if (!cap) { spin_unlock(&ci->i_ceph_lock); - iput(inode); + ceph_async_iput(inode); goto random; } mds = cap->session->s_mds; @@ -992,7 +1010,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, cap == ci->i_auth_cap ? "auth " : "", cap); spin_unlock(&ci->i_ceph_lock); out: - iput(inode); + /* avoid calling iput_final() while holding mdsc->mutex or + * in mds dispatch threads */ + ceph_async_iput(inode); return mds; random: @@ -1302,7 +1322,9 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, spin_unlock(&session->s_cap_lock); if (last_inode) { - iput(last_inode); + /* avoid calling iput_final() while holding + * s_mutex or in mds dispatch threads */ + ceph_async_iput(last_inode); last_inode = NULL; } if (old_cap) { @@ -1335,7 +1357,7 @@ out: session->s_cap_iterator = NULL; spin_unlock(&session->s_cap_lock); - iput(last_inode); + ceph_async_iput(last_inode); if (old_cap) ceph_put_cap(session->s_mdsc, old_cap); @@ -1471,7 +1493,8 @@ static void remove_session_caps(struct ceph_mds_session *session) spin_unlock(&session->s_cap_lock); inode = ceph_find_inode(sb, vino); - iput(inode); + /* avoid calling iput_final() while holding s_mutex */ + ceph_async_iput(inode); spin_lock(&session->s_cap_lock); } @@ -2114,9 +2137,10 @@ retry: if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); - } else if (stop_on_nosnap && inode && + } else if (stop_on_nosnap && inode && dentry != temp && ceph_snap(inode) == CEPH_NOSNAP) { spin_unlock(&temp->d_lock); + pos++; /* get rid of any prepended '/' */ break; } else { pos -= temp->d_name.len; @@ -2127,7 +2151,7 @@ retry: memcpy(path + pos, temp->d_name.name, temp->d_name.len); } spin_unlock(&temp->d_lock); - temp = temp->d_parent; + temp = READ_ONCE(temp->d_parent); /* Are we at the root? */ if (IS_ROOT(temp)) @@ -3719,42 +3743,35 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", ceph_session_state_name(s->s_state)); - if (i >= newmap->m_num_mds || - memcmp(ceph_mdsmap_get_addr(oldmap, i), - ceph_mdsmap_get_addr(newmap, i), - sizeof(struct ceph_entity_addr))) { - if (s->s_state == CEPH_MDS_SESSION_OPENING) { - /* the session never opened, just close it - * out now */ - get_session(s); - __unregister_session(mdsc, s); - __wake_requests(mdsc, &s->s_waiting); - ceph_put_mds_session(s); - } else if (i >= newmap->m_num_mds) { - /* force close session for stopped mds */ - get_session(s); - __unregister_session(mdsc, s); - __wake_requests(mdsc, &s->s_waiting); - kick_requests(mdsc, i); - mutex_unlock(&mdsc->mutex); + if (i >= newmap->m_num_mds) { + /* force close session for stopped mds */ + get_session(s); + __unregister_session(mdsc, s); + __wake_requests(mdsc, &s->s_waiting); + mutex_unlock(&mdsc->mutex); - mutex_lock(&s->s_mutex); - cleanup_session_requests(mdsc, s); - remove_session_caps(s); - mutex_unlock(&s->s_mutex); + mutex_lock(&s->s_mutex); + cleanup_session_requests(mdsc, s); + remove_session_caps(s); + mutex_unlock(&s->s_mutex); - ceph_put_mds_session(s); + ceph_put_mds_session(s); - mutex_lock(&mdsc->mutex); - } else { - /* just close it */ - mutex_unlock(&mdsc->mutex); - mutex_lock(&s->s_mutex); - mutex_lock(&mdsc->mutex); - ceph_con_close(&s->s_con); - mutex_unlock(&s->s_mutex); - s->s_state = CEPH_MDS_SESSION_RESTARTING; - } + mutex_lock(&mdsc->mutex); + kick_requests(mdsc, i); + continue; + } + + if (memcmp(ceph_mdsmap_get_addr(oldmap, i), + ceph_mdsmap_get_addr(newmap, i), + sizeof(struct ceph_entity_addr))) { + /* just close it */ + mutex_unlock(&mdsc->mutex); + mutex_lock(&s->s_mutex); + mutex_lock(&mdsc->mutex); + ceph_con_close(&s->s_con); + mutex_unlock(&s->s_mutex); + s->s_state = CEPH_MDS_SESSION_RESTARTING; } else if (oldstate == newstate) { continue; /* nothing new with this mds */ } @@ -3912,8 +3929,9 @@ release: ceph_con_send(&session->s_con, msg); out: - iput(inode); mutex_unlock(&session->s_mutex); + /* avoid calling iput_final() in mds dispatch threads */ + ceph_async_iput(inode); return; bad: @@ -3922,31 +3940,33 @@ bad: } void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, - struct inode *inode, struct dentry *dentry, char action, u32 seq) { struct ceph_msg *msg; struct ceph_mds_lease *lease; - int len = sizeof(*lease) + sizeof(u32); - int dnamelen = 0; + struct inode *dir; + int len = sizeof(*lease) + sizeof(u32) + NAME_MAX; - dout("lease_send_msg inode %p dentry %p %s to mds%d\n", - inode, dentry, ceph_lease_op_name(action), session->s_mds); - dnamelen = dentry->d_name.len; - len += dnamelen; + dout("lease_send_msg identry %p %s to mds%d\n", + dentry, ceph_lease_op_name(action), session->s_mds); msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false); if (!msg) return; lease = msg->front.iov_base; lease->action = action; - lease->ino = cpu_to_le64(ceph_vino(inode).ino); - lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); - put_unaligned_le32(dnamelen, lease + 1); - memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dnamelen); + spin_lock(&dentry->d_lock); + dir = d_inode(dentry->d_parent); + lease->ino = cpu_to_le64(ceph_ino(dir)); + lease->first = lease->last = cpu_to_le64(ceph_snap(dir)); + + put_unaligned_le32(dentry->d_name.len, lease + 1); + memcpy((void *)(lease + 1) + 4, + dentry->d_name.name, dentry->d_name.len); + spin_unlock(&dentry->d_lock); /* * if this is a preemptive lease RELEASE, no need to * flush request stream, since the actual request will @@ -4148,6 +4168,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) while ((req = __get_oldest_req(mdsc))) { dout("wait_requests timed out on tid %llu\n", req->r_tid); + list_del_init(&req->r_wait); __unregister_request(mdsc, req); } } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a83f28bc2387..f7c8603484fe 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -69,6 +69,9 @@ struct ceph_mds_reply_info_in { u64 max_bytes; u64 max_files; s32 dir_pin; + struct ceph_timespec btime; + struct ceph_timespec snap_btime; + u64 change_attr; }; struct ceph_mds_reply_dir_entry { @@ -504,7 +507,6 @@ extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, - struct inode *inode, struct dentry *dentry, char action, u32 seq); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 701b4fb0fb5a..ce2d00da5096 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -107,7 +107,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) struct ceph_mdsmap *m; const void *start = *p; int i, j, n; - int err = -EINVAL; + int err; u8 mdsmap_v, mdsmap_cv; u16 mdsmap_ev; @@ -183,8 +183,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) inc = ceph_decode_32(p); state = ceph_decode_32(p); state_seq = ceph_decode_64(p); - ceph_decode_copy(p, &addr, sizeof(addr)); - ceph_decode_addr(&addr); + err = ceph_decode_entity_addr(p, end, &addr); + if (err) + goto corrupt; ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); *p += sizeof(u32); ceph_decode_32_safe(p, end, namelen, bad); @@ -357,7 +358,7 @@ bad_ext: nomem: err = -ENOMEM; goto out_err; -bad: +corrupt: pr_err("corrupt mdsmap\n"); print_hex_dump(KERN_DEBUG, "mdsmap: ", DUMP_PREFIX_OFFSET, 16, 1, @@ -365,6 +366,9 @@ bad: out_err: ceph_mdsmap_destroy(m); return ERR_PTR(err); +bad: + err = -EINVAL; + goto corrupt; } void ceph_mdsmap_destroy(struct ceph_mdsmap *m) diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index c4522212872c..de56dee60540 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -74,7 +74,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, le64_to_cpu(h->max_files)); spin_unlock(&ci->i_ceph_lock); - iput(inode); + /* avoid calling iput_final() in dispatch thread */ + ceph_async_iput(inode); } static struct ceph_quotarealm_inode * @@ -134,7 +135,7 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, return NULL; mutex_lock(&qri->mutex); - if (qri->inode) { + if (qri->inode && ceph_is_any_caps(qri->inode)) { /* A request has already returned the inode */ mutex_unlock(&qri->mutex); return qri->inode; @@ -145,7 +146,18 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, mutex_unlock(&qri->mutex); return NULL; } - in = ceph_lookup_inode(sb, realm->ino); + if (qri->inode) { + /* get caps */ + int ret = __ceph_do_getattr(qri->inode, NULL, + CEPH_STAT_CAP_INODE, true); + if (ret >= 0) + in = qri->inode; + else + in = ERR_PTR(ret); + } else { + in = ceph_lookup_inode(sb, realm->ino); + } + if (IS_ERR(in)) { pr_warn("Can't lookup inode %llx (err: %ld)\n", realm->ino, PTR_ERR(in)); @@ -235,7 +247,8 @@ restart: ci = ceph_inode(in); has_quota = __ceph_has_any_quota(ci); - iput(in); + /* avoid calling iput_final() while holding mdsc->snap_rwsem */ + ceph_async_iput(in); next = realm->parent; if (has_quota || !next) @@ -372,7 +385,8 @@ restart: pr_warn("Invalid quota check op (%d)\n", op); exceeded = true; /* Just break the loop */ } - iput(in); + /* avoid calling iput_final() while holding mdsc->snap_rwsem */ + ceph_async_iput(in); next = realm->parent; if (exceeded || !next) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index b26e12cd8ec3..4c6494eb02b5 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -3,6 +3,7 @@ #include <linux/sort.h> #include <linux/slab.h> +#include <linux/iversion.h> #include "super.h" #include "mds_client.h" #include <linux/ceph/decode.h> @@ -606,6 +607,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->mtime = inode->i_mtime; capsnap->atime = inode->i_atime; capsnap->ctime = inode->i_ctime; + capsnap->btime = ci->i_btime; + capsnap->change_attr = inode_peek_iversion_raw(inode); capsnap->time_warp_seq = ci->i_time_warp_seq; capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; @@ -648,13 +651,15 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) if (!inode) continue; spin_unlock(&realm->inodes_with_caps_lock); - iput(lastinode); + /* avoid calling iput_final() while holding + * mdsc->snap_rwsem or in mds dispatch threads */ + ceph_async_iput(lastinode); lastinode = inode; ceph_queue_cap_snap(ci); spin_lock(&realm->inodes_with_caps_lock); } spin_unlock(&realm->inodes_with_caps_lock); - iput(lastinode); + ceph_async_iput(lastinode); dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); } @@ -806,7 +811,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc) ihold(inode); spin_unlock(&mdsc->snap_flush_lock); ceph_flush_snaps(ci, &session); - iput(inode); + /* avoid calling iput_final() while holding + * session->s_mutex or in mds dispatch threads */ + ceph_async_iput(inode); spin_lock(&mdsc->snap_flush_lock); } spin_unlock(&mdsc->snap_flush_lock); @@ -950,12 +957,14 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, ceph_get_snap_realm(mdsc, realm); ceph_put_snap_realm(mdsc, oldrealm); - iput(inode); + /* avoid calling iput_final() while holding + * mdsc->snap_rwsem or mds in dispatch threads */ + ceph_async_iput(inode); continue; skip_inode: spin_unlock(&ci->i_ceph_lock); - iput(inode); + ceph_async_iput(inode); } /* we may have taken some of the old realm's children. */ diff --git a/fs/ceph/super.c b/fs/ceph/super.c index c864b44c8341..ab4868c7308e 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/ceph/ceph_debug.h> @@ -671,18 +672,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, * The number of concurrent works can be high but they don't need * to be processed in parallel, limit concurrency. */ - fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); - if (!fsc->wb_wq) + fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); + if (!fsc->inode_wq) goto fail_client; - fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); - if (!fsc->pg_inv_wq) - goto fail_wb_wq; - fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); - if (!fsc->trunc_wq) - goto fail_pg_inv_wq; fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); if (!fsc->cap_wq) - goto fail_trunc_wq; + goto fail_inode_wq; /* set up mempools */ err = -ENOMEM; @@ -696,12 +691,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, fail_cap_wq: destroy_workqueue(fsc->cap_wq); -fail_trunc_wq: - destroy_workqueue(fsc->trunc_wq); -fail_pg_inv_wq: - destroy_workqueue(fsc->pg_inv_wq); -fail_wb_wq: - destroy_workqueue(fsc->wb_wq); +fail_inode_wq: + destroy_workqueue(fsc->inode_wq); fail_client: ceph_destroy_client(fsc->client); fail: @@ -714,9 +705,7 @@ fail: static void flush_fs_workqueues(struct ceph_fs_client *fsc) { - flush_workqueue(fsc->wb_wq); - flush_workqueue(fsc->pg_inv_wq); - flush_workqueue(fsc->trunc_wq); + flush_workqueue(fsc->inode_wq); flush_workqueue(fsc->cap_wq); } @@ -724,9 +713,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) { dout("destroy_fs_client %p\n", fsc); - destroy_workqueue(fsc->wb_wq); - destroy_workqueue(fsc->pg_inv_wq); - destroy_workqueue(fsc->trunc_wq); + destroy_workqueue(fsc->inode_wq); destroy_workqueue(fsc->cap_wq); mempool_destroy(fsc->wb_pagevec_pool); @@ -853,10 +840,10 @@ static int ceph_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations ceph_super_ops = { .alloc_inode = ceph_alloc_inode, - .destroy_inode = ceph_destroy_inode, .free_inode = ceph_free_inode, .write_inode = ceph_write_inode, - .drop_inode = ceph_drop_inode, + .drop_inode = generic_delete_inode, + .evict_inode = ceph_evict_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, .remount_fs = ceph_remount, @@ -950,9 +937,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) dout("mount opening path %s\n", path); } - err = ceph_fs_debugfs_init(fsc); - if (err < 0) - goto out; + ceph_fs_debugfs_init(fsc); root = open_root_dentry(fsc, path, started); if (IS_ERR(root)) { @@ -993,7 +978,7 @@ static int ceph_set_super(struct super_block *s, void *data) s->s_d_op = &ceph_dentry_ops; s->s_export_op = &ceph_export_ops; - s->s_time_gran = 1000; /* 1000 ns == 1 us */ + s->s_time_gran = 1; ret = set_anon_super(s, NULL); /* what is that second arg for? */ if (ret != 0) @@ -1174,17 +1159,15 @@ static int __init init_ceph(void) goto out; ceph_flock_init(); - ceph_xattr_init(); ret = register_filesystem(&ceph_fs_type); if (ret) - goto out_xattr; + goto out_caches; pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); return 0; -out_xattr: - ceph_xattr_exit(); +out_caches: destroy_caches(); out: return ret; @@ -1194,7 +1177,6 @@ static void __exit exit_ceph(void) { dout("exit_ceph\n"); unregister_filesystem(&ceph_fs_type); - ceph_xattr_exit(); destroy_caches(); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6edab9a750f8..d2352fd95dbc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -109,9 +109,7 @@ struct ceph_fs_client { mempool_t *wb_pagevec_pool; atomic_long_t writeback_count; - struct workqueue_struct *wb_wq; - struct workqueue_struct *pg_inv_wq; - struct workqueue_struct *trunc_wq; + struct workqueue_struct *inode_wq; struct workqueue_struct *cap_wq; #ifdef CONFIG_DEBUG_FS @@ -199,7 +197,8 @@ struct ceph_cap_snap { u64 xattr_version; u64 size; - struct timespec64 mtime, atime, ctime; + u64 change_attr; + struct timespec64 mtime, atime, ctime, btime; u64 time_warp_seq; u64 truncate_size; u32 truncate_seq; @@ -386,11 +385,11 @@ struct ceph_inode_info { int i_snap_realm_counter; /* snap realm (if caps) */ struct list_head i_snap_realm_item; struct list_head i_snap_flush_item; + struct timespec64 i_btime; + struct timespec64 i_snap_btime; - struct work_struct i_wb_work; /* writeback work */ - struct work_struct i_pg_inv_work; /* page invalidation work */ - - struct work_struct i_vmtruncate_work; + struct work_struct i_work; + unsigned long i_work_mask; #ifdef CONFIG_CEPH_FSCACHE struct fscache_cookie *fscache; @@ -513,6 +512,13 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, /* + * Masks of ceph inode work. + */ +#define CEPH_I_WORK_WRITEBACK 0 /* writeback */ +#define CEPH_I_WORK_INVALIDATE_PAGES 1 /* invalidate pages */ +#define CEPH_I_WORK_VMTRUNCATE 2 /* vmtruncate */ + +/* * We set the ERROR_WRITE bit when we start seeing write errors on an inode * and then clear it when they start succeeding. Note that we do a lockless * check first, and only take the lock if it looks like it needs to be changed. @@ -541,7 +547,12 @@ static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, long long release_count, long long ordered_count) { - smp_mb__before_atomic(); + /* + * Makes sure operations that setup readdir cache (update page + * cache and i_size) are strongly ordered w.r.t. the following + * atomic64_set() operations. + */ + smp_mb(); atomic64_set(&ci->i_complete_seq[0], release_count); atomic64_set(&ci->i_complete_seq[1], ordered_count); } @@ -873,9 +884,8 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) extern const struct inode_operations ceph_file_iops; extern struct inode *ceph_alloc_inode(struct super_block *sb); -extern void ceph_destroy_inode(struct inode *inode); +extern void ceph_evict_inode(struct inode *inode); extern void ceph_free_inode(struct inode *inode); -extern int ceph_drop_inode(struct inode *inode); extern struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino); @@ -896,9 +906,9 @@ extern int ceph_inode_holds_cap(struct inode *inode, int mask); extern bool ceph_inode_set_size(struct inode *inode, loff_t size); extern void __ceph_do_pending_vmtruncate(struct inode *inode); extern void ceph_queue_vmtruncate(struct inode *inode); - extern void ceph_queue_invalidate(struct inode *inode); extern void ceph_queue_writeback(struct inode *inode); +extern void ceph_async_iput(struct inode *inode); extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, int mask, bool force); @@ -918,10 +928,20 @@ ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); -extern void __init ceph_xattr_init(void); -extern void ceph_xattr_exit(void); extern const struct xattr_handler *ceph_xattr_handlers[]; +struct ceph_acl_sec_ctx { +#ifdef CONFIG_CEPH_FS_POSIX_ACL + void *default_acl; + void *acl; +#endif +#ifdef CONFIG_CEPH_FS_SECURITY_LABEL + void *sec_ctx; + u32 sec_ctxlen; +#endif + struct ceph_pagelist *pagelist; +}; + #ifdef CONFIG_SECURITY extern bool ceph_security_xattr_deadlock(struct inode *in); extern bool ceph_security_xattr_wanted(struct inode *in); @@ -936,21 +956,32 @@ static inline bool ceph_security_xattr_wanted(struct inode *in) } #endif -/* acl.c */ -struct ceph_acls_info { - void *default_acl; - void *acl; - struct ceph_pagelist *pagelist; -}; +#ifdef CONFIG_CEPH_FS_SECURITY_LABEL +extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, + struct ceph_acl_sec_ctx *ctx); +extern void ceph_security_invalidate_secctx(struct inode *inode); +#else +static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, + struct ceph_acl_sec_ctx *ctx) +{ + return 0; +} +static inline void ceph_security_invalidate_secctx(struct inode *inode) +{ +} +#endif + +void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx); +/* acl.c */ #ifdef CONFIG_CEPH_FS_POSIX_ACL struct posix_acl *ceph_get_acl(struct inode *, int); int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type); int ceph_pre_init_acls(struct inode *dir, umode_t *mode, - struct ceph_acls_info *info); -void ceph_init_inode_acls(struct inode *inode, struct ceph_acls_info *info); -void ceph_release_acls_info(struct ceph_acls_info *info); + struct ceph_acl_sec_ctx *as_ctx); +void ceph_init_inode_acls(struct inode *inode, + struct ceph_acl_sec_ctx *as_ctx); static inline void ceph_forget_all_cached_acls(struct inode *inode) { @@ -963,15 +994,12 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) #define ceph_set_acl NULL static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, - struct ceph_acls_info *info) + struct ceph_acl_sec_ctx *as_ctx) { return 0; } static inline void ceph_init_inode_acls(struct inode *inode, - struct ceph_acls_info *info) -{ -} -static inline void ceph_release_acls_info(struct ceph_acls_info *info) + struct ceph_acl_sec_ctx *as_ctx) { } static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode) @@ -997,7 +1025,7 @@ extern void ceph_add_cap(struct inode *inode, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); -extern void __ceph_remove_caps(struct inode* inode); +extern void __ceph_remove_caps(struct ceph_inode_info *ci); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); extern int ceph_is_any_caps(struct inode *inode); @@ -1099,7 +1127,7 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, int num_fcntl_locks, int num_flock_locks); /* debugfs.c */ -extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); +extern void ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); /* quota.c */ diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 0cc42c8879e9..37b458a9af3a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -8,6 +8,7 @@ #include <linux/ceph/decode.h> #include <linux/xattr.h> +#include <linux/security.h> #include <linux/posix_acl_xattr.h> #include <linux/slab.h> @@ -17,26 +18,9 @@ static int __remove_xattr(struct ceph_inode_info *ci, struct ceph_inode_xattr *xattr); -static const struct xattr_handler ceph_other_xattr_handler; - -/* - * List of handlers for synthetic system.* attributes. Other - * attributes are handled directly. - */ -const struct xattr_handler *ceph_xattr_handlers[] = { -#ifdef CONFIG_CEPH_FS_POSIX_ACL - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, -#endif - &ceph_other_xattr_handler, - NULL, -}; - static bool ceph_is_valid_xattr(const char *name) { return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || - !strncmp(name, XATTR_SECURITY_PREFIX, - XATTR_SECURITY_PREFIX_LEN) || !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } @@ -48,8 +32,8 @@ static bool ceph_is_valid_xattr(const char *name) struct ceph_vxattr { char *name; size_t name_size; /* strlen(name) + 1 (for '\0') */ - size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, - size_t size); + ssize_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, + size_t size); bool (*exists_cb)(struct ceph_inode_info *ci); unsigned int flags; }; @@ -68,8 +52,8 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) rcu_dereference_raw(fl->pool_ns) != NULL); } -static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + size_t size) { struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); struct ceph_osd_client *osdc = &fsc->client->osdc; @@ -79,7 +63,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, const char *ns_field = " pool_namespace="; char buf[128]; size_t len, total_len = 0; - int ret; + ssize_t ret; pool_ns = ceph_try_get_string(ci->i_layout.pool_ns); @@ -96,18 +80,15 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, len = snprintf(buf, sizeof(buf), "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld", ci->i_layout.stripe_unit, ci->i_layout.stripe_count, - ci->i_layout.object_size, (unsigned long long)pool); + ci->i_layout.object_size, pool); total_len = len; } if (pool_ns) total_len += strlen(ns_field) + pool_ns->len; - if (!size) { - ret = total_len; - } else if (total_len > size) { - ret = -ERANGE; - } else { + ret = total_len; + if (size >= total_len) { memcpy(val, buf, len); ret = len; if (pool_name) { @@ -128,28 +109,55 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, return ret; } -static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, - char *val, size_t size) +/* + * The convention with strings in xattrs is that they should not be NULL + * terminated, since we're returning the length with them. snprintf always + * NULL terminates however, so call it on a temporary buffer and then memcpy + * the result into place. + */ +static int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...) { - return snprintf(val, size, "%u", ci->i_layout.stripe_unit); + int ret; + va_list args; + char buf[96]; /* NB: reevaluate size if new vxattrs are added */ + + va_start(args, fmt); + ret = vsnprintf(buf, size ? sizeof(buf) : 0, fmt, args); + va_end(args); + + /* Sanity check */ + if (size && ret + 1 > sizeof(buf)) { + WARN_ONCE(true, "Returned length too big (%d)", ret); + return -E2BIG; + } + + if (ret <= size) + memcpy(val, buf, ret); + return ret; } -static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, +static ssize_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, char *val, size_t size) { - return snprintf(val, size, "%u", ci->i_layout.stripe_count); + return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_unit); +} + +static ssize_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_count); } -static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, - char *val, size_t size) +static ssize_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, + char *val, size_t size) { - return snprintf(val, size, "%u", ci->i_layout.object_size); + return ceph_fmt_xattr(val, size, "%u", ci->i_layout.object_size); } -static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, - char *val, size_t size) +static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, + char *val, size_t size) { - int ret; + ssize_t ret; struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); struct ceph_osd_client *osdc = &fsc->client->osdc; s64 pool = ci->i_layout.pool_id; @@ -157,21 +165,27 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, down_read(&osdc->lock); pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); - if (pool_name) - ret = snprintf(val, size, "%s", pool_name); - else - ret = snprintf(val, size, "%lld", (unsigned long long)pool); + if (pool_name) { + ret = strlen(pool_name); + if (ret <= size) + memcpy(val, pool_name, ret); + } else { + ret = ceph_fmt_xattr(val, size, "%lld", pool); + } up_read(&osdc->lock); return ret; } -static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci, - char *val, size_t size) +static ssize_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci, + char *val, size_t size) { - int ret = 0; + ssize_t ret = 0; struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns); + if (ns) { - ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str); + ret = ns->len; + if (ret <= size) + memcpy(val, ns->str, ret); ceph_put_string(ns); } return ret; @@ -179,53 +193,54 @@ static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci, /* directories */ -static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs); + return ceph_fmt_xattr(val, size, "%lld", ci->i_files + ci->i_subdirs); } -static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_files); + return ceph_fmt_xattr(val, size, "%lld", ci->i_files); } -static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_subdirs); + return ceph_fmt_xattr(val, size, "%lld", ci->i_subdirs); } -static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs); + return ceph_fmt_xattr(val, size, "%lld", + ci->i_rfiles + ci->i_rsubdirs); } -static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_rfiles); + return ceph_fmt_xattr(val, size, "%lld", ci->i_rfiles); } -static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_rsubdirs); + return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs); } -static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld", ci->i_rbytes); + return ceph_fmt_xattr(val, size, "%lld", ci->i_rbytes); } -static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec, - ci->i_rctime.tv_nsec); + return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_rctime.tv_sec, + ci->i_rctime.tv_nsec); } /* dir pin */ @@ -234,10 +249,10 @@ static bool ceph_vxattrcb_dir_pin_exists(struct ceph_inode_info *ci) return ci->i_dir_pin != -ENODATA; } -static size_t ceph_vxattrcb_dir_pin(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_dir_pin(struct ceph_inode_info *ci, char *val, + size_t size) { - return snprintf(val, size, "%d", (int)ci->i_dir_pin); + return ceph_fmt_xattr(val, size, "%d", (int)ci->i_dir_pin); } /* quotas */ @@ -254,23 +269,36 @@ static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci) return ret; } -static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val, - size_t size) +static ssize_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val, + size_t size) +{ + return ceph_fmt_xattr(val, size, "max_bytes=%llu max_files=%llu", + ci->i_max_bytes, ci->i_max_files); +} + +static ssize_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci, + char *val, size_t size) { - return snprintf(val, size, "max_bytes=%llu max_files=%llu", - ci->i_max_bytes, ci->i_max_files); + return ceph_fmt_xattr(val, size, "%llu", ci->i_max_bytes); } -static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci, - char *val, size_t size) +static ssize_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci, + char *val, size_t size) { - return snprintf(val, size, "%llu", ci->i_max_bytes); + return ceph_fmt_xattr(val, size, "%llu", ci->i_max_files); } -static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci, - char *val, size_t size) +/* snapshots */ +static bool ceph_vxattrcb_snap_btime_exists(struct ceph_inode_info *ci) { - return snprintf(val, size, "%llu", ci->i_max_files); + return (ci->i_snap_btime.tv_sec != 0 || ci->i_snap_btime.tv_nsec != 0); +} + +static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val, + size_t size) +{ + return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_snap_btime.tv_sec, + ci->i_snap_btime.tv_nsec); } #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name @@ -327,7 +355,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_RSTAT_FIELD(dir, rctime), { .name = "ceph.dir.pin", - .name_size = sizeof("ceph.dir_pin"), + .name_size = sizeof("ceph.dir.pin"), .getxattr_cb = ceph_vxattrcb_dir_pin, .exists_cb = ceph_vxattrcb_dir_pin_exists, .flags = VXATTR_FLAG_HIDDEN, @@ -341,9 +369,15 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { }, XATTR_QUOTA_FIELD(quota, max_bytes), XATTR_QUOTA_FIELD(quota, max_files), + { + .name = "ceph.snap.btime", + .name_size = sizeof("ceph.snap.btime"), + .getxattr_cb = ceph_vxattrcb_snap_btime, + .exists_cb = ceph_vxattrcb_snap_btime_exists, + .flags = VXATTR_FLAG_READONLY, + }, { .name = NULL, 0 } /* Required table terminator */ }; -static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ /* files */ @@ -360,9 +394,15 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { XATTR_LAYOUT_FIELD(file, layout, object_size), XATTR_LAYOUT_FIELD(file, layout, pool), XATTR_LAYOUT_FIELD(file, layout, pool_namespace), + { + .name = "ceph.snap.btime", + .name_size = sizeof("ceph.snap.btime"), + .getxattr_cb = ceph_vxattrcb_snap_btime, + .exists_cb = ceph_vxattrcb_snap_btime_exists, + .flags = VXATTR_FLAG_READONLY, + }, { .name = NULL, 0 } /* Required table terminator */ }; -static size_t ceph_file_vxattrs_name_size; /* total size of all names */ static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) { @@ -373,47 +413,6 @@ static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode) return NULL; } -static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs) -{ - if (vxattrs == ceph_dir_vxattrs) - return ceph_dir_vxattrs_name_size; - if (vxattrs == ceph_file_vxattrs) - return ceph_file_vxattrs_name_size; - BUG_ON(vxattrs); - return 0; -} - -/* - * Compute the aggregate size (including terminating '\0') of all - * virtual extended attribute names in the given vxattr table. - */ -static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) -{ - struct ceph_vxattr *vxattr; - size_t size = 0; - - for (vxattr = vxattrs; vxattr->name; vxattr++) { - if (!(vxattr->flags & VXATTR_FLAG_HIDDEN)) - size += vxattr->name_size; - } - - return size; -} - -/* Routines called at initialization and exit time */ - -void __init ceph_xattr_init(void) -{ - ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs); - ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs); -} - -void ceph_xattr_exit(void) -{ - ceph_dir_vxattrs_name_size = 0; - ceph_file_vxattrs_name_size = 0; -} - static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode, const char *name) { @@ -523,8 +522,8 @@ static int __set_xattr(struct ceph_inode_info *ci, dout("__set_xattr_val p=%p\n", p); } - dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n", - ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val); + dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n", + ceph_vinop(&ci->vfs_inode), xattr, name_len, name, val_len, val); return 0; } @@ -823,7 +822,7 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, struct ceph_inode_xattr *xattr; struct ceph_vxattr *vxattr = NULL; int req_mask; - int err; + ssize_t err; /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); @@ -835,8 +834,11 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, if (err) return err; err = -ENODATA; - if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) + if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) { err = vxattr->getxattr_cb(ci, value, size); + if (size && size < err) + err = -ERANGE; + } return err; } @@ -897,10 +899,9 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) struct inode *inode = d_inode(dentry); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); - u32 vir_namelen = 0; + bool len_only = (size == 0); u32 namelen; int err; - u32 len; int i; spin_lock(&ci->i_ceph_lock); @@ -919,38 +920,45 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) err = __build_xattrs(inode); if (err < 0) goto out; - /* - * Start with virtual dir xattr names (if any) (including - * terminating '\0' characters for each). - */ - vir_namelen = ceph_vxattrs_name_size(vxattrs); - /* adding 1 byte per each variable due to the null termination */ + /* add 1 byte for each xattr due to the null termination */ namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; - err = -ERANGE; - if (size && vir_namelen + namelen > size) - goto out; - - err = namelen + vir_namelen; - if (size == 0) - goto out; + if (!len_only) { + if (namelen > size) { + err = -ERANGE; + goto out; + } + names = __copy_xattr_names(ci, names); + size -= namelen; + } - names = __copy_xattr_names(ci, names); /* virtual xattr names, too */ - err = namelen; if (vxattrs) { for (i = 0; vxattrs[i].name; i++) { - if (!(vxattrs[i].flags & VXATTR_FLAG_HIDDEN) && - !(vxattrs[i].exists_cb && - !vxattrs[i].exists_cb(ci))) { - len = sprintf(names, "%s", vxattrs[i].name); - names += len + 1; - err += len + 1; + size_t this_len; + + if (vxattrs[i].flags & VXATTR_FLAG_HIDDEN) + continue; + if (vxattrs[i].exists_cb && !vxattrs[i].exists_cb(ci)) + continue; + + this_len = strlen(vxattrs[i].name) + 1; + namelen += this_len; + if (len_only) + continue; + + if (this_len > size) { + err = -ERANGE; + goto out; } + + memcpy(names, vxattrs[i].name, this_len); + names += this_len; + size -= this_len; } } - + err = namelen; out: spin_unlock(&ci->i_ceph_lock); return err; @@ -1206,4 +1214,138 @@ bool ceph_security_xattr_deadlock(struct inode *in) spin_unlock(&ci->i_ceph_lock); return ret; } + +#ifdef CONFIG_CEPH_FS_SECURITY_LABEL +int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, + struct ceph_acl_sec_ctx *as_ctx) +{ + struct ceph_pagelist *pagelist = as_ctx->pagelist; + const char *name; + size_t name_len; + int err; + + err = security_dentry_init_security(dentry, mode, &dentry->d_name, + &as_ctx->sec_ctx, + &as_ctx->sec_ctxlen); + if (err < 0) { + WARN_ON_ONCE(err != -EOPNOTSUPP); + err = 0; /* do nothing */ + goto out; + } + + err = -ENOMEM; + if (!pagelist) { + pagelist = ceph_pagelist_alloc(GFP_KERNEL); + if (!pagelist) + goto out; + err = ceph_pagelist_reserve(pagelist, PAGE_SIZE); + if (err) + goto out; + ceph_pagelist_encode_32(pagelist, 1); + } + + /* + * FIXME: Make security_dentry_init_security() generic. Currently + * It only supports single security module and only selinux has + * dentry_init_security hook. + */ + name = XATTR_NAME_SELINUX; + name_len = strlen(name); + err = ceph_pagelist_reserve(pagelist, + 4 * 2 + name_len + as_ctx->sec_ctxlen); + if (err) + goto out; + + if (as_ctx->pagelist) { + /* update count of KV pairs */ + BUG_ON(pagelist->length <= sizeof(__le32)); + if (list_is_singular(&pagelist->head)) { + le32_add_cpu((__le32*)pagelist->mapped_tail, 1); + } else { + struct page *page = list_first_entry(&pagelist->head, + struct page, lru); + void *addr = kmap_atomic(page); + le32_add_cpu((__le32*)addr, 1); + kunmap_atomic(addr); + } + } else { + as_ctx->pagelist = pagelist; + } + + ceph_pagelist_encode_32(pagelist, name_len); + ceph_pagelist_append(pagelist, name, name_len); + + ceph_pagelist_encode_32(pagelist, as_ctx->sec_ctxlen); + ceph_pagelist_append(pagelist, as_ctx->sec_ctx, as_ctx->sec_ctxlen); + + err = 0; +out: + if (pagelist && !as_ctx->pagelist) + ceph_pagelist_release(pagelist); + return err; +} + +void ceph_security_invalidate_secctx(struct inode *inode) +{ + security_inode_invalidate_secctx(inode); +} + +static int ceph_xattr_set_security_label(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + if (security_ismaclabel(key)) { + const char *name = xattr_full_name(handler, key); + return __ceph_setxattr(inode, name, buf, buflen, flags); + } + return -EOPNOTSUPP; +} + +static int ceph_xattr_get_security_label(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + if (security_ismaclabel(key)) { + const char *name = xattr_full_name(handler, key); + return __ceph_getxattr(inode, name, buf, buflen); + } + return -EOPNOTSUPP; +} + +static const struct xattr_handler ceph_security_label_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = ceph_xattr_get_security_label, + .set = ceph_xattr_set_security_label, +}; +#endif #endif + +void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx) +{ +#ifdef CONFIG_CEPH_FS_POSIX_ACL + posix_acl_release(as_ctx->acl); + posix_acl_release(as_ctx->default_acl); +#endif +#ifdef CONFIG_CEPH_FS_SECURITY_LABEL + security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen); +#endif + if (as_ctx->pagelist) + ceph_pagelist_release(as_ctx->pagelist); +} + +/* + * List of handlers for synthetic system.* attributes. Other + * attributes are handled directly. + */ +const struct xattr_handler *ceph_xattr_handlers[] = { +#ifdef CONFIG_CEPH_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif +#ifdef CONFIG_CEPH_FS_SECURITY_LABEL + &ceph_security_label_handler, +#endif + &ceph_other_xattr_handler, + NULL, +}; diff --git a/fs/char_dev.c b/fs/char_dev.c index d18cad28c1c3..00dfe17871ac 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -98,7 +98,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, int minorct, const char *name) { struct char_device_struct *cd, *curr, *prev = NULL; - int ret = -EBUSY; + int ret; int i; if (major >= CHRDEV_MAJOR_MAX) { @@ -129,6 +129,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, major = ret; } + ret = -EBUSY; i = major_to_index(major); for (curr = chrdevs[i]; curr; prev = curr, curr = curr->next) { if (curr->major < major) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 76724efc831c..b16219e5dac9 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config CIFS tristate "SMB3 and CIFS support (advanced network filesystem)" depends on INET @@ -9,12 +10,14 @@ config CIFS select CRYPTO_SHA512 select CRYPTO_CMAC select CRYPTO_HMAC - select CRYPTO_ARC4 + select CRYPTO_LIB_ARC4 select CRYPTO_AEAD2 select CRYPTO_CCM + select CRYPTO_GCM select CRYPTO_ECB select CRYPTO_AES select CRYPTO_DES + select KEYS help This is the client VFS module for the SMB3 family of NAS protocols, (including support for the most recent, most secure dialect SMB3.1.1) @@ -108,7 +111,7 @@ config CIFS_WEAK_PW_HASH config CIFS_UPCALL bool "Kerberos/SPNEGO advanced session setup" - depends on CIFS && KEYS + depends on CIFS select DNS_RESOLVER help Enables an upcall mechanism for CIFS which accesses userspace helper @@ -143,14 +146,6 @@ config CIFS_POSIX (such as Samba 3.10 and later) which can negotiate CIFS POSIX ACL support. If unsure, say N. -config CIFS_ACL - bool "Provide CIFS ACL support" - depends on CIFS_XATTR && KEYS - help - Allows fetching CIFS/NTFS ACL from the server. The DACL blob - is handed over to the application/caller. See the man - page for getcifsacl for more information. If unsure, say Y. - config CIFS_DEBUG bool "Enable CIFS debugging routines" default y @@ -183,7 +178,7 @@ config CIFS_DEBUG_DUMP_KEYS config CIFS_DFS_UPCALL bool "DFS feature support" - depends on CIFS && KEYS + depends on CIFS select DNS_RESOLVER help Distributed File System (DFS) support is used to access shares @@ -202,10 +197,10 @@ config CIFS_NFSD_EXPORT Allows NFS server to export a CIFS mounted share (nfsd over cifs) config CIFS_SMB_DIRECT - bool "SMB Direct support (Experimental)" + bool "SMB Direct support" depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y help - Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. + Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, say N. diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 51af69a1a328..41332f20055b 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -10,10 +10,9 @@ cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \ cifs_unicode.o nterr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \ smb2ops.o smb2maperror.o smb2transport.o \ - smb2misc.o smb2pdu.o smb2inode.o smb2file.o + smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o cifs-$(CONFIG_CIFS_XATTR) += xattr.o -cifs-$(CONFIG_CIFS_ACL) += cifsacl.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 3d19595eb352..689162e2e175 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -1,20 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * The ASB.1/BER parsing code is derived from ip_nat_snmp_basic.c which was in * turn derived from the gxsnmp package by Gregory McLean & Jochen Friedrich * * Copyright (c) 2000 RP Internet (www.rpi.net.au). - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/module.h> diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 45e74da40f3a..a38d796f5ffe 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/cifs_debug.c * * Copyright (C) International Business Machines Corp., 2000,2005 * * Modified by Steve French (sfrench@us.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> #include <linux/string.h> @@ -253,9 +240,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_CIFS_XATTR seq_printf(m, ",XATTR"); #endif -#ifdef CONFIG_CIFS_ACL seq_printf(m, ",ACL"); -#endif seq_putc(m, '\n'); seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index 631dc1bb21c1..3d392620a2f4 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * * Copyright (c) International Business Machines Corp., 2000,2002 * Modified by Steve French (sfrench@us.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #ifndef _H_CIFS_DEBUG diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 5d83c924cc47..41957b82d796 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Contains the CIFS DFS referral mounting routines used for handling * traversal via DFS junction point @@ -6,10 +7,6 @@ * Copyright (C) International Business Machines Corp., 2008 * Author(s): Igor Mammedov (niallain@gmail.com) * Steve French (sfrench@us.ibm.com) - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/dcache.h> diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index ed49222abecb..b326d2ca3765 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -52,6 +52,7 @@ #define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */ #define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */ #define CIFS_MOUNT_NO_DFS 0x8000000 /* disable DFS resolving */ +#define CIFS_MOUNT_MODE_FROM_SID 0x10000000 /* retrieve mode from special ACE */ struct cifs_sb_info { struct rb_root tlink_tree; @@ -83,5 +84,10 @@ struct cifs_sb_info { * failover properly. */ char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */ + /* + * Indicate whether serverino option was turned off later + * (cifs_autodisable_serverino) in order to match new mounts. + */ + bool mnt_cifs_serverino_autodisabled; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index a2b2355e7f01..498777d859eb 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/cifs/cifs_unicode.c * * Copyright (c) International Business Machines Corp., 2000,2009 * Modified by Steve French (sfrench@us.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> #include <linux/slab.h> diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 8360b74530a9..80b3d845419f 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * cifs_unicode: Unicode kernel case support * @@ -7,28 +8,12 @@ * * Copyright (c) International Business Machines Corp., 2000,2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * * Notes: * These APIs are based on the C library functions. The semantics * should match the C functions but with expanded size operands. * * The upper/lower functions are based on a table created by mkupr. * This is a compressed table of upper and lower case conversion. - * */ #ifndef _CIFS_UNICODE_H #define _CIFS_UNICODE_H diff --git a/fs/cifs/cifs_uniupr.h b/fs/cifs/cifs_uniupr.h index 0ac7c5a8633a..7b272fcdf0d3 100644 --- a/fs/cifs/cifs_uniupr.h +++ b/fs/cifs/cifs_uniupr.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) International Business Machines Corp., 2000,2002 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * uniupr.h - Unicode compressed case ranges - * */ #ifndef UNIUPR_NOUPPER diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index d2a05e46d6f5..97b7497c13ef 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -33,7 +33,8 @@ #include <linux/ctype.h> #include <linux/random.h> #include <linux/highmem.h> -#include <crypto/skcipher.h> +#include <linux/fips.h> +#include <crypto/arc4.h> #include <crypto/aead.h> int __cifs_calc_signature(struct smb_rqst *rqst, @@ -772,63 +773,32 @@ setup_ntlmv2_rsp_ret: int calc_seckey(struct cifs_ses *ses) { - int rc; - struct crypto_skcipher *tfm_arc4; - struct scatterlist sgin, sgout; - struct skcipher_request *req; - unsigned char *sec_key; + unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */ + struct arc4_ctx *ctx_arc4; - sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL); - if (sec_key == NULL) - return -ENOMEM; + if (fips_enabled) + return -ENODEV; get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE); - tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm_arc4)) { - rc = PTR_ERR(tfm_arc4); - cifs_dbg(VFS, "could not allocate crypto API arc4\n"); - goto out; - } - - rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response, - CIFS_SESS_KEY_SIZE); - if (rc) { - cifs_dbg(VFS, "%s: Could not set response as a key\n", - __func__); - goto out_free_cipher; - } - - req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; - cifs_dbg(VFS, "could not allocate crypto API arc4 request\n"); - goto out_free_cipher; + ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL); + if (!ctx_arc4) { + cifs_dbg(VFS, "could not allocate arc4 context\n"); + return -ENOMEM; } - sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); - sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); - - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL); - - rc = crypto_skcipher_encrypt(req); - skcipher_request_free(req); - if (rc) { - cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc); - goto out_free_cipher; - } + arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); + arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key, + CIFS_CPHTXT_SIZE); /* make secondary_key/nonce as session key */ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE); /* and make len as that of session key only */ ses->auth_key.len = CIFS_SESS_KEY_SIZE; -out_free_cipher: - crypto_free_skcipher(tfm_arc4); -out: - kfree(sec_key); - return rc; + memzero_explicit(sec_key, CIFS_SESS_KEY_SIZE); + kzfree(ctx_arc4); + return 0; } void diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f5fcd6360056..270d3c58fb3b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -303,6 +303,7 @@ cifs_alloc_inode(struct super_block *sb) cifs_inode->uniqueid = 0; cifs_inode->createtime = 0; cifs_inode->epoch = 0; + spin_lock_init(&cifs_inode->open_file_lock); generate_random_uuid(cifs_inode->lease_key); /* @@ -525,6 +526,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nobrl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE) seq_puts(s, ",nohandlecache"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) + seq_puts(s, ",modefromsid"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) seq_puts(s, ",cifsacl"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) @@ -553,6 +556,11 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",bsize=%u", cifs_sb->bsize); seq_printf(s, ",echo_interval=%lu", tcon->ses->server->echo_interval / HZ); + + /* Only display max_credits if it was overridden on mount */ + if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE) + seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits); + if (tcon->snapshot_time) seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); if (tcon->handle_timeout) @@ -1148,6 +1156,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, len, flags); free_xid(xid); + + if (rc == -EOPNOTSUPP || rc == -EXDEV) + rc = generic_copy_file_range(src_file, off, dst_file, + destoff, len, flags); return rc; } @@ -1512,11 +1524,9 @@ init_cifs(void) goto out_destroy_dfs_cache; #endif /* CONFIG_CIFS_UPCALL */ -#ifdef CONFIG_CIFS_ACL rc = init_cifs_idmap(); if (rc) goto out_register_key_type; -#endif /* CONFIG_CIFS_ACL */ rc = register_filesystem(&cifs_fs_type); if (rc) @@ -1531,10 +1541,8 @@ init_cifs(void) return 0; out_init_cifs_idmap: -#ifdef CONFIG_CIFS_ACL exit_cifs_idmap(); out_register_key_type: -#endif #ifdef CONFIG_CIFS_UPCALL exit_cifs_spnego(); out_destroy_dfs_cache: @@ -1566,9 +1574,7 @@ exit_cifs(void) unregister_filesystem(&cifs_fs_type); unregister_filesystem(&smb3_fs_type); cifs_dfs_release_automount_timer(); -#ifdef CONFIG_CIFS_ACL exit_cifs_idmap(); -#endif #ifdef CONFIG_CIFS_UPCALL exit_cifs_spnego(); #endif @@ -1590,7 +1596,6 @@ MODULE_DESCRIPTION ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and " "also older servers complying with the SNIA CIFS Specification)"); MODULE_VERSION(CIFS_VERSION); -MODULE_SOFTDEP("pre: arc4"); MODULE_SOFTDEP("pre: des"); MODULE_SOFTDEP("pre: ecb"); MODULE_SOFTDEP("pre: hmac"); @@ -1603,5 +1608,6 @@ MODULE_SOFTDEP("pre: sha256"); MODULE_SOFTDEP("pre: sha512"); MODULE_SOFTDEP("pre: aead2"); MODULE_SOFTDEP("pre: ccm"); +MODULE_SOFTDEP("pre: gcm"); module_init(init_cifs) module_exit(exit_cifs) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 334ff5f9c3f3..fe610e7e3670 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -550,6 +550,7 @@ struct smb_vol { bool override_gid:1; bool dynperm:1; bool noperm:1; + bool mode_ace:1; bool no_psx_acl:1; /* set if posix acl support should be disabled */ bool cifs_acl:1; bool backupuid_specified; /* mount option backupuid is specified */ @@ -600,6 +601,7 @@ struct smb_vol { __u64 snapshot_time; /* needed for timewarp tokens */ __u32 handle_timeout; /* persistent and durable handle timeout in ms */ unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */ + __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */ }; /** @@ -617,7 +619,8 @@ struct smb_vol { CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \ CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID | \ - CIFS_MOUNT_NO_DFS) + CIFS_MOUNT_UID_FROM_ACL | CIFS_MOUNT_NO_HANDLE_CACHE | \ + CIFS_MOUNT_NO_DFS | CIFS_MOUNT_MODE_FROM_SID) /** * Generic VFS superblock mount flags (s_flags) to consider when @@ -1377,6 +1380,7 @@ struct cifsInodeInfo { struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; + spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ unsigned int epoch; /* used to track lease state changes */ @@ -1780,10 +1784,14 @@ require use of the stronger protocol */ * tcp_ses_lock protects: * list operations on tcp and SMB session lists * tcon->open_file_lock protects the list of open files hanging off the tcon + * inode->open_file_lock protects the openFileList hanging off the inode * cfile->file_info_lock protects counters and fields in cifs file struct * f_owner.lock protects certain per file struct operations * mapping->page_lock protects certain per page operations * + * Note that the cifs_tcon.open_file_lock should be taken before + * not after the cifsInodeInfo.open_file_lock + * * Semaphores * ---------- * sesSem operations on smb session @@ -1865,7 +1873,6 @@ extern unsigned int cifs_min_small; /* min size of small buf pool */ extern unsigned int cifs_max_pending; /* MAX requests at once to server*/ extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */ -#ifdef CONFIG_CIFS_ACL GLOBAL_EXTERN struct rb_root uidtree; GLOBAL_EXTERN struct rb_root gidtree; GLOBAL_EXTERN spinlock_t siduidlock; @@ -1874,7 +1881,6 @@ GLOBAL_EXTERN struct rb_root siduidtree; GLOBAL_EXTERN struct rb_root sidgidtree; GLOBAL_EXTERN spinlock_t uidsidlock; GLOBAL_EXTERN spinlock_t gidsidlock; -#endif /* CONFIG_CIFS_ACL */ void cifs_oplock_break(struct work_struct *work); void cifs_queue_oplock_break(struct cifsFileInfo *cfile); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 1fbd92843a73..e2f95965065d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3600,11 +3600,9 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen, return size; } -static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace, +static void convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace, const struct posix_acl_xattr_entry *local_ace) { - __u16 rc = 0; /* 0 = ACL converted ok */ - cifs_ace->cifs_e_perm = le16_to_cpu(local_ace->e_perm); cifs_ace->cifs_e_tag = le16_to_cpu(local_ace->e_tag); /* BB is there a better way to handle the large uid? */ @@ -3617,7 +3615,6 @@ static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace, cifs_dbg(FYI, "perm %d tag %d id %d\n", ace->e_perm, ace->e_tag, ace->e_id); */ - return rc; } /* Convert ACL from local Linux POSIX xattr to CIFS POSIX ACL wire format */ @@ -3653,13 +3650,8 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } - for (i = 0; i < count; i++) { - rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &ace[i]); - if (rc != 0) { - /* ACE not converted */ - break; - } - } + for (i = 0; i < count; i++) + convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &ace[i]); if (rc == 0) { rc = (__u16)(count * sizeof(struct cifs_posix_ace)); rc += sizeof(struct cifs_posix_acl); @@ -3920,7 +3912,6 @@ GetExtAttrOut: #endif /* CONFIG_POSIX */ -#ifdef CONFIG_CIFS_ACL /* * Initialize NT TRANSACT SMB into small smb request buffer. This assumes that * all NT TRANSACTS that we init here have total parm and data under about 400 @@ -4164,7 +4155,6 @@ setCifsAclRetry: return (rc); } -#endif /* CONFIG_CIFS_ACL */ /* Legacy Query Path Information call for lookup to old servers such as Win9x/WinME */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8c4121da624e..a4830ced0f98 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -96,7 +96,8 @@ enum { Opt_multiuser, Opt_sloppy, Opt_nosharesock, Opt_persistent, Opt_nopersistent, Opt_resilient, Opt_noresilient, - Opt_domainauto, Opt_rdma, + Opt_domainauto, Opt_rdma, Opt_modesid, + Opt_compress, /* Mount options which take numeric value */ Opt_backupuid, Opt_backupgid, Opt_uid, @@ -175,6 +176,7 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_serverino, "serverino" }, { Opt_noserverino, "noserverino" }, { Opt_rwpidforward, "rwpidforward" }, + { Opt_modesid, "modefromsid" }, { Opt_cifsacl, "cifsacl" }, { Opt_nocifsacl, "nocifsacl" }, { Opt_acl, "acl" }, @@ -212,6 +214,7 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_echo_interval, "echo_interval=%s" }, { Opt_max_credits, "max_credits=%s" }, { Opt_snapshot, "snapshot=%s" }, + { Opt_compress, "compress=%s" }, { Opt_blank_user, "user=" }, { Opt_blank_user, "username=" }, @@ -476,6 +479,7 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); server->nr_targets = 1; #ifdef CONFIG_CIFS_DFS_UPCALL + spin_unlock(&GlobalMid_Lock); cifs_sb = find_super_by_tcp(server); if (IS_ERR(cifs_sb)) { rc = PTR_ERR(cifs_sb); @@ -493,6 +497,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__, server->nr_targets); + spin_lock(&GlobalMid_Lock); #endif if (server->tcpStatus == CifsExiting) { /* the demux thread will exit normally @@ -704,10 +709,10 @@ static bool server_unresponsive(struct TCP_Server_Info *server) { /* - * We need to wait 2 echo intervals to make sure we handle such + * We need to wait 3 echo intervals to make sure we handle such * situations right: * 1s client sends a normal SMB request - * 2s client gets a response + * 3s client gets a response * 30s echo workqueue job pops, and decides we got a response recently * and don't need to send another * ... @@ -716,9 +721,9 @@ server_unresponsive(struct TCP_Server_Info *server) */ if ((server->tcpStatus == CifsGood || server->tcpStatus == CifsNeedNegotiate) && - time_after(jiffies, server->lstrp + 2 * server->echo_interval)) { + time_after(jiffies, server->lstrp + 3 * server->echo_interval)) { cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n", - server->hostname, (2 * server->echo_interval) / HZ); + server->hostname, (3 * server->echo_interval) / HZ); cifs_reconnect(server); wake_up(&server->response_q); return true; @@ -1221,11 +1226,11 @@ next_pdu: atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", bufs[i], HEADER_SIZE(server)); + smb2_add_credits_from_hdr(bufs[i], server); #ifdef CONFIG_CIFS_DEBUG2 if (server->ops->dump_detail) server->ops->dump_detail(bufs[i], server); - smb2_add_credits_from_hdr(bufs[i], server); cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ } @@ -1828,6 +1833,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_rwpidforward: vol->rwpidforward = 1; break; + case Opt_modesid: + vol->mode_ace = 1; + break; case Opt_cifsacl: vol->cifs_acl = 1; break; @@ -1909,6 +1917,11 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_rdma: vol->rdma = true; break; + case Opt_compress: + vol->compression = UNKNOWN_TYPE; + cifs_dbg(VFS, + "SMB3 compression support is experimental\n"); + break; /* Numeric Values */ case Opt_backupuid: @@ -2542,8 +2555,15 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) if (vol->nosharesock) return 0; - /* BB update this for smb3any and default case */ - if ((server->vals != vol->vals) || (server->ops != vol->ops)) + /* If multidialect negotiation see if existing sessions match one */ + if (strcmp(vol->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { + if (server->vals->protocol_id < SMB30_PROT_ID) + return 0; + } else if (strcmp(vol->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + if (server->vals->protocol_id < SMB21_PROT_ID) + return 0; + } else if ((server->vals != vol->vals) || (server->ops != vol->ops)) return 0; if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) @@ -2629,7 +2649,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) task = xchg(&server->tsk, NULL); if (task) - force_sig(SIGKILL, task); + send_sig(SIGKILL, task, 1); } static struct TCP_Server_Info * @@ -2678,6 +2698,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->sequence_number = 0; tcp_ses->reconnect_instance = 1; tcp_ses->lstrp = jiffies; + tcp_ses->compress_algorithm = cpu_to_le16(volume_info->compression); spin_lock_init(&tcp_ses->req_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); @@ -3458,12 +3479,16 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; + unsigned int oldflags = old->mnt_cifs_flags & CIFS_MOUNT_MASK; + unsigned int newflags = new->mnt_cifs_flags & CIFS_MOUNT_MASK; if ((sb->s_flags & CIFS_MS_MASK) != (mnt_data->flags & CIFS_MS_MASK)) return 0; - if ((old->mnt_cifs_flags & CIFS_MOUNT_MASK) != - (new->mnt_cifs_flags & CIFS_MOUNT_MASK)) + if (old->mnt_cifs_serverino_autodisabled) + newflags &= ~CIFS_MOUNT_SERVER_INUM; + + if (oldflags != newflags) return 0; /* @@ -3963,6 +3988,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL; if (pvolume_info->rwpidforward) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; + if (pvolume_info->mode_ace) + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID; if (pvolume_info->cifs_acl) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; if (pvolume_info->backupuid_specified) { @@ -4457,11 +4484,13 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, - char *full_path) + char *full_path, + int added_treename) { int rc; char *s; char sep, tmp; + int skip = added_treename ? 1 : 0; sep = CIFS_DIR_SEP(cifs_sb); s = full_path; @@ -4476,7 +4505,14 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, /* next separator */ while (*s && *s != sep) s++; - + /* + * if the treename is added, we then have to skip the first + * part within the separators + */ + if (skip) { + skip = 0; + continue; + } /* * temporarily null-terminate the path at the end of * the current component @@ -4524,8 +4560,7 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol, if (rc != -EREMOTE) { rc = cifs_are_all_path_components_accessible(server, xid, tcon, - cifs_sb, - full_path); + cifs_sb, full_path, tcon->Flags & SMB_SHARE_IS_IN_DFS); if (rc != 0) { cifs_dbg(VFS, "cannot query dirs between root and final path, " "enabling CIFS_MOUNT_USE_PREFIX_PATH\n"); diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 85dc89d3a203..1692c0c6c23a 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -132,7 +132,7 @@ static inline void flush_cache_ent(struct dfs_cache_entry *ce) return; hlist_del_init_rcu(&ce->ce_hlist); - kfree(ce->ce_path); + kfree_const(ce->ce_path); free_tgts(ce); dfs_cache_count--; call_rcu(&ce->ce_rcu, free_cache_entry); @@ -422,7 +422,7 @@ alloc_cache_entry(const char *path, const struct dfs_info3_param *refs, rc = copy_ref_data(refs, numrefs, ce, NULL); if (rc) { - kfree(ce->ce_path); + kfree_const(ce->ce_path); kmem_cache_free(dfs_cache_slab, ce); ce = ERR_PTR(rc); } @@ -492,7 +492,7 @@ static struct dfs_cache_entry *__find_cache_entry(unsigned int hash, #ifdef CONFIG_CIFS_DEBUG2 char *name = get_tgt_name(ce); - if (unlikely(IS_ERR(name))) { + if (IS_ERR(name)) { rcu_read_unlock(); return ERR_CAST(name); } diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 1e21b2528cfb..534cbba72789 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -77,7 +77,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) goto name_is_IP_address; /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false); + rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len, + NULL, ip_addr, NULL, false); if (rc < 0) cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", __func__, len, len, hostname); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ce9a5be11df5..97090693d182 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -338,10 +338,12 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, atomic_inc(&tcon->num_local_opens); /* if readable file instance put first in list*/ + spin_lock(&cinode->open_file_lock); if (file->f_mode & FMODE_READ) list_add(&cfile->flist, &cinode->openFileList); else list_add_tail(&cfile->flist, &cinode->openFileList); + spin_unlock(&cinode->open_file_lock); spin_unlock(&tcon->open_file_lock); if (fid->purge_cache) @@ -413,7 +415,9 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); /* remove it from the lists */ + spin_lock(&cifsi->open_file_lock); list_del(&cifs_file->flist); + spin_unlock(&cifsi->open_file_lock); list_del(&cifs_file->tlist); atomic_dec(&tcon->num_local_opens); @@ -1950,9 +1954,9 @@ refind_writable: return 0; } - spin_lock(&tcon->open_file_lock); + spin_lock(&cifs_inode->open_file_lock); list_move_tail(&inv_file->flist, &cifs_inode->openFileList); - spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_inode->open_file_lock); cifsFileInfo_put(inv_file); ++refind; inv_file = NULL; @@ -3216,7 +3220,9 @@ cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages) } if (rc) { - for (i = 0; i < nr_pages; i++) { + unsigned int nr_page_failed = i; + + for (i = 0; i < nr_page_failed; i++) { put_page(rdata->pages[i]); rdata->pages[i] = NULL; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d7cc62252634..1bffe029fb66 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -892,7 +892,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, cifs_dbg(FYI, "cifs_sfu_type failed: %d\n", tmprc); } -#ifdef CONFIG_CIFS_ACL /* fill in 0777 bits from ACL */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { rc = cifs_acl_to_fattr(cifs_sb, &fattr, *inode, full_path, fid); @@ -902,7 +901,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, goto cgii_exit; } } -#endif /* CONFIG_CIFS_ACL */ /* fill in remaining high mode bits e.g. SUID, VTX */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) @@ -2415,7 +2413,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) xid = get_xid(); - cifs_dbg(FYI, "setattr on file %pd attrs->iavalid 0x%x\n", + cifs_dbg(FYI, "setattr on file %pd attrs->ia_valid 0x%x\n", direntry, attrs->ia_valid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) @@ -2466,7 +2464,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (attrs->ia_valid & ATTR_GID) gid = attrs->ia_gid; -#ifdef CONFIG_CIFS_ACL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { if (uid_valid(uid) || gid_valid(gid)) { rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64, @@ -2478,7 +2475,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) } } } else -#endif /* CONFIG_CIFS_ACL */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); @@ -2489,7 +2485,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (attrs->ia_valid & ATTR_MODE) { mode = attrs->ia_mode; rc = 0; -#ifdef CONFIG_CIFS_ACL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { rc = id_mode_to_cifs_acl(inode, full_path, mode, INVALID_UID, INVALID_GID); @@ -2499,7 +2494,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) goto cifs_setattr_exit; } } else -#endif /* CONFIG_CIFS_ACL */ if (((mode & S_IWUGO) == 0) && (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b1a696a73f7c..f383877a6511 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -539,6 +539,7 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) tcon = cifs_sb_master_tcon(cifs_sb); cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; + cifs_sb->mnt_cifs_serverino_autodisabled = true; cifs_dbg(VFS, "Autodisabling the use of server inode numbers on %s.\n", tcon ? tcon->treeName : "new server"); cifs_dbg(VFS, "The server doesn't seem to support them properly or the files might be on different servers (DFS).\n"); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index fdd908e4a26b..ed92958e842d 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/cifs/netmisc.c * @@ -6,20 +7,6 @@ * * Error mapping routines from Samba libsmb/errormap.c * Copyright (C) Andrew Tridgell 2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/net.h> diff --git a/fs/cifs/nterr.c b/fs/cifs/nterr.c index b6023c646123..358a766375b4 100644 --- a/fs/cifs/nterr.c +++ b/fs/cifs/nterr.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Unix SMB/Netbios implementation. * Version 1.9. * RPC Pipe client / server routines * Copyright (C) Luke Kenneth Casson Leighton 1997-2001. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* NT error codes - see nterr.h */ diff --git a/fs/cifs/nterr.h b/fs/cifs/nterr.h index 7a0eae5ae7c9..edd4741cab0a 100644 --- a/fs/cifs/nterr.h +++ b/fs/cifs/nterr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* Unix SMB/Netbios implementation. Version 1.9. @@ -7,19 +8,6 @@ Copyright (C) Luke Kenneth Casson Leighton 1996-2000 Copyright (C) Paul Ashton 1998-2000 - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index c4e75afa3258..b7421a096319 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * SMB1 (CIFS) version specific operations * * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com> - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2 as published - * by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/pagemap.h> @@ -1235,16 +1223,15 @@ struct smb_version_operations smb1_operations = { .query_all_EAs = CIFSSMBQAllEAs, .set_EA = CIFSSMBSetEA, #endif /* CIFS_XATTR */ -#ifdef CONFIG_CIFS_ACL .get_acl = get_cifs_acl, .get_acl_by_fid = get_cifs_acl_by_fid, .set_acl = set_cifs_acl, -#endif /* CIFS_ACL */ .make_node = cifs_make_node, }; struct smb_version_values smb1_values = { .version_string = SMB1_VERSION_STRING, + .protocol_id = SMB10_PROT_ID, .large_lock_type = LOCKING_ANDX_LARGE_FILES, .exclusive_lock_type = 0, .shared_lock_type = LOCKING_ANDX_SHARED_LOCK, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 278405d26c47..d8d9cdfa30b6 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -120,6 +120,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, SMB2_O_INFO_FILE, 0, sizeof(struct smb2_file_all_info) + PATH_MAX * 2, 0, NULL); + if (rc) + goto finished; smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); trace_smb3_query_info_compound_enter(xid, ses->Suid, tcon->tid, @@ -147,6 +149,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto finished; smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); trace_smb3_rmdir_enter(xid, ses->Suid, tcon->tid, full_path); @@ -163,6 +167,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto finished; smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path); @@ -180,6 +186,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto finished; smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); trace_smb3_set_info_compound_enter(xid, ses->Suid, tcon->tid, @@ -206,6 +214,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto finished; smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); trace_smb3_rename_enter(xid, ses->Suid, tcon->tid, full_path); @@ -231,6 +241,8 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto finished; smb2_set_next_command(tcon, &rqst[num_rqst]); smb2_set_related(&rqst[num_rqst++]); trace_smb3_hardlink_enter(xid, ses->Suid, tcon->tid, full_path); diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index e32c264e3adb..82ade16c9501 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -457,7 +457,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, "STATUS_ALLOTTED_SPACE_EXCEEDED"}, - {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO, + {STATUS_INSUFFICIENT_RESOURCES, -EAGAIN, "STATUS_INSUFFICIENT_RESOURCES"}, {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e921e6511728..0cdc4e47ca87 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3,19 +3,6 @@ * SMB2 version specific operations * * Copyright (c) 2012, Jeff Layton <jlayton@redhat.com> - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License v2 as published - * by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/pagemap.h> @@ -2040,6 +2027,10 @@ smb2_set_related(struct smb_rqst *rqst) struct smb2_sync_hdr *shdr; shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); + if (shdr == NULL) { + cifs_dbg(FYI, "shdr NULL in smb2_set_related\n"); + return; + } shdr->Flags |= SMB2_FLAGS_RELATED_OPERATIONS; } @@ -2054,6 +2045,12 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) unsigned long len = smb_rqst_len(server, rqst); int i, num_padding; + shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); + if (shdr == NULL) { + cifs_dbg(FYI, "shdr NULL in smb2_set_next_command\n"); + return; + } + /* SMB headers in a compound are 8 byte aligned. */ /* No padding needed */ @@ -2093,7 +2090,6 @@ smb2_set_next_command(struct cifs_tcon *tcon, struct smb_rqst *rqst) } finished: - shdr = (struct smb2_sync_hdr *)(rqst->rq_iov[0].iov_base); shdr->NextCommand = cpu_to_le32(len); } @@ -2385,6 +2381,100 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, kfree(dfs_rsp); return rc; } + +static int +parse_reparse_posix(struct reparse_posix_data *symlink_buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + unsigned int len; + + /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */ + len = le16_to_cpu(symlink_buf->ReparseDataLength); + + if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) { + cifs_dbg(VFS, "%lld not a supported symlink type\n", + le64_to_cpu(symlink_buf->InodeType)); + return -EOPNOTSUPP; + } + + *target_path = cifs_strndup_from_utf16( + symlink_buf->PathBuffer, + len, true, cifs_sb->local_nls); + if (!(*target_path)) + return -ENOMEM; + + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + + return 0; +} + +static int +parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + unsigned int sub_len; + unsigned int sub_offset; + + /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */ + + sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset); + sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength); + if (sub_offset + 20 > plen || + sub_offset + sub_len + 20 > plen) { + cifs_dbg(VFS, "srv returned malformed symlink buffer\n"); + return -EIO; + } + + *target_path = cifs_strndup_from_utf16( + symlink_buf->PathBuffer + sub_offset, + sub_len, true, cifs_sb->local_nls); + if (!(*target_path)) + return -ENOMEM; + + convert_delimiter(*target_path, '/'); + cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path); + + return 0; +} + +static int +parse_reparse_point(struct reparse_data_buffer *buf, + u32 plen, char **target_path, + struct cifs_sb_info *cifs_sb) +{ + if (plen < sizeof(struct reparse_data_buffer)) { + cifs_dbg(VFS, "reparse buffer is too small. Must be " + "at least 8 bytes but was %d\n", plen); + return -EIO; + } + + if (plen < le16_to_cpu(buf->ReparseDataLength) + + sizeof(struct reparse_data_buffer)) { + cifs_dbg(VFS, "srv returned invalid reparse buf " + "length: %d\n", plen); + return -EIO; + } + + /* See MS-FSCC 2.1.2 */ + switch (le32_to_cpu(buf->ReparseTag)) { + case IO_REPARSE_TAG_NFS: + return parse_reparse_posix( + (struct reparse_posix_data *)buf, + plen, target_path, cifs_sb); + case IO_REPARSE_TAG_SYMLINK: + return parse_reparse_symlink( + (struct reparse_symlink_data_buffer *)buf, + plen, target_path, cifs_sb); + default: + cifs_dbg(VFS, "srv returned unknown symlink buffer " + "tag:0x%08x\n", le32_to_cpu(buf->ReparseTag)); + return -EOPNOTSUPP; + } +} + #define SMB2_SYMLINK_STRUCT_SIZE \ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp)) @@ -2414,11 +2504,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct kvec close_iov[1]; struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; - char *ioctl_buf; + struct reparse_data_buffer *reparse_buf; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); + *target_path = NULL; + if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -2496,17 +2588,21 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == 0) && (is_reparse_point)) { /* See MS-FSCC 2.3.23 */ - ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset); + reparse_buf = (struct reparse_data_buffer *) + ((char *)ioctl_rsp + + le32_to_cpu(ioctl_rsp->OutputOffset)); plen = le32_to_cpu(ioctl_rsp->OutputCount); if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) > rsp_iov[1].iov_len) { - cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen); + cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n", + plen); rc = -EIO; goto querty_exit; } - /* Do stuff with ioctl_buf/plen */ + rc = parse_reparse_point(reparse_buf, plen, target_path, + cifs_sb); goto querty_exit; } @@ -2518,26 +2614,32 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, err_buf = err_iov.iov_base; if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) || err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE) { - rc = -ENOENT; + rc = -EINVAL; + goto querty_exit; + } + + symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData; + if (le32_to_cpu(symlink->SymLinkErrorTag) != SYMLINK_ERROR_TAG || + le32_to_cpu(symlink->ReparseTag) != IO_REPARSE_TAG_SYMLINK) { + rc = -EINVAL; goto querty_exit; } /* open must fail on symlink - reset rc */ rc = 0; - symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData; sub_len = le16_to_cpu(symlink->SubstituteNameLength); sub_offset = le16_to_cpu(symlink->SubstituteNameOffset); print_len = le16_to_cpu(symlink->PrintNameLength); print_offset = le16_to_cpu(symlink->PrintNameOffset); if (err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) { - rc = -ENOENT; + rc = -EINVAL; goto querty_exit; } if (err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) { - rc = -ENOENT; + rc = -EINVAL; goto querty_exit; } @@ -2563,7 +2665,6 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, return rc; } -#ifdef CONFIG_CIFS_ACL static struct cifs_ntsd * get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb, const struct cifs_fid *cifsfid, u32 *pacllen) @@ -2648,7 +2749,6 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, return pntsd; } -#ifdef CONFIG_CIFS_ACL static int set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path, int aclflag) @@ -2706,7 +2806,6 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, free_xid(xid); return rc; } -#endif /* CIFS_ACL */ /* Retrieve an ACL from the server */ static struct cifs_ntsd * @@ -2726,7 +2825,6 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb, cifsFileInfo_put(open_file); return pntsd; } -#endif static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, loff_t offset, loff_t len, bool keep_size) @@ -3324,7 +3422,7 @@ smb2_dir_needs_close(struct cifsFileInfo *cfile) static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, - struct smb_rqst *old_rq) + struct smb_rqst *old_rq, __le16 cipher_type) { struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)old_rq->rq_iov[0].iov_base; @@ -3333,7 +3431,10 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM; tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len); tr_hdr->Flags = cpu_to_le16(0x01); - get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CMM_NONCE); + if (cipher_type == SMB2_ENCRYPTION_AES128_GCM) + get_random_bytes(&tr_hdr->Nonce, SMB3_AES128GCM_NONCE); + else + get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CCM_NONCE); memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8); } @@ -3491,8 +3592,13 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, rc = -ENOMEM; goto free_sg; } - iv[0] = 3; - memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES128CMM_NONCE); + + if (server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) + memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES128GCM_NONCE); + else { + iv[0] = 3; + memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES128CCM_NONCE); + } aead_request_set_crypt(req, sg, sg, crypt_len, iv); aead_request_set_ad(req, assoc_data_len); @@ -3592,7 +3698,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, } /* fill the 1st iov with a transform header */ - fill_transform_hdr(tr_hdr, orig_len, old_rq); + fill_transform_hdr(tr_hdr, orig_len, old_rq, server->cipher_type); rc = crypt_message(server, num_rqst, new_rq, 1); cifs_dbg(FYI, "Encrypt message returned %d\n", rc); @@ -4241,11 +4347,9 @@ struct smb_version_operations smb20_operations = { .query_all_EAs = smb2_query_eas, .set_EA = smb2_set_ea, #endif /* CIFS_XATTR */ -#ifdef CONFIG_CIFS_ACL .get_acl = get_smb2_acl, .get_acl_by_fid = get_smb2_acl_by_fid, .set_acl = set_smb2_acl, -#endif /* CIFS_ACL */ .next_header = smb2_next_header, .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, @@ -4342,11 +4446,9 @@ struct smb_version_operations smb21_operations = { .query_all_EAs = smb2_query_eas, .set_EA = smb2_set_ea, #endif /* CIFS_XATTR */ -#ifdef CONFIG_CIFS_ACL .get_acl = get_smb2_acl, .get_acl_by_fid = get_smb2_acl_by_fid, .set_acl = set_smb2_acl, -#endif /* CIFS_ACL */ .next_header = smb2_next_header, .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, @@ -4452,11 +4554,9 @@ struct smb_version_operations smb30_operations = { .query_all_EAs = smb2_query_eas, .set_EA = smb2_set_ea, #endif /* CIFS_XATTR */ -#ifdef CONFIG_CIFS_ACL .get_acl = get_smb2_acl, .get_acl_by_fid = get_smb2_acl_by_fid, .set_acl = set_smb2_acl, -#endif /* CIFS_ACL */ .next_header = smb2_next_header, .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, @@ -4563,11 +4663,9 @@ struct smb_version_operations smb311_operations = { .query_all_EAs = smb2_query_eas, .set_EA = smb2_set_ea, #endif /* CIFS_XATTR */ -#ifdef CONFIG_CIFS_ACL .get_acl = get_smb2_acl, .get_acl_by_fid = get_smb2_acl_by_fid, .set_acl = set_smb2_acl, -#endif /* CIFS_ACL */ .next_header = smb2_next_header, .ioctl_query_info = smb2_ioctl_query_info, .make_node = smb2_make_node, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 710ceb875161..f58e4dc3987b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -489,10 +489,25 @@ static void build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt) { pneg_ctxt->ContextType = SMB2_ENCRYPTION_CAPABILITIES; - pneg_ctxt->DataLength = cpu_to_le16(4); /* Cipher Count + le16 cipher */ - pneg_ctxt->CipherCount = cpu_to_le16(1); -/* pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM;*/ /* not supported yet */ - pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_CCM; + pneg_ctxt->DataLength = cpu_to_le16(6); /* Cipher Count + two ciphers */ + pneg_ctxt->CipherCount = cpu_to_le16(2); + pneg_ctxt->Ciphers[0] = SMB2_ENCRYPTION_AES128_GCM; + pneg_ctxt->Ciphers[1] = SMB2_ENCRYPTION_AES128_CCM; +} + +static unsigned int +build_netname_ctxt(struct smb2_netname_neg_context *pneg_ctxt, char *hostname) +{ + struct nls_table *cp = load_nls_default(); + + pneg_ctxt->ContextType = SMB2_NETNAME_NEGOTIATE_CONTEXT_ID; + + /* copy up to max of first 100 bytes of server name to NetName field */ + pneg_ctxt->DataLength = cpu_to_le16(2 + + (2 * cifs_strtoUTF16(pneg_ctxt->NetName, hostname, 100, cp))); + /* context size is DataLength + minimal smb2_neg_context */ + return DIV_ROUND_UP(le16_to_cpu(pneg_ctxt->DataLength) + + sizeof(struct smb2_neg_context), 8) * 8; } static void @@ -521,7 +536,7 @@ build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt) static void assemble_neg_contexts(struct smb2_negotiate_req *req, - unsigned int *total_len) + struct TCP_Server_Info *server, unsigned int *total_len) { char *pneg_ctxt = (char *)req; unsigned int ctxt_len; @@ -551,17 +566,25 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += ctxt_len; pneg_ctxt += ctxt_len; - build_compression_ctxt((struct smb2_compression_capabilities_context *) + if (server->compress_algorithm) { + build_compression_ctxt((struct smb2_compression_capabilities_context *) pneg_ctxt); - ctxt_len = DIV_ROUND_UP( - sizeof(struct smb2_compression_capabilities_context), 8) * 8; + ctxt_len = DIV_ROUND_UP( + sizeof(struct smb2_compression_capabilities_context), + 8) * 8; + *total_len += ctxt_len; + pneg_ctxt += ctxt_len; + req->NegotiateContextCount = cpu_to_le16(5); + } else + req->NegotiateContextCount = cpu_to_le16(4); + + ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, + server->hostname); *total_len += ctxt_len; pneg_ctxt += ctxt_len; build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); - - req->NegotiateContextCount = cpu_to_le16(4); } static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt) @@ -829,7 +852,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if ((ses->server->vals->protocol_id == SMB311_PROT_ID) || (strcmp(ses->server->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0)) - assemble_neg_contexts(req, &total_len); + assemble_neg_contexts(req, server, &total_len); } iov[0].iov_base = (char *)req; iov[0].iov_len = total_len; @@ -1054,7 +1077,8 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) * not supported error. Client should accept it. */ cifs_dbg(VFS, "Server does not support validate negotiate\n"); - return 0; + rc = 0; + goto out_free_inbuf; } else if (rc != 0) { cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); rc = -EIO; @@ -2094,6 +2118,48 @@ add_twarp_context(struct kvec *iov, unsigned int *num_iovec, __u64 timewarp) return 0; } +static struct crt_query_id_ctxt * +create_query_id_buf(void) +{ + struct crt_query_id_ctxt *buf; + + buf = kzalloc(sizeof(struct crt_query_id_ctxt), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(0); + buf->ccontext.DataLength = cpu_to_le32(0); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct crt_query_id_ctxt, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + /* SMB2_CREATE_QUERY_ON_DISK_ID is "QFid" */ + buf->Name[0] = 'Q'; + buf->Name[1] = 'F'; + buf->Name[2] = 'i'; + buf->Name[3] = 'd'; + return buf; +} + +/* See MS-SMB2 2.2.13.2.9 */ +static int +add_query_id_context(struct kvec *iov, unsigned int *num_iovec) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + iov[num].iov_base = create_query_id_buf(); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct crt_query_id_ctxt); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = cpu_to_le32( + sizeof(struct smb2_create_req) + + iov[num - 1].iov_len); + le32_add_cpu(&req->CreateContextsLength, sizeof(struct crt_query_id_ctxt)); + *num_iovec = num + 1; + return 0; +} + static int alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, const char *treename, const __le16 *path) @@ -2422,6 +2488,12 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock, return rc; } + if (n_iov > 2) { + struct create_context *ccontext = + (struct create_context *)iov[n_iov-1].iov_base; + ccontext->Next = cpu_to_le32(iov[n_iov-1].iov_len); + } + add_query_id_context(iov, &n_iov); rqst->rq_nvec = n_iov; return 0; @@ -2549,12 +2621,11 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, * indatalen is usually small at a couple of bytes max, so * just allocate through generic pool */ - in_data_buf = kmalloc(indatalen, GFP_NOFS); + in_data_buf = kmemdup(in_data, indatalen, GFP_NOFS); if (!in_data_buf) { cifs_small_buf_release(req); return -ENOMEM; } - memcpy(in_data_buf, in_data, indatalen); } req->CtlCode = cpu_to_le32(opcode); @@ -2619,10 +2690,12 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, void SMB2_ioctl_free(struct smb_rqst *rqst) { + int i; if (rqst && rqst->rq_iov) { cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */ - if (rqst->rq_iov[1].iov_len) - kfree(rqst->rq_iov[1].iov_base); + for (i = 1; i < rqst->rq_nvec; i++) + if (rqst->rq_iov[i].iov_base != smb2_padding) + kfree(rqst->rq_iov[i].iov_base); } } @@ -3111,9 +3184,14 @@ void smb2_reconnect_server(struct work_struct *work) tcon_exist = true; } } + /* + * IPC has the same lifetime as its session and uses its + * refcount. + */ if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); tcon_exist = true; + ses->ses_count++; } } /* @@ -3132,7 +3210,10 @@ void smb2_reconnect_server(struct work_struct *work) else resched = true; list_del_init(&tcon->rlist); - cifs_put_tcon(tcon); + if (tcon->ipc) + cifs_put_smb_ses(tcon->ses); + else + cifs_put_tcon(tcon); } cifs_dbg(FYI, "Reconnecting tcons finished\n"); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c7d5813bebd8..7e2e782f8edd 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -123,7 +123,7 @@ struct smb2_sync_pdu { __le16 StructureSize2; /* size of wct area (varies, request specific) */ } __packed; -#define SMB3_AES128CMM_NONCE 11 +#define SMB3_AES128CCM_NONCE 11 #define SMB3_AES128GCM_NONCE 12 struct smb2_transform_hdr { @@ -166,6 +166,8 @@ struct smb2_err_rsp { __u8 ErrorData[1]; /* variable length */ } __packed; +#define SYMLINK_ERROR_TAG 0x4c4d5953 + struct smb2_symlink_err_rsp { __le32 SymLinkLength; __le32 SymLinkErrorTag; @@ -227,6 +229,7 @@ struct smb2_negotiate_req { } __packed; /* Dialects */ +#define SMB10_PROT_ID 0x0000 /* local only, not sent on wire w/CIFS negprot */ #define SMB20_PROT_ID 0x0202 #define SMB21_PROT_ID 0x0210 #define SMB30_PROT_ID 0x0300 @@ -293,7 +296,7 @@ struct smb2_encryption_neg_context { __le16 DataLength; __le32 Reserved; __le16 CipherCount; /* AES-128-GCM and AES-128-CCM */ - __le16 Ciphers[1]; /* Ciphers[0] since only one used now */ + __le16 Ciphers[2]; } __packed; /* See MS-SMB2 2.2.3.1.3 */ @@ -316,6 +319,12 @@ struct smb2_compression_capabilities_context { * For smb2_netname_negotiate_context_id See MS-SMB2 2.2.3.1.4. * Its struct simply contains NetName, an array of Unicode characters */ +struct smb2_netname_neg_context { + __le16 ContextType; /* 0x100 */ + __le16 DataLength; + __le32 Reserved; + __le16 NetName[0]; /* hostname of target converted to UCS-2 */ +} __packed; #define POSIX_CTXT_DATA_LEN 16 struct smb2_posix_neg_context { @@ -640,6 +649,7 @@ struct smb2_tree_disconnect_rsp { #define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2 "DH2Q" #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C" #define SMB2_CREATE_APP_INSTANCE_ID 0x45BCA66AEFA7F74A9008FA462E144D74 +#define SMB2_CREATE_APP_INSTANCE_VERSION 0xB982D0B73B56074FA07B524A8116A010 #define SVHDX_OPEN_DEVICE_CONTEX 0x9CCBCF9E04C1E643980E158DA1F6EC83 #define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C @@ -654,9 +664,10 @@ struct smb2_tree_disconnect_rsp { * [3] : durable context * [4] : posix context * [5] : time warp context - * [6] : compound padding + * [6] : query id context + * [7] : compound padding */ -#define SMB2_CREATE_IOV_SIZE 7 +#define SMB2_CREATE_IOV_SIZE 8 struct smb2_create_req { struct smb2_sync_hdr sync_hdr; @@ -680,10 +691,10 @@ struct smb2_create_req { /* * Maximum size of a SMB2_CREATE response is 64 (smb2 header) + - * 88 (fixed part of create response) + 520 (path) + 150 (contexts) + + * 88 (fixed part of create response) + 520 (path) + 208 (contexts) + * 2 bytes of padding. */ -#define MAX_SMB2_CREATE_RESPONSE_SIZE 824 +#define MAX_SMB2_CREATE_RESPONSE_SIZE 880 struct smb2_create_rsp { struct smb2_sync_hdr sync_hdr; @@ -806,6 +817,13 @@ struct durable_reconnect_context_v2 { __le32 Flags; /* see above DHANDLE_FLAG_PERSISTENT */ } __packed; +/* See MS-SMB2 2.2.14.2.9 */ +struct on_disk_id { + __le64 DiskFileId; + __le64 VolumeId; + __u32 Reserved[4]; +} __packed; + /* See MS-SMB2 2.2.14.2.12 */ struct durable_reconnect_context_v2_rsp { __le32 Timeout; @@ -826,6 +844,12 @@ struct crt_twarp_ctxt { } __packed; +/* See MS-SMB2 2.2.13.2.9 */ +struct crt_query_id_ctxt { + struct create_context ccontext; + __u8 Name[8]; +} __packed; + #define COPY_CHUNK_RES_KEY_SIZE 24 struct resume_key_req { char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; @@ -914,7 +938,19 @@ struct reparse_mount_point_data_buffer { __u8 PathBuffer[0]; /* Variable Length */ } __packed; -/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */ +#define SYMLINK_FLAG_RELATIVE 0x00000001 + +struct reparse_symlink_data_buffer { + __le32 ReparseTag; + __le16 ReparseDataLength; + __u16 Reserved; + __le16 SubstituteNameOffset; + __le16 SubstituteNameLength; + __le16 PrintNameOffset; + __le16 PrintNameLength; + __le32 Flags; + __u8 PathBuffer[0]; /* Variable Length */ +} __packed; /* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */ diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index d1181572758b..1ccbcf9c2c3b 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -734,7 +734,10 @@ smb3_crypto_aead_allocate(struct TCP_Server_Info *server) struct crypto_aead *tfm; if (!server->secmech.ccmaesencrypt) { - tfm = crypto_alloc_aead("ccm(aes)", 0, 0); + if (server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + else + tfm = crypto_alloc_aead("ccm(aes)", 0, 0); if (IS_ERR(tfm)) { cifs_dbg(VFS, "%s: Failed to alloc encrypt aead\n", __func__); @@ -744,7 +747,10 @@ smb3_crypto_aead_allocate(struct TCP_Server_Info *server) } if (!server->secmech.ccmaesdecrypt) { - tfm = crypto_alloc_aead("ccm(aes)", 0, 0); + if (server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); + else + tfm = crypto_alloc_aead("ccm(aes)", 0, 0); if (IS_ERR(tfm)) { crypto_free_aead(server->secmech.ccmaesencrypt); server->secmech.ccmaesencrypt = NULL; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index caac37b1de8c..cd07e5301d42 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1,17 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2017, Microsoft Corporation. * * Author(s): Long Li <longli@microsoft.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #include <linux/module.h> #include <linux/highmem.h> diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index f6241b8bce5f..6ff880a1e186 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -1,17 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2017, Microsoft Corporation. * * Author(s): Long Li <longli@microsoft.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #ifndef _SMBDIRECT_H #define _SMBDIRECT_H diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index a0b80ac651a6..2b6d87bfdf8e 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Unix SMB/Netbios implementation. Version 1.9. @@ -8,19 +9,6 @@ Copyright (C) Andrew Bartlett <abartlet@samba.org> 2002-2003 Modified by Steve French (sfrench@us.ibm.com) 2002-2003 - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/crypto.h> diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 60661b3f983a..5d6d44bfe10a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -979,6 +979,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, }; unsigned int instance; char *buf; + struct TCP_Server_Info *server; optype = flags & CIFS_OP_MASK; @@ -990,7 +991,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, return -EIO; } - if (ses->server->tcpStatus == CifsExiting) + server = ses->server; + if (server->tcpStatus == CifsExiting) return -ENOENT; /* @@ -1001,7 +1003,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * other requests. * This can be handled by the eventual session reconnect. */ - rc = wait_for_compound_request(ses->server, num_rqst, flags, + rc = wait_for_compound_request(server, num_rqst, flags, &instance); if (rc) return rc; @@ -1017,7 +1019,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * of smb data. */ - mutex_lock(&ses->server->srv_mutex); + mutex_lock(&server->srv_mutex); /* * All the parts of the compound chain belong obtained credits from the @@ -1026,24 +1028,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * we obtained credits and return -EAGAIN in such cases to let callers * handle it. */ - if (instance != ses->server->reconnect_instance) { - mutex_unlock(&ses->server->srv_mutex); + if (instance != server->reconnect_instance) { + mutex_unlock(&server->srv_mutex); for (j = 0; j < num_rqst; j++) - add_credits(ses->server, &credits[j], optype); + add_credits(server, &credits[j], optype); return -EAGAIN; } for (i = 0; i < num_rqst; i++) { - midQ[i] = ses->server->ops->setup_request(ses, &rqst[i]); + midQ[i] = server->ops->setup_request(ses, &rqst[i]); if (IS_ERR(midQ[i])) { - revert_current_mid(ses->server, i); + revert_current_mid(server, i); for (j = 0; j < i; j++) cifs_delete_mid(midQ[j]); - mutex_unlock(&ses->server->srv_mutex); + mutex_unlock(&server->srv_mutex); /* Update # of requests on wire to server */ for (j = 0; j < num_rqst; j++) - add_credits(ses->server, &credits[j], optype); + add_credits(server, &credits[j], optype); return PTR_ERR(midQ[i]); } @@ -1059,19 +1061,19 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, else midQ[i]->callback = cifs_compound_last_callback; } - cifs_in_send_inc(ses->server); - rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); - cifs_in_send_dec(ses->server); + cifs_in_send_inc(server); + rc = smb_send_rqst(server, num_rqst, rqst, flags); + cifs_in_send_dec(server); for (i = 0; i < num_rqst; i++) cifs_save_when_sent(midQ[i]); if (rc < 0) { - revert_current_mid(ses->server, num_rqst); - ses->server->sequence_number -= 2; + revert_current_mid(server, num_rqst); + server->sequence_number -= 2; } - mutex_unlock(&ses->server->srv_mutex); + mutex_unlock(&server->srv_mutex); /* * If sending failed for some reason or it is an oplock break that we @@ -1079,7 +1081,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, */ if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) { for (i = 0; i < num_rqst; i++) - add_credits(ses->server, &credits[i], optype); + add_credits(server, &credits[i], optype); goto out; } @@ -1099,7 +1101,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, rqst[0].rq_nvec); for (i = 0; i < num_rqst; i++) { - rc = wait_for_response(ses->server, midQ[i]); + rc = wait_for_response(server, midQ[i]); if (rc != 0) break; } @@ -1107,7 +1109,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, for (; i < num_rqst; i++) { cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); - send_cancel(ses->server, &rqst[i], midQ[i]); + send_cancel(server, &rqst[i], midQ[i]); spin_lock(&GlobalMid_Lock); if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) { midQ[i]->mid_flags |= MID_WAIT_CANCELLED; @@ -1123,7 +1125,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, if (rc < 0) goto out; - rc = cifs_sync_mid_result(midQ[i], ses->server); + rc = cifs_sync_mid_result(midQ[i], server); if (rc != 0) { /* mark this mid as cancelled to not free it below */ cancelled_mid[i] = true; @@ -1140,14 +1142,14 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, buf = (char *)midQ[i]->resp_buf; resp_iov[i].iov_base = buf; resp_iov[i].iov_len = midQ[i]->resp_buf_size + - ses->server->vals->header_preamble_size; + server->vals->header_preamble_size; if (midQ[i]->large_buf) resp_buf_type[i] = CIFS_LARGE_BUFFER; else resp_buf_type[i] = CIFS_SMALL_BUFFER; - rc = ses->server->ops->check_receive(midQ[i], ses->server, + rc = server->ops->check_receive(midQ[i], server, flags & CIFS_LOG_ERROR); /* mark it so buf will not be freed by cifs_delete_mid */ diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c index 1506d4fddb2c..1a23a1d2ebf9 100644 --- a/fs/cifs/winucase.c +++ b/fs/cifs/winucase.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/cifs/winucase.c * * Copyright (c) Jeffrey Layton <jlayton@redhat.com>, 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * The const tables in this file were converted from the following info * provided by Microsoft: * diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 50ddb795aaeb..9076150758d8 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -96,7 +96,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: { -#ifdef CONFIG_CIFS_ACL struct cifs_ntsd *pacl; if (!value) @@ -117,7 +116,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler, CIFS_I(inode)->time = 0; kfree(pacl); } -#endif /* CONFIG_CIFS_ACL */ break; } @@ -247,7 +245,6 @@ static int cifs_xattr_get(const struct xattr_handler *handler, break; case XATTR_CIFS_ACL: { -#ifdef CONFIG_CIFS_ACL u32 acllen; struct cifs_ntsd *pacl; @@ -270,7 +267,6 @@ static int cifs_xattr_get(const struct xattr_handler *handler, rc = acllen; kfree(pacl); } -#endif /* CONFIG_CIFS_ACL */ break; } diff --git a/fs/coda/Kconfig b/fs/coda/Kconfig index c0e5a7fad06d..ae6759f9594a 100644 --- a/fs/coda/Kconfig +++ b/fs/coda/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config CODA_FS tristate "Coda file system support (advanced network fs)" depends on INET diff --git a/fs/coda/Makefile b/fs/coda/Makefile index 1bab69a0d347..78befb8369c9 100644 --- a/fs/coda/Makefile +++ b/fs/coda/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux Coda filesystem routines. # @@ -5,7 +6,8 @@ obj-$(CONFIG_CODA_FS) += coda.o coda-objs := psdev.o cache.o cnode.o inode.o dir.o file.o upcall.o \ - coda_linux.o symlink.o pioctl.o sysctl.o + coda_linux.o symlink.o pioctl.o +coda-$(CONFIG_SYSCTL) += sysctl.o # If you want debugging output, please uncomment the following line. diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 201fc08a8b4f..3b8c4513118f 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -21,7 +21,7 @@ #include <linux/spinlock.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> +#include "coda_psdev.h" #include "coda_linux.h" #include "coda_cache.h" diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 845b5a66952a..06855f6c7902 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -8,8 +8,8 @@ #include <linux/time.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> #include <linux/pagemap.h> +#include "coda_psdev.h" #include "coda_linux.h" static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2) @@ -137,11 +137,6 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) struct inode *inode; unsigned long hash = coda_f2i(fid); - if ( !sb ) { - pr_warn("%s: no sb!\n", __func__); - return NULL; - } - inode = ilookup5(sb, hash, coda_test_inode, fid); if ( !inode ) return NULL; @@ -153,6 +148,16 @@ struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) return inode; } +struct coda_file_info *coda_ftoc(struct file *file) +{ + struct coda_file_info *cfi = file->private_data; + + BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + + return cfi; + +} + /* the CONTROL inode is made without asking attributes from Venus */ struct inode *coda_cnode_makectl(struct super_block *sb) { diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h index d702ba1a2bf9..1763ff95d865 100644 --- a/fs/coda/coda_fs_i.h +++ b/fs/coda/coda_fs_i.h @@ -40,10 +40,9 @@ struct coda_file_info { int cfi_magic; /* magic number */ struct file *cfi_container; /* container file for this cnode */ unsigned int cfi_mapcount; /* nr of times this file is mapped */ + bool cfi_access_intent; /* is access intent supported */ }; -#define CODA_FTOC(file) ((struct coda_file_info *)((file)->private_data)) - /* flags */ #define C_VATTR 0x1 /* Validity of vattr in inode */ #define C_FLUSH 0x2 /* used after a flush */ @@ -54,6 +53,7 @@ struct inode *coda_cnode_make(struct CodaFid *, struct super_block *); struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); struct inode *coda_cnode_makectl(struct super_block *sb); struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); +struct coda_file_info *coda_ftoc(struct file *file); void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); #endif diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index bb0b3e0ed6c2..f82b59c9dd28 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -13,9 +13,19 @@ extern int coda_fake_statfs; void coda_destroy_inodecache(void); int __init coda_init_inodecache(void); int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync); + +#ifdef CONFIG_SYSCTL void coda_sysctl_init(void); void coda_sysctl_clean(void); +#else +static inline void coda_sysctl_init(void) +{ +} +static inline void coda_sysctl_clean(void) +{ +} +#endif #endif /* _CODA_INT_ */ diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index f3d543dd9a98..2e1a5a192074 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -18,7 +18,7 @@ #include <linux/string.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> +#include "coda_psdev.h" #include "coda_linux.h" /* initialize the debugging variables */ @@ -66,6 +66,25 @@ unsigned short coda_flags_to_cflags(unsigned short flags) return coda_flags; } +static struct timespec64 coda_to_timespec64(struct coda_timespec ts) +{ + struct timespec64 ts64 = { + .tv_sec = ts.tv_sec, + .tv_nsec = ts.tv_nsec, + }; + + return ts64; +} + +static struct coda_timespec timespec64_to_coda(struct timespec64 ts64) +{ + struct coda_timespec ts = { + .tv_sec = ts64.tv_sec, + .tv_nsec = ts64.tv_nsec, + }; + + return ts; +} /* utility functions below */ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) @@ -105,11 +124,11 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) if (attr->va_size != -1) inode->i_blocks = (attr->va_size + 511) >> 9; if (attr->va_atime.tv_sec != -1) - inode->i_atime = timespec_to_timespec64(attr->va_atime); + inode->i_atime = coda_to_timespec64(attr->va_atime); if (attr->va_mtime.tv_sec != -1) - inode->i_mtime = timespec_to_timespec64(attr->va_mtime); + inode->i_mtime = coda_to_timespec64(attr->va_mtime); if (attr->va_ctime.tv_sec != -1) - inode->i_ctime = timespec_to_timespec64(attr->va_ctime); + inode->i_ctime = coda_to_timespec64(attr->va_ctime); } @@ -130,12 +149,12 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) vattr->va_uid = (vuid_t) -1; vattr->va_gid = (vgid_t) -1; vattr->va_size = (off_t) -1; - vattr->va_atime.tv_sec = (time_t) -1; - vattr->va_atime.tv_nsec = (time_t) -1; - vattr->va_mtime.tv_sec = (time_t) -1; - vattr->va_mtime.tv_nsec = (time_t) -1; - vattr->va_ctime.tv_sec = (time_t) -1; - vattr->va_ctime.tv_nsec = (time_t) -1; + vattr->va_atime.tv_sec = (int64_t) -1; + vattr->va_atime.tv_nsec = (long) -1; + vattr->va_mtime.tv_sec = (int64_t) -1; + vattr->va_mtime.tv_nsec = (long) -1; + vattr->va_ctime.tv_sec = (int64_t) -1; + vattr->va_ctime.tv_nsec = (long) -1; vattr->va_type = C_VNON; vattr->va_fileid = -1; vattr->va_gen = -1; @@ -175,13 +194,13 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) vattr->va_size = iattr->ia_size; } if ( valid & ATTR_ATIME ) { - vattr->va_atime = timespec64_to_timespec(iattr->ia_atime); + vattr->va_atime = timespec64_to_coda(iattr->ia_atime); } if ( valid & ATTR_MTIME ) { - vattr->va_mtime = timespec64_to_timespec(iattr->ia_mtime); + vattr->va_mtime = timespec64_to_coda(iattr->ia_mtime); } if ( valid & ATTR_CTIME ) { - vattr->va_ctime = timespec64_to_timespec(iattr->ia_ctime); + vattr->va_ctime = timespec64_to_coda(iattr->ia_ctime); } } diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index 126155cadfa9..d5ebd36fb2cc 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -59,22 +59,6 @@ void coda_vattr_to_iattr(struct inode *, struct coda_vattr *); void coda_iattr_to_vattr(struct iattr *, struct coda_vattr *); unsigned short coda_flags_to_cflags(unsigned short); -/* sysctl.h */ -void coda_sysctl_init(void); -void coda_sysctl_clean(void); - -#define CODA_ALLOC(ptr, cast, size) do { \ - if (size < PAGE_SIZE) \ - ptr = kzalloc((unsigned long) size, GFP_KERNEL); \ - else \ - ptr = (cast)vzalloc((unsigned long) size); \ - if (!ptr) \ - pr_warn("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ -} while (0) - - -#define CODA_FREE(ptr, size) kvfree((ptr)) - /* inode to cnode access functions */ static inline struct coda_inode_info *ITOC(struct inode *inode) diff --git a/fs/coda/coda_psdev.h b/fs/coda/coda_psdev.h new file mode 100644 index 000000000000..52da08c770b0 --- /dev/null +++ b/fs/coda/coda_psdev.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __CODA_PSDEV_H +#define __CODA_PSDEV_H + +#include <linux/backing-dev.h> +#include <linux/magic.h> +#include <linux/mutex.h> + +#define CODA_PSDEV_MAJOR 67 +#define MAX_CODADEVS 5 /* how many do we allow */ + +struct kstatfs; + +/* messages between coda filesystem in kernel and Venus */ +struct upc_req { + struct list_head uc_chain; + caddr_t uc_data; + u_short uc_flags; + u_short uc_inSize; /* Size is at most 5000 bytes */ + u_short uc_outSize; + u_short uc_opcode; /* copied from data to save lookup */ + int uc_unique; + wait_queue_head_t uc_sleep; /* process' wait queue */ +}; + +#define CODA_REQ_ASYNC 0x1 +#define CODA_REQ_READ 0x2 +#define CODA_REQ_WRITE 0x4 +#define CODA_REQ_ABORT 0x8 + +/* communication pending/processing queues */ +struct venus_comm { + u_long vc_seq; + wait_queue_head_t vc_waitq; /* Venus wait queue */ + struct list_head vc_pending; + struct list_head vc_processing; + int vc_inuse; + struct super_block *vc_sb; + struct mutex vc_mutex; +}; + +static inline struct venus_comm *coda_vcp(struct super_block *sb) +{ + return (struct venus_comm *)((sb)->s_fs_info); +} + +/* upcalls */ +int venus_rootfid(struct super_block *sb, struct CodaFid *fidp); +int venus_getattr(struct super_block *sb, struct CodaFid *fid, + struct coda_vattr *attr); +int venus_setattr(struct super_block *, struct CodaFid *, struct coda_vattr *); +int venus_lookup(struct super_block *sb, struct CodaFid *fid, + const char *name, int length, int *type, + struct CodaFid *resfid); +int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, + kuid_t uid); +int venus_open(struct super_block *sb, struct CodaFid *fid, int flags, + struct file **f); +int venus_mkdir(struct super_block *sb, struct CodaFid *dirfid, + const char *name, int length, + struct CodaFid *newfid, struct coda_vattr *attrs); +int venus_create(struct super_block *sb, struct CodaFid *dirfid, + const char *name, int length, int excl, int mode, + struct CodaFid *newfid, struct coda_vattr *attrs); +int venus_rmdir(struct super_block *sb, struct CodaFid *dirfid, + const char *name, int length); +int venus_remove(struct super_block *sb, struct CodaFid *dirfid, + const char *name, int length); +int venus_readlink(struct super_block *sb, struct CodaFid *fid, + char *buffer, int *length); +int venus_rename(struct super_block *sb, struct CodaFid *new_fid, + struct CodaFid *old_fid, size_t old_length, + size_t new_length, const char *old_name, + const char *new_name); +int venus_link(struct super_block *sb, struct CodaFid *fid, + struct CodaFid *dirfid, const char *name, int len ); +int venus_symlink(struct super_block *sb, struct CodaFid *fid, + const char *name, int len, const char *symname, int symlen); +int venus_access(struct super_block *sb, struct CodaFid *fid, int mask); +int venus_pioctl(struct super_block *sb, struct CodaFid *fid, + unsigned int cmd, struct PioctlData *data); +int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out, + size_t nbytes); +int venus_fsync(struct super_block *sb, struct CodaFid *fid); +int venus_statfs(struct dentry *dentry, struct kstatfs *sfs); +int venus_access_intent(struct super_block *sb, struct CodaFid *fid, + bool *access_intent_supported, + size_t count, loff_t ppos, int type); + +/* + * Statistics + */ + +extern struct venus_comm coda_comms[]; +#endif diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 00876ddadb43..ca40c2556ba6 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -23,7 +23,7 @@ #include <linux/uaccess.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> +#include "coda_psdev.h" #include "coda_linux.h" #include "coda_cache.h" @@ -47,8 +47,8 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig int type = 0; if (length > CODA_MAXNAMLEN) { - pr_err("name too long: lookup, %s (%*s)\n", - coda_i2s(dir), (int)length, name); + pr_err("name too long: lookup, %s %zu\n", + coda_i2s(dir), length); return ERR_PTR(-ENAMETOOLONG); } @@ -356,8 +356,7 @@ static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) ino_t ino; int ret; - cfi = CODA_FTOC(coda_file); - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + cfi = coda_ftoc(coda_file); host_file = cfi->cfi_container; cii = ITOC(file_inode(coda_file)); @@ -426,8 +425,7 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) struct file *host_file; int ret; - cfi = CODA_FTOC(coda_file); - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + cfi = coda_ftoc(coda_file); host_file = cfi->cfi_container; if (host_file->f_op->iterate || host_file->f_op->iterate_shared) { diff --git a/fs/coda/file.c b/fs/coda/file.c index 1cbc1f2298ee..128d63df5bfb 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -20,22 +20,43 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/uaccess.h> +#include <linux/uio.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> - +#include "coda_psdev.h" #include "coda_linux.h" #include "coda_int.h" +struct coda_vm_ops { + atomic_t refcnt; + struct file *coda_file; + const struct vm_operations_struct *host_vm_ops; + struct vm_operations_struct vm_ops; +}; + static ssize_t coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *coda_file = iocb->ki_filp; - struct coda_file_info *cfi = CODA_FTOC(coda_file); + struct inode *coda_inode = file_inode(coda_file); + struct coda_file_info *cfi = coda_ftoc(coda_file); + loff_t ki_pos = iocb->ki_pos; + size_t count = iov_iter_count(to); + ssize_t ret; - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_READ); + if (ret) + goto finish_read; - return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0); + ret = vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0); + +finish_read: + venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_READ_FINISH); + return ret; } static ssize_t @@ -43,13 +64,18 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *coda_file = iocb->ki_filp; struct inode *coda_inode = file_inode(coda_file); - struct coda_file_info *cfi = CODA_FTOC(coda_file); - struct file *host_file; + struct coda_file_info *cfi = coda_ftoc(coda_file); + struct file *host_file = cfi->cfi_container; + loff_t ki_pos = iocb->ki_pos; + size_t count = iov_iter_count(to); ssize_t ret; - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_WRITE); + if (ret) + goto finish_write; - host_file = cfi->cfi_container; file_start_write(host_file); inode_lock(coda_inode); ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0); @@ -58,26 +84,73 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to) coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode); inode_unlock(coda_inode); file_end_write(host_file); + +finish_write: + venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ki_pos, CODA_ACCESS_TYPE_WRITE_FINISH); return ret; } +static void +coda_vm_open(struct vm_area_struct *vma) +{ + struct coda_vm_ops *cvm_ops = + container_of(vma->vm_ops, struct coda_vm_ops, vm_ops); + + atomic_inc(&cvm_ops->refcnt); + + if (cvm_ops->host_vm_ops && cvm_ops->host_vm_ops->open) + cvm_ops->host_vm_ops->open(vma); +} + +static void +coda_vm_close(struct vm_area_struct *vma) +{ + struct coda_vm_ops *cvm_ops = + container_of(vma->vm_ops, struct coda_vm_ops, vm_ops); + + if (cvm_ops->host_vm_ops && cvm_ops->host_vm_ops->close) + cvm_ops->host_vm_ops->close(vma); + + if (atomic_dec_and_test(&cvm_ops->refcnt)) { + vma->vm_ops = cvm_ops->host_vm_ops; + fput(cvm_ops->coda_file); + kfree(cvm_ops); + } +} + static int coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) { - struct coda_file_info *cfi; + struct inode *coda_inode = file_inode(coda_file); + struct coda_file_info *cfi = coda_ftoc(coda_file); + struct file *host_file = cfi->cfi_container; + struct inode *host_inode = file_inode(host_file); struct coda_inode_info *cii; - struct file *host_file; - struct inode *coda_inode, *host_inode; - - cfi = CODA_FTOC(coda_file); - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); - host_file = cfi->cfi_container; + struct coda_vm_ops *cvm_ops; + loff_t ppos; + size_t count; + int ret; if (!host_file->f_op->mmap) return -ENODEV; - coda_inode = file_inode(coda_file); - host_inode = file_inode(host_file); + if (WARN_ON(coda_file != vma->vm_file)) + return -EIO; + + count = vma->vm_end - vma->vm_start; + ppos = vma->vm_pgoff * PAGE_SIZE; + + ret = venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode), + &cfi->cfi_access_intent, + count, ppos, CODA_ACCESS_TYPE_MMAP); + if (ret) + return ret; + + cvm_ops = kmalloc(sizeof(struct coda_vm_ops), GFP_KERNEL); + if (!cvm_ops) + return -ENOMEM; cii = ITOC(coda_inode); spin_lock(&cii->c_lock); @@ -89,6 +162,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) * the container file on us! */ else if (coda_inode->i_mapping != host_inode->i_mapping) { spin_unlock(&cii->c_lock); + kfree(cvm_ops); return -EBUSY; } @@ -97,7 +171,29 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) cfi->cfi_mapcount++; spin_unlock(&cii->c_lock); - return call_mmap(host_file, vma); + vma->vm_file = get_file(host_file); + ret = call_mmap(vma->vm_file, vma); + + if (ret) { + /* if call_mmap fails, our caller will put coda_file so we + * should drop the reference to the host_file that we got. + */ + fput(host_file); + kfree(cvm_ops); + } else { + /* here we add redirects for the open/close vm_operations */ + cvm_ops->host_vm_ops = vma->vm_ops; + if (vma->vm_ops) + cvm_ops->vm_ops = *vma->vm_ops; + + cvm_ops->vm_ops.open = coda_vm_open; + cvm_ops->vm_ops.close = coda_vm_close; + cvm_ops->coda_file = coda_file; + atomic_set(&cvm_ops->refcnt, 1); + + vma->vm_ops = &cvm_ops->vm_ops; + } + return ret; } int coda_open(struct inode *coda_inode, struct file *coda_file) @@ -127,6 +223,8 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) cfi->cfi_magic = CODA_MAGIC; cfi->cfi_mapcount = 0; cfi->cfi_container = host_file; + /* assume access intents are supported unless we hear otherwise */ + cfi->cfi_access_intent = true; BUG_ON(coda_file->private_data != NULL); coda_file->private_data = cfi; @@ -142,8 +240,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) struct inode *host_inode; int err; - cfi = CODA_FTOC(coda_file); - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + cfi = coda_ftoc(coda_file); err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, coda_file->f_cred->fsuid); @@ -185,8 +282,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync) return err; inode_lock(coda_inode); - cfi = CODA_FTOC(coda_file); - BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); + cfi = coda_ftoc(coda_file); host_file = cfi->cfi_container; err = vfs_fsync(host_file, datasync); @@ -207,4 +303,3 @@ const struct file_operations coda_file_operations = { .fsync = coda_fsync, .splice_read = generic_file_splice_read, }; - diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 23f6ebd08e80..321f56e487cb 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -27,7 +27,7 @@ #include <linux/vmalloc.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> +#include "coda_psdev.h" #include "coda_linux.h" #include "coda_cache.h" @@ -236,6 +236,7 @@ static void coda_put_super(struct super_block *sb) vcp->vc_sb = NULL; sb->s_fs_info = NULL; mutex_unlock(&vcp->vc_mutex); + mutex_destroy(&vcp->vc_mutex); pr_info("Bye bye.\n"); } diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index e0c17b7dccce..644d48c12ce8 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -20,8 +20,7 @@ #include <linux/uaccess.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> - +#include "coda_psdev.h" #include "coda_linux.h" /* pioctl ops */ diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index f2bb7985d21c..240669f51eac 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * An implementation of a loadable kernel mode driver providing * multiple kernel/user space bidirectional communications links. * * Author: Alan Cox <alan@lxorguk.ukuu.org.uk> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. * * Adapted to become the Linux 2.0 Coda pseudo device * Peter Braam <braam@maths.ox.ac.uk> @@ -42,8 +38,7 @@ #include <linux/uaccess.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> - +#include "coda_psdev.h" #include "coda_linux.h" #include "coda_int.h" @@ -104,8 +99,12 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, ssize_t retval = 0, count = 0; int error; + /* make sure there is enough to copy out the (opcode, unique) values */ + if (nbytes < (2 * sizeof(u_int32_t))) + return -EINVAL; + /* Peek at the opcode, uniquefier */ - if (copy_from_user(&hdr, buf, 2 * sizeof(u_long))) + if (copy_from_user(&hdr, buf, 2 * sizeof(u_int32_t))) return -EFAULT; if (DOWNCALL(hdr.opcode)) { @@ -123,17 +122,21 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, hdr.opcode, hdr.unique); nbytes = size; } - CODA_ALLOC(dcbuf, union outputArgs *, nbytes); + dcbuf = kvmalloc(nbytes, GFP_KERNEL); + if (!dcbuf) { + retval = -ENOMEM; + goto out; + } if (copy_from_user(dcbuf, buf, nbytes)) { - CODA_FREE(dcbuf, nbytes); + kvfree(dcbuf); retval = -EFAULT; goto out; } /* what downcall errors does Venus handle ? */ - error = coda_downcall(vcp, hdr.opcode, dcbuf); + error = coda_downcall(vcp, hdr.opcode, dcbuf, nbytes); - CODA_FREE(dcbuf, nbytes); + kvfree(dcbuf); if (error) { pr_warn("%s: coda_downcall error: %d\n", __func__, error); @@ -186,8 +189,11 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, if (req->uc_opcode == CODA_OPEN_BY_FD) { struct coda_open_by_fd_out *outp = (struct coda_open_by_fd_out *)req->uc_data; - if (!outp->oh.result) + if (!outp->oh.result) { outp->fh = fget(outp->fd); + if (!outp->fh) + return -EBADF; + } } wake_up(&req->uc_sleep); @@ -256,7 +262,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, goto out; } - CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); + kvfree(req->uc_data); kfree(req); out: mutex_unlock(&vcp->vc_mutex); @@ -318,7 +324,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file) /* Async requests need to be freed here */ if (req->uc_flags & CODA_REQ_ASYNC) { - CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); + kvfree(req->uc_data); kfree(req); continue; } @@ -351,13 +357,13 @@ static const struct file_operations coda_psdev_fops = { .llseek = noop_llseek, }; -static int init_coda_psdev(void) +static int __init init_coda_psdev(void) { int i, err = 0; if (register_chrdev(CODA_PSDEV_MAJOR, "coda", &coda_psdev_fops)) { pr_err("%s: unable to get major %d\n", __func__, CODA_PSDEV_MAJOR); - return -EIO; + return -EIO; } coda_psdev_class = class_create(THIS_MODULE, "coda"); if (IS_ERR(coda_psdev_class)) { @@ -382,7 +388,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); -MODULE_VERSION("6.6"); +MODULE_VERSION("7.0"); static int __init init_coda(void) { diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index 202297d156df..8907d0508198 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -17,8 +17,7 @@ #include <linux/pagemap.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> - +#include "coda_psdev.h" #include "coda_linux.h" static int coda_symlink_filler(struct file *file, struct page *page) diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index 0301d45000a8..fda3b702b1c5 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -12,7 +12,6 @@ #include "coda_int.h" -#ifdef CONFIG_SYSCTL static struct ctl_table_header *fs_table_header; static struct ctl_table coda_table[] = { @@ -62,13 +61,3 @@ void coda_sysctl_clean(void) fs_table_header = NULL; } } - -#else -void coda_sysctl_init(void) -{ -} - -void coda_sysctl_clean(void) -{ -} -#endif diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 1175a1722411..eb3b1898da46 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -33,7 +33,7 @@ #include <linux/vfs.h> #include <linux/coda.h> -#include <linux/coda_psdev.h> +#include "coda_psdev.h" #include "coda_linux.h" #include "coda_cache.h" @@ -46,7 +46,7 @@ static void *alloc_upcall(int opcode, int size) { union inputArgs *inp; - CODA_ALLOC(inp, union inputArgs *, size); + inp = kvzalloc(size, GFP_KERNEL); if (!inp) return ERR_PTR(-ENOMEM); @@ -85,7 +85,7 @@ int venus_rootfid(struct super_block *sb, struct CodaFid *fidp) if (!error) *fidp = outp->coda_root.VFid; - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -104,7 +104,7 @@ int venus_getattr(struct super_block *sb, struct CodaFid *fid, if (!error) *attr = outp->coda_getattr.attr; - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -123,7 +123,7 @@ int venus_setattr(struct super_block *sb, struct CodaFid *fid, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -153,7 +153,7 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid, *type = outp->coda_lookup.vtype; } - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -173,7 +173,7 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -194,7 +194,7 @@ int venus_open(struct super_block *sb, struct CodaFid *fid, if (!error) *fh = outp->coda_open_by_fd.fh; - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -224,7 +224,7 @@ int venus_mkdir(struct super_block *sb, struct CodaFid *dirfid, *newfid = outp->coda_mkdir.VFid; } - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -262,7 +262,7 @@ int venus_rename(struct super_block *sb, struct CodaFid *old_fid, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -295,7 +295,7 @@ int venus_create(struct super_block *sb, struct CodaFid *dirfid, *newfid = outp->coda_create.VFid; } - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -318,7 +318,7 @@ int venus_rmdir(struct super_block *sb, struct CodaFid *dirfid, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -340,7 +340,7 @@ int venus_remove(struct super_block *sb, struct CodaFid *dirfid, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -370,7 +370,7 @@ int venus_readlink(struct super_block *sb, struct CodaFid *fid, *(buffer + retlen) = '\0'; } - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -398,7 +398,7 @@ int venus_link(struct super_block *sb, struct CodaFid *fid, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -433,7 +433,7 @@ int venus_symlink(struct super_block *sb, struct CodaFid *fid, error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -449,7 +449,7 @@ int venus_fsync(struct super_block *sb, struct CodaFid *fid) inp->coda_fsync.VFid = *fid; error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -467,7 +467,7 @@ int venus_access(struct super_block *sb, struct CodaFid *fid, int mask) error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -543,7 +543,7 @@ int venus_pioctl(struct super_block *sb, struct CodaFid *fid, } exit: - CODA_FREE(inp, insize); + kvfree(inp); return error; } @@ -553,7 +553,7 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs) union outputArgs *outp; int insize, outsize, error; - insize = max_t(unsigned int, INSIZE(statfs), OUTSIZE(statfs)); + insize = SIZE(statfs); UPARG(CODA_STATFS); error = coda_upcall(coda_vcp(dentry->d_sb), insize, &outsize, inp); @@ -565,10 +565,51 @@ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs) sfs->f_ffree = outp->coda_statfs.stat.f_ffree; } - CODA_FREE(inp, insize); + kvfree(inp); return error; } +int venus_access_intent(struct super_block *sb, struct CodaFid *fid, + bool *access_intent_supported, + size_t count, loff_t ppos, int type) +{ + union inputArgs *inp; + union outputArgs *outp; + int insize, outsize, error; + bool finalizer = + type == CODA_ACCESS_TYPE_READ_FINISH || + type == CODA_ACCESS_TYPE_WRITE_FINISH; + + if (!*access_intent_supported && !finalizer) + return 0; + + insize = SIZE(access_intent); + UPARG(CODA_ACCESS_INTENT); + + inp->coda_access_intent.VFid = *fid; + inp->coda_access_intent.count = count; + inp->coda_access_intent.pos = ppos; + inp->coda_access_intent.type = type; + + error = coda_upcall(coda_vcp(sb), insize, + finalizer ? NULL : &outsize, inp); + + /* + * we have to free the request buffer for synchronous upcalls + * or when asynchronous upcalls fail, but not when asynchronous + * upcalls succeed + */ + if (!finalizer || error) + kvfree(inp); + + /* Chunked access is not supported or an old Coda client */ + if (error == -EOPNOTSUPP) { + *access_intent_supported = false; + error = 0; + } + return error; +} + /* * coda_upcall and coda_downcall routines. */ @@ -598,10 +639,12 @@ static void coda_unblock_signals(sigset_t *old) * has seen them, * - CODA_CLOSE or CODA_RELEASE upcall (to avoid reference count problems) * - CODA_STORE (to avoid data loss) + * - CODA_ACCESS_INTENT (to avoid reference count problems) */ #define CODA_INTERRUPTIBLE(r) (!coda_hard && \ (((r)->uc_opcode != CODA_CLOSE && \ (r)->uc_opcode != CODA_STORE && \ + (r)->uc_opcode != CODA_ACCESS_INTENT && \ (r)->uc_opcode != CODA_RELEASE) || \ (r)->uc_flags & CODA_REQ_READ)) @@ -687,21 +730,25 @@ static int coda_upcall(struct venus_comm *vcp, goto exit; } + buffer->ih.unique = ++vcp->vc_seq; + req->uc_data = (void *)buffer; - req->uc_flags = 0; + req->uc_flags = outSize ? 0 : CODA_REQ_ASYNC; req->uc_inSize = inSize; - req->uc_outSize = *outSize ? *outSize : inSize; - req->uc_opcode = ((union inputArgs *)buffer)->ih.opcode; - req->uc_unique = ++vcp->vc_seq; + req->uc_outSize = (outSize && *outSize) ? *outSize : inSize; + req->uc_opcode = buffer->ih.opcode; + req->uc_unique = buffer->ih.unique; init_waitqueue_head(&req->uc_sleep); - /* Fill in the common input args. */ - ((union inputArgs *)buffer)->ih.unique = req->uc_unique; - /* Append msg to pending queue and poke Venus. */ list_add_tail(&req->uc_chain, &vcp->vc_pending); - wake_up_interruptible(&vcp->vc_waitq); + + if (req->uc_flags & CODA_REQ_ASYNC) { + mutex_unlock(&vcp->vc_mutex); + return 0; + } + /* We can be interrupted while we wait for Venus to process * our request. If the interrupt occurs before Venus has read * the request, we dequeue and return. If it occurs after the @@ -743,20 +790,20 @@ static int coda_upcall(struct venus_comm *vcp, sig_req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); if (!sig_req) goto exit; - CODA_ALLOC((sig_req->uc_data), char *, sizeof(struct coda_in_hdr)); - if (!sig_req->uc_data) { + sig_inputArgs = kvzalloc(sizeof(struct coda_in_hdr), GFP_KERNEL); + if (!sig_inputArgs) { kfree(sig_req); goto exit; } error = -EINTR; - sig_inputArgs = (union inputArgs *)sig_req->uc_data; sig_inputArgs->ih.opcode = CODA_SIGNAL; sig_inputArgs->ih.unique = req->uc_unique; sig_req->uc_flags = CODA_REQ_ASYNC; sig_req->uc_opcode = sig_inputArgs->ih.opcode; sig_req->uc_unique = sig_inputArgs->ih.unique; + sig_req->uc_data = (void *)sig_inputArgs; sig_req->uc_inSize = sizeof(struct coda_in_hdr); sig_req->uc_outSize = sizeof(struct coda_in_hdr); @@ -804,12 +851,44 @@ exit: * * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */ -int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out) +int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out, + size_t nbytes) { struct inode *inode = NULL; struct CodaFid *fid = NULL, *newfid; struct super_block *sb; + /* + * Make sure we have received enough data from the cache + * manager to populate the necessary fields in the buffer + */ + switch (opcode) { + case CODA_PURGEUSER: + if (nbytes < sizeof(struct coda_purgeuser_out)) + return -EINVAL; + break; + + case CODA_ZAPDIR: + if (nbytes < sizeof(struct coda_zapdir_out)) + return -EINVAL; + break; + + case CODA_ZAPFILE: + if (nbytes < sizeof(struct coda_zapfile_out)) + return -EINVAL; + break; + + case CODA_PURGEFID: + if (nbytes < sizeof(struct coda_purgefid_out)) + return -EINVAL; + break; + + case CODA_REPLACE: + if (nbytes < sizeof(struct coda_replace_out)) + return -EINVAL; + break; + } + /* Handle invalidation requests. */ mutex_lock(&vcp->vc_mutex); sb = vcp->vc_sb; @@ -879,4 +958,3 @@ unlock_out: iput(inode); return 0; } - diff --git a/fs/compat.c b/fs/compat.c index 4a0aaaf53217..436d228cf71c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/compat.c * @@ -9,10 +10,6 @@ * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/compat.h> diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 15f6e96b3bd9..b7f9ffa1d5f1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * 32-bit compatibility support for ELF format executables and core dumps. * * Copyright (C) 2007 Red Hat, Inc. All rights reserved. * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * * Red Hat Author: Roland McGrath. * * This file is used in a 64-bit kernel that wants to support 32-bit ELF. diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig index 9febcdefdfdc..272b64456999 100644 --- a/fs/configfs/Kconfig +++ b/fs/configfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config CONFIGFS_FS tristate "Userspace-driven configuration filesystem" select SYSFS diff --git a/fs/configfs/Makefile b/fs/configfs/Makefile index 00ffb278e98c..0200498ede27 100644 --- a/fs/configfs/Makefile +++ b/fs/configfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the configfs virtual filesystem # diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index ccc31fa6f1a7..f752d83a9c44 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset:8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * configfs_internal.h - Internal stuff for configfs * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5e7932d668ab..92112915de8e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dir.c - Operations for configfs directories. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * @@ -27,6 +13,7 @@ #undef DEBUG #include <linux/fs.h> +#include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/module.h> #include <linux/slab.h> @@ -58,15 +45,13 @@ static void configfs_d_iput(struct dentry * dentry, if (sd) { /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - /* Coordinate with configfs_attach_attr where will increase - * sd->s_count and update sd->s_dentry to new allocated one. - * Only set sd->dentry to null when this dentry is the only - * sd owner. - * If not do so, configfs_d_iput may run just after - * configfs_attach_attr and set sd->s_dentry to null - * even it's still in use. + /* + * Set sd->s_dentry to null only when this dentry is the one + * that is going to be killed. Otherwise configfs_d_iput may + * run just after configfs_attach_attr and set sd->s_dentry to + * NULL even it's still in use. */ - if (atomic_read(&sd->s_count) <= 2) + if (sd->s_dentry == dentry) sd->s_dentry = NULL; spin_unlock(&configfs_dirent_lock); @@ -1804,6 +1789,7 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + fsnotify_rmdir(d_inode(parent), dentry); d_delete(dentry); inode_unlock(d_inode(parent)); @@ -1932,6 +1918,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); d_delete(dentry); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 62580dba3552..61e4db4390a1 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * file.c - operations for regular (text) files. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 28ef9e528853..ab0284321912 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * inode.c - basic inode and dentry operations. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 99d491cd01f9..6e0f1fcb8a5b 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * item.c - library routines for handling generic config items * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on kobject: * kobject is Copyright (c) 2002-2003 Patrick Mochel * diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 13c93715864b..55438dd58189 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * mount.c - operations for initializing and mounting configfs. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index a5c54af861f7..91eac6c55e07 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * symlink.c - operations for configfs symlinks. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index 5933f995309a..c8bebb70a971 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config CRAMFS tristate "Compressed ROM file system support (cramfs)" select ZLIB_INFLATE diff --git a/fs/cramfs/Makefile b/fs/cramfs/Makefile index 92ebb464a725..8c3ed2982419 100644 --- a/fs/cramfs/Makefile +++ b/fs/cramfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux cramfs routines. # diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index f0de238000c0..5fdf24877c17 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config FS_ENCRYPTION bool "FS Encryption (Per-file encryption)" select CRYPTO @@ -6,7 +7,6 @@ config FS_ENCRYPTION select CRYPTO_ECB select CRYPTO_XTS select CRYPTO_CTS - select CRYPTO_SHA256 select KEYS help Enable encryption of files and directories. This diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index cb496989a6b6..4f0df5e682e4 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o fscrypto-y := crypto.o fname.o hooks.o keyinfo.o policy.o diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index b46021ebde85..82da2510721f 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -33,9 +33,8 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done) bio_for_each_segment_all(bv, bio, iter_all) { struct page *page = bv->bv_page; - int ret = fscrypt_decrypt_page(page->mapping->host, page, - PAGE_SIZE, 0, page->index); - + int ret = fscrypt_decrypt_pagecache_blocks(page, bv->bv_len, + bv->bv_offset); if (ret) SetPageError(page); else if (done) @@ -53,9 +52,8 @@ EXPORT_SYMBOL(fscrypt_decrypt_bio); static void completion_pages(struct work_struct *work) { - struct fscrypt_ctx *ctx = - container_of(work, struct fscrypt_ctx, r.work); - struct bio *bio = ctx->r.bio; + struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work); + struct bio *bio = ctx->bio; __fscrypt_decrypt_bio(bio, true); fscrypt_release_ctx(ctx); @@ -64,57 +62,29 @@ static void completion_pages(struct work_struct *work) void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) { - INIT_WORK(&ctx->r.work, completion_pages); - ctx->r.bio = bio; - fscrypt_enqueue_decrypt_work(&ctx->r.work); + INIT_WORK(&ctx->work, completion_pages); + ctx->bio = bio; + fscrypt_enqueue_decrypt_work(&ctx->work); } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); -void fscrypt_pullback_bio_page(struct page **page, bool restore) -{ - struct fscrypt_ctx *ctx; - struct page *bounce_page; - - /* The bounce data pages are unmapped. */ - if ((*page)->mapping) - return; - - /* The bounce data page is unmapped. */ - bounce_page = *page; - ctx = (struct fscrypt_ctx *)page_private(bounce_page); - - /* restore control page */ - *page = ctx->w.control_page; - - if (restore) - fscrypt_restore_control_page(bounce_page); -} -EXPORT_SYMBOL(fscrypt_pullback_bio_page); - int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { - struct fscrypt_ctx *ctx; - struct page *ciphertext_page = NULL; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + struct page *ciphertext_page; struct bio *bio; int ret, err = 0; - BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - - ctx = fscrypt_get_ctx(GFP_NOFS); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT); - if (IS_ERR(ciphertext_page)) { - err = PTR_ERR(ciphertext_page); - goto errout; - } + ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); + if (!ciphertext_page) + return -ENOMEM; while (len--) { - err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - PAGE_SIZE, 0, GFP_NOFS); + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), ciphertext_page, + blocksize, 0, GFP_NOFS); if (err) goto errout; @@ -124,14 +94,11 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, goto errout; } bio_set_dev(bio, inode->i_sb->s_bdev); - bio->bi_iter.bi_sector = - pblk << (inode->i_sb->s_blocksize_bits - 9); + bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = bio_add_page(bio, ciphertext_page, - inode->i_sb->s_blocksize, 0); - if (ret != inode->i_sb->s_blocksize) { + ret = bio_add_page(bio, ciphertext_page, blocksize, 0); + if (WARN_ON(ret != blocksize)) { /* should never happen! */ - WARN_ON(1); bio_put(bio); err = -EIO; goto errout; @@ -147,7 +114,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, } err = 0; errout: - fscrypt_release_ctx(ctx); + fscrypt_free_bounce_page(ciphertext_page); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 68e2ca4c4af6..45c3d0427fb2 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This contains encryption functions for per-file encryption. * @@ -58,23 +59,16 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work) EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); /** - * fscrypt_release_ctx() - Releases an encryption context - * @ctx: The encryption context to release. + * fscrypt_release_ctx() - Release a decryption context + * @ctx: The decryption context to release. * - * If the encryption context was allocated from the pre-allocated pool, returns - * it to that pool. Else, frees it. - * - * If there's a bounce page in the context, this frees that. + * If the decryption context was allocated from the pre-allocated pool, return + * it to that pool. Else, free it. */ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) { unsigned long flags; - if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) { - mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool); - ctx->w.bounce_page = NULL; - } - ctx->w.control_page = NULL; if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { kmem_cache_free(fscrypt_ctx_cachep, ctx); } else { @@ -86,12 +80,12 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx) EXPORT_SYMBOL(fscrypt_release_ctx); /** - * fscrypt_get_ctx() - Gets an encryption context + * fscrypt_get_ctx() - Get a decryption context * @gfp_flags: The gfp flag for memory allocation * - * Allocates and initializes an encryption context. + * Allocate and initialize a decryption context. * - * Return: A new encryption context on success; an ERR_PTR() otherwise. + * Return: A new decryption context on success; an ERR_PTR() otherwise. */ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) { @@ -99,14 +93,8 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) unsigned long flags; /* - * We first try getting the ctx from a free list because in - * the common case the ctx will have an allocated and - * initialized crypto tfm, so it's probably a worthwhile - * optimization. For the bounce page, we first try getting it - * from the kernel allocator because that's just about as fast - * as getting it from a list and because a cache of free pages - * should generally be a "last resort" option for a filesystem - * to be able to do its job. + * First try getting a ctx from the free list so that we don't have to + * call into the slab allocator. */ spin_lock_irqsave(&fscrypt_ctx_lock, flags); ctx = list_first_entry_or_null(&fscrypt_free_ctxs, @@ -122,11 +110,31 @@ struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) } else { ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL; } - ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL; return ctx; } EXPORT_SYMBOL(fscrypt_get_ctx); +struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) +{ + return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); +} + +/** + * fscrypt_free_bounce_page() - free a ciphertext bounce page + * + * Free a bounce page that was allocated by fscrypt_encrypt_pagecache_blocks(), + * or by fscrypt_alloc_bounce_page() directly. + */ +void fscrypt_free_bounce_page(struct page *bounce_page) +{ + if (!bounce_page) + return; + set_page_private(bounce_page, (unsigned long)NULL); + ClearPagePrivate(bounce_page); + mempool_free(bounce_page, fscrypt_bounce_page_pool); +} +EXPORT_SYMBOL(fscrypt_free_bounce_page); + void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { @@ -140,10 +148,11 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); } -int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, - u64 lblk_num, struct page *src_page, - struct page *dest_page, unsigned int len, - unsigned int offs, gfp_t gfp_flags) +/* Encrypt or decrypt a single filesystem block of file contents */ +int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, + u64 lblk_num, struct page *src_page, + struct page *dest_page, unsigned int len, + unsigned int offs, gfp_t gfp_flags) { union fscrypt_iv iv; struct skcipher_request *req = NULL; @@ -153,7 +162,10 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; - BUG_ON(len == 0); + if (WARN_ON_ONCE(len <= 0)) + return -EINVAL; + if (WARN_ON_ONCE(len % FS_CRYPTO_BLOCK_SIZE != 0)) + return -EINVAL; fscrypt_generate_iv(&iv, lblk_num, ci); @@ -185,126 +197,158 @@ int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, return 0; } -struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, - gfp_t gfp_flags) -{ - ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); - if (ctx->w.bounce_page == NULL) - return ERR_PTR(-ENOMEM); - ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL; - return ctx->w.bounce_page; -} - /** - * fscypt_encrypt_page() - Encrypts a page - * @inode: The inode for which the encryption should take place - * @page: The page to encrypt. Must be locked for bounce-page - * encryption. - * @len: Length of data to encrypt in @page and encrypted - * data in returned page. - * @offs: Offset of data within @page and returned - * page holding encrypted data. - * @lblk_num: Logical block number. This must be unique for multiple - * calls with same inode, except when overwriting - * previously written data. - * @gfp_flags: The gfp flag for memory allocation - * - * Encrypts @page using the ctx encryption context. Performs encryption - * either in-place or into a newly allocated bounce page. - * Called on the page write path. + * fscrypt_encrypt_pagecache_blocks() - Encrypt filesystem blocks from a pagecache page + * @page: The locked pagecache page containing the block(s) to encrypt + * @len: Total size of the block(s) to encrypt. Must be a nonzero + * multiple of the filesystem's block size. + * @offs: Byte offset within @page of the first block to encrypt. Must be + * a multiple of the filesystem's block size. + * @gfp_flags: Memory allocation flags * - * Bounce page allocation is the default. - * In this case, the contents of @page are encrypted and stored in an - * allocated bounce page. @page has to be locked and the caller must call - * fscrypt_restore_control_page() on the returned ciphertext page to - * release the bounce buffer and the encryption context. + * A new bounce page is allocated, and the specified block(s) are encrypted into + * it. In the bounce page, the ciphertext block(s) will be located at the same + * offsets at which the plaintext block(s) were located in the source page; any + * other parts of the bounce page will be left uninitialized. However, normally + * blocksize == PAGE_SIZE and the whole page is encrypted at once. * - * In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in - * fscrypt_operations. Here, the input-page is returned with its content - * encrypted. + * This is for use by the filesystem's ->writepages() method. * - * Return: A page with the encrypted content on success. Else, an - * error value or NULL. + * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ -struct page *fscrypt_encrypt_page(const struct inode *inode, - struct page *page, - unsigned int len, - unsigned int offs, - u64 lblk_num, gfp_t gfp_flags) +struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, + unsigned int len, + unsigned int offs, + gfp_t gfp_flags) { - struct fscrypt_ctx *ctx; - struct page *ciphertext_page = page; + const struct inode *inode = page->mapping->host; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + struct page *ciphertext_page; + u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + + (offs >> blockbits); + unsigned int i; int err; - BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0); + if (WARN_ON_ONCE(!PageLocked(page))) + return ERR_PTR(-EINVAL); - if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) { - /* with inplace-encryption we just encrypt the page */ - err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page, - ciphertext_page, len, offs, - gfp_flags); - if (err) - return ERR_PTR(err); + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + return ERR_PTR(-EINVAL); - return ciphertext_page; - } - - BUG_ON(!PageLocked(page)); - - ctx = fscrypt_get_ctx(gfp_flags); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - - /* The encryption operation will require a bounce page. */ - ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags); - if (IS_ERR(ciphertext_page)) - goto errout; + ciphertext_page = fscrypt_alloc_bounce_page(gfp_flags); + if (!ciphertext_page) + return ERR_PTR(-ENOMEM); - ctx->w.control_page = page; - err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, - page, ciphertext_page, len, offs, - gfp_flags); - if (err) { - ciphertext_page = ERR_PTR(err); - goto errout; + for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, + page, ciphertext_page, + blocksize, i, gfp_flags); + if (err) { + fscrypt_free_bounce_page(ciphertext_page); + return ERR_PTR(err); + } } SetPagePrivate(ciphertext_page); - set_page_private(ciphertext_page, (unsigned long)ctx); - lock_page(ciphertext_page); + set_page_private(ciphertext_page, (unsigned long)page); return ciphertext_page; +} +EXPORT_SYMBOL(fscrypt_encrypt_pagecache_blocks); -errout: - fscrypt_release_ctx(ctx); - return ciphertext_page; +/** + * fscrypt_encrypt_block_inplace() - Encrypt a filesystem block in-place + * @inode: The inode to which this block belongs + * @page: The page containing the block to encrypt + * @len: Size of block to encrypt. Doesn't need to be a multiple of the + * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. + * @offs: Byte offset within @page at which the block to encrypt begins + * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based + * number of the block within the file + * @gfp_flags: Memory allocation flags + * + * Encrypt a possibly-compressed filesystem block that is located in an + * arbitrary page, not necessarily in the original pagecache page. The @inode + * and @lblk_num must be specified, as they can't be determined from @page. + * + * Return: 0 on success; -errno on failure + */ +int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num, gfp_t gfp_flags) +{ + return fscrypt_crypt_block(inode, FS_ENCRYPT, lblk_num, page, page, + len, offs, gfp_flags); } -EXPORT_SYMBOL(fscrypt_encrypt_page); +EXPORT_SYMBOL(fscrypt_encrypt_block_inplace); /** - * fscrypt_decrypt_page() - Decrypts a page in-place - * @inode: The corresponding inode for the page to decrypt. - * @page: The page to decrypt. Must be locked in case - * it is a writeback page (FS_CFLG_OWN_PAGES unset). - * @len: Number of bytes in @page to be decrypted. - * @offs: Start of data in @page. - * @lblk_num: Logical block number. + * fscrypt_decrypt_pagecache_blocks() - Decrypt filesystem blocks in a pagecache page + * @page: The locked pagecache page containing the block(s) to decrypt + * @len: Total size of the block(s) to decrypt. Must be a nonzero + * multiple of the filesystem's block size. + * @offs: Byte offset within @page of the first block to decrypt. Must be + * a multiple of the filesystem's block size. * - * Decrypts page in-place using the ctx encryption context. + * The specified block(s) are decrypted in-place within the pagecache page, + * which must still be locked and not uptodate. Normally, blocksize == + * PAGE_SIZE and the whole page is decrypted at once. * - * Called from the read completion callback. + * This is for use by the filesystem's ->readpages() method. * - * Return: Zero on success, non-zero otherwise. + * Return: 0 on success; -errno on failure */ -int fscrypt_decrypt_page(const struct inode *inode, struct page *page, - unsigned int len, unsigned int offs, u64 lblk_num) +int fscrypt_decrypt_pagecache_blocks(struct page *page, unsigned int len, + unsigned int offs) { - if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES)) - BUG_ON(!PageLocked(page)); + const struct inode *inode = page->mapping->host; + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + u64 lblk_num = ((u64)page->index << (PAGE_SHIFT - blockbits)) + + (offs >> blockbits); + unsigned int i; + int err; + + if (WARN_ON_ONCE(!PageLocked(page))) + return -EINVAL; + + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offs, blocksize))) + return -EINVAL; + + for (i = offs; i < offs + len; i += blocksize, lblk_num++) { + err = fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, + page, blocksize, i, GFP_NOFS); + if (err) + return err; + } + return 0; +} +EXPORT_SYMBOL(fscrypt_decrypt_pagecache_blocks); - return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page, - len, offs, GFP_NOFS); +/** + * fscrypt_decrypt_block_inplace() - Decrypt a filesystem block in-place + * @inode: The inode to which this block belongs + * @page: The page containing the block to decrypt + * @len: Size of block to decrypt. Doesn't need to be a multiple of the + * fs block size, but must be a multiple of FS_CRYPTO_BLOCK_SIZE. + * @offs: Byte offset within @page at which the block to decrypt begins + * @lblk_num: Filesystem logical block number of the block, i.e. the 0-based + * number of the block within the file + * + * Decrypt a possibly-compressed filesystem block that is located in an + * arbitrary page, not necessarily in the original pagecache page. The @inode + * and @lblk_num must be specified, as they can't be determined from @page. + * + * Return: 0 on success; -errno on failure + */ +int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, + unsigned int len, unsigned int offs, + u64 lblk_num) +{ + return fscrypt_crypt_block(inode, FS_DECRYPT, lblk_num, page, page, + len, offs, GFP_NOFS); } -EXPORT_SYMBOL(fscrypt_decrypt_page); +EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); /* * Validate dentries in encrypted directories to make sure we aren't potentially @@ -354,18 +398,6 @@ const struct dentry_operations fscrypt_d_ops = { .d_revalidate = fscrypt_d_revalidate, }; -void fscrypt_restore_control_page(struct page *page) -{ - struct fscrypt_ctx *ctx; - - ctx = (struct fscrypt_ctx *)page_private(page); - set_page_private(page, (unsigned long)NULL); - ClearPagePrivate(page); - unlock_page(page); - fscrypt_release_ctx(ctx); -} -EXPORT_SYMBOL(fscrypt_restore_control_page); - static void fscrypt_destroy(void) { struct fscrypt_ctx *pos, *n; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index eccea3d8f923..00d150ff3033 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -12,7 +12,6 @@ */ #include <linux/scatterlist.h> -#include <linux/ratelimit.h> #include <crypto/skcipher.h> #include "fscrypt_private.h" diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 7da276159593..8978eec9d766 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -94,7 +94,6 @@ typedef enum { } fscrypt_direction_t; #define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 -#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002 static inline bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) @@ -117,14 +116,12 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); -extern int fscrypt_do_page_crypto(const struct inode *inode, - fscrypt_direction_t rw, u64 lblk_num, - struct page *src_page, - struct page *dest_page, - unsigned int len, unsigned int offs, - gfp_t gfp_flags); -extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, - gfp_t gfp_flags); +extern int fscrypt_crypt_block(const struct inode *inode, + fscrypt_direction_t rw, u64 lblk_num, + struct page *src_page, struct page *dest_page, + unsigned int len, unsigned int offs, + gfp_t gfp_flags); +extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 2dc22549d724..c1d6715d88e9 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -1,10 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/crypto/hooks.c * * Encryption hooks for higher-level filesystem operations. */ -#include <linux/ratelimit.h> #include "fscrypt_private.h" /** diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index dcd91a3fbe49..207ebed918c1 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -12,7 +12,6 @@ #include <keys/user-type.h> #include <linux/hashtable.h> #include <linux/scatterlist.h> -#include <linux/ratelimit.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/sha.h> diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index d536889ac31b..4941fe8471ce 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -81,6 +81,8 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) if (ret == -ENODATA) { if (!S_ISDIR(inode->i_mode)) ret = -ENOTDIR; + else if (IS_DEADDIR(inode)) + ret = -ENOENT; else if (!inode->i_sb->s_cop->empty_dir(inode)) ret = -ENOTEMPTY; else @@ -1,17 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/dax.c - Direct Access filesystem code * Copyright (c) 2013-2014 Intel Corporation * Author: Matthew Wilcox <matthew.r.wilcox@intel.com> * Author: Ross Zwisler <ross.zwisler@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include <linux/atomic.h> @@ -132,6 +124,15 @@ static int dax_is_empty_entry(void *entry) } /* + * true if the entry that was found is of a smaller order than the entry + * we were looking for + */ +static bool dax_is_conflict(void *entry) +{ + return entry == XA_RETRY_ENTRY; +} + +/* * DAX page cache entry locking */ struct exceptional_entry_key { @@ -203,11 +204,13 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) * Look up entry in page cache, wait for it to become unlocked if it * is a DAX entry and return it. The caller must subsequently call * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() - * if it did. + * if it did. The entry returned may have a larger order than @order. + * If @order is larger than the order of the entry found in i_pages, this + * function returns a dax_is_conflict entry. * * Must be called with the i_pages lock held. */ -static void *get_unlocked_entry(struct xa_state *xas) +static void *get_unlocked_entry(struct xa_state *xas, unsigned int order) { void *entry; struct wait_exceptional_entry_queue ewait; @@ -218,6 +221,8 @@ static void *get_unlocked_entry(struct xa_state *xas) for (;;) { entry = xas_find_conflict(xas); + if (dax_entry_order(entry) < order) + return XA_RETRY_ENTRY; if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) || !dax_is_locked(entry)) return entry; @@ -262,7 +267,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ - if (entry) + if (entry && dax_is_conflict(entry)) dax_wake_entry(xas, entry, false); } @@ -469,7 +474,7 @@ void dax_unlock_page(struct page *page, dax_entry_t cookie) * overlap with xarray value entries. */ static void *grab_mapping_entry(struct xa_state *xas, - struct address_space *mapping, unsigned long size_flag) + struct address_space *mapping, unsigned int order) { unsigned long index = xas->xa_index; bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */ @@ -477,20 +482,17 @@ static void *grab_mapping_entry(struct xa_state *xas, retry: xas_lock_irq(xas); - entry = get_unlocked_entry(xas); + entry = get_unlocked_entry(xas, order); if (entry) { + if (dax_is_conflict(entry)) + goto fallback; if (!xa_is_value(entry)) { xas_set_err(xas, EIO); goto out_unlock; } - if (size_flag & DAX_PMD) { - if (dax_is_pte_entry(entry)) { - put_unlocked_entry(xas, entry); - goto fallback; - } - } else { /* trying to grab a PTE entry */ + if (order == 0) { if (dax_is_pmd_entry(entry) && (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))) { @@ -531,7 +533,11 @@ retry: if (entry) { dax_lock_entry(xas, entry); } else { - entry = dax_make_entry(pfn_to_pfn_t(0), size_flag | DAX_EMPTY); + unsigned long flags = DAX_EMPTY; + + if (order > 0) + flags |= DAX_PMD; + entry = dax_make_entry(pfn_to_pfn_t(0), flags); dax_lock_entry(xas, entry); if (xas_error(xas)) goto out_unlock; @@ -602,7 +608,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) if (WARN_ON_ONCE(!xa_is_value(entry))) continue; if (unlikely(dax_is_locked(entry))) - entry = get_unlocked_entry(&xas); + entry = get_unlocked_entry(&xas, 0); if (entry) page = dax_busy_page(entry); put_unlocked_entry(&xas, entry); @@ -629,7 +635,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, void *entry; xas_lock_irq(&xas); - entry = get_unlocked_entry(&xas); + entry = get_unlocked_entry(&xas, 0); if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) goto out; if (!trunc && @@ -728,12 +734,11 @@ static void *dax_insert_entry(struct xa_state *xas, xas_reset(xas); xas_lock_irq(xas); - if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + void *old; + dax_disassociate_entry(entry, mapping, false); dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address); - } - - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* * Only swap our new entry into the page cache if the current * entry is a zero page or an empty entry. If a normal PTE or @@ -742,7 +747,7 @@ static void *dax_insert_entry(struct xa_state *xas, * existing entry is a PMD, we will just leave the PMD in the * tree and dirty it if necessary. */ - void *old = dax_lock_entry(xas, new_entry); + old = dax_lock_entry(xas, new_entry); WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) | DAX_LOCKED)); entry = new_entry; @@ -857,7 +862,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, if (unlikely(dax_is_locked(entry))) { void *old_entry = entry; - entry = get_unlocked_entry(xas); + entry = get_unlocked_entry(xas, 0); /* Entry got punched out / reallocated? */ if (!entry || WARN_ON_ONCE(!xa_is_value(entry))) @@ -1196,7 +1201,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, unsigned flags = 0; if (iov_iter_rw(iter) == WRITE) { - lockdep_assert_held_exclusive(&inode->i_rwsem); + lockdep_assert_held_write(&inode->i_rwsem); flags |= IOMAP_WRITE; } else { lockdep_assert_held(&inode->i_rwsem); @@ -1518,7 +1523,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, * entry is already in the array, for instance), it will return * VM_FAULT_FALLBACK. */ - entry = grab_mapping_entry(&xas, mapping, DAX_PMD); + entry = grab_mapping_entry(&xas, mapping, PMD_ORDER); if (xa_is_internal(entry)) { result = xa_to_internal(entry); goto fallback; @@ -1667,11 +1672,10 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) vm_fault_t ret; xas_lock_irq(&xas); - entry = get_unlocked_entry(&xas); + entry = get_unlocked_entry(&xas, order); /* Did we race with someone splitting entry or so? */ - if (!entry || - (order == 0 && !dax_is_pte_entry(entry)) || - (order == PMD_ORDER && !dax_is_pmd_entry(entry))) { + if (!entry || dax_is_conflict(entry) || + (order == 0 && !dax_is_pte_entry(entry))) { put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, diff --git a/fs/dcache.c b/fs/dcache.c index 8136bda27a1f..f41121e5d1ec 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/dcache.c * @@ -2371,7 +2372,6 @@ EXPORT_SYMBOL(d_hash_and_lookup); void d_delete(struct dentry * dentry) { struct inode *inode = dentry->d_inode; - int isdir = d_is_dir(dentry); spin_lock(&inode->i_lock); spin_lock(&dentry->d_lock); @@ -2386,7 +2386,6 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); } - fsnotify_nameremove(dentry, isdir); } EXPORT_SYMBOL(d_delete); diff --git a/fs/dcookies.c b/fs/dcookies.c index 57bc96435feb..6eeb61100a09 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * dcookies.c * diff --git a/fs/debugfs/Makefile b/fs/debugfs/Makefile index 840c45696668..9c0fe38cfb5e 100644 --- a/fs/debugfs/Makefile +++ b/fs/debugfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only debugfs-objs := inode.o file.o obj-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index ddd708b09fa1..93e4ca6b2ad7 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -997,25 +997,19 @@ static const struct file_operations u32_array_fops = { * @array as data. If the @mode variable is so set it can be read from. * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. - * - * The function returns a pointer to dentry on success. If an error occurs, - * %ERR_PTR(-ERROR) or NULL will be returned. If debugfs is not enabled in - * the kernel, the value %ERR_PTR(-ENODEV) will be returned. */ -struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, u32 elements) +void debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, u32 *array, u32 elements) { struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) - return NULL; + return; data->array = array; data->elements = elements; - return debugfs_create_file_unsafe(name, mode, parent, data, - &u32_array_fops); + debugfs_create_file_unsafe(name, mode, parent, data, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index acef14ad53db..042b688ed124 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -2,13 +2,16 @@ /* * inode.c - part of debugfs, a tiny little debug file system * - * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2004,2019 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. + * Copyright (C) 2019 Linux Foundation <gregkh@linuxfoundation.org> * * debugfs is for people to use instead of /proc or /sys. * See ./Documentation/core-api/kernel-api.rst for more details. */ +#define pr_fmt(fmt) "debugfs: " fmt + #include <linux/module.h> #include <linux/fs.h> #include <linux/mount.h> @@ -285,15 +288,17 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) struct dentry *dentry; int error; - pr_debug("debugfs: creating file '%s'\n",name); + pr_debug("creating file '%s'\n", name); if (IS_ERR(parent)) return parent; error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); - if (error) + if (error) { + pr_err("Unable to pin filesystem for file '%s'\n", name); return ERR_PTR(error); + } /* If the parent is not specified, we create it in the root. * We need the root dentry to do this, which is in the super @@ -306,6 +311,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) inode_lock(d_inode(parent)); dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry) && d_really_is_positive(dentry)) { + if (d_is_dir(dentry)) + pr_err("Directory '%s' with parent '%s' already present!\n", + name, parent->d_name.name); + else + pr_err("File '%s' in directory '%s' already present!\n", + name, parent->d_name.name); dput(dentry); dentry = ERR_PTR(-EEXIST); } @@ -349,8 +360,11 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, return dentry; inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) + if (unlikely(!inode)) { + pr_err("out of free dentries, can not create file '%s'\n", + name); return failed_creating(dentry); + } inode->i_mode = mode; inode->i_private = data; @@ -511,8 +525,11 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) return dentry; inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) + if (unlikely(!inode)) { + pr_err("out of free dentries, can not create directory '%s'\n", + name); return failed_creating(dentry); + } inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &simple_dir_inode_operations; @@ -550,8 +567,11 @@ struct dentry *debugfs_create_automount(const char *name, return dentry; inode = debugfs_get_inode(dentry->d_sb); - if (unlikely(!inode)) + if (unlikely(!inode)) { + pr_err("out of free dentries, can not create automount '%s'\n", + name); return failed_creating(dentry); + } make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; @@ -606,6 +626,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { + pr_err("out of free dentries, can not create symlink '%s'\n", + name); kfree(link); return failed_creating(dentry); } @@ -617,13 +639,10 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, } EXPORT_SYMBOL_GPL(debugfs_create_symlink); -static void __debugfs_remove_file(struct dentry *dentry, struct dentry *parent) +static void __debugfs_file_removed(struct dentry *dentry) { struct debugfs_fsdata *fsd; - simple_unlink(d_inode(parent), dentry); - d_delete(dentry); - /* * Paired with the closing smp_mb() implied by a successful * cmpxchg() in debugfs_file_get(): either @@ -644,16 +663,18 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) if (simple_positive(dentry)) { dget(dentry); - if (!d_is_reg(dentry)) { - if (d_is_dir(dentry)) - ret = simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); + if (d_is_dir(dentry)) { + ret = simple_rmdir(d_inode(parent), dentry); if (!ret) - d_delete(dentry); + fsnotify_rmdir(d_inode(parent), dentry); } else { - __debugfs_remove_file(dentry, parent); + simple_unlink(d_inode(parent), dentry); + fsnotify_unlink(d_inode(parent), dentry); } + if (!ret) + d_delete(dentry); + if (d_is_reg(dentry)) + __debugfs_file_removed(dentry); dput(dentry); } return ret; diff --git a/fs/devpts/Makefile b/fs/devpts/Makefile index 236696efcbac..66064c8fcb3e 100644 --- a/fs/devpts/Makefile +++ b/fs/devpts/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux /dev/pts virtual filesystem. # diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 553a3f3300ae..beeadca23b05 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- linux-c -*- --------------------------------------------------------- * * * linux/fs/devpts/inode.c * * Copyright 1998-2004 H. Peter Anvin -- All Rights Reserved * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * * ------------------------------------------------------------------------- */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -624,6 +621,7 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_delete(dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/direct-io.c b/fs/direct-io.c index fbe885d68035..ae196784f487 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/direct-io.c * @@ -537,8 +538,8 @@ static struct bio *dio_await_one(struct dio *dio) */ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) { - struct bio_vec *bvec; blk_status_t err = bio->bi_status; + bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty; if (err) { if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) @@ -547,19 +548,10 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) dio->io_error = -EIO; } - if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { + if (dio->is_async && should_dirty) { bio_check_pages_dirty(bio); /* transfers ownership */ } else { - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - if (dio->op == REQ_OP_READ && !PageCompound(page) && - dio->should_dirty) - set_page_dirty_lock(page); - put_page(page); - } + bio_release_pages(bio, should_dirty); bio_put(bio); } return err; diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index e4242c3f8486..f82a4952769d 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only menuconfig DLM tristate "Distributed Lock Manager (DLM)" depends on INET diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 47ee66d70109..283c7b94edda 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h index 757b551c6820..181ad7d20c4d 100644 --- a/fs/dlm/ast.h +++ b/fs/dlm/ast.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 1270551d24e3..3b21082e1b55 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 6041eec886ab..2b471aae4e61 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index fa08448e35dd..d6bbccb0ed15 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ @@ -739,7 +737,7 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_toss_dentry); } -int dlm_create_debug_file(struct dlm_ls *ls) +void dlm_create_debug_file(struct dlm_ls *ls) { char name[DLM_LOCKSPACE_LEN + 8]; @@ -750,8 +748,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format1_fops); - if (!ls->ls_debug_rsb_dentry) - goto fail; /* format 2 */ @@ -763,8 +759,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format2_fops); - if (!ls->ls_debug_locks_dentry) - goto fail; /* format 3 */ @@ -776,8 +770,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format3_fops); - if (!ls->ls_debug_all_dentry) - goto fail; /* format 4 */ @@ -789,8 +781,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format4_fops); - if (!ls->ls_debug_toss_dentry) - goto fail; memset(name, 0, sizeof(name)); snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name); @@ -800,21 +790,12 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &waiters_fops); - if (!ls->ls_debug_waiters_dentry) - goto fail; - - return 0; - - fail: - dlm_delete_debug_file(ls); - return -ENOMEM; } -int __init dlm_register_debugfs(void) +void __init dlm_register_debugfs(void) { mutex_init(&debug_buf_lock); dlm_root = debugfs_create_dir("dlm", NULL); - return dlm_root ? 0 : -ENOMEM; } void dlm_unregister_debugfs(void) diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index d975851a7e1e..10c36ae1a8f9 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/dir.h b/fs/dlm/dir.h index 417506344456..03844d086be2 100644 --- a/fs/dlm/dir.h +++ b/fs/dlm/dir.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 748e8d59e611..416d9de35679 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ @@ -721,14 +719,14 @@ int dlm_plock_init(void); void dlm_plock_exit(void); #ifdef CONFIG_DLM_DEBUG -int dlm_register_debugfs(void); +void dlm_register_debugfs(void); void dlm_unregister_debugfs(void); -int dlm_create_debug_file(struct dlm_ls *ls); +void dlm_create_debug_file(struct dlm_ls *ls); void dlm_delete_debug_file(struct dlm_ls *ls); #else -static inline int dlm_register_debugfs(void) { return 0; } +static inline void dlm_register_debugfs(void) { } static inline void dlm_unregister_debugfs(void) { } -static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } +static inline void dlm_create_debug_file(struct dlm_ls *ls) { } static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } #endif diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index a928ba008d7d..18d81599522f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index ed8ebd3a8593..456c6ec3ef6f 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index db43b98c4d64..afb8340918b8 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ @@ -160,6 +158,7 @@ static struct attribute *dlm_attrs[] = { &dlm_attr_recover_nodeid.attr, NULL, }; +ATTRIBUTE_GROUPS(dlm); static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -189,7 +188,7 @@ static const struct sysfs_ops dlm_attr_ops = { }; static struct kobj_type dlm_ktype = { - .default_attrs = dlm_attrs, + .default_groups = dlm_groups, .sysfs_ops = &dlm_attr_ops, .release = lockspace_kobj_release, }; diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index f879f87901f8..a78d853b9342 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index c98ad9777ad9..3951d39b9b75 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ @@ -1630,8 +1628,10 @@ static void clean_writequeues(void) static void work_stop(void) { - destroy_workqueue(recv_workqueue); - destroy_workqueue(send_workqueue); + if (recv_workqueue) + destroy_workqueue(recv_workqueue); + if (send_workqueue) + destroy_workqueue(send_workqueue); } static int work_start(void) @@ -1691,13 +1691,17 @@ static void work_flush(void) struct hlist_node *n; struct connection *con; - flush_workqueue(recv_workqueue); - flush_workqueue(send_workqueue); + if (recv_workqueue) + flush_workqueue(recv_workqueue); + if (send_workqueue) + flush_workqueue(send_workqueue); do { ok = 1; foreach_conn(stop_conn); - flush_workqueue(recv_workqueue); - flush_workqueue(send_workqueue); + if (recv_workqueue) + flush_workqueue(recv_workqueue); + if (send_workqueue) + flush_workqueue(send_workqueue); for (i = 0; i < CONN_HASH_SIZE && ok; i++) { hlist_for_each_entry_safe(con, n, &connection_hash[i], list) { diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 67462e54fc2f..687b2894e469 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/lvb_table.h b/fs/dlm/lvb_table.h index cc3e92f3feef..09052d967174 100644 --- a/fs/dlm/lvb_table.h +++ b/fs/dlm/lvb_table.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 8e1b618891be..afc66a1346d3 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ @@ -37,9 +35,7 @@ static int __init init_dlm(void) if (error) goto out_lockspace; - error = dlm_register_debugfs(); - if (error) - goto out_config; + dlm_register_debugfs(); error = dlm_user_init(); if (error) @@ -63,7 +59,6 @@ static int __init init_dlm(void) dlm_user_exit(); out_debug: dlm_unregister_debugfs(); - out_config: dlm_config_exit(); out_lockspace: dlm_lockspace_exit(); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 0bc43b35d2c5..7ad83deb4505 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/member.h b/fs/dlm/member.h index 3deb70661c69..433b2fac9f4a 100644 --- a/fs/dlm/member.h +++ b/fs/dlm/member.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 37be29f21d04..5918f4d39586 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h index 177c11cbb0a6..4f218ea4b187 100644 --- a/fs/dlm/memory.h +++ b/fs/dlm/memory.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index f3396c622aec..921322d133e3 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h index 95852a5f111d..2e122e81c8d0 100644 --- a/fs/dlm/midcomms.h +++ b/fs/dlm/midcomms.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index d8e27defa89f..e7f550327d5d 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. */ #include <net/genetlink.h> diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index c7d5a2ea3d03..c38b2b8ffd1d 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/fs.h> diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 70c625999d36..e3d9f72c640d 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index 206723ab744d..454d3c4814ab 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index ce2aa54ca2e2..8928e99dfd47 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/recover.h b/fs/dlm/recover.h index d8c8738c70eb..235e0d25cd48 100644 --- a/fs/dlm/recover.h +++ b/fs/dlm/recover.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 6f4e1d42d733..85e245392715 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h index 8856079733fa..d1944dc5f9e6 100644 --- a/fs/dlm/recoverd.h +++ b/fs/dlm/recoverd.h @@ -1,12 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 1695f1b0dd45..e89e0ff8bfa3 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h index 10ce449b77da..4e403469a845 100644 --- a/fs/dlm/requestqueue.h +++ b/fs/dlm/requestqueue.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 3c84c62dadb7..5264bac75115 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. */ #include <linux/miscdevice.h> diff --git a/fs/dlm/user.h b/fs/dlm/user.h index 00499ab8835f..6b9bce6b96e0 100644 --- a/fs/dlm/user.h +++ b/fs/dlm/user.h @@ -1,9 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. */ #ifndef __USER_DOT_H__ diff --git a/fs/dlm/util.c b/fs/dlm/util.c index e36520af7cc0..cfd0d00b19ae 100644 --- a/fs/dlm/util.c +++ b/fs/dlm/util.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/dlm/util.h b/fs/dlm/util.h index 2b25915161c0..cc719ca9397e 100644 --- a/fs/dlm/util.h +++ b/fs/dlm/util.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** ******************************************************************************* ** ** Copyright (C) 2005 Red Hat, Inc. All rights reserved. ** -** This copyrighted material is made available to anyone wishing to use, -** modify, copy, or redistribute it subject to the terms and conditions -** of the GNU General Public License v.2. ** ******************************************************************************* ******************************************************************************/ diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig index 434aa313f077..522c35d5292b 100644 --- a/fs/ecryptfs/Kconfig +++ b/fs/ecryptfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config ECRYPT_FS tristate "eCrypt filesystem layer support" depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 49678a69947d..4f2cc5b2542d 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux eCryptfs # diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 491cf5baa8c2..f91db24bbf3b 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * @@ -6,21 +7,6 @@ * Copyright (C) 2004-2007 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Michael C. Thompson <mcthomps@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <crypto/hash.h> @@ -37,6 +23,7 @@ #include <linux/slab.h> #include <asm/unaligned.h> #include <linux/kernel.h> +#include <linux/xattr.h> #include "ecryptfs_kernel.h" #define DECRYPT 0 @@ -874,13 +861,10 @@ static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = { * @crypt_stat: The cryptographic context * @page_virt: Source data to be parsed * @bytes_read: Updated with the number of bytes read - * - * Returns zero on success; non-zero if the flag set is invalid */ -static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, +static void ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, char *page_virt, int *bytes_read) { - int rc = 0; int i; u32 flags; @@ -893,7 +877,6 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat, /* Version is in top 8 bits of the 32-bit flag vector */ crypt_stat->file_version = ((flags >> 24) & 0xFF); (*bytes_read) = 4; - return rc; } /** @@ -1018,8 +1001,10 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode) rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, inode); - if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) - return rc >= 0 ? -EINVAL : rc; + if (rc < 0) + return rc; + else if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) + return -EINVAL; rc = ecryptfs_validate_marker(marker); if (!rc) ecryptfs_i_size_init(file_size, inode); @@ -1129,9 +1114,21 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, char *page_virt, size_t size) { int rc; + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); + struct inode *lower_inode = d_inode(lower_dentry); + + if (!(lower_inode->i_opflags & IOP_XATTR)) { + rc = -EOPNOTSUPP; + goto out; + } - rc = ecryptfs_setxattr(ecryptfs_dentry, ecryptfs_inode, - ECRYPTFS_XATTR_NAME, page_virt, size, 0); + inode_lock(lower_inode); + rc = __vfs_setxattr(lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME, + page_virt, size, 0); + if (!rc && ecryptfs_inode) + fsstack_copy_attr_all(ecryptfs_inode, lower_inode); + inode_unlock(lower_inode); +out: return rc; } @@ -1305,12 +1302,7 @@ static int ecryptfs_read_headers_virt(char *page_virt, if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED)) ecryptfs_i_size_init(page_virt, d_inode(ecryptfs_dentry)); offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; - rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), - &bytes_read); - if (rc) { - ecryptfs_printk(KERN_WARNING, "Error processing flags\n"); - goto out; - } + ecryptfs_process_flags(crypt_stat, (page_virt + offset), &bytes_read); if (crypt_stat->file_version > ECRYPTFS_SUPPORTED_FILE_VERSION) { ecryptfs_printk(KERN_WARNING, "File version is [%d]; only " "file version [%d] is supported by this " @@ -1381,8 +1373,10 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, ecryptfs_inode_to_lower(inode), ECRYPTFS_XATTR_NAME, file_size, ECRYPTFS_SIZE_AND_MARKER_BYTES); - if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) - return rc >= 0 ? -EINVAL : rc; + if (rc < 0) + return rc; + else if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) + return -EINVAL; rc = ecryptfs_validate_marker(marker); if (!rc) ecryptfs_i_size_init(file_size, inode); diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c index 3d2bdf546ec6..1f65e99f9a41 100644 --- a/fs/ecryptfs/debug.c +++ b/fs/ecryptfs/debug.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * Functions only useful for debugging. * * Copyright (C) 2006 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include "ecryptfs_kernel.h" @@ -97,25 +83,9 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok) */ void ecryptfs_dump_hex(char *data, int bytes) { - int i = 0; - int add_newline = 1; - if (ecryptfs_verbosity < 1) return; - if (bytes != 0) { - printk(KERN_DEBUG "0x%.2x.", (unsigned char)data[i]); - i++; - } - while (i < bytes) { - printk("0x%.2x.", (unsigned char)data[i]); - i++; - if (i % 16 == 0) { - printk("\n"); - add_newline = 0; - } else - add_newline = 1; - } - if (add_newline) - printk("\n"); -} + print_hex_dump(KERN_DEBUG, "ecryptfs: ", DUMP_PREFIX_OFFSET, 16, 1, + data, bytes, false); +} diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 63cd2c147221..44606f079efb 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * @@ -5,21 +6,6 @@ * Copyright (C) 2001-2003 Stony Brook University * Copyright (C) 2004-2006 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/dcache.h> diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index e74cb2a0b299..1c1a56be7ea2 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /** * eCryptfs: Linux filesystem encryption layer * Kernel declarations. @@ -8,21 +9,6 @@ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Trevor S. Highland <trevor.highland@gmail.com> * Tyler Hicks <tyhicks@ou.edu> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #ifndef ECRYPTFS_KERNEL_H diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b76a9853325e..feecb57defa7 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * @@ -6,21 +7,6 @@ * Copyright (C) 2004-2007 International Business Machines Corp. * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> * Michael C. Thompson <mcthomps@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/file.h> diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5c36ceecb5c1..18426f4855f1 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * @@ -6,21 +7,6 @@ * Copyright (C) 2004-2007 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Michael C. Thompsion <mcthomps@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/file.h> @@ -1135,7 +1121,7 @@ static int ecryptfs_xattr_set(const struct xattr_handler *handler, } } -const struct xattr_handler ecryptfs_xattr_handler = { +static const struct xattr_handler ecryptfs_xattr_handler = { .prefix = "", /* match anything */ .get = ecryptfs_xattr_get, .set = ecryptfs_xattr_set, diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 90fbac5d485b..216fbe6a4837 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * In-kernel key management code. Includes functions to parse and @@ -8,21 +9,6 @@ * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> * Michael C. Thompson <mcthomps@us.ibm.com> * Trevor S. Highland <trevor.highland@gmail.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <crypto/hash.h> @@ -1062,8 +1048,9 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, "rc = [%d]\n", __func__, rc); goto out_free_unlock; } - while (s->decrypted_filename[s->i] != '\0' - && s->i < s->block_aligned_filename_size) + + while (s->i < s->block_aligned_filename_size && + s->decrypted_filename[s->i] != '\0') s->i++; if (s->i == s->block_aligned_filename_size) { printk(KERN_WARNING "%s: Invalid tag 70 packet; could not " @@ -1625,9 +1612,9 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, int rc = 0; (*auth_tok_key) = request_key(&key_type_user, sig, NULL); - if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { + if (IS_ERR(*auth_tok_key)) { (*auth_tok_key) = ecryptfs_get_encrypted_key(sig); - if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { + if (IS_ERR(*auth_tok_key)) { printk(KERN_ERR "Could not find key with description: [%s]\n", sig); rc = process_request_key_err(PTR_ERR(*auth_tok_key)); diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index e00d45af84ea..a7c903cb01a0 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * * Copyright (C) 2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/kthread.h> diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 025d66a705db..b8a7ce379ffe 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * @@ -7,21 +8,6 @@ * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Michael C. Thompson <mcthomps@us.ibm.com> * Tyler Hicks <tyhicks@ou.edu> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/dcache.h> diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 9fdd5bcf4564..d668e60b85b5 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /** * eCryptfs: Linux filesystem encryption layer * * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> * Tyler Hicks <tyhicks@ou.edu> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/sched.h> #include <linux/slab.h> diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 2d1158e5f950..742ece22c1d4 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -1,22 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /** * eCryptfs: Linux filesystem encryption layer * * Copyright (C) 2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index cdf358b209d9..cffa0c1ec829 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * This is where eCryptfs coordinates the symmetric encryption and @@ -8,21 +9,6 @@ * Copyright (C) 2001-2003 Stony Brook University * Copyright (C) 2004-2007 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/pagemap.h> diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index c596e7c03424..0438997ac9d8 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * * Copyright (C) 2007 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index c3e511f2b6c0..6b1853f1c06a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * eCryptfs: Linux filesystem encryption layer * @@ -6,21 +7,6 @@ * Copyright (C) 2004-2006 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Michael C. Thompson <mcthomps@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. */ #include <linux/fs.h> diff --git a/fs/efivarfs/Kconfig b/fs/efivarfs/Kconfig index c2499ef174a2..edec8a19c894 100644 --- a/fs/efivarfs/Kconfig +++ b/fs/efivarfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config EFIVAR_FS tristate "EFI Variable filesystem" depends on EFI diff --git a/fs/efivarfs/Makefile b/fs/efivarfs/Makefile index 955d478177d5..0b1c5e63eb71 100644 --- a/fs/efivarfs/Makefile +++ b/fs/efivarfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the efivarfs filesystem # diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8e568428c88b..e9e27a271af0 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/efi.h> @@ -110,16 +107,22 @@ out_free: return size; } -static int -efivarfs_ioc_getxflags(struct file *file, void __user *arg) +static inline unsigned int efivarfs_getflags(struct inode *inode) { - struct inode *inode = file->f_mapping->host; unsigned int i_flags; unsigned int flags = 0; i_flags = inode->i_flags; if (i_flags & S_IMMUTABLE) flags |= FS_IMMUTABLE_FL; + return flags; +} + +static int +efivarfs_ioc_getxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int flags = efivarfs_getflags(inode); if (copy_to_user(arg, &flags, sizeof(flags))) return -EFAULT; @@ -132,6 +135,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg) struct inode *inode = file->f_mapping->host; unsigned int flags; unsigned int i_flags = 0; + unsigned int oldflags = efivarfs_getflags(inode); int error; if (!inode_owner_or_capable(inode)) @@ -143,9 +147,6 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg) if (flags & ~FS_IMMUTABLE_FL) return -EOPNOTSUPP; - if (!capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - if (flags & FS_IMMUTABLE_FL) i_flags |= S_IMMUTABLE; @@ -155,12 +156,16 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg) return error; inode_lock(inode); + + error = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (error) + goto out; + inode_set_flags(inode, i_flags, S_IMMUTABLE); +out: inode_unlock(inode); - mnt_drop_write_file(file); - - return 0; + return error; } static long diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 8c6ab6c95727..96c0c86f3fff 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/efi.h> diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index b4505188e799..30ae44cb7453 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef EFIVAR_FS_INTERNAL_H #define EFIVAR_FS_INTERNAL_H diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 124f1aa9ab03..fa4f6447ddad 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Red Hat, Inc. * Copyright (C) 2012 Jeremy Kerr <jeremy.kerr@canonical.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/ctype.h> diff --git a/fs/efs/Kconfig b/fs/efs/Kconfig index d020e3c30fea..2df1bac8b375 100644 --- a/fs/efs/Kconfig +++ b/fs/efs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config EFS_FS tristate "EFS file system support (read only)" depends on BLOCK diff --git a/fs/efs/Makefile b/fs/efs/Makefile index 963543d46f0d..85e5b88f9471 100644 --- a/fs/efs/Makefile +++ b/fs/efs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux efs-filesystem routines. # diff --git a/fs/efs/inode.c b/fs/efs/inode.c index cdf0872382af..89e73a6f0d36 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * inode.c * diff --git a/fs/eventfd.c b/fs/eventfd.c index 93b1fa7bb298..8aa0ea8c55e8 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/eventfd.c * diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4a0e98d87fcc..d7f1f5011fac 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1,14 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/eventpoll.c (Efficient event retrieval implementation) * Copyright (C) 2001,...,2009 Davide Libenzi * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * * Davide Libenzi <davidel@xmailserver.org> - * */ #include <linux/init.h> @@ -296,7 +291,7 @@ static LIST_HEAD(tfile_check_list); #include <linux/sysctl.h> -static long zero; +static long long_zero; static long long_max = LONG_MAX; struct ctl_table epoll_table[] = { @@ -306,7 +301,7 @@ struct ctl_table epoll_table[] = { .maxlen = sizeof(max_user_watches), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, + .extra1 = &long_zero, .extra2 = &long_max, }, { } @@ -2318,19 +2313,17 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, size_t, sigsetsize) { int error; - sigset_t ksigmask, sigsaved; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ - error = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + error = set_user_sigmask(sigmask, sigsetsize); if (error) return error; error = do_epoll_wait(epfd, events, maxevents, timeout); - - restore_user_sigmask(sigmask, &sigsaved); + restore_saved_sigmask_unless(error == -EINTR); return error; } @@ -2343,19 +2336,17 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, compat_size_t, sigsetsize) { long err; - sigset_t ksigmask, sigsaved; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ - err = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + err = set_compat_user_sigmask(sigmask, sigsetsize); if (err) return err; err = do_epoll_wait(epfd, events, maxevents, timeout); - - restore_user_sigmask(sigmask, &sigsaved); + restore_saved_sigmask_unless(err == -EINTR); return err; } diff --git a/fs/exec.c b/fs/exec.c index d88584ebf07f..c71cbfe6826a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/exec.c * @@ -1662,7 +1663,7 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval < 0 && !bprm->mm) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return retval; } if (retval != -ENOEXEC || !bprm->file) { diff --git a/fs/exportfs/Makefile b/fs/exportfs/Makefile index d7c5d4ddb34b..a04a8af83efd 100644 --- a/fs/exportfs/Makefile +++ b/fs/exportfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the filesystem export support routines. diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c69927bed4ef..f0e549783caf 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Neil Brown 2002 * Copyright (C) Christoph Hellwig 2007 diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index 894e4c53d1d2..54eec9185627 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config EXT2_FS tristate "Second extended fs support" help diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 33db13365c5e..547c165299c0 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -1197,7 +1197,7 @@ static int ext2_has_free_blocks(struct ext2_sb_info *sbi) /* * Returns 1 if the passed-in block region is valid; 0 if some part overlaps - * with filesystem metadata blocksi. + * with filesystem metadata blocks. */ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, unsigned int count) @@ -1212,7 +1212,6 @@ int ext2_data_block_valid(struct ext2_sb_info *sbi, ext2_fsblk_t start_blk, (start_blk + count >= sbi->s_sb_block)) return 0; - return 1; } diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index a0c5ea91fcd4..fda7d3f5b4be 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -172,9 +172,7 @@ static void ext2_preread_inode(struct inode *inode) struct backing_dev_info *bdi; bdi = inode_to_bdi(inode); - if (bdi_read_congested(bdi)) - return; - if (bdi_write_congested(bdi)) + if (bdi_rw_congested(bdi)) return; block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); @@ -511,6 +509,7 @@ repeat_in_this_group: /* * Scanned all blockgroups. */ + brelse(bitmap_bh); err = -ENOSPC; goto fail; got: diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e474127dd255..7004ce581a32 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1400,7 +1400,7 @@ void ext2_set_file_ops(struct inode *inode) struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; - struct buffer_head * bh; + struct buffer_head * bh = NULL; struct ext2_inode *raw_inode; struct inode *inode; long ret = -EIO; @@ -1446,7 +1446,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) */ if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ - brelse (bh); ret = -ESTALE; goto bad_inode; } @@ -1463,7 +1462,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) !ext2_data_block_valid(EXT2_SB(sb), ei->i_file_acl, 1)) { ext2_error(sb, "ext2_iget", "bad extended attribute block %u", ei->i_file_acl); - brelse(bh); ret = -EFSCORRUPTED; goto bad_inode; } @@ -1526,6 +1524,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) return inode; bad_inode: + brelse(bh); iget_failed(inode); return ERR_PTR(ret); } @@ -1640,7 +1639,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) } int ext2_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_falgs) + u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct ext2_inode_info *ei = EXT2_I(inode); diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 0367c0039e68..1b853fb0b163 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -60,18 +60,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } oldflags = ei->i_flags; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); - ret = -EPERM; - goto setflags_out; - } + ret = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (ret) { + inode_unlock(inode); + goto setflags_out; } flags = flags & EXT2_FL_USER_MODIFIABLE; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3988633789cb..44eb6e7eb492 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/ext2/super.c * @@ -302,16 +303,16 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); - if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); - if (sbi->s_mount_opt & EXT2_MOUNT_XIP) + if (test_opt(sb, XIP)) seq_puts(seq, ",xip"); - if (sbi->s_mount_opt & EXT2_MOUNT_DAX) + if (test_opt(sb, DAX)) seq_puts(seq, ",dax"); if (!test_opt(sb, RESERVATION)) @@ -934,8 +935,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_resgid = opts.s_resgid; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? - SB_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); sb->s_iflags |= SB_I_CGROUPWB; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && @@ -966,11 +966,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { + if (test_opt(sb, DAX)) { if (!bdev_dax_supported(sb->s_bdev, blocksize)) { ext2_msg(sb, KERN_ERR, "DAX unsupported by block device. Turning off DAX."); - sbi->s_mount_opt &= ~EXT2_MOUNT_DAX; + clear_opt(sbi->s_mount_opt, DAX); } } @@ -1403,7 +1403,7 @@ out_set: sbi->s_resuid = new_opts.s_resuid; sbi->s_resgid = new_opts.s_resgid; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); spin_unlock(&sbi->s_lock); return 0; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 1e33e0ac8cf1..79369c13cc55 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -134,6 +134,53 @@ ext2_xattr_handler(int name_index) return handler; } +static bool +ext2_xattr_header_valid(struct ext2_xattr_header *header) +{ + if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || + header->h_blocks != cpu_to_le32(1)) + return false; + + return true; +} + +static bool +ext2_xattr_entry_valid(struct ext2_xattr_entry *entry, + char *end, size_t end_offs) +{ + struct ext2_xattr_entry *next; + size_t size; + + next = EXT2_XATTR_NEXT(entry); + if ((char *)next >= end) + return false; + + if (entry->e_value_block != 0) + return false; + + size = le32_to_cpu(entry->e_value_size); + if (size > end_offs || + le16_to_cpu(entry->e_value_offs) + size > end_offs) + return false; + + return true; +} + +static int +ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name, + struct ext2_xattr_entry *entry) +{ + int cmp; + + cmp = name_index - entry->e_name_index; + if (!cmp) + cmp = name_len - entry->e_name_len; + if (!cmp) + cmp = memcmp(name, entry->e_name, name_len); + + return cmp; +} + /* * ext2_xattr_get() * @@ -152,7 +199,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, struct ext2_xattr_entry *entry; size_t name_len, size; char *end; - int error; + int error, not_found; struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode); ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", @@ -176,9 +223,9 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_get", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -188,29 +235,25 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", /* find named attribute */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = - EXT2_XATTR_NEXT(entry); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - if (name_index == entry->e_name_index && - name_len == entry->e_name_len && - memcmp(name, entry->e_name, name_len) == 0) + + not_found = ext2_xattr_cmp_entry(name_index, name_len, name, + entry); + if (!not_found) goto found; - entry = next; + if (not_found < 0) + break; + + entry = EXT2_XATTR_NEXT(entry); } if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); error = -ENODATA; goto cleanup; found: - /* check the buffer size */ - if (entry->e_value_block != 0) - goto bad_block; size = le32_to_cpu(entry->e_value_size); - if (size > inode->i_sb->s_blocksize || - le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) - goto bad_block; - if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); if (buffer) { @@ -266,9 +309,9 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); end = bh->b_data + bh->b_size; - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", + if (!ext2_xattr_header_valid(HDR(bh))) { +bad_block: + ext2_error(inode->i_sb, "ext2_xattr_list", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; @@ -278,11 +321,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", /* check the on-disk data structure */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(entry); - - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(entry, end, + inode->i_sb->s_blocksize)) goto bad_block; - entry = next; + entry = EXT2_XATTR_NEXT(entry); } if (ext2_xattr_cache_insert(ea_block_cache, bh)) ea_idebug(inode, "cache insert failed"); @@ -367,7 +409,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; struct ext2_xattr_header *header = NULL; - struct ext2_xattr_entry *here, *last; + struct ext2_xattr_entry *here = NULL, *last = NULL; size_t name_len, free, min_offs = sb->s_blocksize; int not_found = 1, error; char *end; @@ -406,47 +448,39 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, le32_to_cpu(HDR(bh)->h_refcount)); header = HDR(bh); end = bh->b_data + bh->b_size; - if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - header->h_blocks != cpu_to_le32(1)) { -bad_block: ext2_error(sb, "ext2_xattr_set", + if (!ext2_xattr_header_valid(header)) { +bad_block: + ext2_error(sb, "ext2_xattr_set", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); error = -EIO; goto cleanup; } - /* Find the named attribute. */ - here = FIRST_ENTRY(bh); - while (!IS_LAST_ENTRY(here)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); - if ((char *)next >= end) - goto bad_block; - if (!here->e_value_block && here->e_value_size) { - size_t offs = le16_to_cpu(here->e_value_offs); - if (offs < min_offs) - min_offs = offs; - } - not_found = name_index - here->e_name_index; - if (!not_found) - not_found = name_len - here->e_name_len; - if (!not_found) - not_found = memcmp(name, here->e_name,name_len); - if (not_found <= 0) - break; - here = next; - } - last = here; - /* We still need to compute min_offs and last. */ + /* + * Find the named attribute. If not found, 'here' will point + * to entry where the new attribute should be inserted to + * maintain sorting. + */ + last = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(last)) { - struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); - if ((char *)next >= end) + if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize)) goto bad_block; - if (!last->e_value_block && last->e_value_size) { + if (last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) min_offs = offs; } - last = next; + if (not_found > 0) { + not_found = ext2_xattr_cmp_entry(name_index, + name_len, + name, last); + if (not_found <= 0) + here = last; + } + last = EXT2_XATTR_NEXT(last); } + if (not_found > 0) + here = last; /* Check whether we have enough space left. */ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); @@ -454,7 +488,6 @@ bad_block: ext2_error(sb, "ext2_xattr_set", /* We will use a new extended attribute block. */ free = sb->s_blocksize - sizeof(struct ext2_xattr_header) - sizeof(__u32); - here = last = NULL; /* avoid gcc uninitialized warning. */ } if (not_found) { @@ -470,14 +503,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", error = -EEXIST; if (flags & XATTR_CREATE) goto cleanup; - if (!here->e_value_block && here->e_value_size) { - size_t size = le32_to_cpu(here->e_value_size); - - if (le16_to_cpu(here->e_value_offs) + size > - sb->s_blocksize || size > sb->s_blocksize) - goto bad_block; - free += EXT2_XATTR_SIZE(size); - } + free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size)); free += EXT2_XATTR_LEN(name_len); } error = -ENOSPC; @@ -506,11 +532,10 @@ bad_block: ext2_error(sb, "ext2_xattr_set", unlock_buffer(bh); ea_bdebug(bh, "cloning"); - header = kmalloc(bh->b_size, GFP_KERNEL); + header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); error = -ENOMEM; if (header == NULL) goto cleanup; - memcpy(header, HDR(bh), bh->b_size); header->h_refcount = cpu_to_le32(1); offset = (char *)here - bh->b_data; @@ -542,7 +567,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", here->e_name_len = name_len; memcpy(here->e_name, name, name_len); } else { - if (!here->e_value_block && here->e_value_size) { + if (here->e_value_size) { char *first_val = (char *)header + min_offs; size_t offs = le16_to_cpu(here->e_value_offs); char *val = (char *)header + offs; @@ -569,7 +594,7 @@ bad_block: ext2_error(sb, "ext2_xattr_set", last = ENTRY(header+1); while (!IS_LAST_ENTRY(last)) { size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && o < offs) + if (o < offs) last->e_value_offs = cpu_to_le16(o + size); last = EXT2_XATTR_NEXT(last); @@ -784,8 +809,7 @@ ext2_xattr_delete_inode(struct inode *inode) goto cleanup; } ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); - if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || - HDR(bh)->h_blocks != cpu_to_le32(1)) { + if (!ext2_xattr_header_valid(HDR(bh))) { ext2_error(inode->i_sb, "ext2_xattr_delete_inode", "inode %ld: bad block %d", inode->i_ino, EXT2_I(inode)->i_file_acl); diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 06f77ca7f36e..cbb5ca830e57 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # Ext3 configs are here for backward compatibility with old configs which may # have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable # kernels after the removal of ext3 driver. diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e5d6ee61ff48..0b202e00d93f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -603,9 +603,9 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, } /** - * ext4_should_retry_alloc() + * ext4_should_retry_alloc() - check if a block allocation should be retried * @sb: super block - * @retries number of attemps has been made + * @retries: number of attemps has been made * * ext4_should_retry_alloc() is called when ENOSPC is returned, and if * it is profitable to retry the operation, this function will wait diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 884a6e776809..86054f31fe4d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -33,6 +33,9 @@ static int ext4_dx_readdir(struct file *, struct dir_context *); /** + * is_dx_dir() - check if a directory is using htree indexing + * @inode: directory inode + * * Check if the given dir-inode refers to an htree-indexed directory * (or a directory which could potentially get converted to use htree * indexing). @@ -109,7 +112,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; - int dir_has_error = 0; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); if (IS_ENCRYPTED(inode)) { @@ -145,8 +147,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return err; } - offset = ctx->pos & (sb->s_blocksize - 1); - while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; @@ -155,9 +155,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto errout; } cond_resched(); + offset = ctx->pos & (sb->s_blocksize - 1); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); + if (err == 0) { + /* m_len should never be zero but let's avoid + * an infinite loop if it somehow is */ + if (map.m_len == 0) + map.m_len = 1; + ctx->pos += map.m_len * sb->s_blocksize; + continue; + } if (err > 0) { pgoff_t index = map.m_pblk >> (PAGE_SHIFT - inode->i_blkbits); @@ -176,13 +185,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } if (!bh) { - if (!dir_has_error) { - EXT4_ERROR_FILE(file, 0, - "directory contains a " - "hole at offset %llu", - (unsigned long long) ctx->pos); - dir_has_error = 1; - } /* corrupt size? Maybe no more blocks to read */ if (ctx->pos > inode->i_blocks << 9) break; @@ -192,8 +194,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) /* Check the checksum */ if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { + !ext4_dirblock_csum_verify(inode, bh)) { EXT4_ERROR_FILE(file, 0, "directory fails checksum " "at offset %llu", (unsigned long long)ctx->pos); @@ -671,10 +672,10 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { if (len != name->len) return -1; - return !memcmp(str, name, len); + return memcmp(str, name->name, len); } - return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr); + return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); } static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1cb67859e051..bf660aa7a9e0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -421,7 +421,8 @@ struct flex_groups { EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL)) +#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ + EXT4_PROJINHERIT_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) @@ -2077,6 +2078,9 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif +#ifdef CONFIG_UNICODE + struct fscrypt_str cf_name; +#endif }; #define fname_name(p) ((p)->disk_name.name) @@ -2302,6 +2306,12 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); +#ifdef CONFIG_UNICODE +extern void ext4_fname_setup_ci_filename(struct inode *dir, + const struct qstr *iname, + struct fscrypt_str *fname); +#endif + #ifdef CONFIG_FS_ENCRYPTION static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, const struct fscrypt_name *src) @@ -2328,6 +2338,10 @@ static inline int ext4_fname_setup_filename(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif return 0; } @@ -2343,6 +2357,10 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return err; ext4_fname_from_fscrypt_name(fname, &name); + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, &dentry->d_name, &fname->cf_name); +#endif return 0; } @@ -2356,6 +2374,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->crypto_buf.name = NULL; fname->usr_fname = NULL; fname->disk_name.name = NULL; + +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif } #else /* !CONFIG_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, @@ -2366,6 +2389,11 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; + +#ifdef CONFIG_UNICODE + ext4_fname_setup_ci_filename(dir, iname, &fname->cf_name); +#endif + return 0; } @@ -2376,7 +2404,13 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname); } -static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } +static inline void ext4_fname_free_filename(struct ext4_filename *fname) +{ +#ifdef CONFIG_UNICODE + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} #endif /* !CONFIG_FS_ENCRYPTION */ /* dir.c */ @@ -2568,8 +2602,8 @@ extern int ext4_ext_migrate(struct inode *); extern int ext4_ind_migrate(struct inode *inode); /* namei.c */ -extern int ext4_dirent_csum_verify(struct inode *inode, - struct ext4_dir_entry *dirent); +extern int ext4_dirblock_csum_verify(struct inode *inode, + struct buffer_head *bh); extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, @@ -3070,11 +3104,11 @@ extern int ext4_try_create_inline_dir(handle_t *handle, extern int ext4_read_inline_dir(struct file *filp, struct dir_context *ctx, int *has_inline_data); -extern int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data); +extern int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data); extern struct buffer_head *ext4_find_inline_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir, @@ -3113,14 +3147,13 @@ extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, struct ext4_dir_entry_2 *de, int blocksize, int csum_size, unsigned int parent_ino, int dotdot_real_len); -extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize); -extern int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh); +extern void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize); +extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode, + struct buffer_head *bh); extern int ext4_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry); + const struct qstr *fname, + const struct qstr *entry, bool quick); #define S_SHIFT 12 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 75a5309f2231..ef8fcf7d0d3b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -361,20 +361,20 @@ static inline int ext4_journal_force_commit(journal_t *journal) } static inline int ext4_jbd2_inode_add_write(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_write(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_write(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline int ext4_jbd2_inode_add_wait(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_wait(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_wait(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f2c62e2a0c98..92266a2da7d6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -518,10 +518,14 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); - if (err) - goto errout; + if (!ext4_has_feature_journal(inode->i_sb) || + (inode->i_ino != + le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { + err = __ext4_ext_check(function, line, inode, + ext_block_hdr(bh), depth, pblk); + if (err) + goto errout; + } set_buffer_verified(bh); /* * If this is a leaf block, cache all of its entries @@ -5672,8 +5676,8 @@ out_mutex: } /** - * ext4_swap_extents - Swap extents between two inodes - * + * ext4_swap_extents() - Swap extents between two inodes + * @handle: handle for this transaction * @inode1: First inode * @inode2: Second inode * @lblk1: Start block for first inode diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 023a3eb3afa3..7521de2dcf3a 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1317,7 +1317,6 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); if (!es) goto out_wrap; - node = &es->rb_node; while (*nr_to_scan > 0) { if (es->es_lblk > end) { ei->i_es_shrink_lblk = end + 1; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2c5baa5e8291..70b0438dbc94 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -165,6 +165,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret <= 0) return ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. @@ -367,15 +371,17 @@ static const struct vm_operations_struct ext4_file_vm_ops = { static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_mapping->host; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct dax_device *dax_dev = sbi->s_daxdev; - if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + if (unlikely(ext4_forced_shutdown(sbi))) return -EIO; /* - * We don't support synchronous mappings for non-DAX files. At least - * until someone comes with a sensible use case. + * We don't support synchronous mappings for non-DAX files and + * for DAX files if underneath dax_device is not synchronous. */ - if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC)) + if (!daxdev_mapping_supported(vma, dax_dev)) return -EOPNOTSUPP; file_accessed(file); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 2024d3fa5504..36699a131168 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -294,14 +294,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, } /** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. + * ext4_alloc_branch() - allocate and set up a chain of blocks + * @handle: handle for this transaction + * @ar: structure describing the allocation request + * @indirect_blks: number of allocated indirect blocks + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. * * This function allocates blocks, zeroes out all but the last one, * links them into chain and (if we are synchronous) writes them to disk. @@ -396,15 +394,11 @@ failed: } /** - * ext4_splice_branch - splice the allocated branch onto inode. + * ext4_splice_branch() - splice the allocated branch onto inode. * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) + * @ar: structure describing the allocation request * @where: location of missing link * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding * * This function fills the missing link and does all housekeeping needed in * inode (->i_blocks, etc.). In case of success we end up with the full diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f73bc3925282..88cdf3c90bd1 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1132,7 +1132,6 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, { int err, csum_size = 0, header_size = 0; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; void *target = dir_block->b_data; /* @@ -1158,13 +1157,11 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, inode->i_sb->s_blocksize - csum_size); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, - inode->i_sb->s_blocksize); - initialize_dirent_tail(t, inode->i_sb->s_blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(dir_block, + inode->i_sb->s_blocksize); set_buffer_uptodate(dir_block); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) return err; set_buffer_verified(dir_block); @@ -1327,11 +1324,11 @@ out: * inlined dir. It returns the number directory entries loaded * into the tree. If there is an error it is returned in err. */ -int htree_inlinedir_to_tree(struct file *dir_file, - struct inode *dir, ext4_lblk_t block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash, - int *has_inline_data) +int ext4_inlinedir_to_tree(struct file *dir_file, + struct inode *dir, ext4_lblk_t block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash, + int *has_inline_data) { int err = 0, count = 0; unsigned int parent_ino; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 82298c63ea6d..420fe3deed39 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -731,10 +731,16 @@ out_sem: !(flags & EXT4_GET_BLOCKS_ZERO) && !ext4_is_quota_file(inode) && ext4_should_order_data(inode)) { + loff_t start_byte = + (loff_t)map->m_lblk << inode->i_blkbits; + loff_t length = (loff_t)map->m_len << inode->i_blkbits; + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) - ret = ext4_jbd2_inode_add_wait(handle, inode); + ret = ext4_jbd2_inode_add_wait(handle, inode, + start_byte, length); else - ret = ext4_jbd2_inode_add_write(handle, inode); + ret = ext4_jbd2_inode_add_write(handle, inode, + start_byte, length); if (ret) return ret; } @@ -1164,8 +1170,9 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, int err = 0; unsigned blocksize = inode->i_sb->s_blocksize; unsigned bbits; - struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; - bool decrypt = false; + struct buffer_head *bh, *head, *wait[2]; + int nr_wait = 0; + int i; BUG_ON(!PageLocked(page)); BUG_ON(from > PAGE_SIZE); @@ -1217,23 +1224,32 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, !buffer_unwritten(bh) && (block_start < from || block_end > to)) { ll_rw_block(REQ_OP_READ, 0, 1, &bh); - *wait_bh++ = bh; - decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); + wait[nr_wait++] = bh; } } /* * If we issued read requests, let them complete. */ - while (wait_bh > wait) { - wait_on_buffer(*--wait_bh); - if (!buffer_uptodate(*wait_bh)) + for (i = 0; i < nr_wait; i++) { + wait_on_buffer(wait[i]); + if (!buffer_uptodate(wait[i])) err = -EIO; } - if (unlikely(err)) + if (unlikely(err)) { page_zero_new_buffers(page, from, to); - else if (decrypt) - err = fscrypt_decrypt_page(page->mapping->host, page, - PAGE_SIZE, 0, page->index); + } else if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) { + for (i = 0; i < nr_wait; i++) { + int err2; + + err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize, + bh_offset(wait[i])); + if (err2) { + clear_buffer_uptodate(wait[i]); + err = err2; + } + } + } + return err; } #endif @@ -4065,9 +4081,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) { /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); - BUG_ON(blocksize != PAGE_SIZE); - WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, - page, PAGE_SIZE, 0, page->index)); + WARN_ON_ONCE(fscrypt_decrypt_pagecache_blocks( + page, blocksize, bh_offset(bh))); } } if (ext4_should_journal_data(inode)) { @@ -4085,7 +4100,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, err = 0; mark_buffer_dirty(bh); if (ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode); + err = ext4_jbd2_inode_add_write(handle, inode, from, + length); } unlock: @@ -4570,6 +4586,7 @@ static int __ext4_get_inode_loc(struct inode *inode, struct buffer_head *bh; struct super_block *sb = inode->i_sb; ext4_fsblk_t block; + struct blk_plug plug; int inodes_per_block, inode_offset; iloc->bh = NULL; @@ -4658,6 +4675,7 @@ make_io: * If we need to do any I/O, try to pre-readahead extra * blocks from the inode table. */ + blk_start_plug(&plug); if (EXT4_SB(sb)->s_inode_readahead_blks) { ext4_fsblk_t b, end, table; unsigned num; @@ -4688,6 +4706,7 @@ make_io: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); + blk_finish_plug(&plug); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, @@ -5520,6 +5539,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (unlikely(IS_APPEND(inode) && + (ia_valid & (ATTR_MODE | ATTR_UID | + ATTR_GID | ATTR_TIMES_SET)))) + return -EPERM; + error = setattr_prepare(dentry, attr); if (error) return error; @@ -5571,7 +5598,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; - int shrink = (attr->ia_size <= inode->i_size); + int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -5585,18 +5612,33 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) inode_inc_iversion(inode); - if (ext4_should_order_data(inode) && - (attr->ia_size < inode->i_size)) { - error = ext4_begin_ordered_truncate(inode, + if (shrink) { + if (ext4_should_order_data(inode)) { + error = ext4_begin_ordered_truncate(inode, attr->ia_size); - if (error) - goto err_out; + if (error) + goto err_out; + } + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. + */ + inode_dio_wait(inode); + } + + down_write(&EXT4_I(inode)->i_mmap_sem); + + rc = ext4_break_layouts(inode); + if (rc) { + up_write(&EXT4_I(inode)->i_mmap_sem); + return rc; } + if (attr->ia_size != inode->i_size) { handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); if (IS_ERR(handle)) { error = PTR_ERR(handle); - goto err_out; + goto out_mmap_sem; } if (ext4_handle_valid(handle) && shrink) { error = ext4_orphan_add(handle, inode); @@ -5624,33 +5666,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) i_size_write(inode, attr->ia_size); up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); - if (error) { - if (orphan) - ext4_orphan_del(NULL, inode); - goto err_out; - } - } - if (!shrink) - pagecache_isize_extended(inode, oldsize, inode->i_size); - - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. Temporarily disable - * dioread_nolock to prevent livelock. - */ - if (orphan) { - if (!ext4_should_journal_data(inode)) { - inode_dio_wait(inode); - } else + if (error) + goto out_mmap_sem; + if (!shrink) { + pagecache_isize_extended(inode, oldsize, + inode->i_size); + } else if (ext4_should_journal_data(inode)) { ext4_wait_for_tail_page_commit(inode); - } - down_write(&EXT4_I(inode)->i_mmap_sem); - - rc = ext4_break_layouts(inode); - if (rc) { - up_write(&EXT4_I(inode)->i_mmap_sem); - error = rc; - goto err_out; + } } /* @@ -5658,11 +5681,16 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) * in data=journal mode to make pages freeable. */ truncate_pagecache(inode, inode->i_size); - if (shrink) { + /* + * Call ext4_truncate() even if i_size didn't change to + * truncate possible preallocated blocks. + */ + if (attr->ia_size <= oldsize) { rc = ext4_truncate(inode); if (rc) error = rc; } +out_mmap_sem: up_write(&EXT4_I(inode)->i_mmap_sem); } @@ -6193,6 +6221,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) get_block_t *get_block; int retries = 0; + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e486e49b31ed..442f7ef873fc 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -269,6 +269,29 @@ static int uuid_is_zero(__u8 u[16]) } #endif +/* + * If immutable is set and we are not clearing it, we're not allowed to change + * anything else in the inode. Don't error out if we're only trying to set + * immutable on an immutable file. + */ +static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, + unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int oldflags = ei->i_flags; + + if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) + return 0; + + if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) + return -EPERM; + if (ext4_has_feature_project(inode->i_sb) && + __kprojid_val(ei->i_projid) != new_projid) + return -EPERM; + + return 0; +} + static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { @@ -289,16 +312,9 @@ static int ext4_ioctl_setflags(struct inode *inode, /* The JOURNAL_DATA flag is modifiable only by root */ jflag = flags & EXT4_JOURNAL_DATA_FL; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) - goto flags_out; - } + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) + goto flags_out; /* * The JOURNAL_DATA flag can only be changed by @@ -340,6 +356,20 @@ static int ext4_ioctl_setflags(struct inode *inode, } } + /* + * Wait for all pending directio and then flush all the dirty pages + * for this file. The flush marks all the pages readonly, so any + * subsequent attempt to write to the file (particularly mmap pages) + * will come through the filesystem and fail. + */ + if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && + (flags & EXT4_IMMUTABLE_FL)) { + inode_dio_wait(inode); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto flags_out; + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -704,28 +734,15 @@ group_add_out: return err; } -static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa) +static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa) { - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid) - return -EINVAL; + struct ext4_inode_info *ei = EXT4_I(inode); - if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) { - if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) - return -EINVAL; - } else { - if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) - return -EINVAL; - } + simple_fill_fsxattr(fa, ext4_iflags_to_xflags(ei->i_flags & + EXT4_FL_USER_VISIBLE)); - return 0; + if (ext4_has_feature_project(inode->i_sb)) + fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid); } long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) @@ -742,6 +759,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ext4_ioc_getfsmap(sb, (void __user *)arg); case EXT4_IOC_GETFLAGS: flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + if (S_ISREG(inode->i_mode)) + flags &= ~EXT4_PROJINHERIT_FL; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { int err; @@ -769,7 +788,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return err; inode_lock(inode); - err = ext4_ioctl_setflags(inode, flags); + err = ext4_ioctl_check_immutable(inode, + from_kprojid(&init_user_ns, ei->i_projid), + flags); + if (!err) + err = ext4_ioctl_setflags(inode, flags); inode_unlock(inode); mnt_drop_write_file(filp); return err; @@ -1096,13 +1119,7 @@ resizefs_out: { struct fsxattr fa; - memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); - - if (ext4_has_feature_project(inode->i_sb)) { - fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, - EXT4_I(inode)->i_projid); - } + ext4_fill_fsxattr(inode, &fa); if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa))) @@ -1111,7 +1128,7 @@ resizefs_out: } case EXT4_IOC_FSSETXATTR: { - struct fsxattr fa; + struct fsxattr fa, old_fa; int err; if (copy_from_user(&fa, (struct fsxattr __user *)arg, @@ -1134,11 +1151,15 @@ resizefs_out: return err; inode_lock(inode); - err = ext4_ioctl_check_project(inode, &fa); + ext4_fill_fsxattr(inode, &old_fa); + err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); if (err) goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | (flags & EXT4_FL_XFLAG_VISIBLE); + err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags); + if (err) + goto out; err = ext4_ioctl_setflags(inode, flags); if (err) goto out; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99ba720dbb7a..a3e2767bdf2f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4696,8 +4696,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * ext4_free_blocks() -- Free given blocks and update quota * @handle: handle for this transaction * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count + * @bh: optional buffer of the block to be freed + * @block: starting physical block to be freed + * @count: number of blocks to be freed * @flags: flags used by ext4_free_blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1083a9f3f16a..30ce3dc69378 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -13,11 +13,10 @@ #include "ext4_extents.h" /** - * get_ext_path - Find an extent path for designated logical block number. - * - * @inode: an inode which is searched + * get_ext_path() - Find an extent path for designated logical block number. + * @inode: inode to be searched * @lblock: logical block number to find an extent path - * @path: pointer to an extent path pointer (for output) + * @ppath: pointer to an extent path pointer (for output) * * ext4_find_extent wrapper. Return 0 on success, or a negative error value * on failure. @@ -42,8 +41,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, } /** - * ext4_double_down_write_data_sem - Acquire two inodes' write lock - * of i_data_sem + * ext4_double_down_write_data_sem() - write lock two inodes's i_data_sem + * @first: inode to be locked + * @second: inode to be locked * * Acquire write lock of i_data_sem of the two inodes */ @@ -390,7 +390,8 @@ data_copy: /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ - *err = ext4_jbd2_inode_add_write(handle, orig_inode); + *err = ext4_jbd2_inode_add_write(handle, orig_inode, + (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); unlock_pages: unlock_page(pagep[0]); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cd01c4a67ffb..129029534075 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -82,8 +82,18 @@ static struct buffer_head *ext4_append(handle_t *handle, static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); +/* + * Hints to ext4_read_dirblock regarding whether we expect a directory + * block being read to be an index block, or a block containing + * directory entries (and if the latter, whether it was found via a + * logical block in an htree index block). This is used to control + * what sort of sanity checkinig ext4_read_dirblock() will do on the + * directory block read from the storage device. EITHER will means + * the caller doesn't know what kind of directory block will be read, + * so no specific verification will be done. + */ typedef enum { - EITHER, INDEX, DIRENT + EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ @@ -109,11 +119,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } - if (!bh) { + if (!bh && (type == INDEX || type == DIRENT_HTREE)) { ext4_error_inode(inode, func, line, block, - "Directory hole found"); + "Directory hole found for htree %s block", + (type == INDEX) ? "index" : "leaf"); return ERR_PTR(-EFSCORRUPTED); } + if (!bh) + return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { @@ -150,7 +163,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, } } if (!is_dx_block) { - if (ext4_dirent_csum_verify(inode, dirent)) + if (ext4_dirblock_csum_verify(inode, bh)) set_buffer_verified(bh); else { ext4_error_inode(inode, func, line, block, @@ -280,9 +293,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode); /* checksumming functions */ -void initialize_dirent_tail(struct ext4_dir_entry_tail *t, - unsigned int blocksize) +void ext4_initialize_dirent_tail(struct buffer_head *bh, + unsigned int blocksize) { + struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); + memset(t, 0, sizeof(struct ext4_dir_entry_tail)); t->det_rec_len = ext4_rec_len_to_disk( sizeof(struct ext4_dir_entry_tail), blocksize); @@ -291,17 +306,17 @@ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, /* Walk through a dirent block to find a checksum "dirent" at the tail */ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, - struct ext4_dir_entry *de) + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; #ifdef PARANOID struct ext4_dir_entry *d, *top; - d = de; - top = (struct ext4_dir_entry *)(((void *)de) + + d = (struct ext4_dir_entry *)bh->b_data; + top = (struct ext4_dir_entry *)(bh->b_data + (EXT4_BLOCK_SIZE(inode->i_sb) - - sizeof(struct ext4_dir_entry_tail))); + sizeof(struct ext4_dir_entry_tail))); while (d < top && d->rec_len) d = (struct ext4_dir_entry *)(((void *)d) + le16_to_cpu(d->rec_len)); @@ -311,7 +326,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, t = (struct ext4_dir_entry_tail *)d; #else - t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); + t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb)); #endif if (t->det_reserved_zero1 || @@ -323,8 +338,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, return t; } -static __le32 ext4_dirent_csum(struct inode *inode, - struct ext4_dir_entry *dirent, int size) +static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); @@ -344,49 +358,49 @@ static void __warn_no_space_for_csum(struct inode *inode, const char *func, "No space for directory leaf checksum. Please run e2fsck -D."); } -int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) +int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return 1; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return 0; } - if (t->det_checksum != ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent)) + if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data)) return 0; return 1; } -static void ext4_dirent_csum_set(struct inode *inode, - struct ext4_dir_entry *dirent) +static void ext4_dirblock_csum_set(struct inode *inode, + struct buffer_head *bh) { struct ext4_dir_entry_tail *t; if (!ext4_has_metadata_csum(inode->i_sb)) return; - t = get_dirent_tail(inode, dirent); + t = get_dirent_tail(inode, bh); if (!t) { warn_no_space_for_csum(inode); return; } - t->det_checksum = ext4_dirent_csum(inode, dirent, - (void *)t - (void *)dirent); + t->det_checksum = ext4_dirblock_csum(inode, bh->b_data, + (char *)t - bh->b_data); } -int ext4_handle_dirty_dirent_node(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) +int ext4_handle_dirty_dirblock(handle_t *handle, + struct inode *inode, + struct buffer_head *bh) { - ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); + ext4_dirblock_csum_set(inode, bh); return ext4_handle_dirty_metadata(handle, inode, bh); } @@ -980,7 +994,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1090,10 +1104,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - count = htree_inlinedir_to_tree(dir_file, dir, 0, - &hinfo, start_hash, - start_minor_hash, - &has_inline_data); + count = ext4_inlinedir_to_tree(dir_file, dir, 0, + &hinfo, start_hash, + start_minor_hash, + &has_inline_data); if (has_inline_data) { *next_hash = ~0; return count; @@ -1259,19 +1273,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) #ifdef CONFIG_UNICODE /* * Test whether a case-insensitive directory entry matches the filename - * being searched for. + * being searched for. If quick is set, assume the name being looked up + * is already in the casefolded form. * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry) + const struct qstr *entry, bool quick) { const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); const struct unicode_map *um = sbi->s_encoding; int ret; - ret = utf8_strncasecmp(um, name, entry); + if (quick) + ret = utf8_strncasecmp_folded(um, name, entry); + else + ret = utf8_strncasecmp(um, name, entry); + if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. @@ -1287,6 +1306,32 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, return ret; } + +void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, + struct fscrypt_str *cf_name) +{ + int len; + + if (!IS_CASEFOLDED(dir)) { + cf_name->name = NULL; + return; + } + + cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS); + if (!cf_name->name) + return; + + len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + iname, cf_name->name, + EXT4_NAME_LEN); + if (len <= 0) { + kfree(cf_name->name); + cf_name->name = NULL; + return; + } + cf_name->len = (unsigned) len; + +} #endif /* @@ -1313,8 +1358,15 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) - return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0); + if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (fname->cf_name.name) { + struct qstr cf = {.name = fname->cf_name.name, + .len = fname->cf_name.len}; + return !ext4_ci_compare(parent, &cf, &entry, true); + } + return !ext4_ci_compare(parent, fname->usr_fname, &entry, + false); + } #endif return fscrypt_match_name(&f, de->name, de->name_len); @@ -1484,8 +1536,7 @@ restart: if (!buffer_verified(bh) && !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { + !ext4_dirblock_csum_verify(dir, bh)) { EXT4_ERROR_INODE(dir, "checksumming directory " "block %lu", (unsigned long)block); brelse(bh); @@ -1586,7 +1637,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; @@ -1769,7 +1820,6 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, char *data1 = (*bh)->b_data, *data2; unsigned split, move, size; struct ext4_dir_entry_2 *de = NULL, *de2; - struct ext4_dir_entry_tail *t; int csum_size = 0; int err = 0, i; @@ -1830,11 +1880,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, (char *) de2, blocksize); if (csum_size) { - t = EXT4_DIRENT_TAIL(data2, blocksize); - initialize_dirent_tail(t, blocksize); - - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); + ext4_initialize_dirent_tail(*bh, blocksize); + ext4_initialize_dirent_tail(bh2, blocksize); } dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1, @@ -1848,7 +1895,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); + err = ext4_handle_dirty_dirblock(handle, dir, bh2); if (err) goto journal_error; err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); @@ -1976,7 +2023,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, inode_inc_iversion(dir); ext4_mark_inode_dirty(handle, dir); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); return 0; @@ -1995,8 +2042,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; struct dx_entry *entries; struct ext4_dir_entry_2 *de, *de2; - struct ext4_dir_entry_tail *t; - char *data1, *top; + char *data2, *top; unsigned len; int retval; unsigned blocksize; @@ -2036,21 +2082,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, return PTR_ERR(bh2); } ext4_set_inode_flag(dir, EXT4_INODE_INDEX); - data1 = bh2->b_data; + data2 = bh2->b_data; - memcpy (data1, de, len); - de = (struct ext4_dir_entry_2 *) data1; - top = data1 + len; + memcpy(data2, de, len); + de = (struct ext4_dir_entry_2 *) data2; + top = data2 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) de = de2; - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, - blocksize); + de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - + (char *) de, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(data1, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh2, blocksize); /* Initialize the root; the dot dirents already exist */ de = (struct ext4_dir_entry_2 *) (&root->dotdot); @@ -2080,7 +2123,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (retval) goto out_frames; - retval = ext4_handle_dirty_dirent_node(handle, dir, bh2); + retval = ext4_handle_dirty_dirblock(handle, dir, bh2); if (retval) goto out_frames; @@ -2120,7 +2163,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; struct super_block *sb; struct ext4_sb_info *sbi; struct ext4_filename fname; @@ -2170,6 +2212,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); + if (bh == NULL) { + bh = ext4_bread(handle, dir, block, + EXT4_GET_BLOCKS_CREATE); + goto add_to_new_block; + } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2190,6 +2237,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, brelse(bh); } bh = ext4_append(handle, dir, &block); +add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2199,10 +2247,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); - if (csum_size) { - t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(bh, blocksize); retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh); out: @@ -2234,7 +2280,7 @@ again: return PTR_ERR(frame); entries = frame->entries; at = frame->at; - bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; @@ -2460,7 +2506,7 @@ static int ext4_delete_entry(handle_t *handle, goto out; BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, dir, bh); + err = ext4_handle_dirty_dirblock(handle, dir, bh); if (unlikely(err)) goto out; @@ -2662,7 +2708,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, { struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; - struct ext4_dir_entry_tail *t; ext4_lblk_t block = 0; unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; @@ -2686,13 +2731,11 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, de = (struct ext4_dir_entry_2 *)dir_block->b_data; ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); set_nlink(inode, 2); - if (csum_size) { - t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); - initialize_dirent_tail(t, blocksize); - } + if (csum_size) + ext4_initialize_dirent_tail(dir_block, blocksize); BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); + err = ext4_handle_dirty_dirblock(handle, inode, dir_block); if (err) goto out; set_buffer_verified(dir_block); @@ -2782,7 +2825,10 @@ bool ext4_empty_dir(struct inode *inode) EXT4_ERROR_INODE(inode, "invalid size"); return true; } - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, + * so treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) return true; @@ -2804,6 +2850,10 @@ bool ext4_empty_dir(struct inode *inode) brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); + if (bh == NULL) { + offset += sb->s_blocksize; + continue; + } if (IS_ERR(bh)) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; @@ -3369,7 +3419,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, so + * treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; @@ -3430,9 +3483,8 @@ static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, ent->inode, ent->dir_bh); } else { - retval = ext4_handle_dirty_dirent_node(handle, - ent->inode, - ent->dir_bh); + retval = ext4_handle_dirty_dirblock(handle, ent->inode, + ent->dir_bh); } } else { retval = ext4_mark_inode_dirty(handle, ent->inode); @@ -3462,8 +3514,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, ext4_mark_inode_dirty(handle, ent->dir); BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); if (!ent->inlined) { - retval = ext4_handle_dirty_dirent_node(handle, - ent->dir, ent->bh); + retval = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh); if (unlikely(retval)) { ext4_std_error(ent->dir->i_sb, retval); return retval; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4690618a92e9..12ceadef32c5 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -66,9 +66,7 @@ static void ext4_finish_bio(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; -#ifdef CONFIG_FS_ENCRYPTION - struct page *data_page = NULL; -#endif + struct page *bounce_page = NULL; struct buffer_head *bh, *head; unsigned bio_start = bvec->bv_offset; unsigned bio_end = bio_start + bvec->bv_len; @@ -78,13 +76,10 @@ static void ext4_finish_bio(struct bio *bio) if (!page) continue; -#ifdef CONFIG_FS_ENCRYPTION - if (!page->mapping) { - /* The bounce data pages are unmapped. */ - data_page = page; - fscrypt_pullback_bio_page(&page, false); + if (fscrypt_is_bounce_page(page)) { + bounce_page = page; + page = fscrypt_pagecache_page(bounce_page); } -#endif if (bio->bi_status) { SetPageError(page); @@ -111,10 +106,7 @@ static void ext4_finish_bio(struct bio *bio) bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); local_irq_restore(flags); if (!under_io) { -#ifdef CONFIG_FS_ENCRYPTION - if (data_page) - fscrypt_restore_control_page(data_page); -#endif + fscrypt_free_bounce_page(bounce_page); end_page_writeback(page); } } @@ -404,7 +396,7 @@ submit_and_retry: ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); if (ret != bh->b_size) goto submit_and_retry; - wbc_account_io(io->io_wbc, page, bh->b_size); + wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size); io->io_next_block++; return 0; } @@ -415,7 +407,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct writeback_control *wbc, bool keep_towrite) { - struct page *data_page = NULL; + struct page *bounce_page = NULL; struct inode *inode = page->mapping->host; unsigned block_start; struct buffer_head *bh, *head; @@ -475,14 +467,22 @@ int ext4_bio_write_page(struct ext4_io_submit *io, bh = head = page_buffers(page); + /* + * If any blocks are being written to an encrypted file, encrypt them + * into a bounce page. For simplicity, just encrypt until the last + * block which might be needed. This may cause some unneeded blocks + * (e.g. holes) to be unnecessarily encrypted, but this is rare and + * can't happen in the common case of blocksize == PAGE_SIZE. + */ if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; + unsigned int enc_bytes = round_up(len, i_blocksize(inode)); retry_encrypt: - data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, - page->index, gfp_flags); - if (IS_ERR(data_page)) { - ret = PTR_ERR(data_page); + bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, + 0, gfp_flags); + if (IS_ERR(bounce_page)) { + ret = PTR_ERR(bounce_page); if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { if (io->io_bio) { ext4_io_submit(io); @@ -491,7 +491,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_flags |= __GFP_NOFAIL; goto retry_encrypt; } - data_page = NULL; + bounce_page = NULL; goto out; } } @@ -500,8 +500,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { if (!buffer_async_write(bh)) continue; - ret = io_submit_add_bh(io, inode, - data_page ? data_page : page, bh); + ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh); if (ret) { /* * We only get here on ENOMEM. Not much else @@ -517,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, /* Error stopped previous loop? Clean up buffers... */ if (ret) { out: - if (data_page) - fscrypt_restore_control_page(data_page); + fscrypt_free_bounce_page(bounce_page); printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); redirty_page_for_writepage(wbc, page); do { diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 04b4f53f0659..b3cd7655a6ff 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -230,6 +230,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(journal_task), NULL, }; +ATTRIBUTE_GROUPS(ext4); /* Features this copy of ext4 supports */ EXT4_ATTR_FEATURE(lazy_itable_init); @@ -256,6 +257,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(metadata_csum_seed), NULL, }; +ATTRIBUTE_GROUPS(ext4_feat); static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi) { @@ -374,13 +376,13 @@ static const struct sysfs_ops ext4_attr_ops = { }; static struct kobj_type ext4_sb_ktype = { - .default_attrs = ext4_attrs, + .default_groups = ext4_groups, .sysfs_ops = &ext4_attr_ops, .release = ext4_sb_release, }; static struct kobj_type ext4_feat_ktype = { - .default_attrs = ext4_feat_attrs, + .default_groups = ext4_feat_groups, .sysfs_ops = &ext4_attr_ops, .release = (void (*)(struct kobject *))kfree, }; diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index e57cc754d543..110a38ca5d53 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config F2FS_FS tristate "F2FS filesystem support" depends on BLOCK diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ed70b68b2b38..a0eef95b9e0e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -146,8 +146,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, exist = f2fs_test_bit(offset, se->cur_valid_map); if (!exist && type == DATA_GENERIC_ENHANCE) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "blkaddr:%u, sit bitmap:%d", blkaddr, exist); + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); set_sbi_flag(sbi, SBI_NEED_FSCK); WARN_ON(1); } @@ -184,8 +184,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case DATA_GENERIC_ENHANCE_READ: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) { - f2fs_msg(sbi->sb, KERN_WARNING, - "access invalid blkaddr:%u", blkaddr); + f2fs_warn(sbi, "access invalid blkaddr:%u", + blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); WARN_ON(1); return false; @@ -657,9 +657,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) err_out: set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); + f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); return err; } @@ -676,13 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) return 0; if (bdev_read_only(sbi->sb->s_bdev)) { - f2fs_msg(sbi->sb, KERN_INFO, "write access " - "unavailable, skipping orphan cleanup"); + f2fs_info(sbi, "write access unavailable, skipping orphan cleanup"); return 0; } if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_info(sbi, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } @@ -827,26 +825,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, if (crc_offset < CP_MIN_CHKSUM_OFFSET || crc_offset > CP_CHKSUM_OFFSET) { f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid crc_offset: %zu", crc_offset); + f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) { - if (crc_offset != CP_MIN_CHKSUM_OFFSET) { - f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, - "layout of large_nat_bitmap is deprecated, " - "run fsck to repair, chksum_offset: %zu", - crc_offset); - return -EINVAL; - } - } - crc = f2fs_checkpoint_chksum(sbi, *cp_block); if (crc != cur_cp_crc(*cp_block)) { f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); + f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -869,9 +855,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (le32_to_cpu(cp_block->cp_pack_total_block_count) > sbi->blocks_per_seg) { - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid cp_pack_total_block_count:%u", - le32_to_cpu(cp_block->cp_pack_total_block_count)); + f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; @@ -905,6 +890,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; + int err; sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), GFP_KERNEL); @@ -932,6 +918,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) } else if (cp2) { cur_page = cp2; } else { + err = -EFSCORRUPTED; goto fail_no_cp; } @@ -944,8 +931,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) sbi->cur_cp_pack = 2; /* Sanity checking of checkpoint */ - if (f2fs_sanity_check_ckpt(sbi)) + if (f2fs_sanity_check_ckpt(sbi)) { + err = -EFSCORRUPTED; goto free_fail_no_cp; + } if (cp_blks <= 1) goto done; @@ -959,8 +948,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned char *ckpt = (unsigned char *)sbi->ckpt; cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); - if (IS_ERR(cur_page)) + if (IS_ERR(cur_page)) { + err = PTR_ERR(cur_page); goto free_fail_no_cp; + } sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); @@ -975,7 +966,7 @@ free_fail_no_cp: f2fs_put_page(cp2, 1); fail_no_cp: kvfree(sbi->ckpt); - return -EINVAL; + return err; } static void __add_dirty_inode(struct inode *inode, enum inode_type type) @@ -1142,17 +1133,24 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) static bool __need_flush_quota(struct f2fs_sb_info *sbi) { + bool ret = false; + if (!is_journalled_quota(sbi)) return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) - return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) - return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) - return true; - if (get_pages(sbi, F2FS_DIRTY_QDATA)) - return true; - return false; + + down_write(&sbi->quota_sem); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) { + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + ret = true; + } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { + ret = true; + } + up_write(&sbi->quota_sem); + return ret; } /* @@ -1171,26 +1169,22 @@ static int block_operations(struct f2fs_sb_info *sbi) blk_start_plug(&plug); retry_flush_quotas: + f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { int locked; if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); - f2fs_lock_all(sbi); + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); goto retry_flush_dents; } - clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + f2fs_unlock_all(sbi); /* only failed during mount/umount/freeze/quotactl */ locked = down_read_trylock(&sbi->sb->s_umount); f2fs_quota_sync(sbi->sb, -1); if (locked) up_read(&sbi->sb->s_umount); - } - - f2fs_lock_all(sbi); - if (__need_flush_quota(sbi)) { - f2fs_unlock_all(sbi); cond_resched(); goto retry_flush_quotas; } @@ -1212,12 +1206,6 @@ retry_flush_dents: */ down_write(&sbi->node_change); - if (__need_flush_quota(sbi)) { - up_write(&sbi->node_change); - f2fs_unlock_all(sbi); - goto retry_flush_quotas; - } - if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1313,7 +1301,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) @@ -1328,10 +1317,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); - /* - * TODO: we count on fsck.f2fs to clear this flag until we figure out - * missing cases which clear it incorrectly. - */ + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); @@ -1571,8 +1558,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (cpc->reason != CP_PAUSE) return 0; - f2fs_msg(sbi->sb, KERN_WARNING, - "Start checkpoint disabled!"); + f2fs_warn(sbi, "Start checkpoint disabled!"); } mutex_lock(&sbi->cp_mutex); @@ -1638,8 +1624,7 @@ stop: stat_inc_cp_count(sbi->stat_info); if (cpc->reason & CP_RECOVERY) - f2fs_msg(sbi->sb, KERN_NOTICE, - "checkpoint: version = %llx", ckpt_ver); + f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index eda4181d2092..abbf14e9bd72 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -14,6 +14,7 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/swap.h> #include <linux/prefetch.h> #include <linux/uio.h> #include <linux/cleancache.h> @@ -54,7 +55,7 @@ static bool __is_cp_guaranteed(struct page *page) static enum count_type __read_io_type(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_file_mapping(page); if (mapping) { struct inode *inode = mapping->host; @@ -185,7 +186,7 @@ static void f2fs_write_end_io(struct bio *bio) continue; } - fscrypt_pullback_bio_page(&page, true); + fscrypt_finalize_bounce_page(&page); if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); @@ -347,25 +348,24 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, +static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; struct bvec_iter_all iter_all; - if (!io->bio) + if (!bio) return false; if (!inode && !page && !ino) return true; - bio_for_each_segment_all(bvec, io->bio, iter_all) { + bio_for_each_segment_all(bvec, bio, iter_all) { - if (bvec->bv_page->mapping) - target = bvec->bv_page; - else - target = fscrypt_control_page(bvec->bv_page); + target = bvec->bv_page; + if (fscrypt_is_bounce_page(target)) + target = fscrypt_pagecache_page(target); if (inode && inode == target->mapping->host) return true; @@ -411,7 +411,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); + ret = __has_merged_page(io->bio, inode, page, ino); up_read(&io->io_rwsem); } if (ret) @@ -455,7 +455,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE))) - return -EFAULT; + return -EFSCORRUPTED; trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -470,7 +470,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); bio_set_op_attrs(bio, fio->op, fio->op_flags); @@ -481,6 +481,61 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } +int f2fs_merge_page_bio(struct f2fs_io_info *fio) +{ + struct bio *bio = *fio->bio; + struct page *page = fio->encrypted_page ? + fio->encrypted_page : fio->page; + + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFSCORRUPTED; + + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); + + if (bio && (*fio->last_block + 1 != fio->new_blkaddr || + !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + } +alloc_new: + if (!bio) { + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, + BIO_MAX_PAGES, false, fio->type, fio->temp); + bio_set_op_attrs(bio, fio->op, fio->op_flags); + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + goto alloc_new; + } + + if (fio->io_wbc) + wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); + + inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + + *fio->last_block = fio->new_blkaddr; + *fio->bio = bio; + + return 0; +} + +static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + if (!bio) + return; + + if (!__has_merged_page(*bio, NULL, page, 0)) + return; + + __submit_bio(sbi, *bio, DATA); + *bio = NULL; +} + void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -537,7 +592,7 @@ alloc_new: } if (fio->io_wbc) - wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); @@ -734,7 +789,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr = ei.blk + index - ei.fofs; if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_err; } goto got_it; @@ -754,7 +809,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_err; } got_it: @@ -1100,7 +1155,7 @@ next_block: if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto sync_out; } @@ -1530,7 +1585,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, sector_t block_nr; int ret = 0; - block_in_file = (sector_t)page->index; + block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; @@ -1563,14 +1618,15 @@ got_it: block_nr = map->m_pblk + block_in_file - map->m_lblk; SetPageMappedToDisk(page); - if (!PageUptodate(page) && !cleancache_get_page(page)) { + if (!PageUptodate(page) && (!PageSwapCache(page) && + !cleancache_get_page(page))) { SetPageUptodate(page); goto confused; } if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { - ret = -EFAULT; + ret = -EFSCORRUPTED; goto out; } } else { @@ -1661,7 +1717,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, - page->index, + page_index(page), readahead_gfp_mask(mapping))) goto next_page; } @@ -1685,7 +1741,7 @@ next_page: static int f2fs_read_data_page(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); @@ -1694,7 +1750,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false); + ret = f2fs_mpage_readpages(page_file_mapping(page), + NULL, page, 1, false); return ret; } @@ -1727,8 +1784,9 @@ static int encrypt_one_page(struct f2fs_io_info *fio) f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - PAGE_SIZE, 0, fio->page->index, gfp_flags); + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, + PAGE_SIZE, 0, + gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { @@ -1851,7 +1909,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + return -EFSCORRUPTED; ipu_force = true; fio->need_lock = LOCK_DONE; @@ -1878,7 +1936,7 @@ got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto out_writepage; } /* @@ -1900,8 +1958,7 @@ got_it: err = f2fs_inplace_write_data(fio); if (err) { if (f2fs_encrypted_file(inode)) - fscrypt_pullback_bio_page(&fio->encrypted_page, - true); + fscrypt_finalize_bounce_page(&fio->encrypted_page); if (PageWriteback(page)) end_page_writeback(page); } else { @@ -1947,6 +2004,8 @@ out: } static int __write_data_page(struct page *page, bool *submitted, + struct bio **bio, + sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type) { @@ -1972,6 +2031,8 @@ static int __write_data_page(struct page *page, bool *submitted, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, + .bio = bio, + .last_block = last_block, }; trace_f2fs_writepage(page, DATA); @@ -2070,10 +2131,13 @@ out: unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) + !F2FS_I(inode)->cp_task) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_balance_fs(sbi, need_balance_fs); + } if (unlikely(f2fs_cp_error(sbi))) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); submitted = NULL; } @@ -2100,7 +2164,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, wbc, FS_DATA_IO); + return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); } /* @@ -2116,6 +2180,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int done = 0; struct pagevec pvec; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct bio *bio = NULL; + sector_t last_block; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; @@ -2192,17 +2258,20 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) + if (wbc->sync_mode != WB_SYNC_NONE) { f2fs_wait_on_page_writeback(page, DATA, true, true); - else + f2fs_submit_ipu_bio(sbi, &bio, page); + } else { goto continue_unlock; + } } if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, wbc, io_type); + ret = __write_data_page(page, &submitted, &bio, + &last_block, wbc, io_type); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -2251,6 +2320,9 @@ continue_unlock: if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, NULL, 0, DATA); + /* submit cached bio of IPU write */ + if (bio) + __submit_bio(sbi, bio, DATA); return ret; } @@ -2262,6 +2334,9 @@ static inline bool __should_serialize_io(struct inode *inode, return false; if (IS_NOQUOTA(inode)) return false; + /* to avoid deadlock in path of data flush */ + if (F2FS_I(inode)->cp_task) + return false; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) @@ -2533,7 +2608,7 @@ repeat: } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto fail; } err = f2fs_submit_page_read(inode, page, blkaddr); @@ -2778,13 +2853,14 @@ int f2fs_release_page(struct page *page, gfp_t wait) static int f2fs_set_data_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct inode *inode = page_file_mapping(page)->host; trace_f2fs_set_page_dirty(page, DATA); if (!PageUptodate(page)) SetPageUptodate(page); + if (PageSwapCache(page)) + return __set_page_dirty_nobuffers(page); if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { @@ -2843,7 +2919,7 @@ int f2fs_migrate_page(struct address_space *mapping, /* one extra reference was held for atomic_write page */ extra_count = atomic_written ? 1 : 0; rc = migrate_page_move_mapping(mapping, newpage, - page, mode, extra_count); + page, extra_count); if (rc != MIGRATEPAGE_SUCCESS) { if (atomic_written) mutex_unlock(&fi->inmem_lock); @@ -2876,6 +2952,126 @@ int f2fs_migrate_page(struct address_space *mapping, } #endif +#ifdef CONFIG_SWAP +/* Copied from generic_swapfile_activate() to check any holes */ +static int check_swap_activate(struct file *swap_file, unsigned int max) +{ + struct address_space *mapping = swap_file->f_mapping; + struct inode *inode = mapping->host; + unsigned blocks_per_page; + unsigned long page_no; + unsigned blkbits; + sector_t probe_block; + sector_t last_block; + sector_t lowest_block = -1; + sector_t highest_block = 0; + + blkbits = inode->i_blkbits; + blocks_per_page = PAGE_SIZE >> blkbits; + + /* + * Map all the blocks into the extent list. This code doesn't try + * to be very smart. + */ + probe_block = 0; + page_no = 0; + last_block = i_size_read(inode) >> blkbits; + while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + unsigned block_in_page; + sector_t first_block; + + cond_resched(); + + first_block = bmap(inode, probe_block); + if (first_block == 0) + goto bad_bmap; + + /* + * It must be PAGE_SIZE aligned on-disk + */ + if (first_block & (blocks_per_page - 1)) { + probe_block++; + goto reprobe; + } + + for (block_in_page = 1; block_in_page < blocks_per_page; + block_in_page++) { + sector_t block; + + block = bmap(inode, probe_block + block_in_page); + if (block == 0) + goto bad_bmap; + if (block != first_block + block_in_page) { + /* Discontiguity */ + probe_block++; + goto reprobe; + } + } + + first_block >>= (PAGE_SHIFT - blkbits); + if (page_no) { /* exclude the header page */ + if (first_block < lowest_block) + lowest_block = first_block; + if (first_block > highest_block) + highest_block = first_block; + } + + page_no++; + probe_block += blocks_per_page; +reprobe: + continue; + } + return 0; + +bad_bmap: + pr_err("swapon: swapfile has holes\n"); + return -EINVAL; +} + +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + struct inode *inode = file_inode(file); + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + + ret = check_swap_activate(file, sis->max); + if (ret) + return ret; + + set_inode_flag(inode, FI_PIN_FILE); + f2fs_precache_extents(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return 0; +} + +static void f2fs_swap_deactivate(struct file *file) +{ + struct inode *inode = file_inode(file); + + clear_inode_flag(inode, FI_PIN_FILE); +} +#else +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + return -EOPNOTSUPP; +} + +static void f2fs_swap_deactivate(struct file *file) +{ +} +#endif + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, @@ -2888,6 +3084,8 @@ const struct address_space_operations f2fs_dblock_aops = { .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, + .swap_activate = f2fs_swap_activate, + .swap_deactivate = f2fs_swap_deactivate, #ifdef CONFIG_MIGRATION .migratepage = f2fs_migrate_page, #endif diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 99e9a5c37b71..7706049d23bf 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -27,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); int i; + /* these will be changed if online resize is done */ + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + /* validation check of the segment numbers */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_cached = atomic64_read(&sbi->read_hit_cached); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 59bc46017855..85a1528f319f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -218,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { - f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, - "Corrupted max_depth of %lu: %u", - dir->i_ino, max_depth); + f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u", + dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; f2fs_i_depth_write(dir, max_depth); } @@ -816,11 +815,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (unlikely(bit_pos > d->max || le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted namelen=%d, run fsck to fix.", - __func__, le16_to_cpu(de->name_len)); + f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; goto out; } @@ -828,8 +826,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, int save_len = fstr->len; err = fscrypt_fname_disk_to_usr(d->inode, - (u32)de->hash_code, 0, - &de_name, fstr); + (u32)le32_to_cpu(de->hash_code), + 0, &de_name, fstr); if (err) goto out; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index caf77fe8ac07..e60078460ad1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -184,10 +184,9 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, next_re = rb_entry(next, struct rb_entry, rb_node); if (cur_re->ofs + cur_re->len > next_re->ofs) { - f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " - "cur(%u, %u) next(%u, %u)", - cur_re->ofs, cur_re->len, - next_re->ofs, next_re->len); + f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); return false; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 06b89a9862ab..17382da7f0bd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -136,6 +136,9 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ bool test_dummy_encryption; /* test dummy encryption */ + block_t unusable_cap; /* Amount of space allowed to be + * unusable when disabling checkpoint + */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -412,6 +415,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -476,8 +480,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode); #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ BITS_PER_BYTE + 1)) -#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \ - BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_DENTRY_BITMAP_SIZE(inode) \ + DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE) #define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ NR_INLINE_DENTRY(inode) + \ @@ -1052,6 +1056,8 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ + struct bio **bio; /* bio for ipu */ + sector_t *last_block; /* last block number in bio */ unsigned char version; /* version of the node */ }; @@ -1111,6 +1117,7 @@ enum { SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ + SBI_IS_RESIZEFS, /* resizefs is in process */ }; enum { @@ -1207,6 +1214,7 @@ struct f2fs_sb_info { /* for inode management */ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ + struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ @@ -1230,6 +1238,7 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ + struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ @@ -1247,6 +1256,7 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ + struct rw_semaphore quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1488,7 +1498,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page->mapping); + return F2FS_M_SB(page_file_mapping(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -1766,8 +1776,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - avail_user_block_count -= sbi->unusable_block_count; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -1795,7 +1809,20 @@ enospc: return -ENOSPC; } -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +__printf(2, 3) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1811,11 +1838,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); if (unlikely(inode->i_blocks < sectors)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks, - (unsigned long long)sectors); + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); set_sbi_flag(sbi, SBI_NEED_FSCK); return; } @@ -1967,7 +1993,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count; + unsigned int valid_node_count, user_block_count; int err; if (is_inode) { @@ -1994,10 +2020,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - valid_block_count += sbi->unusable_block_count; + user_block_count -= sbi->unusable_block_count; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count > user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -2052,10 +2079,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, dquot_free_inode(inode); } else { if (unlikely(inode->i_blocks == 0)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks); + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); return; } @@ -2191,6 +2217,9 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + if (sbi->gc_mode == GC_URGENT) + return true; + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || @@ -2198,7 +2227,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) get_pages(sbi, F2FS_DIO_WRITE)) return false; - if (SM_I(sbi) && SM_I(sbi)->dcc_info && + if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) return false; @@ -2320,57 +2349,23 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) } /* - * Inode flags + * On-disk inode flags (f2fs_inode::i_flags) */ -#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */ -#define F2FS_UNRM_FL 0x00000002 /* Undelete */ -#define F2FS_COMPR_FL 0x00000004 /* Compress file */ #define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ #define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ #define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ #define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define F2FS_DIRTY_FL 0x00000100 -#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */ -#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */ -/* End compression flags --- maybe not all used */ #define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */ -#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ -#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */ -#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ -#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ -#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ -#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ - -#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */ -#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */ - -/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */ -#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \ - F2FS_IMMUTABLE_FL | \ - F2FS_APPEND_FL | \ - F2FS_NODUMP_FL | \ - F2FS_NOATIME_FL | \ - F2FS_PROJINHERIT_FL) /* Flags that should be inherited by new inodes from their parent. */ -#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\ - F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\ - F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\ - F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\ - F2FS_PROJINHERIT_FL) +#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ + F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL)) +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) @@ -2856,9 +2851,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { - f2fs_msg(sbi->sb, KERN_ERR, - "invalid blkaddr: %u, type: %d, run fsck to fix.", - blkaddr, type); + f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); f2fs_bug_on(sbi, 1); } } @@ -2989,8 +2983,6 @@ int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); -extern __printf(3, 4) -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* @@ -3074,9 +3066,12 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3169,6 +3164,7 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); +int f2fs_merge_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); @@ -3214,6 +3210,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); /* * recovery.c @@ -3686,7 +3683,8 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) return true; - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && + !(inode->i_flags & S_SWAPFILE)) return true; return false; @@ -3712,4 +3710,7 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) return false; } +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _LINUX_F2FS_H */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 45b45f37d347..f8d46df8fa9e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -707,11 +707,9 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->btime.tv_nsec = fi->i_crtime.tv_nsec; } - flags = fi->i_flags & F2FS_FL_USER_VISIBLE; + flags = fi->i_flags; if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; - if (flags & F2FS_COMPR_FL) - stat->attributes |= STATX_ATTR_COMPRESSED; if (IS_ENCRYPTED(inode)) stat->attributes |= STATX_ATTR_ENCRYPTED; if (flags & F2FS_IMMUTABLE_FL) @@ -720,7 +718,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | - STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); @@ -1026,7 +1023,7 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); - return -EFAULT; + return -EFSCORRUPTED; } if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { @@ -1214,7 +1211,7 @@ roll_back: static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; @@ -1467,7 +1464,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* avoid gc operation during block exchange */ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1531,7 +1528,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (f2fs_is_pinned_file(inode)) + map.m_seg_type = CURSEG_COLD_DATA; + + err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? + F2FS_GET_BLOCK_PRE_DIO : + F2FS_GET_BLOCK_PRE_AIO)); if (err) { pgoff_t last_off; @@ -1648,44 +1650,22 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) return 0; } -static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) +static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { - struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags; - - if (IS_ENCRYPTED(inode)) - flags |= F2FS_ENCRYPT_FL; - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) - flags |= F2FS_INLINE_DATA_FL; - if (is_inode_flag_set(inode, FI_PIN_FILE)) - flags |= F2FS_NOCOW_FL; - - flags &= F2FS_FL_USER_VISIBLE; - - return put_user(flags, (int __user *)arg); -} - -static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int oldflags; + u32 oldflags; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - flags = f2fs_mask_flags(inode->i_mode, flags); - oldflags = fi->i_flags; - if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) + if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - flags = flags & F2FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; + fi->i_flags = iflags | (oldflags & ~mask); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); @@ -1698,26 +1678,124 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) return 0; } +/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry + * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to + * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add + * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. + */ + +static const struct { + u32 iflag; + u32 fsflag; +} f2fs_fsflags_map[] = { + { F2FS_SYNC_FL, FS_SYNC_FL }, + { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, + { F2FS_APPEND_FL, FS_APPEND_FL }, + { F2FS_NODUMP_FL, FS_NODUMP_FL }, + { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_INDEX_FL, FS_INDEX_FL }, + { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, + { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, +}; + +#define F2FS_GETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_INDEX_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL | \ + FS_ENCRYPT_FL | \ + FS_INLINE_DATA_FL | \ + FS_NOCOW_FL) + +#define F2FS_SETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL) + +/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ +static inline u32 f2fs_iflags_to_fsflags(u32 iflags) +{ + u32 fsflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (iflags & f2fs_fsflags_map[i].iflag) + fsflags |= f2fs_fsflags_map[i].fsflag; + + return fsflags; +} + +/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ +static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) +{ + u32 iflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (fsflags & f2fs_fsflags_map[i].fsflag) + iflags |= f2fs_fsflags_map[i].iflag; + + return iflags; +} + +static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + + if (IS_ENCRYPTED(inode)) + fsflags |= FS_ENCRYPT_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + fsflags |= FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + fsflags |= FS_NOCOW_FL; + + fsflags &= F2FS_GETTABLE_FS_FL; + + return put_user(fsflags, (int __user *)arg); +} + static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - unsigned int flags; + u32 fsflags; + u32 iflags; int ret; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *)arg)) + if (get_user(fsflags, (int __user *)arg)) return -EFAULT; + if (fsflags & ~F2FS_GETTABLE_FS_FL) + return -EOPNOTSUPP; + fsflags &= F2FS_SETTABLE_FS_FL; + + iflags = f2fs_fsflags_to_iflags(fsflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); - ret = __f2fs_ioc_setflags(inode, flags); - + ret = f2fs_setflags_common(inode, iflags, + f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1764,9 +1842,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) * f2fs_is_atomic_file. */ if (get_dirty_pages(inode)) - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%u", - inode->i_ino, get_dirty_pages(inode)); + f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u", + inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2201,8 +2278,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) return -EROFS; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - f2fs_msg(sbi->sb, KERN_INFO, - "Skipping Checkpoint. Checkpoints currently disabled."); + f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); return -EINVAL; } @@ -2291,7 +2367,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can @@ -2587,10 +2663,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Can't flush %u in %d for segs_per_sec %u != 1", - range.dev_num, sbi->s_ndevs, - sbi->segs_per_sec); + f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", + range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); return -EINVAL; } @@ -2727,47 +2801,56 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) } #endif -/* Transfer internal flags to xflags */ -static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags) -{ - __u32 xflags = 0; - - if (iflags & F2FS_SYNC_FL) - xflags |= FS_XFLAG_SYNC; - if (iflags & F2FS_IMMUTABLE_FL) - xflags |= FS_XFLAG_IMMUTABLE; - if (iflags & F2FS_APPEND_FL) - xflags |= FS_XFLAG_APPEND; - if (iflags & F2FS_NODUMP_FL) - xflags |= FS_XFLAG_NODUMP; - if (iflags & F2FS_NOATIME_FL) - xflags |= FS_XFLAG_NOATIME; - if (iflags & F2FS_PROJINHERIT_FL) - xflags |= FS_XFLAG_PROJINHERIT; +/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable + * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its + * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS. + */ + +static const struct { + u32 iflag; + u32 xflag; +} f2fs_xflags_map[] = { + { F2FS_SYNC_FL, FS_XFLAG_SYNC }, + { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE }, + { F2FS_APPEND_FL, FS_XFLAG_APPEND }, + { F2FS_NODUMP_FL, FS_XFLAG_NODUMP }, + { F2FS_NOATIME_FL, FS_XFLAG_NOATIME }, + { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT }, +}; + +#define F2FS_SUPPORTED_XFLAGS ( \ + FS_XFLAG_SYNC | \ + FS_XFLAG_IMMUTABLE | \ + FS_XFLAG_APPEND | \ + FS_XFLAG_NODUMP | \ + FS_XFLAG_NOATIME | \ + FS_XFLAG_PROJINHERIT) + +/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */ +static inline u32 f2fs_iflags_to_xflags(u32 iflags) +{ + u32 xflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) + if (iflags & f2fs_xflags_map[i].iflag) + xflags |= f2fs_xflags_map[i].xflag; + return xflags; } -#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ - FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) - -/* Transfer xflags flags to internal */ -static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags) +/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */ +static inline u32 f2fs_xflags_to_iflags(u32 xflags) { - unsigned long iflags = 0; + u32 iflags = 0; + int i; - if (xflags & FS_XFLAG_SYNC) - iflags |= F2FS_SYNC_FL; - if (xflags & FS_XFLAG_IMMUTABLE) - iflags |= F2FS_IMMUTABLE_FL; - if (xflags & FS_XFLAG_APPEND) - iflags |= F2FS_APPEND_FL; - if (xflags & FS_XFLAG_NODUMP) - iflags |= F2FS_NODUMP_FL; - if (xflags & FS_XFLAG_NOATIME) - iflags |= F2FS_NOATIME_FL; - if (xflags & FS_XFLAG_PROJINHERIT) - iflags |= F2FS_PROJINHERIT_FL; + for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) + if (xflags & f2fs_xflags_map[i].xflag) + iflags |= f2fs_xflags_map[i].iflag; return iflags; } @@ -2779,8 +2862,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) struct fsxattr fa; memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & - F2FS_FL_USER_VISIBLE); + fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, @@ -2818,9 +2900,8 @@ static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); struct fsxattr fa; - unsigned int flags; + u32 iflags; int err; if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa))) @@ -2830,11 +2911,11 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) if (!inode_owner_or_capable(inode)) return -EACCES; - if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS) + if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS) return -EOPNOTSUPP; - flags = f2fs_xflags_to_iflags(fa.fsx_xflags); - if (f2fs_mask_flags(inode->i_mode, flags) != flags) + iflags = f2fs_xflags_to_iflags(fa.fsx_xflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2845,9 +2926,8 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) err = f2fs_ioctl_check_project(inode, &fa); if (err) goto out; - flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) | - (flags & F2FS_FL_XFLAG_VISIBLE); - err = __f2fs_ioc_setflags(inode, flags); + err = f2fs_setflags_common(inode, iflags, + f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); if (err) goto out; @@ -2869,10 +2949,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) fi->i_gc_failures[GC_FAILURE_PIN] + 1); if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", + __func__, inode->i_ino, + fi->i_gc_failures[GC_FAILURE_PIN]); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } @@ -2885,9 +2964,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; @@ -2980,6 +3056,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) return f2fs_precache_extents(file_inode(filp)); } +static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); + __u64 block_count; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&block_count, (void __user *)arg, + sizeof(block_count))) + return -EFAULT; + + ret = f2fs_resize_fs(sbi, block_count); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3036,6 +3133,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_set_pin_file(filp, arg); case F2FS_IOC_PRECACHE_EXTENTS: return f2fs_ioc_precache_extents(filp, arg); + case F2FS_IOC_RESIZE_FS: + return f2fs_ioc_resize_fs(filp, arg); default: return -ENOTTY; } @@ -3149,6 +3248,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: + case F2FS_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 963fb4571fd9..6691f526fa40 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -311,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; - unsigned int last_segment = MAIN_SEGS(sbi); + unsigned int last_segment; unsigned int nsearched = 0; mutex_lock(&dirty_i->seglist_lock); + last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); @@ -387,7 +388,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, goto next; /* Don't touch checkpointed data */ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && - get_ckpt_valid_blocks(sbi, segno))) + get_ckpt_valid_blocks(sbi, segno) && + p.alloc_mode != SSR)) goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; @@ -404,7 +406,8 @@ next: sm->last_victim[p.gc_mode] = last_victim + 1; else sm->last_victim[p.gc_mode] = segno + 1; - sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); + sm->last_victim[p.gc_mode] %= + (MAIN_SECS(sbi) * sbi->segs_per_sec); break; } } @@ -615,9 +618,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (sum->version != dni->version) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: valid data with mismatched node version.", - __func__); + f2fs_warn(sbi, "%s: valid data with mismatched node version.", + __func__); set_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -658,7 +660,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) dn.data_blkaddr = ei.blk + index - ei.fofs; if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_page; } goto got_it; @@ -676,7 +678,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_page; } got_it: @@ -1180,9 +1182,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) " - "type [%d, %d] in SSA and SIT", - segno, type, GET_SUM_TYPE((&sum->footer))); + f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", + segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_stop_checkpoint(sbi, false); goto skip; @@ -1360,3 +1361,176 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } + +static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, + unsigned int end) +{ + int type; + unsigned int segno, next_inuse; + int err = 0; + + /* Move out cursegs from the target range */ + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) + allocate_segment_for_resize(sbi, type, start, end); + + /* do GC to move out valid blocks in the range */ + for (segno = start; segno <= end; segno += sbi->segs_per_sec) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + mutex_lock(&sbi->gc_mutex); + do_garbage_collect(sbi, segno, &gc_list, FG_GC); + mutex_unlock(&sbi->gc_mutex); + put_gc_inode(&gc_list); + + if (get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + } + + err = f2fs_sync_fs(sbi->sb, 1); + if (err) + return err; + + next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); + if (next_inuse <= end) { + f2fs_err(sbi, "segno %u should be free but still inuse!", + next_inuse); + f2fs_bug_on(sbi, 1); + } + return err; +} + +static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) +{ + struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); + int section_count = le32_to_cpu(raw_sb->section_count); + int segment_count = le32_to_cpu(raw_sb->segment_count); + int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + long long block_count = le64_to_cpu(raw_sb->block_count); + int segs = secs * sbi->segs_per_sec; + + raw_sb->section_count = cpu_to_le32(section_count + secs); + raw_sb->segment_count = cpu_to_le32(segment_count + segs); + raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); + raw_sb->block_count = cpu_to_le64(block_count + + (long long)segs * sbi->blocks_per_seg); +} + +static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) +{ + int segs = secs * sbi->segs_per_sec; + long long user_block_count = + le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); + + SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; + MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; + FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + + (long long)segs * sbi->blocks_per_seg); +} + +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +{ + __u64 old_block_count, shrunk_blocks; + unsigned int secs; + int gc_mode, gc_type; + int err = 0; + __u32 rem; + + old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); + if (block_count > old_block_count) + return -EINVAL; + + /* new fs size should align to section size */ + div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); + if (rem) + return -EINVAL; + + if (block_count == old_block_count) + return 0; + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_err(sbi, "Should run fsck to repair first."); + return -EFSCORRUPTED; + } + + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + f2fs_err(sbi, "Checkpoint should be enabled."); + return -EINVAL; + } + + freeze_bdev(sbi->sb->s_bdev); + + shrunk_blocks = old_block_count - block_count; + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + spin_lock(&sbi->stat_lock); + if (shrunk_blocks + valid_user_blocks(sbi) + + sbi->current_reserved_blocks + sbi->unusable_block_count + + F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) + err = -ENOSPC; + else + sbi->user_block_count -= shrunk_blocks; + spin_unlock(&sbi->stat_lock); + if (err) { + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; + } + + mutex_lock(&sbi->resize_mutex); + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + + MAIN_SECS(sbi) -= secs; + + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + + err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, + MAIN_SEGS(sbi) - 1); + if (err) + goto out; + + update_sb_metadata(sbi, -secs); + + err = f2fs_commit_super(sbi, false); + if (err) { + update_sb_metadata(sbi, secs); + goto out; + } + + update_fs_metadata(sbi, -secs); + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + err = f2fs_sync_fs(sbi->sb, 1); + if (err) { + update_fs_metadata(sbi, secs); + update_sb_metadata(sbi, secs); + f2fs_commit_super(sbi, false); + } +out: + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); + + MAIN_SECS(sbi) += secs; + spin_lock(&sbi->stat_lock); + sbi->user_block_count += shrunk_blocks; + spin_unlock(&sbi->stat_lock); + } + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + mutex_unlock(&sbi->resize_mutex); + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 404d2462a0fe..3613efca8c00 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -140,11 +140,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (unlikely(dn->data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(dn); set_sbi_flag(fio.sbi, SBI_NEED_FSCK); - f2fs_msg(fio.sbi->sb, KERN_WARNING, - "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " - "run fsck to fix.", - __func__, dn->inode->i_ino, dn->data_blkaddr); - return -EINVAL; + f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dn->inode->i_ino, dn->data_blkaddr); + return -EFSCORRUPTED; } f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); @@ -383,11 +381,9 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (unlikely(dn.data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(&dn); set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); - f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING, - "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " - "run fsck to fix.", - __func__, dir->i_ino, dn.data_blkaddr); - err = -EINVAL; + f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dir->i_ino, dn.data_blkaddr); + err = -EFSCORRUPTED; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ccb02226dd2c..a33d7a849b2d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -74,7 +74,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi, if (!__is_valid_data_blkaddr(addr)) return 1; if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + return -EFSCORRUPTED; return 0; } @@ -176,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) calculated = f2fs_inode_chksum(sbi, page); if (provided != calculated) - f2fs_msg(sbi->sb, KERN_WARNING, - "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", + page->index, ino_of_node(page), provided, calculated); return provided == calculated; } @@ -202,50 +201,41 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); if (!iblocks) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " - "run fsck to fix.", - __func__, inode->i_ino, iblocks); + f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", + __func__, inode->i_ino, iblocks); return false; } if (ino_of_node(node_page) != nid_of_node(node_page)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode footer i_ino=%lx, ino,nid: " - "[%u, %u] run fsck to fix.", - __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); return false; } if (f2fs_sb_has_flexible_inline_xattr(sbi) && !f2fs_has_extra_attr(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", + __func__, inode->i_ino); return false; } if (f2fs_has_extra_attr(inode) && !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) is with extra_attr, " - "but extra_attr feature is off", - __func__, inode->i_ino); + f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", + __func__, inode->i_ino); return false; } if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || fi->i_extra_isize % sizeof(__le32)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, " - "max: %zu", - __func__, inode->i_ino, fi->i_extra_isize, - F2FS_TOTAL_EXTRA_ATTR_SIZE); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } @@ -255,11 +245,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) (!fi->i_inline_xattr_size || fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) has corrupted " - "i_inline_xattr_size: %d, max: %zu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); return false; } @@ -272,11 +260,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, DATA_GENERIC_ENHANCE))) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) extent info [%u, %u, %u] " - "is incorrect, run fsck to fix", - __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); return false; } } @@ -284,19 +270,15 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (f2fs_has_inline_data(inode) && (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx, mode=%u) should not have " - "inline_data, run fsck to fix", - __func__, inode->i_ino, inode->i_mode); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); return false; } if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx, mode=%u) should not have " - "inline_dentry, run fsck to fix", - __func__, inode->i_ino, inode->i_mode); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); return false; } @@ -343,6 +325,8 @@ static int do_read_inode(struct inode *inode) le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); + if (S_ISREG(inode->i_mode)) + fi->i_flags &= ~F2FS_PROJINHERIT_FL; fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); @@ -374,7 +358,7 @@ static int do_read_inode(struct inode *inode) if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); - return -EINVAL; + return -EFSCORRUPTED; } /* check data exist */ @@ -783,8 +767,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "May loss orphan inode, run fsck to fix."); + f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } @@ -792,8 +775,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_acquire_orphan_inode(sbi); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "Too many orphan inodes, run fsck to fix."); + f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix."); } else { f2fs_add_orphan_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0f77f9242751..c5b99042e6f2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -385,9 +385,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) int err = 0; if (f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "skip recovering inline_dots inode (ino:%lu, pino:%u) " - "in readonly mountpoint", dir->i_ino, pino); + f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", + dir->i_ino, pino); return 0; } @@ -484,9 +483,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - f2fs_msg(inode->i_sb, KERN_WARNING, - "Inconsistent encryption contexts: %lu/%lu", - dir->i_ino, inode->i_ino); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); err = -EPERM; goto out_iput; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 18a038a2a9fa..a18b2a895771 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -34,10 +34,9 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: out-of-range nid=%x, run fsck to fix.", - __func__, nid); - return -EINVAL; + f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", + __func__, nid); + return -EFSCORRUPTED; } return 0; } @@ -1189,10 +1188,8 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } @@ -1291,7 +1288,7 @@ static int read_node_page(struct page *page, int op_flags) if (PageUptodate(page)) { if (!f2fs_inode_chksum_verify(sbi, page)) { ClearPageUptodate(page); - return -EBADMSG; + return -EFSBADCRC; } return LOCKED_PAGE; } @@ -1375,16 +1372,15 @@ repeat: } if (!f2fs_inode_chksum_verify(sbi, page)) { - err = -EBADMSG; + err = -EFSBADCRC; goto out_err; } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " - "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); + f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); err = -EINVAL; out_err: ClearPageUptodate(page); @@ -1752,9 +1748,8 @@ continue_unlock: break; } if (!ret && atomic && !marked) { - f2fs_msg(sbi->sb, KERN_DEBUG, - "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", + ino, last_page->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); @@ -2304,8 +2299,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); - f2fs_msg(sbi->sb, KERN_ERR, - "NAT is corrupt, run fsck to fix it"); + f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } } @@ -2725,7 +2719,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, i = 1; } for (; i < NAT_ENTRY_PER_BLOCK; i++) { - if (nat_blk->entries[i].block_addr != NULL_ADDR) + if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } if (valid == 0) { @@ -2915,7 +2909,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->full_nat_bits = nm_i->nat_bits + 8; nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint"); + f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e04f82b3f4fc..783773e4560d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -188,10 +188,9 @@ out: name = "<encrypted>"; else name = raw_inode->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, - IS_ERR(dir) ? 0 : dir->i_ino, err); + f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", + __func__, ino_of_node(ipage), name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -292,9 +291,8 @@ static int recover_inode(struct inode *inode, struct page *page) else name = F2FS_INODE(page)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(page), name, raw->i_inline); return 0; } @@ -371,10 +369,9 @@ next: /* sanity check in order to detect looped node chain */ if (++loop_cnt >= free_blocks || blkaddr == next_blkaddr_of_node(page)) { - f2fs_msg(sbi->sb, KERN_NOTICE, - "%s: detect looped node chain, " - "blkaddr:%u, next:%u", - __func__, blkaddr, next_blkaddr_of_node(page)); + f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u", + __func__, blkaddr, + next_blkaddr_of_node(page)); f2fs_put_page(page, 1); err = -EINVAL; break; @@ -553,11 +550,10 @@ retry_dn: f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(dn.node_page), - ofs_of_node(page)); - err = -EFAULT; + f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", + inode->i_ino, ofs_of_node(dn.node_page), + ofs_of_node(page)); + err = -EFSCORRUPTED; goto err; } @@ -569,13 +565,13 @@ retry_dn: if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } @@ -642,11 +638,9 @@ retry_prev: err: f2fs_put_dnode(&dn); out: - f2fs_msg(sbi->sb, KERN_NOTICE, - "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, - file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", + inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", + recovered, err); return err; } @@ -734,8 +728,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) #endif if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, - "recover fsync data on readonly fs"); + f2fs_info(sbi, "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8dee063c833f..a661ac32e829 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -546,9 +546,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; + mutex_lock(&sbi->flush_lock); + blk_start_plug(&plug); f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); @@ -869,11 +873,14 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; block_t holes[2] = {0, 0}; /* DATA and NODE */ + block_t unusable; struct seg_entry *se; unsigned int segno; @@ -887,10 +894,20 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - if (holes[DATA] > ovp || holes[NODE] > ovp) + unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + if (unusable > ovp_holes) + return unusable - ovp_holes; + return 0; +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) +{ + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && - dirty_segments(sbi) > overprovision_segments(sbi)) + dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; return 0; } @@ -1480,6 +1497,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; @@ -1740,8 +1761,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, devi = f2fs_target_device_index(sbi, blkstart); if (blkstart < FDEV(devi).start_blk || blkstart > FDEV(devi).end_blk) { - f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x", - blkstart); + f2fs_err(sbi, "Invalid block %x", blkstart); return -EIO; } blkstart -= FDEV(devi).start_blk; @@ -1754,10 +1774,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, if (sector & (bdev_zone_sectors(bdev) - 1) || nr_sects != bdev_zone_sectors(bdev)) { - f2fs_msg(sbi->sb, KERN_ERR, - "(%d) %s: Unaligned zone reset attempted (block %x + %x)", - devi, sbi->s_ndevs ? FDEV(devi).path: "", - blkstart, blklen); + f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path : "", + blkstart, blklen); return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); @@ -2121,15 +2140,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_set_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks--; del = 0; @@ -2150,15 +2168,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; @@ -2640,6 +2657,39 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno; + + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); + down_write(&SIT_I(sbi)->sentry_lock); + + segno = CURSEG_I(sbi, type)->segno; + if (segno < start || segno > end) + goto unlock; + + if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type); + else + new_curseg(sbi, type, true); + + stat_inc_seg_type(sbi, curseg); + + locate_dirty_segment(sbi, segno); +unlock: + up_write(&SIT_I(sbi)->sentry_lock); + + if (segno != curseg->segno) + f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); + + mutex_unlock(&curseg->curseg_mutex); + up_read(&SM_I(sbi)->curseg_lock); +} + void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; @@ -2772,9 +2822,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Found FS corruption, run fsck to fix."); - return -EIO; + f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); + return -EFSCORRUPTED; } /* start/end segment number in main_area */ @@ -3197,12 +3246,17 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EFAULT; + f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", + __func__, segno); + return -EFSCORRUPTED; } stat_inc_inplace_blocks(fio->sbi); - err = f2fs_submit_page_bio(fio); + if (fio->bio) + err = f2fs_merge_page_bio(fio); + else + err = f2fs_submit_page_bio(fio); if (!err) { update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); @@ -3393,6 +3447,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + if (blk_off > ENTRIES_IN_SUM) { + f2fs_bug_on(sbi, 1); + f2fs_put_page(page, 1); + return -EFAULT; + } seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; @@ -3530,8 +3589,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || - sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { + f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; + } return 0; } @@ -3762,7 +3824,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; - bool to_journal = true; + bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); @@ -3781,7 +3843,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || + !to_journal) remove_sits_in_journal(sbi); /* @@ -4096,11 +4159,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) start = le32_to_cpu(segno_in_journal(journal, i)); if (start >= MAIN_SEGS(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong journal entry on segno %u", - start); + f2fs_err(sbi, "Wrong journal entry on segno %u", + start); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; break; } @@ -4137,11 +4199,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) up_read(&curseg->journal_rwsem); if (!err && total_node_blocks != valid_node_count(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "SIT is corrupted node# %u vs %u", - total_node_blocks, valid_node_count(sbi)); + f2fs_err(sbi, "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; } return err; @@ -4232,6 +4293,39 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) return init_victim_secmap(sbi); } +static int sanity_check_curseg(struct f2fs_sb_info *sbi) +{ + int i; + + /* + * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; + * In LFS curseg, all blkaddr after .next_blkoff should be unused. + */ + for (i = 0; i < NO_CHECK_TYPE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + struct seg_entry *se = get_seg_entry(sbi, curseg->segno); + unsigned int blkofs = curseg->next_blkoff; + + if (f2fs_test_bit(blkofs, se->cur_valid_map)) + goto out; + + if (curseg->alloc_type == SSR) + continue; + + for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + if (!f2fs_test_bit(blkofs, se->cur_valid_map)) + continue; +out: + f2fs_err(sbi, + "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); + return -EFSCORRUPTED; + } + } + return 0; +} + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -4327,6 +4421,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (err) return err; + err = sanity_check_curseg(sbi); + if (err) + return err; + init_min_max_mtime(sbi); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 429007b8036e..b74602813a05 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -109,7 +109,7 @@ #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ - ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) + DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) @@ -693,21 +693,19 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, } while (cur_pos < sbi->blocks_per_seg); if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Mismatch valid blocks %d vs. %d", - GET_SIT_VBLOCKS(raw_sit), valid_blocks); + f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", + GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg || segno > TOTAL_SEGS(sbi) - 1)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid blocks %d or segno %u", - GET_SIT_VBLOCKS(raw_sit), segno); + f2fs_err(sbi, "Wrong valid blocks %d or segno %u", + GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6b959bbb336a..6de6cda44031 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -136,7 +136,10 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, - Opt_checkpoint, + Opt_checkpoint_disable, + Opt_checkpoint_disable_cap, + Opt_checkpoint_disable_cap_perc, + Opt_checkpoint_enable, Opt_err, }; @@ -195,45 +198,52 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_checkpoint, "checkpoint=%s"}, + {Opt_checkpoint_disable, "checkpoint=disable"}, + {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, + {Opt_checkpoint_enable, "checkpoint=enable"}, {Opt_err, NULL}, }; -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) { struct va_format vaf; va_list args; + int level; va_start(args, fmt); - vaf.fmt = fmt; + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + va_end(args); } static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = (sbi->user_block_count << 1) / 1000; + block_t limit = min((sbi->user_block_count << 1) / 1000, + sbi->user_block_count - sbi->reserved_blocks); /* limit is 0.2% */ if (test_opt(sbi, RESERVE_ROOT) && F2FS_OPTION(sbi).root_reserved_blocks > limit) { F2FS_OPTION(sbi).root_reserved_blocks = limit; - f2fs_msg(sbi->sb, KERN_INFO, - "Reduce reserved blocks for root = %u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Reduce reserved blocks for root = %u", + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) - f2fs_msg(sbi->sb, KERN_INFO, - "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resuid), - from_kgid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resgid)); + f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -254,35 +264,29 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, int ret = -EINVAL; if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_msg(sb, KERN_INFO, - "QUOTA feature is enabled, so ignore qf_name"); + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); return 0; } qname = match_strdup(args); if (!qname) { - f2fs_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); + f2fs_err(sbi, "Not enough memory for storing quotafile name"); return -ENOMEM; } if (F2FS_OPTION(sbi).s_qf_names[qtype]) { if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else - f2fs_msg(sb, KERN_ERR, - "%s quota file already specified", + f2fs_err(sbi, "%s quota file already specified", QTYPE2NAME(qtype)); goto errout; } if (strchr(qname, '/')) { - f2fs_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); + f2fs_err(sbi, "quotafile must be on filesystem root"); goto errout; } F2FS_OPTION(sbi).s_qf_names[qtype] = qname; @@ -298,8 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); @@ -315,8 +318,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * to support legacy quotas in quota files. */ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " - "Cannot enable project quota enforcement."); + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); return -1; } if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || @@ -336,21 +338,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || test_opt(sbi, PRJQUOTA)) { - f2fs_msg(sbi->sb, KERN_ERR, "old and new quota " - "format mixing"); + f2fs_err(sbi, "old and new quota format mixing"); return -1; } if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " - "not specified"); + f2fs_err(sbi, "journaled quota format not specified"); return -1; } } if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_INFO, - "QUOTA feature is enabled, so ignore jquota_fmt"); + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; } return 0; @@ -418,8 +417,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_nodiscard: if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_WARNING, - "discard is required for zoned block devices"); + f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); @@ -451,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_user_xattr: - f2fs_msg(sb, KERN_INFO, - "user_xattr options not supported"); + f2fs_info(sbi, "user_xattr options not supported"); break; case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); + f2fs_info(sbi, "nouser_xattr options not supported"); break; case Opt_inline_xattr: - f2fs_msg(sb, KERN_INFO, - "inline_xattr options not supported"); + f2fs_info(sbi, "inline_xattr options not supported"); break; case Opt_noinline_xattr: - f2fs_msg(sb, KERN_INFO, - "noinline_xattr options not supported"); + f2fs_info(sbi, "noinline_xattr options not supported"); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -476,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_acl: - f2fs_msg(sb, KERN_INFO, "acl options not supported"); + f2fs_info(sbi, "acl options not supported"); break; case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + f2fs_info(sbi, "noacl options not supported"); break; #endif case Opt_active_logs: @@ -529,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_msg(sb, KERN_INFO, - "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); @@ -542,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; uid = make_kuid(current_user_ns(), arg); if (!uid_valid(uid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid uid value %d", arg); + f2fs_err(sbi, "Invalid uid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resuid = uid; @@ -553,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; gid = make_kgid(current_user_ns(), arg); if (!gid_valid(gid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid gid value %d", arg); + f2fs_err(sbi, "Invalid gid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resgid = gid; @@ -567,9 +558,7 @@ static int parse_options(struct super_block *sb, char *options) if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_WARNING, - "adaptive mode is not allowed with " - "zoned block device feature"); + f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); kvfree(name); return -EINVAL; } @@ -587,9 +576,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { - f2fs_msg(sb, KERN_WARNING, - "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + f2fs_warn(sbi, "Not support %d, larger than %d", + 1 << arg, BIO_MAX_PAGES); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; @@ -610,13 +598,11 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_fault_injection: - f2fs_msg(sb, KERN_INFO, - "fault_injection options not supported"); + f2fs_info(sbi, "fault_injection options not supported"); break; case Opt_fault_type: - f2fs_msg(sb, KERN_INFO, - "fault_type options not supported"); + f2fs_info(sbi, "fault_type options not supported"); break; #endif case Opt_lazytime: @@ -696,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options) case Opt_jqfmt_vfsv0: case Opt_jqfmt_vfsv1: case Opt_noquota: - f2fs_msg(sb, KERN_INFO, - "quota operations not supported"); + f2fs_info(sbi, "quota operations not supported"); break; #endif case Opt_whint: @@ -759,39 +744,44 @@ static int parse_options(struct super_block *sb, char *options) case Opt_test_dummy_encryption: #ifdef CONFIG_FS_ENCRYPTION if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + f2fs_err(sbi, "Encrypt feature is off"); return -EINVAL; } F2FS_OPTION(sbi).test_dummy_encryption = true; - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mode enabled"); + f2fs_info(sbi, "Test dummy encryption mode enabled"); #else - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mount option ignored"); + f2fs_info(sbi, "Test dummy encryption mount option ignored"); #endif break; - case Opt_checkpoint: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (strlen(name) == 6 && - !strncmp(name, "enable", 6)) { - clear_opt(sbi, DISABLE_CHECKPOINT); - } else if (strlen(name) == 7 && - !strncmp(name, "disable", 7)) { - set_opt(sbi, DISABLE_CHECKPOINT); - } else { - kvfree(name); + case Opt_checkpoint_disable_cap_perc: + if (args->from && match_int(args, &arg)) return -EINVAL; - } - kvfree(name); + if (arg < 0 || arg > 100) + return -EINVAL; + if (arg == 100) + F2FS_OPTION(sbi).unusable_cap = + sbi->user_block_count; + else + F2FS_OPTION(sbi).unusable_cap = + (sbi->user_block_count / 100) * arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable_cap: + if (args->from && match_int(args, &arg)) + return -EINVAL; + F2FS_OPTION(sbi).unusable_cap = arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable: + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_enable: + clear_opt(sbi, DISABLE_CHECKPOINT); break; default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); + f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", + p); return -EINVAL; } } @@ -800,23 +790,18 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; #else if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be " - "mounted RDWR without CONFIG_QUOTA"); + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "Should set mode=lfs with %uKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); + f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", + F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } @@ -825,15 +810,11 @@ static int parse_options(struct super_block *sb, char *options) if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_msg(sb, KERN_ERR, - "extra_attr or flexible_inline_xattr " - "feature is off"); + f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_msg(sb, KERN_ERR, - "inline_xattr_size option should be " - "set with inline_xattr option"); + f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } @@ -842,16 +823,14 @@ static int parse_options(struct super_block *sb, char *options) if (F2FS_OPTION(sbi).inline_xattr_size < min_size || F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_msg(sb, KERN_ERR, - "inline xattr size is out of range: %d ~ %d", - min_size, max_size); + f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", + min_size, max_size); return -EINVAL; } } if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "LFS not compatible with checkpoint=disable\n"); + f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -1313,6 +1292,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); + else + seq_puts(seq, ",nodiscard"); if (test_opt(sbi, NOHEAP)) seq_puts(seq, ",no_heap"); else @@ -1409,8 +1390,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",alloc_mode=%s", "reuse"); if (test_opt(sbi, DISABLE_CHECKPOINT)) - seq_puts(seq, ",checkpoint=disable"); - + seq_printf(seq, ",checkpoint=disable:%u", + F2FS_OPTION(sbi).unusable_cap); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) @@ -1439,6 +1420,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); + F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); @@ -1467,10 +1449,10 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) struct cp_control cpc; int err = 0; int ret; + block_t unusable; if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_ERR, - "checkpoint=disable on readonly fs"); + f2fs_err(sbi, "checkpoint=disable on readonly fs"); return -EINVAL; } sbi->sb->s_flags |= SB_ACTIVE; @@ -1494,7 +1476,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - if (f2fs_disable_cp_again(sbi)) { + unusable = f2fs_get_unusable_blocks(sbi); + if (f2fs_disable_cp_again(sbi, unusable)) { err = -EAGAIN; goto restore_flag; } @@ -1507,7 +1490,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto out_unlock; spin_lock(&sbi->stat_lock); - sbi->unusable_block_count = 0; + sbi->unusable_block_count = unusable; spin_unlock(&sbi->stat_lock); out_unlock: @@ -1572,8 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* recover superblocks we couldn't write due to previous RO mount */ if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); - f2fs_msg(sb, KERN_INFO, - "Try to recover all the superblocks, ret: %d", err); + f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", + err); if (!err) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } @@ -1614,15 +1597,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "switch extent_cache option is not allowed"); + f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; } if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "disabling checkpoint not compatible with read-only"); + f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; } @@ -1692,8 +1673,7 @@ skip: restore_gc: if (need_restart_gc) { if (f2fs_start_gc_thread(sbi)) - f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread has stopped"); + f2fs_warn(sbi, "background gc thread has stopped"); } else if (need_stop_gc) { f2fs_stop_gc_thread(sbi); } @@ -1832,8 +1812,7 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { - f2fs_msg(sbi->sb, KERN_ERR, - "quota sysfile may be corrupted, skip loading it"); + f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it"); return 0; } @@ -1849,8 +1828,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quota_ino: %d", err); + f2fs_err(sbi, "Cannot turn on quota_ino: %d", err); return 0; } return 1; @@ -1863,8 +1841,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) enabled = 1; continue; } - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quotas: %d on %d", err, i); + f2fs_err(sbi, "Cannot turn on quotas: %d on %d", + err, i); } } return enabled; @@ -1885,8 +1863,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, qf_inode = f2fs_iget(sb, qf_inum); if (IS_ERR(qf_inode)) { - f2fs_msg(sb, KERN_ERR, - "Bad quota inode %u:%lu", type, qf_inum); + f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum); return PTR_ERR(qf_inode); } @@ -1899,17 +1876,17 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, static int f2fs_enable_quotas(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int type, err = 0; unsigned long qf_inum; bool quota_mopt[MAXQUOTAS] = { - test_opt(F2FS_SB(sb), USRQUOTA), - test_opt(F2FS_SB(sb), GRPQUOTA), - test_opt(F2FS_SB(sb), PRJQUOTA), + test_opt(sbi, USRQUOTA), + test_opt(sbi, GRPQUOTA), + test_opt(sbi, PRJQUOTA), }; if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { - f2fs_msg(sb, KERN_ERR, - "quota file may be corrupted, skip loading it"); + f2fs_err(sbi, "quota file may be corrupted, skip loading it"); return 0; } @@ -1922,10 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "fsck to fix.", type, err); + f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.", + type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); set_sbi_flag(F2FS_SB(sb), @@ -1944,6 +1919,18 @@ int f2fs_quota_sync(struct super_block *sb, int type) int cnt; int ret; + /* + * do_quotactl + * f2fs_quota_sync + * down_read(quota_sem) + * dquot_writeback_dquots() + * f2fs_dquot_commit + * block_operation + * down_read(quota_sem) + */ + f2fs_lock_op(sbi); + + down_read(&sbi->quota_sem); ret = dquot_writeback_dquots(sb, type); if (ret) goto out; @@ -1981,6 +1968,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) out: if (ret) set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + f2fs_unlock_op(sbi); return ret; } @@ -2045,10 +2034,8 @@ void f2fs_quota_off_umount(struct super_block *sb) if (err) { int ret = dquot_quota_off(sb, type); - f2fs_msg(sb, KERN_ERR, - "Fail to turn off disk quota " - "(type: %d, err: %d, ret:%d), Please " - "run fsck to fix it.", type, err, ret); + f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.", + type, err, ret); set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } @@ -2074,32 +2061,40 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb) static int f2fs_dquot_commit(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_commit(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_acquire(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); - + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_release(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_release(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } @@ -2109,22 +2104,27 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_mark_dquot_dirty(dquot); /* if we are using journalled quota */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_commit_info(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_commit_info(sb, type); if (ret < 0) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } @@ -2341,55 +2341,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, (segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Mismatch start address, segment0(%u) cp_blkaddr(%u)", - segment0_blkaddr, cp_blkaddr); + f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", + segment0_blkaddr, cp_blkaddr); return true; } if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) != sit_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong CP boundary, start(%u) end(%u) blocks(%u)", - cp_blkaddr, sit_blkaddr, - segment_count_ckpt << log_blocks_per_seg); + f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)", + cp_blkaddr, sit_blkaddr, + segment_count_ckpt << log_blocks_per_seg); return true; } if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) != nat_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", - sit_blkaddr, nat_blkaddr, - segment_count_sit << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", + sit_blkaddr, nat_blkaddr, + segment_count_sit << log_blocks_per_seg); return true; } if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) != ssa_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", - nat_blkaddr, ssa_blkaddr, - segment_count_nat << log_blocks_per_seg); + f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", + nat_blkaddr, ssa_blkaddr, + segment_count_nat << log_blocks_per_seg); return true; } if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) != main_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", - ssa_blkaddr, main_blkaddr, - segment_count_ssa << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", + ssa_blkaddr, main_blkaddr, + segment_count_ssa << log_blocks_per_seg); return true; } if (main_end_blkaddr > seg_end_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", - main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", + main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); return true; } else if (main_end_blkaddr < seg_end_blkaddr) { int err = 0; @@ -2406,12 +2400,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, err = __f2fs_commit_super(bh, NULL); res = err ? "failed" : "done"; } - f2fs_msg(sb, KERN_INFO, - "Fix alignment : %s, start(%u) end(%u) block(%u)", - res, main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)", + res, main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); if (err) return true; } @@ -2425,7 +2418,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - struct super_block *sb = sbi->sb; unsigned int blocksize; size_t crc_offset = 0; __u32 crc = 0; @@ -2435,48 +2427,42 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, crc_offset = le32_to_cpu(raw_super->checksum_offset); if (crc_offset != offsetof(struct f2fs_super_block, crc)) { - f2fs_msg(sb, KERN_INFO, - "Invalid SB checksum offset: %zu", - crc_offset); + f2fs_info(sbi, "Invalid SB checksum offset: %zu", + crc_offset); return 1; } crc = le32_to_cpu(raw_super->crc); if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { - f2fs_msg(sb, KERN_INFO, - "Invalid SB checksum value: %u", crc); + f2fs_info(sbi, "Invalid SB checksum value: %u", crc); return 1; } } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_msg(sb, KERN_INFO, - "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); return 1; } /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid page_cache_size (%lu), supports only 4KB", - PAGE_SIZE); + f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", + PAGE_SIZE); return 1; } /* Currently, support only 4KB block size */ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); if (blocksize != F2FS_BLKSIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid blocksize (%u), supports only 4KB", - blocksize); + f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", + blocksize); return 1; } /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { - f2fs_msg(sb, KERN_INFO, - "Invalid log blocks per segment (%u)", - le32_to_cpu(raw_super->log_blocks_per_seg)); + f2fs_info(sbi, "Invalid log blocks per segment (%u)", + le32_to_cpu(raw_super->log_blocks_per_seg)); return 1; } @@ -2485,17 +2471,16 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, F2FS_MAX_LOG_SECTOR_SIZE || le32_to_cpu(raw_super->log_sectorsize) < F2FS_MIN_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", - le32_to_cpu(raw_super->log_sectorsize)); + f2fs_info(sbi, "Invalid log sectorsize (%u)", + le32_to_cpu(raw_super->log_sectorsize)); return 1; } if (le32_to_cpu(raw_super->log_sectors_per_block) + le32_to_cpu(raw_super->log_sectorsize) != F2FS_MAX_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid log sectors per block(%u) log sectorsize(%u)", - le32_to_cpu(raw_super->log_sectors_per_block), - le32_to_cpu(raw_super->log_sectorsize)); + f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", + le32_to_cpu(raw_super->log_sectors_per_block), + le32_to_cpu(raw_super->log_sectorsize)); return 1; } @@ -2509,59 +2494,51 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (segment_count > F2FS_MAX_SEGMENT || segment_count < F2FS_MIN_SEGMENTS) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment count (%u)", - segment_count); + f2fs_info(sbi, "Invalid segment count (%u)", segment_count); return 1; } if (total_sections > segment_count || total_sections < F2FS_MIN_SEGMENTS || segs_per_sec > segment_count || !segs_per_sec) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment/section count (%u, %u x %u)", - segment_count, total_sections, segs_per_sec); + f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", + segment_count, total_sections, segs_per_sec); return 1; } if ((segment_count / segs_per_sec) < total_sections) { - f2fs_msg(sb, KERN_INFO, - "Small segment_count (%u < %u * %u)", - segment_count, segs_per_sec, total_sections); + f2fs_info(sbi, "Small segment_count (%u < %u * %u)", + segment_count, segs_per_sec, total_sections); return 1; } if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { - f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %llu)", - segment_count, le64_to_cpu(raw_super->block_count)); + f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } if (secs_per_zone > total_sections || !secs_per_zone) { - f2fs_msg(sb, KERN_INFO, - "Wrong secs_per_zone / total_sections (%u, %u)", - secs_per_zone, total_sections); + f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", + secs_per_zone, total_sections); return 1; } if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || raw_super->hot_ext_count > F2FS_MAX_EXTENSION || (le32_to_cpu(raw_super->extension_count) + raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) { - f2fs_msg(sb, KERN_INFO, - "Corrupted extension count (%u + %u > %u)", - le32_to_cpu(raw_super->extension_count), - raw_super->hot_ext_count, - F2FS_MAX_EXTENSION); + f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)", + le32_to_cpu(raw_super->extension_count), + raw_super->hot_ext_count, + F2FS_MAX_EXTENSION); return 1; } if (le32_to_cpu(raw_super->cp_payload) > (blocks_per_seg - F2FS_CP_PACKS)) { - f2fs_msg(sb, KERN_INFO, - "Insane cp_payload (%u > %u)", - le32_to_cpu(raw_super->cp_payload), - blocks_per_seg - F2FS_CP_PACKS); + f2fs_info(sbi, "Insane cp_payload (%u > %u)", + le32_to_cpu(raw_super->cp_payload), + blocks_per_seg - F2FS_CP_PACKS); return 1; } @@ -2569,11 +2546,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (le32_to_cpu(raw_super->node_ino) != 1 || le32_to_cpu(raw_super->meta_ino) != 2 || le32_to_cpu(raw_super->root_ino) != 3) { - f2fs_msg(sb, KERN_INFO, - "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", - le32_to_cpu(raw_super->node_ino), - le32_to_cpu(raw_super->meta_ino), - le32_to_cpu(raw_super->root_ino)); + f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", + le32_to_cpu(raw_super->node_ino), + le32_to_cpu(raw_super->meta_ino), + le32_to_cpu(raw_super->root_ino)); return 1; } @@ -2617,8 +2593,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta < F2FS_MIN_SEGMENTS || ovp_segments == 0 || reserved_segments == 0)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong layout: check mkfs.f2fs version"); + f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version"); return 1; } @@ -2627,16 +2602,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (!user_block_count || user_block_count >= segment_count_main << log_blocks_per_seg) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong user_block_count: %u", user_block_count); + f2fs_err(sbi, "Wrong user_block_count: %u", + user_block_count); return 1; } valid_user_blocks = le64_to_cpu(ckpt->valid_block_count); if (valid_user_blocks > user_block_count) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid_user_blocks: %u, user_block_count: %u", - valid_user_blocks, user_block_count); + f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u", + valid_user_blocks, user_block_count); return 1; } @@ -2644,9 +2618,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) avail_node_count = sbi->total_node_count - sbi->nquota_files - F2FS_RESERVED_NODE_NUM; if (valid_node_count > avail_node_count) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid_node_count: %u, avail_node_count: %u", - valid_node_count, avail_node_count); + f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u", + valid_node_count, avail_node_count); return 1; } @@ -2660,10 +2633,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_node_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Node segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2675,10 +2647,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_data_segno[i])); + f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); return 1; } } @@ -2687,10 +2658,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u) and Data segment (%u)" - " has the same segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2701,9 +2671,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong bitmap size: sit: %u, nat:%u", - sit_bitmap_size, nat_bitmap_size); + f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u", + sit_bitmap_size, nat_bitmap_size); return 1; } @@ -2712,14 +2681,22 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (cp_pack_start_sum < cp_payload + 1 || cp_pack_start_sum > blocks_per_seg - 1 - NR_CURSEG_TYPE) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong cp_pack_start_sum: %u", - cp_pack_start_sum); + f2fs_err(sbi, "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); + return 1; + } + + if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) && + le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) { + f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, " + "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, " + "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"", + le32_to_cpu(ckpt->checksum_offset)); return 1; } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + f2fs_err(sbi, "A bug case: need to run fsck"); return 1; } return 0; @@ -2841,9 +2818,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) while (zones && sector < nr_sectors) { nr_zones = F2FS_REPORT_NR_ZONES; - err = blkdev_report_zones(bdev, sector, - zones, &nr_zones, - GFP_KERNEL); + err = blkdev_report_zones(bdev, sector, zones, &nr_zones); if (err) break; if (!nr_zones) { @@ -2888,18 +2863,17 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, for (block = 0; block < 2; block++) { bh = sb_bread(sb, block); if (!bh) { - f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", - block + 1); + f2fs_err(sbi, "Unable to read %dth superblock", + block + 1); err = -EIO; continue; } /* sanity checking of raw super */ if (sanity_check_raw_super(sbi, bh)) { - f2fs_msg(sb, KERN_ERR, - "Can't find valid F2FS filesystem in %dth superblock", - block + 1); - err = -EINVAL; + f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", + block + 1); + err = -EFSCORRUPTED; brelse(bh); continue; } @@ -3028,36 +3002,32 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && !f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Zoned block device feature not enabled\n"); + f2fs_err(sbi, "Zoned block device feature not enabled\n"); return -EINVAL; } if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { if (init_blkz_info(sbi, i)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Failed to initialize F2FS blkzone information"); + f2fs_err(sbi, "Failed to initialize F2FS blkzone information"); return -EINVAL; } if (max_devices == 1) break; - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk, - bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? - "Host-aware" : "Host-managed"); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk, + bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? + "Host-aware" : "Host-managed"); continue; } #endif - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk); - } - f2fs_msg(sbi->sb, KERN_INFO, - "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk); + } + f2fs_info(sbi, + "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -3103,7 +3073,7 @@ try_onemore: /* Load the checksum driver */ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver."); + f2fs_err(sbi, "Cannot load crc32 driver."); err = PTR_ERR(sbi->s_chksum_driver); sbi->s_chksum_driver = NULL; goto free_sbi; @@ -3111,7 +3081,7 @@ try_onemore: /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { - f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + f2fs_err(sbi, "unable to set blocksize"); goto free_sbi; } @@ -3135,8 +3105,7 @@ try_onemore: */ #ifndef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_ERR, - "Zoned block device support is not enabled"); + f2fs_err(sbi, "Zoned block device support is not enabled"); err = -EOPNOTSUPP; goto free_sb_buf; } @@ -3160,10 +3129,7 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - if (f2fs_sb_has_quota_ino(sbi)) - sb->s_qcop = &dquot_quotactl_sysfile_ops; - else - sb->s_qcop = &f2fs_quotactl_ops; + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; if (f2fs_sb_has_quota_ino(sbi)) { @@ -3192,6 +3158,7 @@ try_onemore: mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); + mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -3227,6 +3194,7 @@ try_onemore: } init_rwsem(&sbi->cp_rwsem); + init_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); @@ -3246,14 +3214,14 @@ try_onemore: /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); goto free_io_dummy; } err = f2fs_get_valid_checkpoint(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + f2fs_err(sbi, "Failed to get valid F2FS checkpoint"); goto free_meta_inode; } @@ -3264,10 +3232,13 @@ try_onemore: sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to find devices"); + f2fs_err(sbi, "Failed to find devices"); goto free_devices; } @@ -3287,6 +3258,7 @@ try_onemore: INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } + mutex_init(&sbi->flush_lock); f2fs_init_extent_cache_info(sbi); @@ -3297,14 +3269,14 @@ try_onemore: /* setup f2fs internal modules */ err = f2fs_build_segment_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS segment manager"); + f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)", + err); goto free_sm; } err = f2fs_build_node_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS node manager"); + f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)", + err); goto free_nm; } @@ -3329,7 +3301,7 @@ try_onemore: /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + f2fs_err(sbi, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); goto free_stats; } @@ -3337,7 +3309,7 @@ try_onemore: /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { - f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + f2fs_err(sbi, "Failed to read root inode"); err = PTR_ERR(root); goto free_node_inode; } @@ -3363,8 +3335,7 @@ try_onemore: if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) - f2fs_msg(sb, KERN_ERR, - "Cannot turn on quotas: error %d", err); + f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif /* if there are nt orphan nodes free them */ @@ -3384,13 +3355,10 @@ try_onemore: if (f2fs_hw_is_readonly(sbi)) { if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data, but " - "write access unavailable"); + f2fs_err(sbi, "Need to recover fsync data, but write access unavailable"); goto free_meta; } - f2fs_msg(sbi->sb, KERN_INFO, "write access " - "unavailable, skipping recovery"); + f2fs_info(sbi, "write access unavailable, skipping recovery"); goto reset_checkpoint; } @@ -3405,8 +3373,8 @@ try_onemore: if (err != -ENOMEM) skip_recovery = true; need_fsck = true; - f2fs_msg(sb, KERN_ERR, - "Cannot recover all fsync data errno=%d", err); + f2fs_err(sbi, "Cannot recover all fsync data errno=%d", + err); goto free_meta; } } else { @@ -3414,8 +3382,7 @@ try_onemore: if (!f2fs_readonly(sb) && err > 0) { err = -EINVAL; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data"); + f2fs_err(sbi, "Need to recover fsync data"); goto free_meta; } } @@ -3446,17 +3413,16 @@ reset_checkpoint: /* recover broken superblock */ if (recovery) { err = f2fs_commit_super(sbi, true); - f2fs_msg(sb, KERN_INFO, - "Try to recover %dth superblock, ret: %d", - sbi->valid_super_block ? 1 : 2, err); + f2fs_info(sbi, "Try to recover %dth superblock, ret: %d", + sbi->valid_super_block ? 1 : 2, err); } f2fs_join_shrinker(sbi); f2fs_tuning_parameters(sbi); - f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", - cur_cp_version(F2FS_CKPT(sbi))); + f2fs_notice(sbi, "Mounted with checkpoint version = %llx", + cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 729f46a3c9ee..3aeacd0aacfd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -68,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a, (unsigned long long)(dirty_segments(sbi))); } +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)unusable); +} + + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -440,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); +F2FS_GENERAL_RO_ATTR(unusable); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -495,12 +510,14 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), NULL, }; +ATTRIBUTE_GROUPS(f2fs); static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION @@ -520,6 +537,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(sb_checksum), NULL, }; +ATTRIBUTE_GROUPS(f2fs_feat); static const struct sysfs_ops f2fs_attr_ops = { .show = f2fs_attr_show, @@ -527,7 +545,7 @@ static const struct sysfs_ops f2fs_attr_ops = { }; static struct kobj_type f2fs_sb_ktype = { - .default_attrs = f2fs_attrs, + .default_groups = f2fs_groups, .sysfs_ops = &f2fs_attr_ops, .release = f2fs_sb_release, }; @@ -541,7 +559,7 @@ static struct kset f2fs_kset = { }; static struct kobj_type f2fs_feat_ktype = { - .default_attrs = f2fs_feat_attrs, + .default_groups = f2fs_feat_groups, .sysfs_ops = &f2fs_attr_ops, }; @@ -566,8 +584,7 @@ static int __maybe_unused segment_info_seq_show(struct seq_file *seq, if ((i % 10) == 0) seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else @@ -593,8 +610,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, struct seg_entry *se = get_seg_entry(sbi, i); seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e791741d193b..b32c45621679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -346,7 +346,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); if (!*xe) { - err = -EFAULT; + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + err = -EFSCORRUPTED; goto out; } check: @@ -622,7 +625,10 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); if (!here) { - error = -EFAULT; + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; goto exit; } diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index 3ff1772f612e..718163d0c621 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config FAT_FS tristate select NLS diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 9d01db37183f..1bda2ab6745b 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/fat/dir.c * diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 495edeafd60a..265983635f2b 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -1,6 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2004, OGAWA Hirofumi - * Released under GPL v2. */ #include <linux/blkdev.h> diff --git a/fs/fat/file.c b/fs/fat/file.c index 0e3ed79fcc3f..4614c0ba5f1c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/fat/file.c * diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ba93d1373306..05689198f5af 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/fat/inode.c * diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 4fc950bb6433..1e08bd54c5fb 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/fat/misc.c * diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index f2cd365a4e86..9d062886fbc1 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/msdos/namei.c * diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 996c8c25e9c6..0cdd0fb9f742 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/vfat/namei.c * diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index eb192656fba2..af191371c352 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -1,14 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* fs/fat/nfs.c - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include <linux/exportfs.h> diff --git a/fs/file_table.c b/fs/file_table.c index 3f9c1b452c1d..b07b53f24ff5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/file_table.c * diff --git a/fs/freevxfs/Kconfig b/fs/freevxfs/Kconfig index ce49df1020dd..c05c71d57291 100644 --- a/fs/freevxfs/Kconfig +++ b/fs/freevxfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config VXFS_FS tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" depends on BLOCK diff --git a/fs/freevxfs/Makefile b/fs/freevxfs/Makefile index 87ad097440d6..e6ee59291521 100644 --- a/fs/freevxfs/Makefile +++ b/fs/freevxfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # VxFS Makefile # diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b16645b417d9..542b02d170f8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/fs-writeback.c * @@ -269,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page) if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) wb_put(wb); } +EXPORT_SYMBOL_GPL(__inode_attach_wb); /** * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it @@ -581,6 +583,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, if (unlikely(wb_dying(wbc->wb))) inode_switch_wbs(inode, wbc->wb_id); } +EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode); /** * wbc_detach_inode - disassociate wbc from inode and perform foreign detection @@ -700,9 +703,10 @@ void wbc_detach_inode(struct writeback_control *wbc) wb_put(wbc->wb); wbc->wb = NULL; } +EXPORT_SYMBOL_GPL(wbc_detach_inode); /** - * wbc_account_io - account IO issued during writeback + * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership * @wbc: writeback_control of the writeback in progress * @page: page being written out * @bytes: number of bytes being written out @@ -711,9 +715,10 @@ void wbc_detach_inode(struct writeback_control *wbc) * controlled by @wbc. Keep the book for foreign inode detection. See * wbc_detach_inode(). */ -void wbc_account_io(struct writeback_control *wbc, struct page *page, - size_t bytes) +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, + size_t bytes) { + struct cgroup_subsys_state *css; int id; /* @@ -722,10 +727,15 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, * behind a slow cgroup. Ultimately, we want pageout() to kick off * regular writeback instead of writing things out itself. */ - if (!wbc->wb) + if (!wbc->wb || wbc->no_cgroup_owner) return; - id = mem_cgroup_css_from_page(page)->id; + css = mem_cgroup_css_from_page(page); + /* dead cgroups shouldn't contribute to inode ownership arbitration */ + if (!(css->flags & CSS_ONLINE)) + return; + + id = css->id; if (id == wbc->wb_id) { wbc->wb_bytes += bytes; @@ -743,7 +753,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page, else wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); } -EXPORT_SYMBOL_GPL(wbc_account_io); +EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner); /** * inode_congested - test whether an inode is congested diff --git a/fs/fs_context.c b/fs/fs_context.c index a47ccd5a4a78..103643c68e3f 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Provide a way to create a superblock configuration context within the kernel * that allows a superblock to be set up prior to mounting. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 3a1697c2f72f..460ea4206fa2 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Filesystem parameter parser. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/export.h> diff --git a/fs/fs_struct.c b/fs/fs_struct.c index be0250788b73..ca639ed967b7 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index 3f6dfa989881..506c5e643f0d 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config FSCACHE tristate "General filesystem local caching manager" diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index cdcb376ef8df..f78793f3d21e 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache cache handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL CACHE diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index c550512ce335..0ce39658a620 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* netfs cookie management * * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * See Documentation/filesystems/caching/netfs-api.txt for more information on * the netfs API. */ diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index aa46e48d8c75..09ed8795ad86 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Filesystem index definition * * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL CACHE diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c index 9a13e9e15b69..4e5beeaaf454 100644 --- a/fs/fscache/histogram.c +++ b/fs/fscache/histogram.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache latency histogram * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL THREAD diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index d6209022e965..9616af3768e1 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* Internal definitions for FS-Cache * * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ /* diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 30ad89db1efc..59c2494efda3 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* General filesystem local caching manager * * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL CACHE diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index c2f605483cc5..cce92216fa28 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache netfs (client) registration * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL COOKIE diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 43e6e28c164f..72ebfe578f40 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Global fscache object list maintainer and viewer * * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL COOKIE diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 6d9cb1719de5..cfeba839a0f2 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache object state machine handler * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * See Documentation/filesystems/caching/object.txt for a description of the * object state machine and the in-kernel representations. */ diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 8d265790374c..1a22a55f75a0 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache worker operation management routines * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * See Documentation/filesystems/caching/operations.txt */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 111349f67d98..26af6fdf1538 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Cache page management and data I/O routines * * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL PAGE diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c index 49a8c90414bc..5523446e2952 100644 --- a/fs/fscache/proc.c +++ b/fs/fscache/proc.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache statistics viewing interface * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL OPERATION diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 00564a1dfd76..a5aa93ece8c5 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* FS-Cache statistics * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #define FSCACHE_DEBUG_LEVEL THREAD diff --git a/fs/fsopen.c b/fs/fsopen.c index 390172772f55..043ffa8dc263 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* Filesystem access-by-fd. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/fs_context.h> diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 76f09ce7e5b2..24fc5a5c1b97 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config FUSE_FS tristate "FUSE (Filesystem in Userspace) support" select FS_POSIX_ACL diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index f7b807bc1027..9485019c2a14 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the FUSE filesystem. # diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 4b41df1d4642..bab7a0db81dd 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * CUSE: Character device in Userspace * * Copyright (C) 2008-2009 SUSE Linux Products GmbH * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org> * - * This file is released under the GPLv2. - * * CUSE enables character devices to be implemented from userland much * like FUSE allows filesystems. On initialization /dev/cuse is * created. By opening the file and replying to the CUSE_INIT request diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 24ea19cfe07e..ea8237513dfa 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1317,16 +1317,6 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, unsigned reqsize; unsigned int hash; - /* - * Require sane minimum read buffer - that has capacity for fixed part - * of any request header + negotated max_write room for data. If the - * requirement is not satisfied return EINVAL to the filesystem server - * to indicate that it is not following FUSE server/client contract. - * Don't dequeue / abort any request. - */ - if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, 4096 + fc->max_write)) - return -EINVAL; - restart: spin_lock(&fiq->waitq.lock); err = -EAGAIN; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3959f08279e6..5ae2828beb00 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1377,10 +1377,17 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, if (err && !nbytes) break; - if (write) + if (write) { + if (!capable(CAP_FSETID)) { + struct fuse_write_in *inarg; + + inarg = &req->misc.write.in; + inarg->write_flags |= FUSE_WRITE_KILL_PRIV; + } nres = fuse_send_write(req, io, pos, nbytes, owner); - else + } else { nres = fuse_send_read(req, io, pos, nbytes, owner); + } if (!io->async) fuse_release_user_pages(req, io->should_dirty); @@ -3014,6 +3021,16 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return ret; } +static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end) +{ + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + + if (!err) + fuse_sync_writes(inode); + + return err; +} + static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, loff_t length) { @@ -3042,12 +3059,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, inode_lock(inode); if (mode & FALLOC_FL_PUNCH_HOLE) { loff_t endbyte = offset + length - 1; - err = filemap_write_and_wait_range(inode->i_mapping, - offset, endbyte); + + err = fuse_writeback_range(inode, offset, endbyte); if (err) goto out; - - fuse_sync_writes(inode); } } @@ -3055,7 +3070,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, offset + length > i_size_read(inode)) { err = inode_newsize_ok(inode, offset + length); if (err) - return err; + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE)) @@ -3097,12 +3112,13 @@ out: return err; } -static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) +static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) { struct fuse_file *ff_in = file_in->private_data; struct fuse_file *ff_out = file_out->private_data; + struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); struct fuse_inode *fi_out = get_fuse_inode(inode_out); struct fuse_conn *fc = ff_in->fc; @@ -3126,15 +3142,27 @@ static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in, if (fc->no_copy_file_range) return -EOPNOTSUPP; + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; + + if (fc->writeback_cache) { + inode_lock(inode_in); + err = fuse_writeback_range(inode_in, pos_in, pos_in + len); + inode_unlock(inode_in); + if (err) + return err; + } + inode_lock(inode_out); + err = file_modified(file_out); + if (err) + goto out; + if (fc->writeback_cache) { - err = filemap_write_and_wait_range(inode_out->i_mapping, - pos_out, pos_out + len); + err = fuse_writeback_range(inode_out, pos_out, pos_out + len); if (err) goto out; - - fuse_sync_writes(inode_out); } if (is_unstable) @@ -3169,10 +3197,26 @@ out: clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); inode_unlock(inode_out); + file_accessed(file_in); return err; } +static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, + size_t len, unsigned int flags) +{ + ssize_t ret; + + ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, + len, flags); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_file, src_off, dst_file, + dst_off, len, flags); + return ret; +} + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read_iter = fuse_file_read_iter, diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 6a1e499543f5..03c966840422 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config GFS2_FS tristate "GFS2 file system support" select FS_POSIX_ACL diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index af5f87a493d9..09e6be8aa036 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/sched.h> diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index f674fdd22337..61353a1501c5 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __ACL_DOT_H__ diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 6210d4429d84..f42048cc5454 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/sched.h> @@ -85,15 +82,11 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, } /** - * gfs2_writepage_common - Common bits of writepage - * @page: The page to be written + * gfs2_writepage - Write page for writeback mappings + * @page: The page * @wbc: The writeback control - * - * Returns: 1 if writepage is ok, otherwise an error code or zero if no error. */ - -static int gfs2_writepage_common(struct page *page, - struct writeback_control *wbc) +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); @@ -112,7 +105,9 @@ static int gfs2_writepage_common(struct page *page, page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); goto out; } - return 1; + + return nobh_writepage(page, gfs2_get_block_noalloc, wbc); + redirty: redirty_page_for_writepage(wbc, page); out: @@ -120,24 +115,6 @@ out: return 0; } -/** - * gfs2_writepage - Write page for writeback mappings - * @page: The page - * @wbc: The writeback control - * - */ - -static int gfs2_writepage(struct page *page, struct writeback_control *wbc) -{ - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - return nobh_writepage(page, gfs2_get_block_noalloc, wbc); -} - /* This is the same as calling block_write_full_page, but it also * writes pages outside of i_size */ @@ -457,8 +434,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, * * Returns: errno */ - -int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) { struct buffer_head *dibh; u64 dsize = i_size_read(&ip->i_inode); @@ -521,7 +497,7 @@ static int __gfs2_readpage(void *file, struct page *page) error = mpage_readpage(page, gfs2_block_map); } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; return error; @@ -638,7 +614,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) ret = -EIO; return ret; } @@ -689,47 +665,6 @@ out: } /** - * gfs2_stuffed_write_end - Write end for stuffed files - * @inode: The inode - * @dibh: The buffer_head containing the on-disk inode - * @pos: The file position - * @copied: How much was actually copied by the VFS - * @page: The page - * - * This copies the data from the page into the inode block after - * the inode data structure itself. - * - * Returns: copied bytes or errno - */ -int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page) -{ - struct gfs2_inode *ip = GFS2_I(inode); - u64 to = pos + copied; - void *kaddr; - unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); - - BUG_ON(pos + copied > gfs2_max_stuffed_size(ip)); - - kaddr = kmap_atomic(page); - memcpy(buf + pos, kaddr + pos, copied); - flush_dcache_page(page); - kunmap_atomic(kaddr); - - WARN_ON(!PageUptodate(page)); - unlock_page(page); - put_page(page); - - if (copied) { - if (inode->i_size < to) - i_size_write(inode, to); - mark_inode_dirty(inode); - } - return copied; -} - -/** * jdata_set_page_dirty - Page dirtying function * @page: The page to dirty * @@ -762,7 +697,7 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) return 0; if (!gfs2_is_stuffed(ip)) - dblock = generic_block_bmap(mapping, lblock, gfs2_block_map); + dblock = iomap_bmap(mapping, lblock, &gfs2_iomap_ops); gfs2_glock_dq_uninit(&i_gh); @@ -891,7 +826,7 @@ cannot_release: return 0; } -static const struct address_space_operations gfs2_writeback_aops = { +static const struct address_space_operations gfs2_aops = { .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, @@ -905,21 +840,6 @@ static const struct address_space_operations gfs2_writeback_aops = { .error_remove_page = generic_error_remove_page, }; -static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_writepage, - .writepages = gfs2_writepages, - .readpage = gfs2_readpage, - .readpages = gfs2_readpages, - .set_page_dirty = __set_page_dirty_buffers, - .bmap = gfs2_bmap, - .invalidatepage = gfs2_invalidatepage, - .releasepage = gfs2_releasepage, - .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, -}; - static const struct address_space_operations gfs2_jdata_aops = { .writepage = gfs2_jdata_writepage, .writepages = gfs2_jdata_writepages, @@ -935,15 +855,8 @@ static const struct address_space_operations gfs2_jdata_aops = { void gfs2_set_aops(struct inode *inode) { - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - - if (gfs2_is_jdata(ip)) + if (gfs2_is_jdata(GFS2_I(inode))) inode->i_mapping->a_ops = &gfs2_jdata_aops; - else if (gfs2_is_writeback(sdp)) - inode->i_mapping->a_ops = &gfs2_writeback_aops; - else if (gfs2_is_ordered(sdp)) - inode->i_mapping->a_ops = &gfs2_ordered_aops; else - BUG(); + inode->i_mapping->a_ops = &gfs2_aops; } diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index fa8e5d0144dd..ff9877a68780 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -8,10 +8,6 @@ #include "incore.h" -extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page); -extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, - loff_t pos, unsigned copied, - struct page *page); extern void adjust_fs_space(struct inode *inode); extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, unsigned int from, unsigned int len); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index f42718dd292f..79581b9bdebb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/spinlock.h> @@ -598,7 +595,6 @@ enum alloc_state { * gfs2_iomap_alloc - Build a metadata tree of the requested height * @inode: The GFS2 inode * @iomap: The iomap structure - * @flags: iomap flags * @mp: The metapath, with proper height information calculated * * In this routine we may have to alloc: @@ -625,7 +621,7 @@ enum alloc_state { */ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, - unsigned flags, struct metapath *mp) + struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -994,9 +990,12 @@ static void gfs2_write_unlock(struct inode *inode) static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, unsigned len, struct iomap *iomap) { + unsigned int blockmask = i_blocksize(inode) - 1; struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int blocks; - return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0); + blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; + return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); } static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, @@ -1088,7 +1087,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } if (iomap->type == IOMAP_HOLE) { - ret = gfs2_iomap_alloc(inode, iomap, flags, mp); + ret = gfs2_iomap_alloc(inode, iomap, mp); if (ret) { gfs2_trans_end(sdp); gfs2_inplace_release(ip); @@ -1182,6 +1181,8 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (ip->i_qadata && ip->i_qadata->qa_qd_num) gfs2_quota_unlock(ip); + if (iomap->flags & IOMAP_F_SIZE_CHANGED) + mark_inode_dirty(inode); gfs2_write_unlock(inode); out: @@ -1232,7 +1233,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, if (create) { ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp); if (!ret && iomap.type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp); + ret = gfs2_iomap_alloc(inode, &iomap, &mp); release_metapath(&mp); } else { ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); @@ -1462,7 +1463,7 @@ int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length, ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); if (!ret && iomap->type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp); + ret = gfs2_iomap_alloc(inode, iomap, &mp); release_metapath(&mp); return ret; } @@ -1862,9 +1863,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) gfs2_assert_withdraw(sdp, bh); if (gfs2_assert_withdraw(sdp, prev_bnr != bh->b_blocknr)) { - printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, " - "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n", - sdp->sd_fsname, + fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u," + "s_h:%u, mp_h:%u\n", (unsigned long long)ip->i_no_addr, prev_bnr, ip->i_height, strip_h, mp_h); } diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 19a1fd772c61..b88fd45ab79f 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __BMAP_DOT_H__ diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 5173b98ca036..a7bb76e9a82b 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/spinlock.h> diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index db9a05244a35..6f35d19eec25 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ /* @@ -753,7 +750,7 @@ static struct gfs2_dirent *gfs2_dirent_split_alloc(struct inode *inode, struct gfs2_dirent *dent; dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_find_offset, name, ptr); - if (!dent || IS_ERR(dent)) + if (IS_ERR_OR_NULL(dent)) return dent; return do_init_dirent(inode, dent, name, bh, (unsigned)(ptr - (void *)dent)); @@ -857,7 +854,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, return ERR_PTR(error); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); got_dent: - if (unlikely(dent == NULL || IS_ERR(dent))) { + if (IS_ERR_OR_NULL(dent)) { brelse(bh); bh = NULL; } diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index e1b309c24dab..0ac2dc8564df 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __DIR_DOT_H__ diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index a332f3cd925e..3f717285ee48 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/spinlock.h> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 58a768e59712..52fa1ef8400b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/slab.h> @@ -139,27 +136,36 @@ static struct { {FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA}, }; +static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags) +{ + int i; + u32 fsflags = 0; + + if (S_ISDIR(inode->i_mode)) + gfsflags &= ~GFS2_DIF_JDATA; + else + gfsflags &= ~GFS2_DIF_INHERIT_JDATA; + + for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) + if (gfsflags & fsflag_gfs2flag[i].gfsflag) + fsflags |= fsflag_gfs2flag[i].fsflag; + return fsflags; +} + static int gfs2_get_flags(struct file *filp, u32 __user *ptr) { struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; - int i, error; - u32 gfsflags, fsflags = 0; + int error; + u32 fsflags; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); error = gfs2_glock_nq(&gh); if (error) goto out_uninit; - gfsflags = ip->i_diskflags; - if (S_ISDIR(inode->i_mode)) - gfsflags &= ~GFS2_DIF_JDATA; - else - gfsflags &= ~GFS2_DIF_INHERIT_JDATA; - for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) - if (gfsflags & fsflag_gfs2flag[i].gfsflag) - fsflags |= fsflag_gfs2flag[i].fsflag; + fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags); if (put_user(fsflags, ptr)) error = -EFAULT; @@ -203,9 +209,11 @@ void gfs2_set_inode_flags(struct inode *inode) * @filp: file pointer * @reqflags: The flags to set * @mask: Indicates which flags are valid + * @fsflags: The FS_* inode flags passed in * */ -static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) +static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask, + const u32 fsflags) { struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); @@ -213,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) struct buffer_head *bh; struct gfs2_holder gh; int error; - u32 new_flags, flags; + u32 new_flags, flags, oldflags; error = mnt_want_write_file(filp); if (error) @@ -223,6 +231,11 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) if (error) goto out_drop_write; + oldflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags); + error = vfs_ioc_setflags_prepare(inode, oldflags, fsflags); + if (error) + goto out; + error = -EACCES; if (!inode_owner_or_capable(inode)) goto out; @@ -311,7 +324,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA); } - return do_gfs2_set_flags(filp, gfsflags, mask); + return do_gfs2_set_flags(filp, gfsflags, mask, fsflags); } static int gfs2_getlabel(struct file *filp, char __user *label) @@ -366,31 +379,30 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) } /** - * gfs2_allocate_page_backing - Use bmap to allocate blocks + * gfs2_allocate_page_backing - Allocate blocks for a write fault * @page: The (locked) page to allocate backing for * - * We try to allocate all the blocks required for the page in - * one go. This might fail for various reasons, so we keep - * trying until all the blocks to back this page are allocated. - * If some of the blocks are already allocated, thats ok too. + * We try to allocate all the blocks required for the page in one go. This + * might fail for various reasons, so we keep trying until all the blocks to + * back this page are allocated. If some of the blocks are already allocated, + * that is ok too. */ - static int gfs2_allocate_page_backing(struct page *page) { - struct inode *inode = page->mapping->host; - struct buffer_head bh; - unsigned long size = PAGE_SIZE; - u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits); + u64 pos = page_offset(page); + u64 size = PAGE_SIZE; do { - bh.b_state = 0; - bh.b_size = size; - gfs2_block_map(inode, lblock, &bh, 1); - if (!buffer_mapped(&bh)) + struct iomap iomap = { }; + + if (gfs2_iomap_get_alloc(page->mapping->host, pos, 1, &iomap)) return -EIO; - size -= bh.b_size; - lblock += (bh.b_size >> inode->i_blkbits); - } while(size > 0); + + iomap.length = min(iomap.length, size); + size -= iomap.length; + pos += iomap.length; + } while (size > 0); + return 0; } @@ -411,7 +423,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf) struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; - u64 pos = page->index << PAGE_SHIFT; + u64 pos = page_offset(page); unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; loff_t size; @@ -1169,7 +1181,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) cmd = F_SETLK; fl->fl_type = F_UNLCK; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) { if (fl->fl_type == F_UNLCK) locks_lock_file_wait(file, fl); return -EIO; diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h index ef606e3a5cf4..ed78e5f20f41 100644 --- a/fs/gfs2/gfs2.h +++ b/fs/gfs2/gfs2.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __GFS2_DOT_H__ diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 15c605cfcfc8..e23fb8b7b020 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -140,7 +137,7 @@ void gfs2_glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - BUG_ON(test_bit(GLF_REVOKES, &gl->gl_flags)); + BUG_ON(atomic_read(&gl->gl_revokes)); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); @@ -547,7 +544,7 @@ __acquires(&gl->gl_lockref.lock) unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) && target != LM_ST_UNLOCKED) return; lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | @@ -584,7 +581,7 @@ __acquires(&gl->gl_lockref.lock) } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN, + GLOCK_BUG_ON(gl, !test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); } } else { /* lock_nolock */ @@ -684,7 +681,7 @@ static void delete_work_func(struct work_struct *work) goto out; inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); - if (inode && !IS_ERR(inode)) { + if (!IS_ERR_OR_NULL(inode)) { d_prune_aliases(inode); iput(inode); } @@ -1078,7 +1075,7 @@ trap_recursive: fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid)); fs_err(sdp, "lock type: %d req lock state : %d\n", gh->gh_gl->gl_name.ln_type, gh->gh_state); - gfs2_dump_glock(NULL, gl); + gfs2_dump_glock(NULL, gl, true); BUG(); } @@ -1097,7 +1094,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error = 0; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; if (test_bit(GLF_LRU, &gl->gl_flags)) @@ -1613,16 +1610,16 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) glock_hash_walk(thaw_glock, sdp); } -static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) { spin_lock(&gl->gl_lockref.lock); - gfs2_dump_glock(seq, gl); + gfs2_dump_glock(seq, gl, fsid); spin_unlock(&gl->gl_lockref.lock); } static void dump_glock_func(struct gfs2_glock *gl) { - dump_glock(NULL, gl); + dump_glock(NULL, gl, true); } /** @@ -1707,10 +1704,12 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) * dump_holder - print information about a glock holder * @seq: the seq_file struct * @gh: the glock holder + * @fs_id_buf: pointer to file system id (if requested) * */ -static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) +static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh, + const char *fs_id_buf) { struct task_struct *gh_owner = NULL; char flags_buf[32]; @@ -1718,8 +1717,8 @@ static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) rcu_read_lock(); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); - gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", - state2str(gh->gh_state), + gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n", + fs_id_buf, state2str(gh->gh_state), hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), gh->gh_error, gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, @@ -1769,6 +1768,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * gfs2_dump_glock - print information about a glock * @seq: The seq_file struct * @gl: the glock + * @fsid: If true, also dump the file system id * * The file format is as follows: * One line per object, capital letters are used to indicate objects @@ -1782,33 +1782,38 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * */ -void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) { const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned long long dtime; const struct gfs2_holder *gh; char gflags_buf[32]; + char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + memset(fs_id_buf, 0, sizeof(fs_id_buf)); + if (fsid && sdp) /* safety precaution */ + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); dtime = jiffies - gl->gl_demote_time; dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", - state2str(gl->gl_state), + gfs2_print_dbg(seq, "%sG: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d " + "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, gflags2str(gflags_buf, gl), state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), - test_bit(GLF_REVOKES, &gl->gl_flags) ? 1 : 0, + atomic_read(&gl->gl_revokes), (int)gl->gl_lockref.count, gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) - dump_holder(seq, gh); + dump_holder(seq, gh, fs_id_buf); if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) - glops->go_dump(seq, gl); + glops->go_dump(seq, gl, fs_id_buf); } static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) @@ -2009,7 +2014,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - dump_glock(seq, iter_ptr); + dump_glock(seq, iter_ptr, false); return 0; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 936b3295839c..e4e0bed5257c 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __GLOCK_DOT_H__ @@ -202,8 +199,11 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, struct gfs2_holder *gh); extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl); -#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) +extern void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, + bool fsid); +#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { \ + gfs2_dump_glock(NULL, gl, true); \ + BUG(); } } while(0) extern __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); @@ -269,7 +269,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object) { spin_lock(&gl->gl_lockref.lock); if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL)) - gfs2_dump_glock(NULL, gl); + gfs2_dump_glock(NULL, gl, true); gl->gl_object = object; spin_unlock(&gl->gl_lockref.lock); } @@ -281,7 +281,7 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object) * * I'd love to similarly add this: * else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object)) - * gfs2_dump_glock(NULL, gl); + * gfs2_dump_glock(NULL, gl, true); * Unfortunately, that's not possible because as soon as gfs2_delete_inode * frees the block in the rgrp, another process can reassign it for an I_NEW * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget. diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 24ada3ccc525..ff213690e364 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/spinlock.h> @@ -464,10 +461,12 @@ static int inode_go_lock(struct gfs2_holder *gh) * inode_go_dump - print information about an inode * @seq: The iterator * @ip: the inode + * @fs_id_buf: file system id (may be empty) * */ -static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl) +static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf) { struct gfs2_inode *ip = gl->gl_object; struct inode *inode = &ip->i_inode; @@ -480,7 +479,8 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl) nrpages = inode->i_data.nrpages; xa_unlock_irq(&inode->i_data.i_pages); - gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu p:%lu\n", + gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu " + "p:%lu\n", fs_id_buf, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, IF2DT(ip->i_inode.i_mode), ip->i_flags, @@ -506,7 +506,8 @@ static void freeze_go_sync(struct gfs2_glock *gl) atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { - printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error); + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", + error); gfs2_assert_withdraw(sdp, 0); } queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); @@ -539,7 +540,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) gfs2_consist(sdp); /* Initialize some head of the log stuff */ - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { sdp->sd_log_sequence = head.lh_sequence + 1; gfs2_log_pointers_init(sdp, head.lh_blkno); } diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 8ed1857c1a8d..2dd192e85618 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __GLOPS_DOT_H__ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b15755068593..7a993d7c022e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __INCORE_DOT_H__ @@ -243,7 +240,8 @@ struct gfs2_glock_operations { int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); - void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl); + void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); const int go_type; const unsigned long go_flags; @@ -345,7 +343,6 @@ enum { GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, GLF_INODE_CREATING = 16, /* Inode creation occurring */ - GLF_REVOKES = 17, /* Glock has revokes in queue */ }; struct gfs2_glock { @@ -375,6 +372,7 @@ struct gfs2_glock { struct list_head gl_lru; struct list_head gl_ail_list; atomic_t gl_ail_count; + atomic_t gl_revokes; struct delayed_work gl_work; union { /* For inode and iopen glocks only */ @@ -507,7 +505,6 @@ struct gfs2_trans { unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; - unsigned int tr_num_revoke_rm; struct list_head tr_list; struct list_head tr_databuf; @@ -612,7 +609,7 @@ struct gfs2_tune { enum { SDF_JOURNAL_CHECKED = 0, SDF_JOURNAL_LIVE = 1, - SDF_SHUTDOWN = 2, + SDF_WITHDRAWN = 2, SDF_NOBARRIERS = 3, SDF_NORECOVERY = 4, SDF_DEMOTE = 5, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 998051c4aea7..2e2a8a2fb51d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/slab.h> @@ -796,7 +793,7 @@ fail_free_acls: fail_gunlock: gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(ghs); - if (inode && !IS_ERR(inode)) { + if (!IS_ERR_OR_NULL(inode)) { clear_nlink(inode); if (!free_vfs_inode) mark_inode_dirty(inode); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 18d4af7417fa..580adbf0b5e1 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __INODE_DOT_H__ diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 31df26ed7854..4361804646d8 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright 2004-2011 Red Hat, Inc. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -31,9 +28,10 @@ * @delta is the difference between the current rtt sample and the * running average srtt. We add 1/8 of that to the srtt in order to * update the current srtt estimate. The variance estimate is a bit - * more complicated. We subtract the abs value of the @delta from - * the current variance estimate and add 1/4 of that to the running - * total. + * more complicated. We subtract the current variance estimate from + * the abs value of the @delta and add 1/4 of that to the running + * total. That's equivalent to 3/4 of the current variance + * estimate plus 1/4 of the abs of @delta. * * Note that the index points at the array entry containing the smoothed * mean value, and the variance is always in the following entry @@ -49,7 +47,7 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, s64 delta = sample - s->stats[index]; s->stats[index] += (delta >> 3); index++; - s->stats[index] += ((abs(delta) - s->stats[index]) >> 2); + s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2; } /** diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index a2e1df488df0..58e237fba565 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/sched.h> @@ -606,10 +603,8 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) gfs2_remove_from_ail(bd); /* drops ref on bh */ bd->bd_bh = NULL; sdp->sd_log_num_revoke++; - if (!test_bit(GLF_REVOKES, &gl->gl_flags)) { - set_bit(GLF_REVOKES, &gl->gl_flags); + if (atomic_inc_return(&gl->gl_revokes) == 1) gfs2_glock_hold(gl); - } set_bit(GLF_LFLUSH, &gl->gl_flags); list_add(&bd->bd_list, &sdp->sd_log_revokes); } @@ -887,7 +882,6 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) old->tr_num_buf_rm += new->tr_num_buf_rm; old->tr_num_databuf_rm += new->tr_num_databuf_rm; old->tr_num_revoke += new->tr_num_revoke; - old->tr_num_revoke_rm += new->tr_num_revoke_rm; list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); @@ -909,7 +903,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) set_bit(TR_ATTACHED, &tr->tr_flags); } - sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; + sdp->sd_log_commited_revoke += tr->tr_num_revoke; reserved = calc_reserved(sdp); maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; gfs2_assert_withdraw(sdp, maxres >= reserved); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 7a34a3234266..2315fca47a2b 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __LOG_DOT_H__ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 33ab662c9aac..5b17979af539 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/sched.h> @@ -762,9 +759,27 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start, if (gfs2_meta_check(sdp, bh_ip)) error = -EIO; - else + else { + struct gfs2_meta_header *mh = + (struct gfs2_meta_header *)bh_ip->b_data; + + if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) { + struct gfs2_rgrpd *rgd; + + rgd = gfs2_blk2rgrpd(sdp, blkno, false); + if (rgd && rgd->rd_addr == blkno && + rgd->rd_bits && rgd->rd_bits->bi_bh) { + fs_info(sdp, "Replaying 0x%llx but we " + "already have a bh!\n", + (unsigned long long)blkno); + fs_info(sdp, "busy:%d, pinned:%d\n", + buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0, + buffer_pinned(rgd->rd_bits->bi_bh)); + gfs2_dump_glock(NULL, rgd->rd_gl, true); + } + } mark_buffer_dirty(bh_ip); - + } brelse(bh_log); brelse(bh_ip); @@ -860,34 +875,19 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct list_head *head = &sdp->sd_log_revokes; - struct gfs2_bufdata *bd, *tmp; - - /* - * Glocks can be referenced repeatedly on the revoke list, but the list - * only holds one reference. All glocks on the list will have the - * GLF_REVOKES flag set initially. - */ - - list_for_each_entry_safe(bd, tmp, head, bd_list) { - struct gfs2_glock *gl = bd->bd_gl; + struct gfs2_bufdata *bd; + struct gfs2_glock *gl; - if (test_bit(GLF_REVOKES, &gl->gl_flags)) { - /* Keep each glock on the list exactly once. */ - clear_bit(GLF_REVOKES, &gl->gl_flags); - continue; + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_list); + list_del_init(&bd->bd_list); + gl = bd->bd_gl; + if (atomic_dec_return(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + gfs2_glock_queue_put(gl); } - list_del(&bd->bd_list); - kmem_cache_free(gfs2_bufdata_cachep, bd); - } - list_for_each_entry_safe(bd, tmp, head, bd_list) { - struct gfs2_glock *gl = bd->bd_gl; - - list_del(&bd->bd_list); kmem_cache_free(gfs2_bufdata_cachep, bd); - clear_bit(GLF_LFLUSH, &gl->gl_flags); - gfs2_glock_queue_put(gl); } - /* the list is empty now */ } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index f195ffb435ac..9c059957a733 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __LOPS_DOT_H__ diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c700738de1f7..a1a295b739fb 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -59,6 +56,7 @@ static void gfs2_init_glock_once(void *foo) INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); atomic_set(&gl->gl_ail_count, 0); + atomic_set(&gl->gl_revokes, 0); } static void gfs2_init_gl_aspace_once(void *foo) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index ff86e1d4f8ff..662ef36c1874 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/sched.h> @@ -254,7 +251,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) { *bhp = NULL; return -EIO; } @@ -312,7 +309,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) { - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; wait_on_buffer(bh); @@ -323,7 +320,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) gfs2_io_error_bh_wd(sdp, bh); return -EIO; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return -EIO; return 0; diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index ffdf6aa3509d..eafb74e861c6 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __DIO_DOT_H__ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 46f6615eaf12..4a8e5a7310f0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -64,6 +61,13 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_complain_secs = 10; } +void free_sbd(struct gfs2_sbd *sdp) +{ + if (sdp->sd_lkstats) + free_percpu(sdp->sd_lkstats); + kfree(sdp); +} + static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; @@ -75,10 +79,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) sdp->sd_vfs = sb; sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats); - if (!sdp->sd_lkstats) { - kfree(sdp); - return NULL; - } + if (!sdp->sd_lkstats) + goto fail; sb->s_fs_info = sdp; set_bit(SDF_NOJOURNALID, &sdp->sd_flags); @@ -137,8 +139,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) mutex_init(&sdp->sd_freeze_mutex); return sdp; -} +fail: + free_sbd(sdp); + return NULL; +} /** * gfs2_check_sb - Check superblock @@ -571,7 +576,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) INIT_WORK(&jd->jd_work, gfs2_recover_func); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); - if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { + if (IS_ERR_OR_NULL(jd->jd_inode)) { if (!jd->jd_inode) error = -ENOENT; else @@ -999,7 +1004,7 @@ hostdata_error: void gfs2_lm_unmount(struct gfs2_sbd *sdp) { const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && + if (likely(!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) && lm->lm_unmount) lm->lm_unmount(sdp); } @@ -1089,8 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (error) { /* In this case, we haven't initialized sysfs, so we have to manually free the sdp. */ - free_percpu(sdp->sd_lkstats); - kfree(sdp); + free_sbd(sdp); sb->s_fs_info = NULL; return error; } @@ -1193,7 +1197,6 @@ fail_lm: gfs2_lm_unmount(sdp); fail_debug: gfs2_delete_debugfs_file(sdp); - free_percpu(sdp->sd_lkstats); /* gfs2_sys_fs_del must be the last thing we do, since it causes * sysfs to call function gfs2_sbd_release, which frees sdp. */ gfs2_sys_fs_del(sdp); @@ -1373,7 +1376,6 @@ static void gfs2_kill_sb(struct super_block *sb) sdp->sd_root_dir = NULL; sdp->sd_master_dir = NULL; shrink_dcache_sb(sb); - free_percpu(sdp->sd_lkstats); kill_block_super(sb); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 2ae5a109eea7..69c4b77f127b 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ /* @@ -1478,7 +1475,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) { if (error == 0 || error == -EROFS) return; - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); sdp->sd_log_error = error; wake_up(&sdp->sd_logd_waitq); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 836f29480be6..765627d9a91e 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __QUOTA_DOT_H__ diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 389b3ef77e20..c529f8749a89 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/module.h> @@ -391,7 +388,8 @@ void gfs2_recover_func(struct work_struct *work) } t_tlck = ktime_get(); - fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); + fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n", + jd->jd_jid, head.lh_tail, head.lh_blkno); for (pass = 0; pass < 2; pass++) { lops_before_scan(jd, &head, pass); diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 1831a1974c8c..0d30f8e804f4 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __RECOVERY_DOT_H__ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 15d6e32de55f..49ac0a5e74ea 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -613,11 +610,12 @@ int gfs2_rsqa_alloc(struct gfs2_inode *ip) return gfs2_qa_alloc(ip); } -static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, + const char *fs_id_buf) { struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); - gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + gfs2_print_dbg(seq, "%s B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf, (unsigned long long)ip->i_no_addr, (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), rs->rs_rbm.offset, rs->rs_free); @@ -1114,32 +1112,33 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + struct gfs2_sbd *sdp = rgd->rd_sbd; int valid = 1; if (rgl->rl_flags != str->rg_flags) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u", - (unsigned long long)rgd->rd_addr, + fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u", + (unsigned long long)rgd->rd_addr, be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags)); valid = 0; } if (rgl->rl_free != str->rg_free) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u", - (unsigned long long)rgd->rd_addr, - be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free)); + fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u", + (unsigned long long)rgd->rd_addr, + be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free)); valid = 0; } if (rgl->rl_dinodes != str->rg_dinodes) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u", - (unsigned long long)rgd->rd_addr, - be32_to_cpu(rgl->rl_dinodes), - be32_to_cpu(str->rg_dinodes)); + fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u", + (unsigned long long)rgd->rd_addr, + be32_to_cpu(rgl->rl_dinodes), + be32_to_cpu(str->rg_dinodes)); valid = 0; } if (rgl->rl_igeneration != str->rg_igeneration) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch " - "%llu/%llu", (unsigned long long)rgd->rd_addr, - (unsigned long long)be64_to_cpu(rgl->rl_igeneration), - (unsigned long long)be64_to_cpu(str->rg_igeneration)); + fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu", + (unsigned long long)rgd->rd_addr, + (unsigned long long)be64_to_cpu(rgl->rl_igeneration), + (unsigned long long)be64_to_cpu(str->rg_igeneration)); valid = 0; } return valid; @@ -2249,10 +2248,12 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd, * gfs2_rgrp_dump - print out an rgrp * @seq: The iterator * @gl: The glock in question + * @fs_id_buf: pointer to file system id (if requested) * */ -void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) +void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf) { struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_blkreserv *trs; @@ -2260,14 +2261,15 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) if (rgd == NULL) return; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, rgd->rd_reserved, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; - gfs2_print_dbg(seq, " L: f:%02x b:%u i:%u\n", + gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf, be32_to_cpu(rgl->rl_flags), be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); @@ -2275,7 +2277,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); - dump_rs(seq, trs); + dump_rs(seq, trs, fs_id_buf); } spin_unlock(&rgd->rd_rsspin); } @@ -2283,10 +2285,13 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; + char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", (unsigned long long)rgd->rd_addr); fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); - gfs2_rgrp_dump(NULL, rgd->rd_gl); + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); + gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); rgd->rd_flags |= GFS2_RDF_ERROR; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 499079a9dbbe..c14a673ae36f 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __RGRP_DOT_H__ @@ -72,7 +69,8 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist); extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); -extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl); +extern void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl, + const char *fs_id_buf); extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index fbf6b1fd330b..0acc5834f653 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -397,6 +394,7 @@ static int init_threads(struct gfs2_sbd *sdp) fail: kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; return error; } @@ -454,8 +452,12 @@ fail: freeze_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&freeze_gh); fail_threads: - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); + if (sdp->sd_quotad_process) + kthread_stop(sdp->sd_quotad_process); + sdp->sd_quotad_process = NULL; + if (sdp->sd_logd_process) + kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; return error; } @@ -803,7 +805,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) if (!(flags & I_DIRTY_INODE)) return; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags))) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); @@ -852,12 +854,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, &freeze_gh); - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + if (error && !test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return error; flush_workqueue(gfs2_delete_workqueue); - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); + if (sdp->sd_quotad_process) + kthread_stop(sdp->sd_quotad_process); + sdp->sd_quotad_process = NULL; + if (sdp->sd_logd_process) + kthread_stop(sdp->sd_logd_process); + sdp->sd_logd_process = NULL; gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); @@ -972,14 +978,14 @@ void gfs2_freeze_func(struct work_struct *work) error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, &freeze_gh); if (error) { - printk(KERN_INFO "GFS2: couldn't get freeze lock : %d\n", error); + fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error); gfs2_assert_withdraw(sdp, 0); } else { atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); error = thaw_super(sb); if (error) { - printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n", - error); + fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", + error); gfs2_assert_withdraw(sdp, 0); } if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) @@ -1007,7 +1013,7 @@ static int gfs2_freeze(struct super_block *sb) if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) goto out; - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + if (test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { error = -EINVAL; goto out; } @@ -1017,20 +1023,14 @@ static int gfs2_freeze(struct super_block *sb) if (!error) break; - switch (error) { - case -EBUSY: + if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); - break; - - default: + else fs_err(sdp, "error freezing FS: %d\n", error); - break; - } fs_err(sdp, "retrying...\n"); msleep(1000); } - error = 0; set_bit(SDF_FS_FROZEN, &sdp->sd_flags); out: mutex_unlock(&sdp->sd_freeze_mutex); @@ -1276,8 +1276,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) error = gfs2_make_fs_ro(sdp); else error = gfs2_make_fs_rw(sdp); - if (error) - return error; } sdp->sd_args = args; @@ -1303,7 +1301,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_unlock(>->gt_spin); gfs2_online_uevent(sdp); - return 0; + return error; } /** @@ -1477,7 +1475,7 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0); truncate_inode_pages(&inode->i_data, 0); - if (!test_bit(GLF_REVOKES, &gl->gl_flags)) { + if (atomic_read(&gl->gl_revokes) == 0) { clear_bit(GLF_LFLUSH, &gl->gl_flags); clear_bit(GLF_DIRTY, &gl->gl_flags); } diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 73c97dccae21..9d49eaadb9d9 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __SUPER_DOT_H__ @@ -47,6 +44,8 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, extern int gfs2_statfs_sync(struct super_block *sb, int type); extern void gfs2_freeze_func(struct work_struct *work); +extern void free_sbd(struct gfs2_sbd *sdp); + extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; extern const struct export_operations gfs2_export_ops; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 08e4996adc23..dd15b8e4af2c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -121,7 +118,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) { - unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags); + unsigned int b = test_bit(SDF_WITHDRAWN, &sdp->sd_flags); return snprintf(buf, PAGE_SIZE, "%u\n", b); } @@ -299,17 +296,18 @@ static struct attribute *gfs2_attrs[] = { &gfs2_attr_demote_rq.attr, NULL, }; +ATTRIBUTE_GROUPS(gfs2); static void gfs2_sbd_release(struct kobject *kobj) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - kfree(sdp); + free_sbd(sdp); } static struct kobj_type gfs2_ktype = { .release = gfs2_sbd_release, - .default_attrs = gfs2_attrs, + .default_groups = gfs2_groups, .sysfs_ops = &gfs2_attr_ops, }; @@ -682,7 +680,6 @@ fail_lock_module: fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); fail_reg: - free_percpu(sdp->sd_lkstats); fs_err(sdp, "error %d adding sysfs files\n", error); kobject_put(&sdp->sd_kobj); sb->s_fs_info = NULL; diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h index 79182d6ad6ac..f8dacf20e1a4 100644 --- a/fs/gfs2/sys.h +++ b/fs/gfs2/sys.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __SYS_DOT_H__ diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index d328da7cde36..35e3059255fe 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -80,10 +77,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n", tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, test_bit(TR_TOUCHED, &tr->tr_flags)); - fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n", tr->tr_num_buf_new, tr->tr_num_buf_rm, tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); + tr->tr_num_revoke); } void gfs2_trans_end(struct gfs2_sbd *sdp) @@ -266,7 +263,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); sdp->sd_log_num_revoke--; kmem_cache_free(gfs2_bufdata_cachep, bd); - tr->tr_num_revoke_rm++; + tr->tr_num_revoke--; if (--n == 0) break; } diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 1e39f056ccb7..6071334de035 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __TRANS_DOT_H__ diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 0a814ccac41d..83f6c582773a 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -44,7 +41,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) struct va_format vaf; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && - test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return 0; if (fmt) { @@ -181,9 +178,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, const char *function, char *file, unsigned int line) { struct gfs2_sbd *sdp = rgd->rd_sbd; + char fs_id_buf[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; int rv; - gfs2_rgrp_dump(NULL, rgd->rd_gl); + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); + gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf); rv = gfs2_lm_withdraw(sdp, "fatal: filesystem consistency error\n" " RG = %llu\n" @@ -259,7 +258,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line, bool withdraw) { - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + if (!test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) fs_err(sdp, "fatal: I/O error\n" " block = %llu\n" diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 9278fecba632..4b68b2c1fe56 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __UTIL_DOT_H__ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 675e704830df..bbe593d16bea 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #include <linux/slab.h> diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index 2d887c88eb49..2aed9d7d483d 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #ifndef __EATTR_DOT_H__ diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig index 998e3a6decf3..44f6e89bcb75 100644 --- a/fs/hfs/Kconfig +++ b/fs/hfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config HFS_FS tristate "Apple Macintosh file system support" depends on BLOCK diff --git a/fs/hfs/Makefile b/fs/hfs/Makefile index c41f5a85f42d..b65459bf3dc4 100644 --- a/fs/hfs/Makefile +++ b/fs/hfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux hfs filesystem routines. # diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig index a63371815aab..7d4229aecec0 100644 --- a/fs/hfsplus/Kconfig +++ b/fs/hfsplus/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config HFSPLUS_FS tristate "Apple Extended HFS file system support" depends on BLOCK diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 5e6502ef7415..ce15b9496b77 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -57,9 +57,8 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) return 0; } -static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) +static inline unsigned int hfsplus_getflags(struct inode *inode) { - struct inode *inode = file_inode(file); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags = 0; @@ -69,6 +68,13 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) flags |= FS_APPEND_FL; if (hip->userflags & HFSPLUS_FLG_NODUMP) flags |= FS_NODUMP_FL; + return flags; +} + +static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) +{ + struct inode *inode = file_inode(file); + unsigned int flags = hfsplus_getflags(inode); return put_user(flags, user_flags); } @@ -78,6 +84,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) struct inode *inode = file_inode(file); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags, new_fl = 0; + unsigned int oldflags = hfsplus_getflags(inode); int err = 0; err = mnt_want_write_file(file); @@ -96,13 +103,9 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) inode_lock(inode); - if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || - inode->i_flags & (S_IMMUTABLE|S_APPEND)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto out_unlock_inode; - } - } + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) + goto out_unlock_inode; /* don't silently ignore unsupported ext2 flags */ if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 0cc5feff76cd..2b9e5743105e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/hfsplus/super.c * diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index d5403b4004c9..bb0b27d88e50 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -407,7 +407,7 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len) int offset = 0; if (!is_known_namespace(xattr_name)) { - strncpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN); + memcpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN); offset += XATTR_MAC_OSX_PREFIX_LEN; len += XATTR_MAC_OSX_PREFIX_LEN; } diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig index 56bd15c5bf6c..56aa0336254a 100644 --- a/fs/hpfs/Kconfig +++ b/fs/hpfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config HPFS_FS tristate "OS/2 HPFS file system support" depends on BLOCK diff --git a/fs/hpfs/Makefile b/fs/hpfs/Makefile index 57b786fb9827..153c17382c19 100644 --- a/fs/hpfs/Makefile +++ b/fs/hpfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux hpfs filesystem routines. # diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index ed4264bca790..9db6d84f0d62 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/hpfs/super.c * diff --git a/fs/hugetlbfs/Makefile b/fs/hugetlbfs/Makefile index 6adf870c63c6..d876ecfbe6bb 100644 --- a/fs/hugetlbfs/Makefile +++ b/fs/hugetlbfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux ramfs routines. # diff --git a/fs/inode.c b/fs/inode.c index e9d18b2c3f91..0f1e3b563c47 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * (C) 1997 Linus Torvalds * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) @@ -361,7 +362,7 @@ EXPORT_SYMBOL(inc_nlink); static void __address_space_init_once(struct address_space *mapping) { - xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ); + xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); @@ -1898,6 +1899,26 @@ int file_update_time(struct file *file) } EXPORT_SYMBOL(file_update_time); +/* Caller must hold the file's inode lock */ +int file_modified(struct file *file) +{ + int err; + + /* + * Clear the security bits if the process is not being run by root. + * This keeps people from modifying setuid and setgid binaries. + */ + err = file_remove_privs(file); + if (err) + return err; + + if (unlikely(file->f_mode & FMODE_NOCMTIME)) + return 0; + + return file_update_time(file); +} +EXPORT_SYMBOL(file_modified); + int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) @@ -2169,3 +2190,89 @@ struct timespec64 current_time(struct inode *inode) return timespec64_trunc(now, inode->i_sb->s_time_gran); } EXPORT_SYMBOL(current_time); + +/* + * Generic function to check FS_IOC_SETFLAGS values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, + unsigned int flags) +{ + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_setflags_prepare); + +/* + * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa, + struct fsxattr *fa) +{ + /* + * Can't modify an immutable/append-only file unless we have + * appropriate permission. + */ + if ((old_fa->fsx_xflags ^ fa->fsx_xflags) & + (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() != &init_user_ns) { + if (old_fa->fsx_projid != fa->fsx_projid) + return -EINVAL; + if ((old_fa->fsx_xflags ^ fa->fsx_xflags) & + FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + /* Check extent size hints. */ + if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && + !S_ISDIR(inode->i_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) && + !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + return -EINVAL; + + /* + * It is only valid to set the DAX flag on regular files and + * directories on filesystems. + */ + if ((fa->fsx_xflags & FS_XFLAG_DAX) && + !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) + return -EINVAL; + + /* Extent size hints of zero turn off the flags. */ + if (fa->fsx_extsize == 0) + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); + if (fa->fsx_cowextsize == 0) + fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_fssetxattr_check); diff --git a/fs/internal.h b/fs/internal.h index b089a489da1f..b9bad2d30cef 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* fs/ internal definitions * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ struct super_block; @@ -45,8 +41,6 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) extern void guard_bio_eod(int rw, struct bio *bio); extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, get_block_t *get_block, struct iomap *iomap); -void __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, - struct page *page); /* * char_dev.c diff --git a/fs/io_uring.c b/fs/io_uring.c index 310f8d17c53e..e2a66e12fbc6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,6 +231,7 @@ struct io_ring_ctx { struct task_struct *sqo_thread; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; + struct completion sqo_thread_started; struct { /* CQ ring */ @@ -322,6 +323,7 @@ struct io_kiocb { struct io_ring_ctx *ctx; struct list_head list; + struct list_head link_list; unsigned int flags; refcount_t refs; #define REQ_F_NOWAIT 1 /* must not punt to workers */ @@ -330,8 +332,10 @@ struct io_kiocb { #define REQ_F_SEQ_PREV 8 /* sequential with previous */ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ #define REQ_F_IO_DRAINED 32 /* drain done */ +#define REQ_F_LINK 64 /* linked sqes */ +#define REQ_F_FAIL_LINK 128 /* fail rest of links */ u64 user_data; - u32 error; /* iopoll result from callback */ + u32 result; u32 sequence; struct work_struct work; @@ -395,7 +399,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) if (!ctx) return NULL; - if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, 0, GFP_KERNEL)) { + if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { kfree(ctx); return NULL; } @@ -403,6 +408,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ctx->flags = p->flags; init_waitqueue_head(&ctx->cq_wait); init_completion(&ctx->ctx_done); + init_completion(&ctx->sqo_thread_started); mutex_init(&ctx->uring_lock); init_waitqueue_head(&ctx->wait); for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) { @@ -579,10 +585,12 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, state->cur_req++; } + req->file = NULL; req->ctx = ctx; req->flags = 0; /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); + req->result = 0; return req; out: io_ring_drop_ctx_refs(ctx, 1); @@ -598,7 +606,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) } } -static void io_free_req(struct io_kiocb *req) +static void __io_free_req(struct io_kiocb *req) { if (req->file && !(req->flags & REQ_F_FIXED_FILE)) fput(req->file); @@ -606,6 +614,63 @@ static void io_free_req(struct io_kiocb *req) kmem_cache_free(req_cachep, req); } +static void io_req_link_next(struct io_kiocb *req) +{ + struct io_kiocb *nxt; + + /* + * The list should never be empty when we are called here. But could + * potentially happen if the chain is messed up, check to be on the + * safe side. + */ + nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list); + if (nxt) { + list_del(&nxt->list); + if (!list_empty(&req->link_list)) { + INIT_LIST_HEAD(&nxt->link_list); + list_splice(&req->link_list, &nxt->link_list); + nxt->flags |= REQ_F_LINK; + } + + INIT_WORK(&nxt->work, io_sq_wq_submit_work); + queue_work(req->ctx->sqo_wq, &nxt->work); + } +} + +/* + * Called if REQ_F_LINK is set, and we fail the head request + */ +static void io_fail_links(struct io_kiocb *req) +{ + struct io_kiocb *link; + + while (!list_empty(&req->link_list)) { + link = list_first_entry(&req->link_list, struct io_kiocb, list); + list_del(&link->list); + + io_cqring_add_event(req->ctx, link->user_data, -ECANCELED); + __io_free_req(link); + } +} + +static void io_free_req(struct io_kiocb *req) +{ + /* + * If LINK is set, we have dependent requests in this chain. If we + * didn't fail this request, queue the first one up, moving any other + * dependencies to the next request. In case of failure, fail the rest + * of the chain. + */ + if (req->flags & REQ_F_LINK) { + if (req->flags & REQ_F_FAIL_LINK) + io_fail_links(req); + else + io_req_link_next(req); + } + + __io_free_req(req); +} + static void io_put_req(struct io_kiocb *req) { if (refcount_dec_and_test(&req->refs)) @@ -627,16 +692,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); - io_cqring_fill_event(ctx, req->user_data, req->error); + io_cqring_fill_event(ctx, req->user_data, req->result); (*nr_events)++; if (refcount_dec_and_test(&req->refs)) { /* If we're not using fixed files, we have to pair the * completion part with the file put. Use regular * completions for those, only batch free for fixed - * file. + * file and non-linked commands. */ - if (req->flags & REQ_F_FIXED_FILE) { + if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == + REQ_F_FIXED_FILE) { reqs[to_free++] = req; if (to_free == ARRAY_SIZE(reqs)) io_free_req_many(ctx, reqs, &to_free); @@ -775,6 +841,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); + if ((req->flags & REQ_F_LINK) && res != req->result) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, req->user_data, res); io_put_req(req); } @@ -785,7 +853,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); - req->error = res; + if ((req->flags & REQ_F_LINK) && res != req->result) + req->flags |= REQ_F_FAIL_LINK; + req->result = res; if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED; } @@ -928,7 +998,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, !kiocb->ki_filp->f_op->iopoll) return -EOPNOTSUPP; - req->error = 0; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; } else { @@ -997,15 +1066,12 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); if (offset) iov_iter_advance(iter, offset); - - /* don't drop a reference to these pages */ - iter->type |= ITER_BVEC_FLAG_NO_REF; return 0; } -static int io_import_iovec(struct io_ring_ctx *ctx, int rw, - const struct sqe_submit *s, struct iovec **iovec, - struct iov_iter *iter) +static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, + const struct sqe_submit *s, struct iovec **iovec, + struct iov_iter *iter) { const struct io_uring_sqe *sqe = s->sqe; void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -1023,7 +1089,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, opcode = READ_ONCE(sqe->opcode); if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - int ret = io_import_fixed(ctx, rw, sqe, iter); + ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); *iovec = NULL; return ret; } @@ -1089,7 +1155,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t read_size, ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1102,16 +1168,30 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; + read_size = ret; + if (req->flags & REQ_F_LINK) + req->result = read_size; + iov_count = iov_iter_count(&iter); ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); if (!ret) { ssize_t ret2; - /* Catch -EAGAIN return for forced non-blocking submission */ ret2 = call_read_iter(file, kiocb, &iter); + /* + * In case of a short read, punt to async. This can happen + * if we have data partially cached. Alternatively we can + * return the short read, in which case the application will + * need to issue another SQE and wait for it. That SQE will + * need async punt anyway, so it's more efficient to do it + * here. + */ + if (force_nonblock && ret2 > 0 && ret2 < read_size) + ret2 = -EAGAIN; + /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { io_rw_done(kiocb, ret2); } else { @@ -1136,7 +1216,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1149,9 +1229,12 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; + if (req->flags & REQ_F_LINK) + req->result = ret; + iov_count = iov_iter_count(&iter); ret = -EAGAIN; @@ -1255,6 +1338,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; @@ -1299,10 +1384,69 @@ static int io_sync_file_range(struct io_kiocb *req, ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; + io_cqring_add_event(req->ctx, sqe->user_data, ret); + io_put_req(req); + return 0; +} + +#if defined(CONFIG_NET) +static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock, + long (*fn)(struct socket *, struct user_msghdr __user *, + unsigned int)) +{ + struct socket *sock; + int ret; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + + sock = sock_from_file(req->file, &ret); + if (sock) { + struct user_msghdr __user *msg; + unsigned flags; + + flags = READ_ONCE(sqe->msg_flags); + if (flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + else if (force_nonblock) + flags |= MSG_DONTWAIT; + + msg = (struct user_msghdr __user *) (unsigned long) + READ_ONCE(sqe->addr); + + ret = fn(sock, msg, flags); + if (force_nonblock && ret == -EAGAIN) + return ret; + } + io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } +#endif + +static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) +{ +#if defined(CONFIG_NET) + return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock); +#else + return -EOPNOTSUPP; +#endif +} + +static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) +{ +#if defined(CONFIG_NET) + return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock); +#else + return -EOPNOTSUPP; +#endif +} static void io_poll_remove_one(struct io_kiocb *req) { @@ -1551,9 +1695,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, { int ret, opcode; + req->user_data = READ_ONCE(s->sqe->user_data); + if (unlikely(s->index >= ctx->sq_entries)) return -EINVAL; - req->user_data = READ_ONCE(s->sqe->user_data); opcode = READ_ONCE(s->sqe->opcode); switch (opcode) { @@ -1588,6 +1733,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_SYNC_FILE_RANGE: ret = io_sync_file_range(req, s->sqe, force_nonblock); break; + case IORING_OP_SENDMSG: + ret = io_sendmsg(req, s->sqe, force_nonblock); + break; + case IORING_OP_RECVMSG: + ret = io_recvmsg(req, s->sqe, force_nonblock); + break; default: ret = -EINVAL; break; @@ -1597,7 +1748,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, return ret; if (ctx->flags & IORING_SETUP_IOPOLL) { - if (req->error == -EAGAIN) + if (req->result == -EAGAIN) return -EAGAIN; /* workqueue context doesn't hold uring_lock, grab it now */ @@ -1801,10 +1952,8 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, req->sequence = ctx->cached_sq_head - 1; } - if (!io_op_needs_file(s->sqe)) { - req->file = NULL; + if (!io_op_needs_file(s->sqe)) return 0; - } if (flags & IOSQE_FIXED_FILE) { if (unlikely(!ctx->user_files || @@ -1823,31 +1972,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, return 0; } -static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, - struct io_submit_state *state) +static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, + struct sqe_submit *s) { - struct io_kiocb *req; int ret; - /* enforce forwards compatibility on users */ - if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN))) - return -EINVAL; - - req = io_get_req(ctx, state); - if (unlikely(!req)) - return -EAGAIN; - - ret = io_req_set_file(ctx, s, state, req); - if (unlikely(ret)) - goto out; - - ret = io_req_defer(ctx, req, s->sqe); - if (ret) { - if (ret == -EIOCBQUEUED) - ret = 0; - return ret; - } - ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; @@ -1870,24 +1999,93 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, /* * Queued up for async execution, worker will release - * submit reference when the iocb is actually - * submitted. + * submit reference when the iocb is actually submitted. */ return 0; } } -out: /* drop submission reference */ io_put_req(req); /* and drop final reference, if we failed */ - if (ret) + if (ret) { + io_cqring_add_event(ctx, req->user_data, ret); + if (req->flags & REQ_F_LINK) + req->flags |= REQ_F_FAIL_LINK; io_put_req(req); + } return ret; } +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) + +static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, + struct io_submit_state *state, struct io_kiocb **link) +{ + struct io_uring_sqe *sqe_copy; + struct io_kiocb *req; + int ret; + + /* enforce forwards compatibility on users */ + if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) { + ret = -EINVAL; + goto err; + } + + req = io_get_req(ctx, state); + if (unlikely(!req)) { + ret = -EAGAIN; + goto err; + } + + ret = io_req_set_file(ctx, s, state, req); + if (unlikely(ret)) { +err_req: + io_free_req(req); +err: + io_cqring_add_event(ctx, s->sqe->user_data, ret); + return; + } + + ret = io_req_defer(ctx, req, s->sqe); + if (ret) { + if (ret != -EIOCBQUEUED) + goto err_req; + return; + } + + /* + * If we already have a head request, queue this one for async + * submittal once the head completes. If we don't have a head but + * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be + * submitted sync once the chain is complete. If none of those + * conditions are true (normal request), then just queue it. + */ + if (*link) { + struct io_kiocb *prev = *link; + + sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); + if (!sqe_copy) { + ret = -EAGAIN; + goto err_req; + } + + s->sqe = sqe_copy; + memcpy(&req->submit, s, sizeof(*s)); + list_add_tail(&req->list, &prev->link_list); + } else if (s->sqe->flags & IOSQE_IO_LINK) { + req->flags |= REQ_F_LINK; + + memcpy(&req->submit, s, sizeof(*s)); + INIT_LIST_HEAD(&req->link_list); + *link = req; + } else { + io_queue_sqe(ctx, req, s); + } +} + /* * Batched submission is done, ensure local IO is flushed out. */ @@ -1970,7 +2168,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, unsigned int nr, bool has_user, bool mm_fault) { struct io_submit_state state, *statep = NULL; - int ret, i, submitted = 0; + struct io_kiocb *link = NULL; + bool prev_was_link = false; + int i, submitted = 0; if (nr > IO_PLUG_THRESHOLD) { io_submit_state_start(&state, ctx, nr); @@ -1978,22 +2178,30 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, } for (i = 0; i < nr; i++) { + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. + */ + if (!prev_was_link && link) { + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } + prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; + if (unlikely(mm_fault)) { - ret = -EFAULT; + io_cqring_add_event(ctx, sqes[i].sqe->user_data, + -EFAULT); } else { sqes[i].has_user = has_user; sqes[i].needs_lock = true; sqes[i].needs_fixed_file = true; - ret = io_submit_sqe(ctx, &sqes[i], statep); - } - if (!ret) { + io_submit_sqe(ctx, &sqes[i], statep, &link); submitted++; - continue; } - - io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); } + if (link) + io_queue_sqe(ctx, link, &link->submit); if (statep) io_submit_state_end(&state); @@ -2010,6 +2218,8 @@ static int io_sq_thread(void *data) unsigned inflight; unsigned long timeout; + complete(&ctx->sqo_thread_started); + old_fs = get_fs(); set_fs(USER_DS); @@ -2134,6 +2344,8 @@ static int io_sq_thread(void *data) static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; + struct io_kiocb *link = NULL; + bool prev_was_link = false; int i, submit = 0; if (to_submit > IO_PLUG_THRESHOLD) { @@ -2143,22 +2355,30 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) for (i = 0; i < to_submit; i++) { struct sqe_submit s; - int ret; if (!io_get_sqring(ctx, &s)) break; + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. + */ + if (!prev_was_link && link) { + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } + prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; + s.has_user = true; s.needs_lock = false; s.needs_fixed_file = false; submit++; - - ret = io_submit_sqe(ctx, &s, statep); - if (ret) - io_cqring_add_event(ctx, s.sqe->user_data, ret); + io_submit_sqe(ctx, &s, statep, &link); } io_commit_sqring(ctx); + if (link) + io_queue_sqe(ctx, link, &link->submit); if (statep) io_submit_state_end(statep); @@ -2180,7 +2400,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { struct io_cq_ring *ring = ctx->cq_ring; - sigset_t ksigmask, sigsaved; int ret; if (io_cqring_events(ring) >= min_events) @@ -2190,23 +2409,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, #ifdef CONFIG_COMPAT if (in_compat_syscall()) ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - &ksigmask, &sigsaved, sigsz); + sigsz); else #endif - ret = set_user_sigmask(sig, &ksigmask, - &sigsaved, sigsz); + ret = set_user_sigmask(sig, sigsz); if (ret) return ret; } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); + restore_saved_sigmask_unless(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR; - if (sig) - restore_user_sigmask(sig, &sigsaved); - return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } @@ -2243,6 +2459,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx) { if (ctx->sqo_thread) { + wait_for_completion(&ctx->sqo_thread_started); /* * The park is a bit of a work-around, without it we get * warning spews on shutdown with SQPOLL set and affinity @@ -2616,7 +2833,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = io_copy_iov(ctx, &iov, arg, i); if (ret) - break; + goto err; /* * Don't impose further limits on the size and buffer @@ -2777,8 +2994,10 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_eventfd_unregister(ctx); #if defined(CONFIG_UNIX) - if (ctx->ring_sock) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ sock_release(ctx->ring_sock); + } #endif io_mem_free(ctx->sq_ring); diff --git a/fs/iomap.c b/fs/iomap.c index 23ef63fd1669..3e7f16a05653 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -287,7 +287,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap_readpage_ctx *ctx = data; struct page *page = ctx->cur_page; struct iomap_page *iop = iomap_page_create(inode, page); - bool is_contig = false; + bool same_page = false, is_contig = false; loff_t orig_pos = pos; unsigned poff, plen; sector_t sector; @@ -315,10 +315,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * Try to merge into a previous segment if we can. */ sector = iomap_sector(iomap, pos); - if (ctx->bio && bio_end_sector(ctx->bio) == sector) { - if (__bio_try_merge_page(ctx->bio, page, plen, poff, true)) - goto done; + if (ctx->bio && bio_end_sector(ctx->bio) == sector) is_contig = true; + + if (is_contig && + __bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) { + if (!same_page && iop) + atomic_inc(&iop->read_count); + goto done; } /* @@ -329,7 +333,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (iop) atomic_inc(&iop->read_count); - if (!ctx->bio || !is_contig || bio_full(ctx->bio)) { + if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -562,7 +566,7 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, { int ret; - ret = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + ret = migrate_page_move_mapping(mapping, newpage, page, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; @@ -773,6 +777,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page, struct iomap *iomap) { const struct iomap_page_ops *page_ops = iomap->page_ops; + loff_t old_size = inode->i_size; int ret; if (iomap->type == IOMAP_INLINE) { @@ -784,9 +789,21 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, ret = __iomap_write_end(inode, pos, len, copied, page, iomap); } - __generic_write_end(inode, pos, ret, page); + /* + * Update the in-memory inode size after copying the data into the page + * cache. It's up to the file system to write the updated size to disk, + * preferably after I/O completion so that no stale data is exposed. + */ + if (pos + ret > old_size) { + i_size_write(inode, pos + ret); + iomap->flags |= IOMAP_F_SIZE_CHANGED; + } + unlock_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); if (page_ops && page_ops->page_done) - page_ops->page_done(inode, pos, copied, page, iomap); + page_ops->page_done(inode, pos, ret, page, iomap); put_page(page); if (ret < len) @@ -1595,13 +1612,7 @@ static void iomap_dio_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - put_page(bvec->bv_page); - } + bio_release_pages(bio, false); bio_put(bio); } } diff --git a/fs/isofs/Kconfig b/fs/isofs/Kconfig index 8ab9878e3671..5e7419599f50 100644 --- a/fs/isofs/Kconfig +++ b/fs/isofs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config ISO9660_FS tristate "ISO 9660 CDROM file system support" help diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 10205ececc27..bc12ac7e2312 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright 2001 H. Peter Anvin - All Rights Reserved * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * * ----------------------------------------------------------------------- */ /* diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 603b052a3c94..9e30d8703735 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/isofs/inode.c * diff --git a/fs/isofs/zisofs.h b/fs/isofs/zisofs.h index 273795709155..70f1a218fc92 100644 --- a/fs/isofs/zisofs.h +++ b/fs/isofs/zisofs.h @@ -1,13 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* ----------------------------------------------------------------------- * * * Copyright 2001 H. Peter Anvin - All Rights Reserved * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * * ----------------------------------------------------------------------- */ /* diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 5a9f5534d57b..4ad2c67f93f1 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config JBD2 tristate select CRC32 diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile index 802a3413872a..126b4da6c7de 100644 --- a/fs/jbd2/Makefile +++ b/fs/jbd2/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux journaling routines. # diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index efd0ce9489ae..132fb92098c7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -184,17 +184,18 @@ static int journal_wait_on_commit_record(journal_t *journal, /* * write the filemap data using writepage() address_space_operations. * We don't do block allocation here even for delalloc. We don't - * use writepages() because with dealyed allocation we may be doing + * use writepages() because with delayed allocation we may be doing * block allocation in writepages(). */ -static int journal_submit_inode_data_buffers(struct address_space *mapping) +static int journal_submit_inode_data_buffers(struct address_space *mapping, + loff_t dirty_start, loff_t dirty_end) { int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = mapping->nrpages * 2, - .range_start = 0, - .range_end = i_size_read(mapping->host), + .range_start = dirty_start, + .range_end = dirty_end, }; ret = generic_writepages(mapping, &wbc); @@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WRITE_DATA)) continue; mapping = jinode->i_vfs_inode->i_mapping; @@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal, * only allocated blocks here. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = journal_submit_inode_data_buffers(mapping); + err = journal_submit_inode_data_buffers(mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal, /* For locking, see the comment in journal_submit_data_buffers() */ spin_lock(&journal->j_list_lock); list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + if (!(jinode->i_flags & JI_WAIT_DATA)) continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = filemap_fdatawait_keep_errors( - jinode->i_vfs_inode->i_mapping); + err = filemap_fdatawait_range_keep_errors( + jinode->i_vfs_inode->i_mapping, dirty_start, + dirty_end); if (!ret) ret = err; spin_lock(&journal->j_list_lock); @@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, &jinode->i_transaction->t_inode_list); } else { jinode->i_transaction = NULL; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; } } spin_unlock(&journal->j_list_lock); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 43df0c943229..953990eb70a9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -66,9 +66,6 @@ EXPORT_SYMBOL(jbd2_journal_get_undo_access); EXPORT_SYMBOL(jbd2_journal_set_triggers); EXPORT_SYMBOL(jbd2_journal_dirty_metadata); EXPORT_SYMBOL(jbd2_journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif EXPORT_SYMBOL(jbd2_journal_flush); EXPORT_SYMBOL(jbd2_journal_revoke); @@ -94,6 +91,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); EXPORT_SYMBOL(jbd2_journal_inode_add_write); EXPORT_SYMBOL(jbd2_journal_inode_add_wait); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); +EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); @@ -203,7 +202,7 @@ loop: if (journal->j_flags & JBD2_UNMOUNT) goto end_loop; - jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", + jbd_debug(1, "commit_sequence=%u, commit_request=%u\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { @@ -324,7 +323,7 @@ static void journal_kill_thread(journal_t *journal) * IO is in progress. do_get_write_access() handles this. * * The function returns a pointer to the buffer_head to be used for IO. - * + * * * Return value: * <0: Error @@ -500,7 +499,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) */ journal->j_commit_request = target; - jbd_debug(1, "JBD2: requesting commit %d/%d\n", + jbd_debug(1, "JBD2: requesting commit %u/%u\n", journal->j_commit_request, journal->j_commit_sequence); journal->j_running_transaction->t_requested = jiffies; @@ -513,7 +512,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", journal->j_commit_request, journal->j_commit_sequence, - target, journal->j_running_transaction ? + target, journal->j_running_transaction ? journal->j_running_transaction->t_tid : 0); return 0; } @@ -698,12 +697,12 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR - "%s: error: j_commit_request=%d, tid=%d\n", + "%s: error: j_commit_request=%u, tid=%u\n", __func__, journal->j_commit_request, tid); } #endif while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", + jbd_debug(1, "JBD2: want %u, j_commit_sequence=%u\n", tid, journal->j_commit_sequence); read_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_commit); @@ -944,7 +943,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) trace_jbd2_update_log_tail(journal, tid, block, freed); jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %lu), " + "Cleaning journal tail from %u to %u (offset %lu), " "freeing %lu\n", journal->j_tail_sequence, tid, block, freed); @@ -1318,7 +1317,7 @@ static int journal_reset(journal_t *journal) */ if (sb->s_start == 0) { jbd_debug(1, "JBD2: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", + "(start %ld, seq %u, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); journal->j_flags |= JBD2_FLUSHED; @@ -1453,7 +1452,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) return; } - jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", + jbd_debug(1, "JBD2: Marking journal as empty (seq %u)\n", journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); @@ -2574,6 +2573,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) jinode->i_next_transaction = NULL; jinode->i_vfs_inode = inode; jinode->i_flags = 0; + jinode->i_dirty_start = 0; + jinode->i_dirty_end = 0; INIT_LIST_HEAD(&jinode->i_list); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8ca4fddc705f..990e7b5062e7 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) * File inode in the inode list of the handle's transaction */ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, - unsigned long flags) + unsigned long flags, loff_t start_byte, loff_t end_byte) { transaction_t *transaction = handle->h_transaction; journal_t *journal; @@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); - /* - * First check whether inode isn't already on the transaction's - * lists without taking the lock. Note that this check is safe - * without the lock as we cannot race with somebody removing inode - * from the transaction. The reason is that we remove inode from the - * transaction only in journal_release_jbd_inode() and when we commit - * the transaction. We are guarded from the first case by holding - * a reference to the inode. We are safe against the second case - * because if jinode->i_transaction == transaction, commit code - * cannot touch the transaction because we hold reference to it, - * and if jinode->i_next_transaction == transaction, commit code - * will only file the inode where we want it. - */ - if ((jinode->i_transaction == transaction || - jinode->i_next_transaction == transaction) && - (jinode->i_flags & flags) == flags) - return 0; - spin_lock(&journal->j_list_lock); jinode->i_flags |= flags; + + if (jinode->i_dirty_end) { + jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); + jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); + } else { + jinode->i_dirty_start = start_byte; + jinode->i_dirty_end = end_byte; + } + /* Is inode already attached where we need it? */ if (jinode->i_transaction == transaction || jinode->i_next_transaction == transaction) @@ -2631,12 +2622,28 @@ done: int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) { return jbd2_journal_file_inode(handle, jinode, - JI_WRITE_DATA | JI_WAIT_DATA); + JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); } int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) { - return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, + LLONG_MAX); +} + +int jbd2_journal_inode_ranged_write(handle_t *handle, + struct jbd2_inode *jinode, loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, + JI_WRITE_DATA | JI_WAIT_DATA, start_byte, + start_byte + length - 1); +} + +int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, + loff_t start_byte, loff_t length) +{ + return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, + start_byte, start_byte + length - 1); } /* diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig index ad850c5bf2ca..7c96bc107218 100644 --- a/fs/jffs2/Kconfig +++ b/fs/jffs2/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config JFFS2_FS tristate "Journalling Flash File System v2 (JFFS2) support" select CRC32 diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 7d8654a1472e..f8fb89b10227 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -109,9 +109,9 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) return ret; } -int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) +int jffs2_do_readpage_unlock(void *data, struct page *pg) { - int ret = jffs2_do_readpage_nolock(inode, pg); + int ret = jffs2_do_readpage_nolock(data, pg); unlock_page(pg); return ret; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 112d85849db1..8a20ddd25f2d 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct page *pg; pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT, - (void *)jffs2_do_readpage_unlock, inode); + jffs2_do_readpage_unlock, inode); if (IS_ERR(pg)) return (void *)pg; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index a2dbbb3f4c74..bd3d5f0ddc34 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -155,7 +155,7 @@ extern const struct file_operations jffs2_file_operations; extern const struct inode_operations jffs2_file_inode_operations; extern const struct address_space_operations jffs2_file_address_operations; int jffs2_fsync(struct file *, loff_t, loff_t, int); -int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); +int jffs2_do_readpage_unlock(void *data, struct page *pg); /* ioctl.c */ long jffs2_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig index 851de78fdabb..22a273bd4648 100644 --- a/fs/jfs/Kconfig +++ b/fs/jfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config JFS_FS tristate "JFS filesystem support" select NLS diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index ebb299003a5b..92cc0ac2d1fc 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2002-2004 * Copyright (C) Andreas Gruenbacher, 2001 * Copyright (C) Linus Torvalds, 1991, 1992 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/sched.h> diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 36665fd37095..930d2701f206 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/mm.h> diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index f2b92b292abe..9486afcdac76 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index ba34dae8bd9f..10ee0ecca1a8 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -98,24 +98,16 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Lock against other parallel changes of flags */ inode_lock(inode); - oldflags = jfs_inode->mode2; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ - if ((oldflags & JFS_IMMUTABLE_FL) || - ((flags ^ oldflags) & - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); - err = -EPERM; - goto setflags_out; - } + oldflags = jfs_map_ext2(jfs_inode->mode2 & JFS_FL_USER_VISIBLE, + 0); + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) { + inode_unlock(inode); + goto setflags_out; } flags = flags & JFS_FL_USER_MODIFIABLE; - flags |= oldflags & ~JFS_FL_USER_MODIFIABLE; + flags |= jfs_inode->mode2 & ~JFS_FL_USER_MODIFIABLE; jfs_inode->mode2 = flags; jfs_set_inode_flags(inode); diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 489f993b7b13..9f8f92dd6f84 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_ACL #define _H_JFS_ACL diff --git a/fs/jfs/jfs_btree.h b/fs/jfs/jfs_btree.h index 79c61805bd33..ce055ef50cd3 100644 --- a/fs/jfs/jfs_btree.h +++ b/fs/jfs/jfs_btree.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_BTREE #define _H_JFS_BTREE diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 35a5b2a81ae0..888cdd685a1e 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index 0d9e35da8462..48e2150c092e 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DEBUG #define _H_JFS_DEBUG diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 1682a87c00b2..5fa9fd594115 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DINODE #define _H_JFS_DINODE diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c index f76ff0a46444..575cb2ba74fc 100644 --- a/fs/jfs/jfs_discard.c +++ b/fs/jfs/jfs_discard.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) Tino Reichardt, 2012 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h index 40d1ee6081a0..05215fa27ebb 100644 --- a/fs/jfs/jfs_discard.h +++ b/fs/jfs/jfs_discard.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) Tino Reichardt, 2012 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DISCARD #define _H_JFS_DISCARD diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 49263e220dbc..caade185e568 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Tino Reichardt, 2012 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 562b9a7e4311..29891fad3f09 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DMAP #define _H_JFS_DMAP diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 52bae3f5c914..3acc954f7c04 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index fd4169e6e698..1758289647a0 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DTREE #define _H_JFS_DTREE diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 2ae7d59ab10a..f65bd6b35412 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_extent.h b/fs/jfs/jfs_extent.h index b567e12c52d3..dd635a8a0f8c 100644 --- a/fs/jfs/jfs_extent.h +++ b/fs/jfs/jfs_extent.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_EXTENT #define _H_JFS_EXTENT diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index b67d64671bb4..1e899298f7f0 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2003 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_FILSYS #define _H_JFS_FILSYS diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 93e8c590ff5c..937ca07b58b1 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h index 610a0e9d8941..dd7409febe28 100644 --- a/fs/jfs/jfs_imap.h +++ b/fs/jfs/jfs_imap.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_IMAP #define _H_JFS_IMAP diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 340eb8e4f716..a466ec41cfbb 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_INCORE #define _H_JFS_INCORE diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 4572b7cf183d..4cef170630db 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 7b0b3a40788f..70a0d12e427e 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2001 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_INODE #define _H_JFS_INODE diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h index ecf04882265e..feb37dd9debf 100644 --- a/fs/jfs/jfs_lock.h +++ b/fs/jfs/jfs_lock.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2001 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_LOCK #define _H_JFS_LOCK diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 4c77b808020b..9330eff210e0 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index 870fc22360e7..7fd125c8dd19 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_LOGMGR #define _H_JFS_LOGMGR diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index fa2c6824c7f2..a2f5338a5ea1 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index 8b0ee514eb84..4179f9df4deb 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_METAPAGE #define _H_JFS_METAPAGE diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index c9c1f16b93df..eb8b9e233d73 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index eb03de7fa925..93402c42070f 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2003 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_SUPERBLOCK #define _H_JFS_SUPERBLOCK diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 78789c5ed36b..c8ce7f1bc594 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index ab7288937019..ba71eb5ced56 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_TXNMGR #define _H_JFS_TXNMGR diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h index 8f602dcb51fa..3ff9f26bc3e6 100644 --- a/fs/jfs/jfs_types.h +++ b/fs/jfs/jfs_types.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_TYPES #define _H_JFS_TYPES diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 7971f37534a3..3e8b13e6aa01 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c index 0148e2e4d97a..0c1e9027245a 100644 --- a/fs/jfs/jfs_unicode.c +++ b/fs/jfs/jfs_unicode.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_unicode.h b/fs/jfs/jfs_unicode.h index 8f0f02cb6ca6..9db62d047daa 100644 --- a/fs/jfs/jfs_unicode.h +++ b/fs/jfs/jfs_unicode.h @@ -1,20 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_UNICODE #define _H_JFS_UNICODE diff --git a/fs/jfs/jfs_uniupr.c b/fs/jfs/jfs_uniupr.c index cfe50666d312..d0b18c7befb8 100644 --- a/fs/jfs/jfs_uniupr.c +++ b/fs/jfs/jfs_uniupr.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h index 561f6af46288..f0558b3348da 100644 --- a/fs/jfs/jfs_xattr.h +++ b/fs/jfs/jfs_xattr.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef H_JFS_XATTR diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 2c200b5256a6..16ad920f6fb1 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * jfs_xtree.c: extent allocation descriptor B+-tree manager diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index 1e0987986d5f..5f51be8596b3 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) International Business Machines Corp., 2000-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_XTREE #define _H_JFS_XTREE diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index fa719a1553b6..7a55d14cc1af 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 7ddcb445a3d9..66acea9d878b 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 8f78fa374242..f4e10cb9f734 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 38320607993e..a040719aafc4 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) Christoph Hellwig, 2001-2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index a6797986b625..db41e7803163 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Copyright (C) Christoph Hellwig, 2002 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/capability.h> diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig index 397b5f7a7a16..e7f09105f6e9 100644 --- a/fs/kernfs/Kconfig +++ b/fs/kernfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # KERNFS should be selected by its users # diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile index 674337c76673..4ca54ff54c98 100644 --- a/fs/kernfs/Makefile +++ b/fs/kernfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the kernfs pseudo filesystem # diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 016ba88f7335..a387534c9577 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/dir.c - kernfs directory implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> - * - * This file is released under the GPLv2. */ #include <linux/sched.h> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 553ce0a92b05..e8c792b49616 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/file.c - kernfs file implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> - * - * This file is released under the GPLv2. */ #include <linux/fs.h> diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index f89a0f13840e..f3f3984cce80 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/inode.c - kernfs inode implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> - * - * This file is released under the GPLv2. */ #include <linux/pagemap.h> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 3c437990f39a..02ce570a9a3c 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -1,11 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * fs/kernfs/kernfs-internal.h - kernfs internal header file * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de> - * - * This file is released under the GPLv2. */ #ifndef __KERNFS_INTERNAL_H diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 9a4646eecb71..6c12fac2c287 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/mount.c - kernfs mount implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> - * - * This file is released under the GPLv2. */ #include <linux/fs.h> diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index eb46c3a16e2f..5432883d819f 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/symlink.c - kernfs symlink implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> - * - * This file is released under the GPLv2. */ #include <linux/fs.h> diff --git a/fs/libfs.c b/fs/libfs.c index c9463dc6a5d4..c9b2850c0f7c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/libfs.c * Library for filesystems writers. diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 5fb4f8910aab..a5bb3f721a9d 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/lockd/clntlock.c * diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index d9c32d1a20c0..b11f2afa84f1 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/lockd/clntproc.c * @@ -46,13 +47,14 @@ void nlmclnt_next_cookie(struct nlm_cookie *c) c->len=4; } -static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) +static struct nlm_lockowner * +nlmclnt_get_lockowner(struct nlm_lockowner *lockowner) { refcount_inc(&lockowner->count); return lockowner; } -static void nlm_put_lockowner(struct nlm_lockowner *lockowner) +static void nlmclnt_put_lockowner(struct nlm_lockowner *lockowner) { if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) return; @@ -81,28 +83,28 @@ static inline uint32_t __nlm_alloc_pid(struct nlm_host *host) return res; } -static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +static struct nlm_lockowner *__nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner) { struct nlm_lockowner *lockowner; list_for_each_entry(lockowner, &host->h_lockowners, list) { if (lockowner->owner != owner) continue; - return nlm_get_lockowner(lockowner); + return nlmclnt_get_lockowner(lockowner); } return NULL; } -static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +static struct nlm_lockowner *nlmclnt_find_lockowner(struct nlm_host *host, fl_owner_t owner) { struct nlm_lockowner *res, *new = NULL; spin_lock(&host->h_lock); - res = __nlm_find_lockowner(host, owner); + res = __nlmclnt_find_lockowner(host, owner); if (res == NULL) { spin_unlock(&host->h_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); spin_lock(&host->h_lock); - res = __nlm_find_lockowner(host, owner); + res = __nlmclnt_find_lockowner(host, owner); if (res == NULL && new != NULL) { res = new; refcount_set(&new->count, 1); @@ -456,7 +458,7 @@ static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) { spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; - new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); + new->fl_u.nfs_fl.owner = nlmclnt_get_lockowner(fl->fl_u.nfs_fl.owner); list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); } @@ -466,7 +468,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl) spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); list_del(&fl->fl_u.nfs_fl.list); spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); - nlm_put_lockowner(fl->fl_u.nfs_fl.owner); + nlmclnt_put_lockowner(fl->fl_u.nfs_fl.owner); } static const struct file_lock_operations nlmclnt_lock_ops = { @@ -477,7 +479,7 @@ static const struct file_lock_operations nlmclnt_lock_ops = { static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host) { fl->fl_u.nfs_fl.state = 0; - fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner); + fl->fl_u.nfs_fl.owner = nlmclnt_find_lockowner(host, fl->fl_owner); INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list); fl->fl_ops = &nlmclnt_lock_ops; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 3056f3a0c270..1a639e34847d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/lockd/svc.c * diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 1bddf70d9656..e4d3f783e06a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -46,8 +46,14 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file; - lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { + /* lockowner allocation has failed */ + nlmsvc_release_host(host); + return nlm_lck_denied_nolocks; + } } return 0; @@ -94,6 +100,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -142,6 +149,7 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -178,6 +186,7 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -217,6 +226,7 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -365,6 +375,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp) resp->status = nlmsvc_share_file(host, file, argp); dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -399,6 +410,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp) resp->status = nlmsvc_unshare_file(host, file, argp); dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index ea719cdd6a36..61d3cc2283dc 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -332,6 +332,93 @@ restart: mutex_unlock(&file->f_mutex); } +static struct nlm_lockowner * +nlmsvc_get_lockowner(struct nlm_lockowner *lockowner) +{ + refcount_inc(&lockowner->count); + return lockowner; +} + +static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner) +{ + if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) + return; + list_del(&lockowner->list); + spin_unlock(&lockowner->host->h_lock); + nlmsvc_release_host(lockowner->host); + kfree(lockowner); +} + +static struct nlm_lockowner *__nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid) +{ + struct nlm_lockowner *lockowner; + list_for_each_entry(lockowner, &host->h_lockowners, list) { + if (lockowner->pid != pid) + continue; + return nlmsvc_get_lockowner(lockowner); + } + return NULL; +} + +static struct nlm_lockowner *nlmsvc_find_lockowner(struct nlm_host *host, pid_t pid) +{ + struct nlm_lockowner *res, *new = NULL; + + spin_lock(&host->h_lock); + res = __nlmsvc_find_lockowner(host, pid); + + if (res == NULL) { + spin_unlock(&host->h_lock); + new = kmalloc(sizeof(*res), GFP_KERNEL); + spin_lock(&host->h_lock); + res = __nlmsvc_find_lockowner(host, pid); + if (res == NULL && new != NULL) { + res = new; + /* fs/locks.c will manage the refcount through lock_ops */ + refcount_set(&new->count, 1); + new->pid = pid; + new->host = nlm_get_host(host); + list_add(&new->list, &host->h_lockowners); + new = NULL; + } + } + + spin_unlock(&host->h_lock); + kfree(new); + return res; +} + +void +nlmsvc_release_lockowner(struct nlm_lock *lock) +{ + if (lock->fl.fl_owner) + nlmsvc_put_lockowner(lock->fl.fl_owner); +} + +static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner; + new->fl_owner = nlmsvc_get_lockowner(nlm_lo); +} + +static void nlmsvc_locks_release_private(struct file_lock *fl) +{ + nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner); +} + +static const struct file_lock_operations nlmsvc_lock_ops = { + .fl_copy_lock = nlmsvc_locks_copy_lock, + .fl_release_private = nlmsvc_locks_release_private, +}; + +void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host, + pid_t pid) +{ + fl->fl_owner = nlmsvc_find_lockowner(host, pid); + if (fl->fl_owner != NULL) + fl->fl_ops = &nlmsvc_lock_ops; +} + /* * Initialize arguments for GRANTED call. The nlm_rqst structure * has been cleared already. @@ -345,7 +432,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock) /* set default data area */ call->a_args.lock.oh.data = call->a_owner; - call->a_args.lock.svid = lock->fl.fl_pid; + call->a_args.lock.svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; if (lock->oh.len > NLMCLNT_OHSIZE) { void *data = kmalloc(lock->oh.len, GFP_KERNEL); @@ -509,6 +596,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, { int error; __be32 ret; + struct nlm_lockowner *test_owner; dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", locks_inode(file->f_file)->i_sb->s_id, @@ -522,6 +610,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } + /* If there's a conflicting lock, remember to clean up the test lock */ + test_owner = (struct nlm_lockowner *)lock->fl.fl_owner; + error = vfs_test_lock(file->f_file, &lock->fl); if (error) { /* We can't currently deal with deferred test requests */ @@ -543,11 +634,16 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, conflock->caller = "somehost"; /* FIXME */ conflock->len = strlen(conflock->caller); conflock->oh.len = 0; /* don't return OH info */ - conflock->svid = lock->fl.fl_pid; + conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid; conflock->fl.fl_type = lock->fl.fl_type; conflock->fl.fl_start = lock->fl.fl_start; conflock->fl.fl_end = lock->fl.fl_end; locks_release_private(&lock->fl); + + /* Clean up the test lock */ + lock->fl.fl_owner = NULL; + nlmsvc_put_lockowner(test_owner); + ret = nlm_lck_denied; out: return ret; @@ -692,25 +788,7 @@ nlmsvc_notify_blocked(struct file_lock *fl) printk(KERN_WARNING "lockd: notification for unknown block!\n"); } -static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) -{ - return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; -} - -/* - * Since NLM uses two "keys" for tracking locks, we need to hash them down - * to one for the blocked_hash. Here, we're just xor'ing the host address - * with the pid in order to create a key value for picking a hash bucket. - */ -static unsigned long -nlmsvc_owner_key(struct file_lock *fl) -{ - return (unsigned long)fl->fl_owner ^ (unsigned long)fl->fl_pid; -} - const struct lock_manager_operations nlmsvc_lock_operations = { - .lm_compare_owner = nlmsvc_same_owner, - .lm_owner_key = nlmsvc_owner_key, .lm_notify = nlmsvc_notify_blocked, .lm_grant = nlmsvc_grant_deferred, }; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index ea77c66d3cc3..d0bb7a6bf005 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -76,8 +76,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file; - lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_pid = current->tgid; lock->fl.fl_lmops = &nlmsvc_lock_operations; + nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); + if (!lock->fl.fl_owner) { + /* lockowner allocation has failed */ + nlmsvc_release_host(host); + return nlm_lck_denied_nolocks; + } } return 0; @@ -125,6 +131,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp) dprintk("lockd: TEST status %d vers %d\n", ntohl(resp->status), rqstp->rq_vers); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -173,6 +180,7 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp) else dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rc; @@ -210,6 +218,7 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock)); dprintk("lockd: CANCEL status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -250,6 +259,7 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp) resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock)); dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -408,6 +418,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp) resp->status = cast_status(nlmsvc_share_file(host, file, argp)); dprintk("lockd: SHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; @@ -442,6 +453,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp) resp->status = cast_status(nlmsvc_unshare_file(host, file, argp)); dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status)); + nlmsvc_release_lockowner(&argp->lock); nlmsvc_release_host(host); nlm_release_file(file); return rpc_success; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 899360ba3b84..028fc152da22 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/lockd/svcsubs.c * @@ -179,7 +180,7 @@ again: /* update current lock count */ file->f_locks++; - lockhost = (struct nlm_host *) fl->fl_owner; + lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host; if (match(lockhost, host)) { struct file_lock lock = *fl; diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 9846f7e95282..982629f7b120 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -126,8 +126,6 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock) lock->svid = ntohl(*p++); locks_init_lock(fl); - fl->fl_owner = current->files; - fl->fl_pid = current->tgid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ start = ntohl(*p++); @@ -269,7 +267,6 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = current->tgid; if (!(p = nlm_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 70154f376695..5fa9f48a9dba 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -118,8 +118,6 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) lock->svid = ntohl(*p++); locks_init_lock(fl); - fl->fl_owner = current->files; - fl->fl_pid = current->tgid; fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; /* as good as anything else */ p = xdr_decode_hyper(p, &start); @@ -266,7 +264,6 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p) memset(lock, 0, sizeof(*lock)); locks_init_lock(&lock->fl); lock->svid = ~(u32) 0; - lock->fl.fl_pid = current->tgid; if (!(p = nlm4_decode_cookie(p, &argp->cookie)) || !(p = xdr_decode_string_inplace(p, &lock->caller, diff --git a/fs/locks.c b/fs/locks.c index 8af49f89ac2f..686eae21daf6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/locks.c * @@ -657,9 +658,6 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) */ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { - if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) - return fl2->fl_lmops == fl1->fl_lmops && - fl1->fl_lmops->lm_compare_owner(fl1, fl2); return fl1->fl_owner == fl2->fl_owner; } @@ -700,8 +698,6 @@ static void locks_delete_global_locks(struct file_lock *fl) static unsigned long posix_owner_key(struct file_lock *fl) { - if (fl->fl_lmops && fl->fl_lmops->lm_owner_key) - return fl->fl_lmops->lm_owner_key(fl); return (unsigned long)fl->fl_owner; } @@ -1533,11 +1529,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { - if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) - return false; - if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) - return false; - return locks_conflict(breaker, lease); + bool rc; + + if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { + rc = false; + goto trace; + } + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) { + rc = false; + goto trace; + } + + rc = locks_conflict(breaker, lease); +trace: + trace_leases_conflict(rc, lease, breaker); + return rc; } static bool @@ -1752,10 +1758,10 @@ int fcntl_getlease(struct file *filp) } /** - * check_conflicting_open - see if the given dentry points to a file that has + * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. - * @dentry: dentry to check + * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * @@ -1763,30 +1769,42 @@ int fcntl_getlease(struct file *filp) * conflict with the lease we're trying to set. */ static int -check_conflicting_open(const struct dentry *dentry, const long arg, int flags) +check_conflicting_open(struct file *filp, const long arg, int flags) { - int ret = 0; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); + int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && inode_is_open_for_write(inode)) - return -EAGAIN; + if (arg == F_RDLCK) + return inode_is_open_for_write(inode) ? -EAGAIN : 0; + else if (arg != F_WRLCK) + return 0; - if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || - (atomic_read(&inode->i_count) > 1))) - ret = -EAGAIN; + /* + * Make sure that only read/write count is from lease requestor. + * Note that this will result in denying write leases when i_writecount + * is negative, which is what we want. (We shouldn't grant write leases + * on files open for execution.) + */ + if (filp->f_mode & FMODE_WRITE) + self_wcount = 1; + else if (filp->f_mode & FMODE_READ) + self_rcount = 1; - return ret; + if (atomic_read(&inode->i_writecount) != self_wcount || + atomic_read(&inode->i_readcount) != self_rcount) + return -EAGAIN; + + return 0; } static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv) { struct file_lock *fl, *my_fl = NULL, *lease; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; @@ -1821,7 +1839,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) goto out; @@ -1878,7 +1896,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr * precedes these checks. */ smp_mb(); - error = check_conflicting_open(dentry, arg, lease->fl_flags); + error = check_conflicting_open(filp, arg, lease->fl_flags); if (error) { locks_unlink_lock_ctx(lease); goto out; diff --git a/fs/mbcache.c b/fs/mbcache.c index 081ccf0caee3..97c54d3a2227 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/list.h> diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig index bcd53a79156f..de2003974ff0 100644 --- a/fs/minix/Kconfig +++ b/fs/minix/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config MINIX_FS tristate "Minix file system support" depends on BLOCK diff --git a/fs/minix/Makefile b/fs/minix/Makefile index 3063015abfd0..a2d3ab58d187 100644 --- a/fs/minix/Makefile +++ b/fs/minix/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux minix filesystem routines. # diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 101200761f61..f96073f25432 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/minix/inode.c * diff --git a/fs/mpage.c b/fs/mpage.c index 436a85260394..a63620cdb73a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -647,7 +647,7 @@ alloc_new: * the confused fail path above (OOM) will be very confused when * it finds all bh marked clean (i.e. it will not write anything) */ - wbc_account_io(wbc, page, PAGE_SIZE); + wbc_account_cgroup_owner(wbc, page, PAGE_SIZE); length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio); diff --git a/fs/namei.c b/fs/namei.c index 20831c2fbb34..209c51a5226c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3883,6 +3883,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); + fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); @@ -3999,6 +4000,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (!error) { dont_mount(dentry); detach_mounts(dentry); + fsnotify_unlink(dir, dentry); } } } diff --git a/fs/namespace.c b/fs/namespace.c index e6990f3d526d..f0d664adb9ba 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1,8 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/namespace.c * * (C) Copyright Al Viro 2000, 2001 - * Released under GPL v2. * * Based on code from fs/super.c, copyright Linus Torvalds and others. * Heavily rewritten. @@ -2106,6 +2106,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); + child->mnt.mnt_flags &= ~MNT_LOCKED; commit_tree(child); } put_mountpoint(smp); @@ -2596,11 +2597,12 @@ static int do_move_mount(struct path *old_path, struct path *new_path) if (!check_mnt(p)) goto out; - /* The thing moved should be either ours or completely unattached. */ - if (attached && !check_mnt(old)) + /* The thing moved must be mounted... */ + if (!is_mounted(&old->mnt)) goto out; - if (!attached && !(ns && is_anon_ns(ns))) + /* ... and either ours or the root of anon namespace */ + if (!(attached ? check_mnt(old) : is_anon_ns(ns))) goto out; if (old->mnt.mnt_flags & MNT_LOCKED) @@ -3448,6 +3450,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, ns->root = mnt; ns->mounts = 1; list_add(&mnt->mnt_list, &ns->list); + mntget(newmount.mnt); /* Attach to an apparent O_PATH fd with a note that we need to unmount * it, not just simply put it. diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 69d02cf8cf37..295a7a21b774 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config NFS_FS tristate "NFS client support" depends on INET && FILE_LOCKING && MULTIUSER diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index c587e3c4c6a6..34cdeaecccf6 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,7 +8,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ io.o direct.o pagelist.o read.o symlink.o unlink.o \ - write.o namespace.o mount_clnt.o nfstrace.o export.o + write.o namespace.o mount_clnt.o nfstrace.o \ + export.o sysfs.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index 3ca14c36d08b..7668a1bfb5fa 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the pNFS block layout driver kernel module # diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 315967354954..f39924ba050b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -414,27 +414,39 @@ static __be32 validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, const struct cb_sequenceargs * args) { + __be32 ret; + + ret = cpu_to_be32(NFS4ERR_BADSLOT); if (args->csa_slotid > tbl->server_highest_slotid) - return htonl(NFS4ERR_BADSLOT); + goto out_err; /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { + ret = cpu_to_be32(NFS4ERR_DELAY); if (nfs4_test_locked_slot(tbl, slot->slot_nr)) - return htonl(NFS4ERR_DELAY); + goto out_err; + /* Signal process_op to set this error on next op */ + ret = cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP); if (args->csa_cachethis == 0) - return htonl(NFS4ERR_RETRY_UNCACHED_REP); + goto out_err; /* Liar! We never allowed you to set csa_cachethis != 0 */ - return htonl(NFS4ERR_SEQ_FALSE_RETRY); + ret = cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY); + goto out_err; } /* Note: wraparound relies on seq_nr being of type u32 */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) - return htonl(NFS4_OK); - /* Misordered request */ - return htonl(NFS4ERR_SEQ_MISORDERED); + ret = cpu_to_be32(NFS4ERR_SEQ_MISORDERED); + if (args->csa_sequenceid != slot->seq_nr + 1) + goto out_err; + + return cpu_to_be32(NFS4_OK); + +out_err: + trace_nfs4_cb_seqid_err(args, ret); + return ret; } /* diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 3d04cb0b839e..30838304a0bf 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* client.c: NFS client sharing and management code * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ @@ -53,6 +49,7 @@ #include "pnfs.h" #include "nfs.h" #include "netns.h" +#include "sysfs.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -179,6 +176,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; + clp->cl_nconnect = cl_init->nconnect; clp->cl_net = get_net(cl_init->net); clp->cl_principal = "*"; @@ -196,7 +194,7 @@ error_0: EXPORT_SYMBOL_GPL(nfs_alloc_client); #if IS_ENABLED(CONFIG_NFS_V4) -void nfs_cleanup_cb_ident_idr(struct net *net) +static void nfs_cleanup_cb_ident_idr(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); @@ -218,7 +216,7 @@ static void pnfs_init_server(struct nfs_server *server) } #else -void nfs_cleanup_cb_ident_idr(struct net *net) +static void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -410,10 +408,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) clp = nfs_match_client(cl_init); if (clp) { spin_unlock(&nn->nfs_client_lock); - if (IS_ERR(clp)) - return clp; if (new) new->rpc_ops->free_client(new); + if (IS_ERR(clp)) + return clp; return nfs_found_client(cl_init, clp); } if (new) { @@ -497,6 +495,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, struct rpc_create_args args = { .net = clp->cl_net, .protocol = clp->cl_proto, + .nconnect = clp->cl_nconnect, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, .timeout = cl_init->timeparms, @@ -662,6 +661,7 @@ static int nfs_init_server(struct nfs_server *server, .net = data->net, .timeparms = &timeparms, .cred = server->cred, + .nconnect = data->nfs_server.nconnect, }; struct nfs_client *clp; int error; @@ -1076,6 +1076,18 @@ void nfs_clients_init(struct net *net) #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); + + nfs_netns_sysfs_setup(nn, net); +} + +void nfs_clients_exit(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + nfs_netns_sysfs_destroy(nn); + nfs_cleanup_cb_ident_idr(net); + WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); + WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 8b78274e3e56..0ff3facf81da 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/delegation.c * diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 47d445bec8c9..8d501093660f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/dir.c * @@ -79,6 +80,10 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->dup_cookie = 0; ctx->cred = get_cred(cred); spin_lock(&dir->i_lock); + if (list_empty(&nfsi->open_files) && + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED; list_add(&ctx->list, &nfsi->open_files); spin_unlock(&dir->i_lock); return ctx; @@ -139,19 +144,12 @@ struct nfs_cache_array { struct nfs_cache_array_entry array[0]; }; -struct readdirvec { - unsigned long nr; - unsigned long index; - struct page *pages[NFS_MAX_READDIR_RAPAGES]; -}; - typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool); typedef struct { struct file *file; struct page *page; struct dir_context *ctx; unsigned long page_index; - struct readdirvec pvec; u64 *dir_cookie; u64 last_cookie; loff_t current_index; @@ -531,10 +529,6 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct nfs_cache_array *array; unsigned int count = 0; int status; - int max_rapages = NFS_MAX_READDIR_RAPAGES; - - desc->pvec.index = desc->page_index; - desc->pvec.nr = 0; scratch = alloc_page(GFP_KERNEL); if (scratch == NULL) @@ -559,40 +553,20 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en if (desc->plus) nfs_prime_dcache(file_dentry(desc->file), entry); - status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); - if (status == -ENOSPC) { - desc->pvec.nr++; - if (desc->pvec.nr == max_rapages) - break; - status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); - } + status = nfs_readdir_add_to_array(entry, page); if (status != 0) break; } while (!entry->eof); - /* - * page and desc->pvec.pages[0] are valid, don't need to check - * whether or not to be NULL. - */ - copy_highpage(page, desc->pvec.pages[0]); - out_nopages: if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]); + array = kmap(page); array->eof_index = array->size; status = 0; - kunmap_atomic(array); + kunmap(page); } put_page(scratch); - - /* - * desc->pvec.nr > 0 means at least one page was completely filled, - * we should return -ENOSPC. Otherwise function - * nfs_readdir_xdr_to_array will enter infinite loop. - */ - if (desc->pvec.nr > 0) - return -ENOSPC; return status; } @@ -626,24 +600,6 @@ out_freepages: return -ENOMEM; } -/* - * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure. - */ -static -void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc) -{ - struct nfs_cache_array *array; - int max_rapages = NFS_MAX_READDIR_RAPAGES; - int index; - - for (index = 0; index < max_rapages; index++) { - array = kmap_atomic(desc->pvec.pages[index]); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; - kunmap_atomic(array); - } -} - static int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) { @@ -654,12 +610,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); - /* - * This means we hit readdir rdpages miss, the preallocated rdpages - * are useless, the preallocate rdpages should be reinitialized. - */ - nfs_readdir_rapages_init(desc); - entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -720,24 +670,9 @@ int nfs_readdir_filler(void *data, struct page* page) struct inode *inode = file_inode(desc->file); int ret; - /* - * If desc->page_index in range desc->pvec.index and - * desc->pvec.index + desc->pvec.nr, we get readdir cache hit. - */ - if (desc->page_index >= desc->pvec.index && - desc->page_index < (desc->pvec.index + desc->pvec.nr)) { - /* - * page and desc->pvec.pages[x] are valid, don't need to check - * whether or not to be NULL. - */ - copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]); - ret = 0; - } else { - ret = nfs_readdir_xdr_to_array(desc, page, inode); - if (ret < 0) - goto error; - } - + ret = nfs_readdir_xdr_to_array(desc, page, inode); + if (ret < 0) + goto error; SetPageUptodate(page); if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { @@ -902,7 +837,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) *desc = &my_desc; struct nfs_open_dir_context *dir_ctx = file->private_data; int res = 0; - int max_rapages = NFS_MAX_READDIR_RAPAGES; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", file, (long long)ctx->pos); @@ -922,12 +856,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx); - res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages); - if (res < 0) - return -ENOMEM; - - nfs_readdir_rapages_init(desc); - if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) @@ -963,7 +891,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; } while (!desc->eof); out: - nfs_readdir_free_pages(desc->pvec.pages, max_rapages); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2436bd92bc00..0cb442406168 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/direct.c * diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index e6a700f01452..aec769a500a1 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -22,7 +22,8 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, char *ip_addr = NULL; int ip_len; - ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false); + ip_len = dns_query(net, NULL, name, namelen, NULL, &ip_addr, NULL, + false); if (ip_len > 0) ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 144e183250c3..95dc90570786 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/file.c * diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile index 8516cdffb9e9..de056312d374 100644 --- a/fs/nfs/filelayout/Makefile +++ b/fs/nfs/filelayout/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the pNFS Files Layout Driver kernel module # diff --git a/fs/nfs/flexfilelayout/Makefile b/fs/nfs/flexfilelayout/Makefile index 1d2c9f6bbcd4..49f03422b6ad 100644 --- a/fs/nfs/flexfilelayout/Makefile +++ b/fs/nfs/flexfilelayout/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the pNFS Flexfile Layout Driver kernel module # diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9920c52bd0cd..b04e20d28162 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Module for pnfs flexfile layout driver. * @@ -933,6 +934,10 @@ out_nolseg: if (pgio->pg_error < 0) return; out_mds: + trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode, + 0, NFS4_MAX_UINT64, IOMODE_READ, + NFS_I(pgio->pg_inode)->layout, + pgio->pg_lseg); pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_read_mds(pgio); @@ -999,6 +1004,10 @@ retry: return; out_mds: + trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode, + 0, NFS4_MAX_UINT64, IOMODE_RW, + NFS_I(pgio->pg_inode)->layout, + pgio->pg_lseg); pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_write_mds(pgio); @@ -1025,6 +1034,10 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, if (pgio->pg_lseg) return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); + trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode, + 0, NFS4_MAX_UINT64, IOMODE_RW, + NFS_I(pgio->pg_inode)->layout, + pgio->pg_lseg); /* no lseg means that pnfs is not in use, so no mirroring here */ nfs_pageio_reset_write_mds(pgio); out: @@ -1074,6 +1087,10 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) hdr->args.count, (unsigned long long)hdr->args.offset); + trace_pnfs_mds_fallback_write_done(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_RW, NFS_I(hdr->inode)->layout, + hdr->lseg); task->tk_status = pnfs_write_done_resend_to_mds(hdr); } } @@ -1093,6 +1110,10 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) hdr->args.count, (unsigned long long)hdr->args.offset); + trace_pnfs_mds_fallback_read_done(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_READ, NFS_I(hdr->inode)->layout, + hdr->lseg); task->tk_status = pnfs_read_done_resend_to_mds(hdr); } } @@ -1826,6 +1847,9 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) out_failed: if (ff_layout_avoid_mds_available_ds(lseg)) return PNFS_TRY_AGAIN; + trace_pnfs_mds_fallback_read_pagelist(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_READ, NFS_I(hdr->inode)->layout, lseg); return PNFS_NOT_ATTEMPTED; } @@ -1891,6 +1915,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) out_failed: if (ff_layout_avoid_mds_available_ds(lseg)) return PNFS_TRY_AGAIN; + trace_pnfs_mds_fallback_write_pagelist(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_RW, NFS_I(hdr->inode)->layout, lseg); return PNFS_NOT_ATTEMPTED; } diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index a809989807d6..3eda40a320a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -18,7 +18,7 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; +static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO; static unsigned int dataserver_retrans; static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); @@ -257,7 +257,7 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, if (status == 0) return 0; - if (mirror->mirror_ds == NULL) + if (IS_ERR_OR_NULL(mirror->mirror_ds)) return -EINVAL; dserr = kmalloc(sizeof(*dserr), gfp_flags); diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 666415d13d52..15f271401dcc 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* NFS FS-Cache index structure definition * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/init.h> diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 4dc887813c71..53507aa96b0b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* NFS filesystem cache interface * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #include <linux/init.h> diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 161ba2edb9d0..25a75e40d91d 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* NFS filesystem cache interface definitions * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. */ #ifndef _NFS_FSCACHE_H diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 391dafaf9182..878c4c5982d9 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* getroot.c: get the root dentry for an NFS mount * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3bc2550cfe4e..8a1758200b57 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/inode.c * @@ -50,6 +51,7 @@ #include "pnfs.h" #include "nfs.h" #include "netns.h" +#include "sysfs.h" #include "nfstrace.h" @@ -207,7 +209,7 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) } if (inode->i_mapping->nrpages == 0) - flags &= ~NFS_INO_INVALID_DATA; + flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); nfsi->cache_validity |= flags; if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode); @@ -651,7 +653,8 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) i_size_write(inode, offset); /* Optimisation */ if (offset == 0) - NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA | + NFS_INO_DATA_INVAL_DEFER); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); @@ -1031,6 +1034,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); + if (list_empty(&nfsi->open_files) && + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED; list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } @@ -1099,6 +1106,7 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } +EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that @@ -1311,7 +1319,8 @@ int nfs_revalidate_mapping(struct inode *inode, set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA| + NFS_INO_DATA_INVAL_DEFER); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); @@ -1869,7 +1878,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - } + } else if (!have_delegation) + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; inode_set_iversion_raw(inode, fattr->change_attr); attr_changed = true; } @@ -2158,12 +2168,8 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { - struct nfs_net *nn = net_generic(net, nfs_net_id); - nfs_fs_proc_net_exit(net); - nfs_cleanup_cb_ident_idr(net); - WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); - WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); + nfs_clients_exit(net); } static struct pernet_operations nfs_net_ops = { @@ -2180,6 +2186,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_sysfs_init(); + if (err < 0) + goto out10; + err = register_pernet_subsys(&nfs_net_ops); if (err < 0) goto out9; @@ -2243,6 +2253,8 @@ out7: out8: unregister_pernet_subsys(&nfs_net_ops); out9: + nfs_sysfs_exit(); +out10: return err; } @@ -2259,6 +2271,7 @@ static void __exit exit_nfs_fs(void) unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); + nfs_sysfs_exit(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 498fab72f70b..a2346a2f8361 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -69,8 +69,7 @@ struct nfs_clone_mount { * Maximum number of pages that readdir can use for creating * a vmapped array of pages. */ -#define NFS_MAX_READDIR_PAGES 64 -#define NFS_MAX_READDIR_RAPAGES 8 +#define NFS_MAX_READDIR_PAGES 8 struct nfs_client_initdata { unsigned long init_flags; @@ -82,6 +81,7 @@ struct nfs_client_initdata { struct nfs_subversion *nfs_mod; int proto; u32 minorversion; + unsigned int nconnect; struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; @@ -123,6 +123,7 @@ struct nfs_parsed_mount_data { char *export_path; int port; unsigned short protocol; + unsigned short nconnect; } nfs_server; void *lsm_opts; @@ -158,6 +159,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); +extern void nfs_clients_exit(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); @@ -170,7 +172,6 @@ int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, struct nfs_server *nfs_alloc_server(void); void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); -extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 15f099a24c29..9287eb666322 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/namespace.c * diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index fc9978c58265..c8374f74dce1 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -15,6 +15,8 @@ struct bl_dev_msg { uint32_t major, minor; }; +struct nfs_netns_client; + struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; @@ -29,6 +31,7 @@ struct nfs_net { unsigned short nfs_callback_tcpport6; int cb_users[NFS4_MAX_MINOR_VERSION + 1]; #endif + struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c index 0a9782c9171a..467f21ee6a35 100644 --- a/fs/nfs/nfs2super.c +++ b/fs/nfs/nfs2super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012 Netapp, Inc. All rights reserved. */ diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 572794dab4b1..cbc17a203248 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -151,7 +151,7 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status) return 0; out_status: *status = be32_to_cpup(p); - trace_nfs_xdr_status((int)*status); + trace_nfs_xdr_status(xdr, (int)*status); return 0; } diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 1afdb0f7473f..148ceb74d27c 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/nfs_fs.h> #include <linux/nfs_mount.h> #include <linux/sunrpc/addr.h> @@ -101,6 +102,9 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + cl_init.nconnect = mds_clp->cl_nconnect; + if (mds_srv->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 5c4394e4656b..7c5809431e61 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012 Netapp, Inc. All rights reserved. */ diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index abbbdde97e31..602767850b36 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -343,7 +343,7 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status) return 0; out_status: *status = be32_to_cpup(p); - trace_nfs_xdr_status((int)*status); + trace_nfs_xdr_status(xdr, (int)*status); return 0; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8a38a254f516..d778dad9a75e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -312,12 +312,12 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode); +extern int nfs4_proc_get_lease_time(struct nfs_client *clp, + struct nfs_fsinfo *fsinfo); #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); extern int nfs4_proc_destroy_session(struct nfs4_session *, const struct cred *); -extern int nfs4_proc_get_lease_time(struct nfs_client *clp, - struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); extern int nfs4_detect_session_trunking(struct nfs_client *clp, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 3ce246346f02..616393a01c06 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -858,7 +859,8 @@ static int nfs4_set_client(struct nfs_server *server, const size_t addrlen, const char *ip_addr, int proto, const struct rpc_timeout *timeparms, - u32 minorversion, struct net *net) + u32 minorversion, unsigned int nconnect, + struct net *net) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -874,6 +876,8 @@ static int nfs4_set_client(struct nfs_server *server, }; struct nfs_client *clp; + if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP) + cl_init.nconnect = nconnect; if (server->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); if (server->options & NFS_OPTION_MIGRATION) @@ -940,6 +944,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + cl_init.nconnect = mds_clp->cl_nconnect; + if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -1073,6 +1080,7 @@ static int nfs4_init_server(struct nfs_server *server, data->nfs_server.protocol, &timeparms, data->minorversion, + data->nfs_server.nconnect, data->net); if (error < 0) return error; @@ -1162,6 +1170,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, XPRT_TRANSPORT_RDMA, parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, + parent_client->cl_nconnect, parent_client->cl_net); if (!error) goto init_server; @@ -1175,6 +1184,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, XPRT_TRANSPORT_TCP, parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, + parent_client->cl_nconnect, parent_client->cl_net); if (error < 0) goto error; @@ -1270,7 +1280,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, net); + clp->cl_minorversion, + clp->cl_nconnect, net); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index cf42a8b939e3..96db471ca2e5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -49,7 +49,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) return err; if ((openflags & O_ACCMODE) == 3) - openflags--; + return nfs_open(inode, filp); /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); @@ -129,10 +129,13 @@ nfs4_file_flush(struct file *file, fl_owner_t id) } #ifdef CONFIG_NFS_V4_2 -static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t count, unsigned int flags) +static ssize_t __nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) { + /* Only offload copy if superblock is the same */ + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) + return -EXDEV; if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) @@ -140,6 +143,20 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } +static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t count, unsigned int flags) +{ + ssize_t ret; + + ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count, + flags); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(file_in, pos_in, file_out, + pos_out, count, flags); + return ret; +} + static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) { loff_t ret; @@ -187,7 +204,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, bool same_inode = false; int ret; - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + /* NFS does not support deduplication. */ + if (remap_flags & REMAP_FILE_DEDUP) + return -EOPNOTSUPP; + + if (remap_flags & ~REMAP_FILE_ADVISORY) return -EINVAL; /* check alignment w.r.t. clone_blksize */ diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 4884fdae28fb..1e7296395d71 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -291,7 +291,7 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen, if (IS_ERR(rkey)) { mutex_lock(&idmap->idmap_mutex); rkey = request_key_with_auxdata(&key_type_id_resolver_legacy, - desc, "", 0, idmap); + desc, NULL, "", 0, idmap); mutex_unlock(&idmap->idmap_mutex); } if (!IS_ERR(rkey)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c29cbef6b53f..39896afc6edf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -428,6 +428,22 @@ static int nfs4_delay(long *timeout, bool interruptible) return nfs4_delay_killable(timeout); } +static const nfs4_stateid * +nfs4_recoverable_stateid(const nfs4_stateid *stateid) +{ + if (!stateid) + return NULL; + switch (stateid->type) { + case NFS4_OPEN_STATEID_TYPE: + case NFS4_LOCK_STATEID_TYPE: + case NFS4_DELEGATION_STATEID_TYPE: + return stateid; + default: + break; + } + return NULL; +} + /* This is the error handling routine for processes that are allowed * to sleep. */ @@ -436,7 +452,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; - const nfs4_stateid *stateid = exception->stateid; + const nfs4_stateid *stateid; struct inode *inode = exception->inode; int ret = errorcode; @@ -444,8 +460,9 @@ static int nfs4_do_handle_exception(struct nfs_server *server, exception->recovering = 0; exception->retry = 0; + stateid = nfs4_recoverable_stateid(exception->stateid); if (stateid == NULL && state != NULL) - stateid = &state->stateid; + stateid = nfs4_recoverable_stateid(&state->stateid); switch(errorcode) { case 0: @@ -1165,6 +1182,18 @@ static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, return true; } +static fmode_t _nfs4_ctx_to_accessmode(const struct nfs_open_context *ctx) +{ + return ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); +} + +static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) +{ + fmode_t ret = ctx->mode & (FMODE_READ|FMODE_WRITE); + + return (ctx->mode & FMODE_EXEC) ? FMODE_READ | ret : ret; +} + static u32 nfs4_map_atomic_open_share(struct nfs_server *server, fmode_t fmode, int openflags) @@ -1256,10 +1285,20 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); - p->o_arg.umask = current_umask(); p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); p->o_arg.share_access = nfs4_map_atomic_open_share(server, fmode, flags); + if (flags & O_CREAT) { + p->o_arg.umask = current_umask(); + p->o_arg.label = nfs4_label_copy(p->a_label, label); + if (c->sattr != NULL && c->sattr->ia_valid != 0) { + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); + + memcpy(p->o_arg.u.verifier.data, c->verf, + sizeof(p->o_arg.u.verifier.data)); + } + } /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS * will return permission denied for all bits until close */ if (!(flags & O_EXCL)) { @@ -1283,7 +1322,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.label = nfs4_label_copy(p->a_label, label); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: @@ -1296,13 +1334,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_DELEG_PREV_FH: p->o_arg.fh = NFS_FH(d_inode(dentry)); } - if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) { - p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); - - memcpy(p->o_arg.u.verifier.data, c->verf, - sizeof(p->o_arg.u.verifier.data)); - } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; p->c_arg.seqid = p->o_arg.seqid; @@ -2898,14 +2929,13 @@ static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, } static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, - fmode_t fmode, - int flags, - struct nfs_open_context *ctx) + int flags, struct nfs_open_context *ctx) { struct nfs4_state_owner *sp = opendata->owner; struct nfs_server *server = sp->so_server; struct dentry *dentry; struct nfs4_state *state; + fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); unsigned int seq; int ret; @@ -2944,7 +2974,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, /* Parse layoutget results before we check for access */ pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); - ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); + ret = nfs4_opendata_access(sp->so_cred, opendata, state, + acc_mode, flags); if (ret != 0) goto out; @@ -2976,7 +3007,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry = ctx->dentry; const struct cred *cred = ctx->cred; struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; - fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); + fmode_t fmode = _nfs4_ctx_to_openmode(ctx); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; struct iattr *sattr = c->sattr; struct nfs4_label *label = c->label; @@ -3022,7 +3053,7 @@ static int _nfs4_do_open(struct inode *dir, if (d_really_is_positive(dentry)) opendata->state = nfs4_get_open_state(d_inode(dentry), sp); - status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); + status = _nfs4_open_and_get_state(opendata, flags, ctx); if (status != 0) goto err_free_label; state = ctx->state; @@ -3592,9 +3623,9 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) if (ctx->state == NULL) return; if (is_sync) - nfs4_close_sync(ctx->state, ctx->mode); + nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else - nfs4_close_state(ctx->state, ctx->mode); + nfs4_close_state(ctx->state, _nfs4_ctx_to_openmode(ctx)); } #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) @@ -5978,7 +6009,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_message = &msg, .callback_ops = &nfs4_setclientid_ops, .callback_data = &setclientid, - .flags = RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; int status; @@ -6044,7 +6075,8 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_setclientid_confirm(clp, status); dprintk("NFS reply setclientid_confirm: %d\n", status); return status; @@ -6932,7 +6964,6 @@ struct nfs4_lock_waiter { struct task_struct *task; struct inode *inode; struct nfs_lowner *owner; - bool notified; }; static int @@ -6954,13 +6985,13 @@ nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, vo /* Make sure it's for the right inode */ if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) return 0; - - waiter->notified = true; } /* override "private" so we can use default_wake_function */ wait->private = waiter->task; - ret = autoremove_wake_function(wait, mode, flags, key); + ret = woken_wake_function(wait, mode, flags, key); + if (ret) + list_del_init(&wait->entry); wait->private = waiter; return ret; } @@ -6969,7 +7000,6 @@ static int nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { int status = -ERESTARTSYS; - unsigned long flags; struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; struct nfs_server *server = NFS_SERVER(state->inode); struct nfs_client *clp = server->nfs_client; @@ -6979,8 +7009,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) .s_dev = server->s_dev }; struct nfs4_lock_waiter waiter = { .task = current, .inode = state->inode, - .owner = &owner, - .notified = false }; + .owner = &owner}; wait_queue_entry_t wait; /* Don't bother with waitqueue if we don't expect a callback */ @@ -6990,27 +7019,22 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) init_wait(&wait); wait.private = &waiter; wait.func = nfs4_wake_lock_waiter; - add_wait_queue(q, &wait); while(!signalled()) { - waiter.notified = false; + add_wait_queue(q, &wait); status = nfs4_proc_setlk(state, cmd, request); - if ((status != -EAGAIN) || IS_SETLK(cmd)) + if ((status != -EAGAIN) || IS_SETLK(cmd)) { + finish_wait(q, &wait); break; - - status = -ERESTARTSYS; - spin_lock_irqsave(&q->lock, flags); - if (waiter.notified) { - spin_unlock_irqrestore(&q->lock, flags); - continue; } - set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_irqrestore(&q->lock, flags); - freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT); + status = -ERESTARTSYS; + freezer_do_not_count(); + wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT); + freezer_count(); + finish_wait(q, &wait); } - finish_wait(q, &wait); return status; } #else /* !CONFIG_NFS_V4_1 */ @@ -7633,7 +7657,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); dprintk("NFS reply secinfo: %d\n", status); put_cred(cred); @@ -7971,7 +7995,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, .rpc_client = clp->cl_rpcclient, .callback_ops = &nfs4_exchange_id_call_ops, .rpc_message = &msg, - .flags = RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; struct nfs41_exchange_id_data *calldata; int status; @@ -8196,7 +8220,8 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, }; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_destroy_clientid(clp, status); if (status) dprintk("NFS: Got error %d from the server %s on " @@ -8247,6 +8272,8 @@ out: return ret; } +#endif /* CONFIG_NFS_V4_1 */ + struct nfs4_get_lease_time_data { struct nfs4_get_lease_time_args *args; struct nfs4_get_lease_time_res *res; @@ -8279,7 +8306,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) (struct nfs4_get_lease_time_data *)calldata; dprintk("--> %s\n", __func__); - if (!nfs41_sequence_done(task, &data->res->lr_seq_res)) + if (!nfs4_sequence_done(task, &data->res->lr_seq_res)) return; switch (task->tk_status) { case -NFS4ERR_DELAY: @@ -8337,6 +8364,8 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } +#ifdef CONFIG_NFS_V4_1 + /* * Initialize the values to be used by the client in CREATE_SESSION * If nfs4_init_session set the fore channel request and response sizes, @@ -8351,6 +8380,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, { unsigned int max_rqst_sz, max_resp_sz; unsigned int max_bc_payload = rpc_max_bc_payload(clnt); + unsigned int max_bc_slots = rpc_num_bc_slots(clnt); max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; @@ -8373,6 +8403,8 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; args->bc_attrs.max_reqs = max_t(unsigned short, max_session_cb_slots, 1); + if (args->bc_attrs.max_reqs > max_bc_slots) + args->bc_attrs.max_reqs = max_bc_slots; dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", @@ -8475,7 +8507,8 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, nfs4_init_channel_attrs(&args, clp->cl_rpcclient); args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_create_session(clp, status); switch (status) { @@ -8551,7 +8584,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) return 0; - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_destroy_session(session->clp, status); if (status) @@ -8805,7 +8839,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs4_reclaim_complete_call_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN, }; int status = -ENOMEM; @@ -9324,7 +9358,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("--> %s\n", __func__); status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); dprintk("<-- %s status=%d\n", __func__, status); put_cred(cred); diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index bcb532def9e2..4145a0138907 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/nfs/nfs4session.c * diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e2e3c4f04d3e..9afd051a4876 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -87,6 +87,27 @@ const nfs4_stateid current_stateid = { static DEFINE_MUTEX(nfs_clid_init_mutex); +static int nfs4_setup_state_renewal(struct nfs_client *clp) +{ + int status; + struct nfs_fsinfo fsinfo; + unsigned long now; + + if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { + nfs4_schedule_state_renewal(clp); + return 0; + } + + now = jiffies; + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); + nfs4_schedule_state_renewal(clp); + } + + return status; +} + int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred) { struct nfs4_setclientid_res clid = { @@ -114,7 +135,7 @@ do_confirm: if (status != 0) goto out; clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - nfs4_schedule_state_renewal(clp); + nfs4_setup_state_renewal(clp); out: return status; } @@ -286,34 +307,13 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) #if defined(CONFIG_NFS_V4_1) -static int nfs41_setup_state_renewal(struct nfs_client *clp) -{ - int status; - struct nfs_fsinfo fsinfo; - unsigned long now; - - if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { - nfs4_schedule_state_renewal(clp); - return 0; - } - - now = jiffies; - status = nfs4_proc_get_lease_time(clp, &fsinfo); - if (status == 0) { - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); - nfs4_schedule_state_renewal(clp); - } - - return status; -} - static void nfs41_finish_session_reset(struct nfs_client *clp) { clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); - nfs41_setup_state_renewal(clp); + nfs4_setup_state_renewal(clp); } int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred) @@ -1064,8 +1064,7 @@ int nfs4_select_rw_stateid(struct nfs4_state *state, * choose to use. */ goto out; - nfs4_copy_open_stateid(dst, state); - ret = 0; + ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN; out: if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) dst->seqid = 0; diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 689977e148cb..04c57066a11a 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012 Bryan Schumaker <bjschuma@netapp.com> */ diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index e9fb3e50a999..1a8f376b3f73 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -16,4 +16,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds); + +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_init_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_init_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_get_mirror_count); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index cd1a5c08da9a..b2f395fa7350 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -156,7 +156,7 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); TRACE_DEFINE_ENUM(NFS4ERR_XDEV); #define show_nfsv4_errors(error) \ - __print_symbolic(-(error), \ + __print_symbolic(error, \ { NFS4_OK, "OK" }, \ /* Mapped by nfs4_stat_to_errno() */ \ { EPERM, "EPERM" }, \ @@ -348,7 +348,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_STRUCT__entry( __string(dstaddr, clp->cl_hostname) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -357,8 +357,8 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ), TP_printk( - "error=%d (%s) dstaddr=%s", - __entry->error, + "error=%ld (%s) dstaddr=%s", + -__entry->error, show_nfsv4_errors(__entry->error), __get_str(dstaddr) ) @@ -420,7 +420,7 @@ TRACE_EVENT(nfs4_sequence_done, __field(unsigned int, highest_slotid) __field(unsigned int, target_highest_slotid) __field(unsigned int, status_flags) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -435,10 +435,10 @@ TRACE_EVENT(nfs4_sequence_done, __entry->error = res->sr_status; ), TP_printk( - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u target_highest_slotid=%u " "status_flags=%u (%s)", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->session, __entry->slot_nr, @@ -467,7 +467,7 @@ TRACE_EVENT(nfs4_cb_sequence, __field(unsigned int, seq_nr) __field(unsigned int, highest_slotid) __field(unsigned int, cachethis) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -476,13 +476,13 @@ TRACE_EVENT(nfs4_cb_sequence, __entry->seq_nr = args->csa_sequenceid; __entry->highest_slotid = args->csa_highestslotid; __entry->cachethis = args->csa_cachethis; - __entry->error = -be32_to_cpu(status); + __entry->error = be32_to_cpu(status); ), TP_printk( - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->session, __entry->slot_nr, @@ -490,6 +490,44 @@ TRACE_EVENT(nfs4_cb_sequence, __entry->highest_slotid ) ); + +TRACE_EVENT(nfs4_cb_seqid_err, + TP_PROTO( + const struct cb_sequenceargs *args, + __be32 status + ), + TP_ARGS(args, status), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_slotid) + __field(unsigned int, cachethis) + __field(unsigned long, error) + ), + + TP_fast_assign( + __entry->session = nfs_session_id_hash(&args->csa_sessionid); + __entry->slot_nr = args->csa_slotid; + __entry->seq_nr = args->csa_sequenceid; + __entry->highest_slotid = args->csa_highestslotid; + __entry->cachethis = args->csa_cachethis; + __entry->error = be32_to_cpu(status); + ), + + TP_printk( + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "highest_slotid=%u", + -__entry->error, + show_nfsv4_errors(__entry->error), + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_slotid + ) +); + #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, @@ -526,26 +564,37 @@ TRACE_EVENT(nfs4_setup_sequence, TRACE_EVENT(nfs4_xdr_status, TP_PROTO( + const struct xdr_stream *xdr, u32 op, int error ), - TP_ARGS(op, error), + TP_ARGS(xdr, op, error), TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) __field(u32, op) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( + const struct rpc_rqst *rqstp = xdr->rqst; + const struct rpc_task *task = rqstp->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->op = op; - __entry->error = -error; + __entry->error = error; ), TP_printk( - "operation %d: nfs status %d (%s)", - __entry->op, - __entry->error, show_nfsv4_errors(__entry->error) + "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u", + __entry->task_id, __entry->client_id, __entry->xid, + -__entry->error, show_nfsv4_errors(__entry->error), + __entry->op ) ); @@ -559,7 +608,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, TP_ARGS(ctx, flags, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(unsigned int, flags) __field(unsigned int, fmode) __field(dev_t, dev) @@ -577,7 +626,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, const struct nfs4_state *state = ctx->state; const struct inode *inode = NULL; - __entry->error = error; + __entry->error = -error; __entry->flags = flags; __entry->fmode = (__force unsigned int)ctx->mode; __entry->dev = ctx->dentry->d_sb->s_dev; @@ -609,11 +658,11 @@ DECLARE_EVENT_CLASS(nfs4_open_event, ), TP_printk( - "error=%d (%s) flags=%d (%s) fmode=%s " + "error=%ld (%s) flags=%d (%s) fmode=%s " "fileid=%02x:%02x:%llu fhandle=0x%08x " "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " "openstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->flags, show_open_flags(__entry->flags), @@ -695,7 +744,7 @@ TRACE_EVENT(nfs4_close, __field(u32, fhandle) __field(u64, fileid) __field(unsigned int, fmode) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -715,9 +764,9 @@ TRACE_EVENT(nfs4_close, ), TP_printk( - "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " + "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x openstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->fmode ? show_fmode_flags(__entry->fmode) : "closed", @@ -757,7 +806,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_ARGS(request, state, cmd, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(int, cmd) __field(char, type) __field(loff_t, start) @@ -787,10 +836,10 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, ), TP_printk( - "error=%d (%s) cmd=%s:%s range=%lld:%lld " + "error=%ld (%s) cmd=%s:%s range=%lld:%lld " "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), show_lock_cmd(__entry->cmd), show_lock_type(__entry->type), @@ -827,7 +876,7 @@ TRACE_EVENT(nfs4_set_lock, TP_ARGS(request, state, lockstateid, cmd, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(int, cmd) __field(char, type) __field(loff_t, start) @@ -863,10 +912,10 @@ TRACE_EVENT(nfs4_set_lock, ), TP_printk( - "error=%d (%s) cmd=%s:%s range=%lld:%lld " + "error=%ld (%s) cmd=%s:%s range=%lld:%lld " "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x lockstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), show_lock_cmd(__entry->cmd), show_lock_type(__entry->type), @@ -932,7 +981,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -948,9 +997,9 @@ TRACE_EVENT(nfs4_delegreturn_exit, ), TP_printk( - "error=%d (%s) dev=%02x:%02x fhandle=0x%08x " + "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fhandle, @@ -969,7 +1018,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, TP_ARGS(state, lsp, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -991,9 +1040,9 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1026,7 +1075,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, dir) __string(name, name->name) ), @@ -1034,13 +1083,13 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = -error; __assign_str(name, name->name); ), TP_printk( - "error=%d (%s) name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) name=%02x:%02x:%llu/%s", + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, @@ -1076,7 +1125,7 @@ TRACE_EVENT(nfs4_lookupp, TP_STRUCT__entry( __field(dev_t, dev) __field(u64, ino) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1086,8 +1135,8 @@ TRACE_EVENT(nfs4_lookupp, ), TP_printk( - "error=%d (%s) inode=%02x:%02x:%llu", - __entry->error, + "error=%ld (%s) inode=%02x:%02x:%llu", + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->ino @@ -1107,7 +1156,7 @@ TRACE_EVENT(nfs4_rename, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, olddir) __string(oldname, oldname->name) __field(u64, newdir) @@ -1124,9 +1173,9 @@ TRACE_EVENT(nfs4_rename, ), TP_printk( - "error=%d (%s) oldname=%02x:%02x:%llu/%s " + "error=%ld (%s) oldname=%02x:%02x:%llu/%s " "newname=%02x:%02x:%llu/%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->olddir, @@ -1149,19 +1198,19 @@ DECLARE_EVENT_CLASS(nfs4_inode_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", - __entry->error, + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1200,7 +1249,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1217,9 +1266,9 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1257,7 +1306,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, __field(u32, fhandle) __field(u64, fileid) __field(unsigned int, valid) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1269,9 +1318,9 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "valid=%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1304,7 +1353,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, TP_ARGS(clp, fhandle, inode, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -1325,9 +1374,9 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "dstaddr=%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1359,7 +1408,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, TP_ARGS(clp, fhandle, inode, stateid, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -1386,9 +1435,9 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x dstaddr=%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1422,7 +1471,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_ARGS(name, len, id, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(u32, id) __dynamic_array(char, name, len > 0 ? len + 1 : 1) ), @@ -1437,8 +1486,8 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, ), TP_printk( - "error=%d id=%u name=%s", - __entry->error, + "error=%ld (%s) id=%u name=%s", + -__entry->error, show_nfsv4_errors(__entry->error), __entry->id, __get_str(name) ) @@ -1471,7 +1520,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __field(u64, fileid) __field(loff_t, offset) __field(size_t, count) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1485,7 +1534,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = @@ -1493,9 +1542,9 @@ DECLARE_EVENT_CLASS(nfs4_read_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zu stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1531,7 +1580,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __field(u64, fileid) __field(loff_t, offset) __field(size_t, count) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1545,7 +1594,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = @@ -1553,9 +1602,9 @@ DECLARE_EVENT_CLASS(nfs4_write_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zu stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1592,7 +1641,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __field(u64, fileid) __field(loff_t, offset) __field(size_t, count) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1606,9 +1655,9 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zu", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1656,7 +1705,7 @@ TRACE_EVENT(nfs4_layoutget, __field(u32, iomode) __field(u64, offset) __field(u64, count) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) __field(int, layoutstateid_seq) @@ -1689,10 +1738,10 @@ TRACE_EVENT(nfs4_layoutget, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1722,6 +1771,7 @@ TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_BLOCKED); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_INVALID_OPEN); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETRY); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_EXIT); #define show_pnfs_update_layout_reason(reason) \ __print_symbolic(reason, \ @@ -1737,7 +1787,8 @@ TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ { PNFS_UPDATE_LAYOUT_INVALID_OPEN, "invalid open" }, \ { PNFS_UPDATE_LAYOUT_RETRY, "retrying" }, \ - { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) + { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }, \ + { PNFS_UPDATE_LAYOUT_EXIT, "exit" }) TRACE_EVENT(pnfs_update_layout, TP_PROTO(struct inode *inode, @@ -1796,6 +1847,78 @@ TRACE_EVENT(pnfs_update_layout, ) ); +DECLARE_EVENT_CLASS(pnfs_layout_event, + TP_PROTO(struct inode *inode, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg + ), + TP_ARGS(inode, pos, count, iomode, lo, lseg), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, fileid) + __field(u32, fhandle) + __field(loff_t, pos) + __field(u64, count) + __field(enum pnfs_iomode, iomode) + __field(int, layoutstateid_seq) + __field(u32, layoutstateid_hash) + __field(long, lseg) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->pos = pos; + __entry->count = count; + __entry->iomode = iomode; + if (lo != NULL) { + __entry->layoutstateid_seq = + be32_to_cpu(lo->plh_stateid.seqid); + __entry->layoutstateid_hash = + nfs_stateid_hash(&lo->plh_stateid); + } else { + __entry->layoutstateid_seq = 0; + __entry->layoutstateid_hash = 0; + } + __entry->lseg = (long)lseg; + ), + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "iomode=%s pos=%llu count=%llu " + "layoutstateid=%d:0x%08x lseg=0x%lx", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + show_pnfs_iomode(__entry->iomode), + (unsigned long long)__entry->pos, + (unsigned long long)__entry->count, + __entry->layoutstateid_seq, __entry->layoutstateid_hash, + __entry->lseg + ) +); + +#define DEFINE_PNFS_LAYOUT_EVENT(name) \ + DEFINE_EVENT(pnfs_layout_event, name, \ + TP_PROTO(struct inode *inode, \ + loff_t pos, \ + u64 count, \ + enum pnfs_iomode iomode, \ + struct pnfs_layout_hdr *lo, \ + struct pnfs_layout_segment *lseg \ + ), \ + TP_ARGS(inode, pos, count, iomode, lo, lseg)) + +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_init_read); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_init_write); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_get_mirror_count); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_done); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); + #endif /* CONFIG_NFS_V4_1 */ #endif /* _TRACE_NFS4_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 602446158bfb..46a8d636d151 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -837,6 +837,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define NFS4_dec_sequence_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz) +#endif #define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putrootfh_maxsz + \ @@ -845,6 +846,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_fsinfo_maxsz) +#if defined(CONFIG_NFS_V4_1) #define NFS4_enc_reclaim_complete_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_reclaim_complete_maxsz) @@ -2957,6 +2959,8 @@ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, encode_nops(&hdr); } +#endif + /* * a GET_LEASE_TIME request */ @@ -2977,6 +2981,8 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, encode_nops(&hdr); } +#ifdef CONFIG_NFS_V4_1 + /* * a RECLAIM_COMPLETE request */ @@ -3187,7 +3193,7 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, return true; out_status: nfserr = be32_to_cpup(p); - trace_nfs4_xdr_status(opnum, nfserr); + trace_nfs4_xdr_status(xdr, opnum, nfserr); *nfs_retval = nfs4_stat_to_errno(nfserr); return true; out_bad_operation: @@ -3427,7 +3433,7 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; } - dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); + dprintk("%s: lease time=%u\n", __func__, (unsigned int)*res); return 0; } @@ -7122,6 +7128,8 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, return status; } +#endif + /* * Decode GET_LEASE_TIME response */ @@ -7143,6 +7151,8 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, return status; } +#ifdef CONFIG_NFS_V4_1 + /* * Decode RECLAIM_COMPLETE response */ @@ -7551,7 +7561,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC41(CREATE_SESSION, enc_create_session, dec_create_session), PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), PROC41(SEQUENCE, enc_sequence, dec_sequence), - PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete), PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC41(LAYOUTGET, enc_layoutget, dec_layoutget), diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a0d6910aa03a..976d4089e267 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,6 +11,16 @@ #include <linux/tracepoint.h> #include <linux/iversion.h> +TRACE_DEFINE_ENUM(DT_UNKNOWN); +TRACE_DEFINE_ENUM(DT_FIFO); +TRACE_DEFINE_ENUM(DT_CHR); +TRACE_DEFINE_ENUM(DT_DIR); +TRACE_DEFINE_ENUM(DT_BLK); +TRACE_DEFINE_ENUM(DT_REG); +TRACE_DEFINE_ENUM(DT_LNK); +TRACE_DEFINE_ENUM(DT_SOCK); +TRACE_DEFINE_ENUM(DT_WHT); + #define nfs_show_file_type(ftype) \ __print_symbolic(ftype, \ { DT_UNKNOWN, "UNKNOWN" }, \ @@ -23,25 +33,57 @@ { DT_SOCK, "SOCK" }, \ { DT_WHT, "WHT" }) +TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL); +TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE); +TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); + #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ - { NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \ { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ - { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }) + { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \ + { NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \ + { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ + { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ + { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ + { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }) + +TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); +TRACE_DEFINE_ENUM(NFS_INO_STALE); +TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET); +TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING); +TRACE_DEFINE_ENUM(NFS_INO_FSCACHE); +TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK); +TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT); +TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING); +TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS); +TRACE_DEFINE_ENUM(NFS_INO_ODIRECT); #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ - { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \ - { 1 << NFS_INO_STALE, "STALE" }, \ - { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ - { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ - { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ - { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) + { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ + { BIT(NFS_INO_STALE), "STALE" }, \ + { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ + { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ + { BIT(NFS_INO_FSCACHE), "FSCACHE" }, \ + { BIT(NFS_INO_FSCACHE_LOCK), "FSCACHE_LOCK" }, \ + { BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \ + { BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \ + { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ + { BIT(NFS_INO_ODIRECT), "ODIRECT" }) DECLARE_EVENT_CLASS(nfs_inode_event, TP_PROTO( @@ -83,7 +125,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_ARGS(inode, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(unsigned char, type) @@ -96,7 +138,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); @@ -108,10 +150,10 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, ), TP_printk( - "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " - "cache_validity=%lu (%s) nfs_flags=%ld (%s)", - __entry->error, + "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -158,13 +200,41 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); +TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); +TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); +TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); +TRACE_DEFINE_ENUM(LOOKUP_PARENT); +TRACE_DEFINE_ENUM(LOOKUP_REVAL); +TRACE_DEFINE_ENUM(LOOKUP_RCU); +TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL); +TRACE_DEFINE_ENUM(LOOKUP_NO_EVAL); +TRACE_DEFINE_ENUM(LOOKUP_OPEN); +TRACE_DEFINE_ENUM(LOOKUP_CREATE); +TRACE_DEFINE_ENUM(LOOKUP_EXCL); +TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET); +TRACE_DEFINE_ENUM(LOOKUP_JUMPED); +TRACE_DEFINE_ENUM(LOOKUP_ROOT); +TRACE_DEFINE_ENUM(LOOKUP_EMPTY); +TRACE_DEFINE_ENUM(LOOKUP_DOWN); + #define show_lookup_flags(flags) \ - __print_flags((unsigned long)flags, "|", \ - { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + __print_flags(flags, "|", \ + { LOOKUP_FOLLOW, "FOLLOW" }, \ { LOOKUP_DIRECTORY, "DIRECTORY" }, \ + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + { LOOKUP_PARENT, "PARENT" }, \ + { LOOKUP_REVAL, "REVAL" }, \ + { LOOKUP_RCU, "RCU" }, \ + { LOOKUP_NO_REVAL, "NO_REVAL" }, \ + { LOOKUP_NO_EVAL, "NO_EVAL" }, \ { LOOKUP_OPEN, "OPEN" }, \ { LOOKUP_CREATE, "CREATE" }, \ - { LOOKUP_EXCL, "EXCL" }) + { LOOKUP_EXCL, "EXCL" }, \ + { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ + { LOOKUP_JUMPED, "JUMPED" }, \ + { LOOKUP_ROOT, "ROOT" }, \ + { LOOKUP_EMPTY, "EMPTY" }, \ + { LOOKUP_DOWN, "DOWN" }) DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( @@ -176,7 +246,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( - __field(unsigned int, flags) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -190,7 +260,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, ), TP_printk( - "flags=%u (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, show_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -219,8 +289,8 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( - __field(int, error) - __field(unsigned int, flags) + __field(unsigned long, error) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -229,14 +299,14 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->flags = flags; __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -260,15 +330,43 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); +TRACE_DEFINE_ENUM(O_WRONLY); +TRACE_DEFINE_ENUM(O_RDWR); +TRACE_DEFINE_ENUM(O_CREAT); +TRACE_DEFINE_ENUM(O_EXCL); +TRACE_DEFINE_ENUM(O_NOCTTY); +TRACE_DEFINE_ENUM(O_TRUNC); +TRACE_DEFINE_ENUM(O_APPEND); +TRACE_DEFINE_ENUM(O_NONBLOCK); +TRACE_DEFINE_ENUM(O_DSYNC); +TRACE_DEFINE_ENUM(O_DIRECT); +TRACE_DEFINE_ENUM(O_LARGEFILE); +TRACE_DEFINE_ENUM(O_DIRECTORY); +TRACE_DEFINE_ENUM(O_NOFOLLOW); +TRACE_DEFINE_ENUM(O_NOATIME); +TRACE_DEFINE_ENUM(O_CLOEXEC); + #define show_open_flags(flags) \ - __print_flags((unsigned long)flags, "|", \ + __print_flags(flags, "|", \ + { O_WRONLY, "O_WRONLY" }, \ + { O_RDWR, "O_RDWR" }, \ { O_CREAT, "O_CREAT" }, \ { O_EXCL, "O_EXCL" }, \ + { O_NOCTTY, "O_NOCTTY" }, \ { O_TRUNC, "O_TRUNC" }, \ { O_APPEND, "O_APPEND" }, \ + { O_NONBLOCK, "O_NONBLOCK" }, \ { O_DSYNC, "O_DSYNC" }, \ { O_DIRECT, "O_DIRECT" }, \ - { O_DIRECTORY, "O_DIRECTORY" }) + { O_LARGEFILE, "O_LARGEFILE" }, \ + { O_DIRECTORY, "O_DIRECTORY" }, \ + { O_NOFOLLOW, "O_NOFOLLOW" }, \ + { O_NOATIME, "O_NOATIME" }, \ + { O_CLOEXEC, "O_CLOEXEC" }) + +TRACE_DEFINE_ENUM(FMODE_READ); +TRACE_DEFINE_ENUM(FMODE_WRITE); +TRACE_DEFINE_ENUM(FMODE_EXEC); #define show_fmode_flags(mode) \ __print_flags(mode, "|", \ @@ -286,7 +384,7 @@ TRACE_EVENT(nfs_atomic_open_enter, TP_ARGS(dir, ctx, flags), TP_STRUCT__entry( - __field(unsigned int, flags) + __field(unsigned long, flags) __field(unsigned int, fmode) __field(dev_t, dev) __field(u64, dir) @@ -302,7 +400,7 @@ TRACE_EVENT(nfs_atomic_open_enter, ), TP_printk( - "flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, show_open_flags(__entry->flags), show_fmode_flags(__entry->fmode), @@ -323,8 +421,8 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_ARGS(dir, ctx, flags, error), TP_STRUCT__entry( - __field(int, error) - __field(unsigned int, flags) + __field(unsigned long, error) + __field(unsigned long, flags) __field(unsigned int, fmode) __field(dev_t, dev) __field(u64, dir) @@ -332,7 +430,7 @@ TRACE_EVENT(nfs_atomic_open_exit, ), TP_fast_assign( - __entry->error = error; + __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; @@ -341,9 +439,9 @@ TRACE_EVENT(nfs_atomic_open_exit, ), TP_printk( - "error=%d flags=%u (%s) fmode=%s " + "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", - __entry->error, + -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_open_flags(__entry->flags), show_fmode_flags(__entry->fmode), @@ -363,7 +461,7 @@ TRACE_EVENT(nfs_create_enter, TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( - __field(unsigned int, flags) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -377,7 +475,7 @@ TRACE_EVENT(nfs_create_enter, ), TP_printk( - "flags=%u (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, show_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -397,15 +495,15 @@ TRACE_EVENT(nfs_create_exit, TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( - __field(int, error) - __field(unsigned int, flags) + __field(unsigned long, error) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) ), TP_fast_assign( - __entry->error = error; + __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; @@ -413,8 +511,8 @@ TRACE_EVENT(nfs_create_exit, ), TP_printk( - "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -469,7 +567,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_ARGS(dir, dentry, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -478,13 +576,13 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%d name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -557,7 +655,7 @@ TRACE_EVENT(nfs_link_exit, TP_ARGS(inode, dir, dentry, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u64, fileid) __field(u64, dir) @@ -568,13 +666,13 @@ TRACE_EVENT(nfs_link_exit, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -642,7 +740,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, old_dir) __string(old_name, old_dentry->d_name.name) __field(u64, new_dir) @@ -651,17 +749,17 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_fast_assign( __entry->dev = old_dir->i_sb->s_dev; + __entry->error = -error; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); - __entry->error = error; __assign_str(old_name, old_dentry->d_name.name); __assign_str(new_name, new_dentry->d_name.name); ), TP_printk( - "error=%d old_name=%02x:%02x:%llu/%s " + "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", - __entry->error, + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), @@ -697,7 +795,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, dir) __dynamic_array(char, name, data->args.name.len + 1) ), @@ -707,15 +805,15 @@ TRACE_EVENT(nfs_sillyrename_unlink, size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = -error; memcpy(__get_str(name), data->args.name.name, len); __get_str(name)[len] = 0; ), TP_printk( - "error=%d name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -974,6 +1072,8 @@ TRACE_DEFINE_ENUM(NFSERR_PERM); TRACE_DEFINE_ENUM(NFSERR_NOENT); TRACE_DEFINE_ENUM(NFSERR_IO); TRACE_DEFINE_ENUM(NFSERR_NXIO); +TRACE_DEFINE_ENUM(ECHILD); +TRACE_DEFINE_ENUM(NFSERR_EAGAIN); TRACE_DEFINE_ENUM(NFSERR_ACCES); TRACE_DEFINE_ENUM(NFSERR_EXIST); TRACE_DEFINE_ENUM(NFSERR_XDEV); @@ -985,6 +1085,7 @@ TRACE_DEFINE_ENUM(NFSERR_FBIG); TRACE_DEFINE_ENUM(NFSERR_NOSPC); TRACE_DEFINE_ENUM(NFSERR_ROFS); TRACE_DEFINE_ENUM(NFSERR_MLINK); +TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); TRACE_DEFINE_ENUM(NFSERR_DQUOT); @@ -1007,6 +1108,8 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_NOENT, "NOENT" }, \ { NFSERR_IO, "IO" }, \ { NFSERR_NXIO, "NXIO" }, \ + { ECHILD, "CHILD" }, \ + { NFSERR_EAGAIN, "AGAIN" }, \ { NFSERR_ACCES, "ACCES" }, \ { NFSERR_EXIST, "EXIST" }, \ { NFSERR_XDEV, "XDEV" }, \ @@ -1018,6 +1121,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_NOSPC, "NOSPC" }, \ { NFSERR_ROFS, "ROFS" }, \ { NFSERR_MLINK, "MLINK" }, \ + { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ { NFSERR_DQUOT, "DQUOT" }, \ @@ -1035,22 +1139,33 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); TRACE_EVENT(nfs_xdr_status, TP_PROTO( + const struct xdr_stream *xdr, int error ), - TP_ARGS(error), + TP_ARGS(xdr, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) + __field(unsigned long, error) ), TP_fast_assign( + const struct rpc_rqst *rqstp = xdr->rqst; + const struct rpc_task *task = rqstp->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->error = error; ), TP_printk( - "error=%d (%s)", - __entry->error, nfs_show_status(__entry->error) + "task:%u@%d xid=0x%08x error=%ld (%s)", + __entry->task_id, __entry->client_id, __entry->xid, + -__entry->error, nfs_show_status(__entry->error) ) ); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6ec30014a439..ed4e1b07447b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/pagelist.c * @@ -76,7 +77,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); if (p) INIT_LIST_HEAD(&p->wb_list); return p; @@ -774,8 +775,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, if (pagecount <= ARRAY_SIZE(pg_array->page_array)) pg_array->pagevec = pg_array->page_array; else { - if (hdr->rw_mode == FMODE_WRITE) - gfp_flags = GFP_NOIO; pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags); if (!pg_array->pagevec) { pg_array->npages = 0; @@ -850,7 +849,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, desc->pg_mirrors_dynamic = NULL; if (mirror_count == 1) return desc->pg_mirrors_static; - ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_NOFS); + ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); if (ret != NULL) { for (i = 0; i < mirror_count; i++) nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 83722e936b4a..75bd5b552ba4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1890,7 +1890,7 @@ lookup_again: spin_unlock(&ino->i_lock); lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, !atomic_read(&lo->plh_outstanding))); - if (IS_ERR(lseg) || !list_empty(&lo->plh_segs)) + if (IS_ERR(lseg)) goto out_put_layout_hdr; pnfs_put_layout_hdr(lo); goto lookup_again; @@ -1915,6 +1915,7 @@ lookup_again: * stateid. */ if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { + int status; /* * The first layoutget for the file. Need to serialize per @@ -1934,13 +1935,20 @@ lookup_again: } first = true; - if (nfs4_select_rw_stateid(ctx->state, + status = nfs4_select_rw_stateid(ctx->state, iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, - NULL, &stateid, NULL) != 0) { + NULL, &stateid, NULL); + if (status != 0) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - goto out_unlock; + if (status != -EAGAIN) + goto out_unlock; + spin_unlock(&ino->i_lock); + nfs4_schedule_stateid_recovery(server, ctx->state); + pnfs_clear_first_layoutget(lo); + pnfs_put_layout_hdr(lo); + goto lookup_again; } } else { nfs4_stateid_copy(&stateid, &lo->plh_stateid); @@ -2029,6 +2037,8 @@ lookup_again: out_put_layout_hdr: if (first) pnfs_clear_first_layoutget(lo); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_EXIT); pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " @@ -2468,7 +2478,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, wb_size, IOMODE_RW, false, - GFP_NOFS); + GFP_KERNEL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f5ad75fafc3c..c0046c348910 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Common NFS I/O operations for the pnfs file based * layout drivers. diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c799e540ed1e..c19841c82b6a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/read.c * diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d6c687419a81..3683d2b1cc8e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/super.c * @@ -76,6 +77,8 @@ #define NFS_DEFAULT_VERSION 2 #endif +#define NFS_MAX_CONNECTIONS 16 + enum { /* Mount options that take no arguments */ Opt_soft, Opt_softerr, Opt_hard, @@ -107,6 +110,7 @@ enum { Opt_nfsvers, Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, + Opt_nconnect, Opt_lookupcache, Opt_fscache_uniq, Opt_local_lock, @@ -180,6 +184,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_mounthost, "mounthost=%s" }, { Opt_mountaddr, "mountaddr=%s" }, + { Opt_nconnect, "nconnect=%s" }, + { Opt_lookupcache, "lookupcache=%s" }, { Opt_fscache_uniq, "fsc=%s" }, { Opt_local_lock, "local_lock=%s" }, @@ -581,7 +587,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, } default: if (showdefaults) - seq_printf(m, ",mountaddr=unspecified"); + seq_puts(m, ",mountaddr=unspecified"); } if (nfss->mountd_version || showdefaults) @@ -672,6 +678,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); rcu_read_unlock(); + if (clp->cl_nconnect > 0) + seq_printf(m, ",nconnect=%u", clp->cl_nconnect); if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); @@ -689,29 +697,29 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_nfsv4_options(m, nfss, showdefaults); if (nfss->options & NFS_OPTION_FSCACHE) - seq_printf(m, ",fsc"); + seq_puts(m, ",fsc"); if (nfss->options & NFS_OPTION_MIGRATION) - seq_printf(m, ",migration"); + seq_puts(m, ",migration"); if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) - seq_printf(m, ",lookupcache=none"); + seq_puts(m, ",lookupcache=none"); else - seq_printf(m, ",lookupcache=pos"); + seq_puts(m, ",lookupcache=pos"); } local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK; local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL; if (!local_flock && !local_fcntl) - seq_printf(m, ",local_lock=none"); + seq_puts(m, ",local_lock=none"); else if (local_flock && local_fcntl) - seq_printf(m, ",local_lock=all"); + seq_puts(m, ",local_lock=all"); else if (local_flock) - seq_printf(m, ",local_lock=flock"); + seq_puts(m, ",local_lock=flock"); else - seq_printf(m, ",local_lock=posix"); + seq_puts(m, ",local_lock=posix"); } /* @@ -734,11 +742,21 @@ int nfs_show_options(struct seq_file *m, struct dentry *root) EXPORT_SYMBOL_GPL(nfs_show_options); #if IS_ENABLED(CONFIG_NFS_V4) +static void show_lease(struct seq_file *m, struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + unsigned long expire; + + seq_printf(m, ",lease_time=%ld", clp->cl_lease_time / HZ); + expire = clp->cl_last_renewal + clp->cl_lease_time; + seq_printf(m, ",lease_expired=%ld", + time_after(expire, jiffies) ? 0 : (jiffies - expire) / HZ); +} #ifdef CONFIG_NFS_V4_1 static void show_sessions(struct seq_file *m, struct nfs_server *server) { if (nfs4_has_session(server->nfs_client)) - seq_printf(m, ",sessions"); + seq_puts(m, ",sessions"); } #else static void show_sessions(struct seq_file *m, struct nfs_server *server) {} @@ -815,7 +833,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) /* * Display all mount option settings */ - seq_printf(m, "\n\topts:\t"); + seq_puts(m, "\n\topts:\t"); seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : ""); seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : ""); @@ -826,7 +844,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) show_implementation_id(m, nfss); - seq_printf(m, "\n\tcaps:\t"); + seq_puts(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%u", nfss->wtmult); seq_printf(m, ",dtsize=%u", nfss->dtsize); @@ -835,13 +853,14 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) #if IS_ENABLED(CONFIG_NFS_V4) if (nfss->nfs_client->rpc_ops->version == 4) { - seq_printf(m, "\n\tnfsv4:\t"); + seq_puts(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]); seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); show_sessions(m, nfss); show_pnfs(m, nfss); + show_lease(m, nfss); } #endif @@ -873,20 +892,20 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) preempt_enable(); } - seq_printf(m, "\n\tevents:\t"); + seq_puts(m, "\n\tevents:\t"); for (i = 0; i < __NFSIOS_COUNTSMAX; i++) seq_printf(m, "%lu ", totals.events[i]); - seq_printf(m, "\n\tbytes:\t"); + seq_puts(m, "\n\tbytes:\t"); for (i = 0; i < __NFSIOS_BYTESMAX; i++) seq_printf(m, "%Lu ", totals.bytes[i]); #ifdef CONFIG_NFS_FSCACHE if (nfss->options & NFS_OPTION_FSCACHE) { - seq_printf(m, "\n\tfsc:\t"); + seq_puts(m, "\n\tfsc:\t"); for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) seq_printf(m, "%Lu ", totals.fscache[i]); } #endif - seq_printf(m, "\n"); + seq_putc(m, '\n'); rpc_clnt_show_stats(m, nfss->client); @@ -1548,6 +1567,11 @@ static int nfs_parse_mount_options(char *raw, if (mnt->mount_server.addrlen == 0) goto out_invalid_address; break; + case Opt_nconnect: + if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) + goto out_invalid_value; + mnt->nfs_server.nconnect = option; + break; case Opt_lookupcache: string = match_strdup(args); if (string == NULL) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c new file mode 100644 index 000000000000..4f3390b20239 --- /dev/null +++ b/fs/nfs/sysfs.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Hammerspace Inc + */ + +#include <linux/module.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/nfs_fs.h> +#include <linux/rcupdate.h> + +#include "nfs4_fs.h" +#include "netns.h" +#include "sysfs.h" + +struct kobject *nfs_client_kobj; +static struct kset *nfs_client_kset; + +static void nfs_netns_object_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( + struct kobject *kobj) +{ + return &net_ns_type_operations; +} + +static struct kobj_type nfs_netns_object_type = { + .release = nfs_netns_object_release, + .sysfs_ops = &kobj_sysfs_ops, + .child_ns_type = nfs_netns_object_child_ns_type, +}; + +static struct kobject *nfs_netns_object_alloc(const char *name, + struct kset *kset, struct kobject *parent) +{ + struct kobject *kobj; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (kobj) { + kobj->kset = kset; + if (kobject_init_and_add(kobj, &nfs_netns_object_type, + parent, "%s", name) == 0) + return kobj; + kobject_put(kobj); + } + return NULL; +} + +int nfs_sysfs_init(void) +{ + nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); + if (!nfs_client_kset) + return -ENOMEM; + nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); + if (!nfs_client_kobj) { + kset_unregister(nfs_client_kset); + nfs_client_kset = NULL; + return -ENOMEM; + } + return 0; +} + +void nfs_sysfs_exit(void) +{ + kobject_put(nfs_client_kobj); + kset_unregister(nfs_client_kset); +} + +static ssize_t nfs_netns_identifier_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + kobject); + return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier); +} + +/* Strip trailing '\n' */ +static size_t nfs_string_strip(const char *c, size_t len) +{ + while (len > 0 && c[len-1] == '\n') + --len; + return len; +} + +static ssize_t nfs_netns_identifier_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + kobject); + const char *old; + char *p; + size_t len; + + len = nfs_string_strip(buf, min_t(size_t, count, CONTAINER_ID_MAXLEN)); + if (!len) + return 0; + p = kmemdup_nul(buf, len, GFP_KERNEL); + if (!p) + return -ENOMEM; + old = xchg(&c->identifier, p); + if (old) { + synchronize_rcu(); + kfree(old); + } + return count; +} + +static void nfs_netns_client_release(struct kobject *kobj) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + kobject); + + if (c->identifier) + kfree(c->identifier); + kfree(c); +} + +static const void *nfs_netns_client_namespace(struct kobject *kobj) +{ + return container_of(kobj, struct nfs_netns_client, kobject)->net; +} + +static struct kobj_attribute nfs_netns_client_id = __ATTR(identifier, + 0644, nfs_netns_identifier_show, nfs_netns_identifier_store); + +static struct attribute *nfs_netns_client_attrs[] = { + &nfs_netns_client_id.attr, + NULL, +}; + +static struct kobj_type nfs_netns_client_type = { + .release = nfs_netns_client_release, + .default_attrs = nfs_netns_client_attrs, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = nfs_netns_client_namespace, +}; + +static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, + struct net *net) +{ + struct nfs_netns_client *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p) { + p->net = net; + p->kobject.kset = nfs_client_kset; + if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, + parent, "nfs_client") == 0) + return p; + kobject_put(&p->kobject); + } + return NULL; +} + +void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) +{ + struct nfs_netns_client *clp; + + clp = nfs_netns_client_alloc(nfs_client_kobj, net); + if (clp) { + netns->nfs_client = clp; + kobject_uevent(&clp->kobject, KOBJ_ADD); + } +} + +void nfs_netns_sysfs_destroy(struct nfs_net *netns) +{ + struct nfs_netns_client *clp = netns->nfs_client; + + if (clp) { + kobject_uevent(&clp->kobject, KOBJ_REMOVE); + kobject_del(&clp->kobject); + kobject_put(&clp->kobject); + netns->nfs_client = NULL; + } +} diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h new file mode 100644 index 000000000000..f1b27411dcc0 --- /dev/null +++ b/fs/nfs/sysfs.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Hammerspace Inc + */ + +#ifndef __NFS_SYSFS_H +#define __NFS_SYSFS_H + +#define CONTAINER_ID_MAXLEN (64) + +struct nfs_netns_client { + struct kobject kobject; + struct net *net; + const char *identifier; +}; + +extern struct kobject *nfs_client_kobj; + +extern int nfs_sysfs_init(void); +extern void nfs_sysfs_exit(void); + +void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net); +void nfs_netns_sysfs_destroy(struct nfs_net *netns); + +#endif diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 52d533967485..0effeee28352 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -396,12 +396,6 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) nfs_cancel_async_unlink(dentry); return; } - - /* - * vfs_unlink and the like do not issue this when a file is - * sillyrenamed, so do it here. - */ - fsnotify_nameremove(dentry, 0); } #define SILLYNAME_PREFIX ".nfs" diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bc5bb9323412..92d9cadc6102 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/write.c * @@ -102,7 +103,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); memset(p, 0, sizeof(*p)); p->rw_mode = FMODE_WRITE; @@ -720,12 +721,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct inode *inode = mapping->host; struct nfs_pageio_descriptor pgio; struct nfs_io_completion *ioc; - unsigned int pflags = memalloc_nofs_save(); int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - ioc = nfs_io_completion_alloc(GFP_NOFS); + ioc = nfs_io_completion_alloc(GFP_KERNEL); if (ioc) nfs_io_completion_init(ioc, nfs_io_completion_commit, inode); @@ -736,8 +736,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_pageio_complete(&pgio); nfs_io_completion_put(ioc); - memalloc_nofs_restore(pflags); - if (err < 0) goto out_err; err = pgio.pg_error; diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index d153ca3ea577..4bebe834c009 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for Linux filesystem routines that are shared by client and server. # diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 5be08f02a76b..b73d9dd37f73 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Common code for control of lockd and nfsv4 grace periods. * diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 538f142935ea..8ceb6425e01a 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/nfs_common/nfsacl.c * diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 20b1c17320d5..d25f6bbe7006 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config NFSD tristate "NFS server support" depends on INET diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 4fb1f72a25fb..66d4c55eb48e 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -121,15 +121,13 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, { loff_t new_size = lcp->lc_last_wr + 1; struct iattr iattr = { .ia_valid = 0 }; - struct timespec ts; int error; - ts = timespec64_to_timespec(inode->i_mtime); if (lcp->lc_mtime.tv_nsec == UTIME_NOW || - timespec_compare(&lcp->lc_mtime, &ts) < 0) - lcp->lc_mtime = timespec64_to_timespec(current_time(inode)); + timespec64_compare(&lcp->lc_mtime, &inode->i_mtime) < 0) + lcp->lc_mtime = current_time(inode); iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; - iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime); + iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; if (new_size > i_size_read(inode)) { iattr.ia_valid |= ATTR_SIZE; diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 4a98537efb0f..10ec5ecdf117 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -10,6 +10,7 @@ #define NFSCACHE_H #include <linux/sunrpc/svc.h> +#include "netns.h" /* * Representation of a reply cache entry. @@ -77,8 +78,8 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) -int nfsd_reply_cache_init(void); -void nfsd_reply_cache_shutdown(void); +int nfsd_reply_cache_init(struct nfsd_net *); +void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); int nfsd_reply_cache_stats_open(struct inode *, struct file *); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 84831253203d..76bee0a0d308 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -127,24 +127,16 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, }; -int nfsd_fault_inject_init(void) +void nfsd_fault_inject_init(void) { unsigned int i; struct nfsd_fault_inject_op *op; umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; debug_dir = debugfs_create_dir("nfsd", NULL); - if (!debug_dir) - goto fail; for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { op = &inject_ops[i]; - if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) - goto fail; + debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd); } - return 0; - -fail: - nfsd_fault_inject_cleanup(); - return -ENOMEM; } diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 789abc4dd1d2..bdfe5bcb3dcd 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * per net namespace data structures for nfsd * * Copyright (C) 2012, Jeff Layton <jlayton@redhat.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef __NFSD_NETNS_H__ @@ -55,6 +42,11 @@ struct nfsd_net { bool grace_ended; time_t boot_time; + /* internal mount of the "nfsd" pseudofilesystem: */ + struct vfsmount *nfsd_mnt; + + struct dentry *nfsd_client_dir; + /* * reclaim_str_hashtbl[] holds known client info from previous reset/reboot * used in reboot/reset lease grace period processing @@ -119,6 +111,7 @@ struct nfsd_net { */ unsigned int max_connections; + u32 clientid_base; u32 clientid_counter; u32 clverifier_counter; @@ -140,6 +133,44 @@ struct nfsd_net { */ bool *nfsd_versions; bool *nfsd4_minorversions; + + /* + * Duplicate reply cache + */ + struct nfsd_drc_bucket *drc_hashtbl; + struct kmem_cache *drc_slab; + + /* max number of entries allowed in the cache */ + unsigned int max_drc_entries; + + /* number of significant bits in the hash value */ + unsigned int maskbits; + unsigned int drc_hashsize; + + /* + * Stats and other tracking of on the duplicate reply cache. + * These fields and the "rc" fields in nfsdstats are modified + * with only the per-bucket cache lock, which isn't really safe + * and should be fixed if we want the statistics to be + * completely accurate. + */ + + /* total number of entries */ + atomic_t num_drc_entries; + + /* cache misses due only to checksum comparison failures */ + unsigned int payload_misses; + + /* amount of memory (in bytes) currently consumed by the DRC */ + unsigned int drc_mem_usage; + + /* longest hash chain seen */ + unsigned int longest_chain; + + /* size of cache when we saw the longest hash chain */ + unsigned int longest_chain_cachesize; + + struct shrinker nfsd_reply_cache_shrinker; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 2961016097ac..d1f285245af8 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -83,7 +83,7 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->type = itm->type; strlcpy(new->name, itm->name, sizeof(new->name)); - strlcpy(new->authname, itm->authname, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->authname)); } static void diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 618e66078ee5..7857942c5ca6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -42,6 +42,7 @@ #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> #include <linux/jhash.h> +#include <linux/string_helpers.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -99,6 +100,13 @@ enum nfsd4_st_mutex_lock_subclass { */ static DECLARE_WAIT_QUEUE_HEAD(close_wq); +/* + * A waitqueue where a writer to clients/#/ctl destroying a client can + * wait for cl_rpc_users to drop to 0 and then for the client to be + * unhashed. + */ +static DECLARE_WAIT_QUEUE_HEAD(expiry_wq); + static struct kmem_cache *client_slab; static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; @@ -138,7 +146,7 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); return nfs_ok; } @@ -170,20 +178,24 @@ static void put_client_renew_locked(struct nfs4_client *clp) lockdep_assert_held(&nn->client_lock); - if (!atomic_dec_and_test(&clp->cl_refcount)) + if (!atomic_dec_and_test(&clp->cl_rpc_users)) return; if (!is_client_expired(clp)) renew_client_locked(clp); + else + wake_up_all(&expiry_wq); } static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock)) return; if (!is_client_expired(clp)) renew_client_locked(clp); + else + wake_up_all(&expiry_wq); spin_unlock(&nn->client_lock); } @@ -694,7 +706,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla idr_preload(GFP_KERNEL); spin_lock(&cl->cl_lock); - new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT); + /* Reserving 0 for start of file in nfsdfs "states" file: */ + new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT); spin_unlock(&cl->cl_lock); idr_preload_end(); if (new_id < 0) @@ -1563,7 +1576,7 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) * Never use more than a third of the remaining memory, * unless it's the only way to give this client a slot: */ - avail = clamp_t(int, avail, slotsize, total_avail/3); + avail = clamp_t(unsigned long, avail, slotsize, total_avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); @@ -1844,7 +1857,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; - clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); + xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL); if (clp->cl_name.data == NULL) goto err_no_name; clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE, @@ -1854,10 +1867,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) goto err_no_hashtbl; for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); - clp->cl_name.len = name.len; INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); + atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -1879,6 +1891,25 @@ err_no_name: return NULL; } +static void __free_client(struct kref *k) +{ + struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref); + struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); + + free_svc_cred(&clp->cl_cred); + kfree(clp->cl_ownerstr_hashtbl); + kfree(clp->cl_name.data); + kfree(clp->cl_nii_domain.data); + kfree(clp->cl_nii_name.data); + idr_destroy(&clp->cl_stateids); + kmem_cache_free(client_slab, clp); +} + +static void drop_client(struct nfs4_client *clp) +{ + kref_put(&clp->cl_nfsdfs.cl_ref, __free_client); +} + static void free_client(struct nfs4_client *clp) { @@ -1891,11 +1922,12 @@ free_client(struct nfs4_client *clp) free_session(ses); } rpc_destroy_wait_queue(&clp->cl_cb_waitq); - free_svc_cred(&clp->cl_cred); - kfree(clp->cl_ownerstr_hashtbl); - kfree(clp->cl_name.data); - idr_destroy(&clp->cl_stateids); - kmem_cache_free(client_slab, clp); + if (clp->cl_nfsd_dentry) { + nfsd_client_rmdir(clp->cl_nfsd_dentry); + clp->cl_nfsd_dentry = NULL; + wake_up_all(&expiry_wq); + } + drop_client(clp); } /* must be called under the client_lock */ @@ -1936,7 +1968,7 @@ unhash_client(struct nfs4_client *clp) static __be32 mark_client_expired_locked(struct nfs4_client *clp) { - if (atomic_read(&clp->cl_refcount)) + if (atomic_read(&clp->cl_rpc_users)) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; @@ -1989,6 +2021,7 @@ __destroy_client(struct nfs4_client *clp) if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); free_client(clp); + wake_up_all(&expiry_wq); } static void @@ -2199,6 +2232,342 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) return s; } +static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) +{ + struct nfsdfs_client *nc; + nc = get_nfsdfs_client(inode); + if (!nc) + return NULL; + return container_of(nc, struct nfs4_client, cl_nfsdfs); +} + +static void seq_quote_mem(struct seq_file *m, char *data, int len) +{ + seq_printf(m, "\""); + seq_escape_mem_ascii(m, data, len); + seq_printf(m, "\""); +} + +static int client_info_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct nfs4_client *clp; + u64 clid; + + clp = get_nfsdfs_clp(inode); + if (!clp) + return -ENXIO; + memcpy(&clid, &clp->cl_clientid, sizeof(clid)); + seq_printf(m, "clientid: 0x%llx\n", clid); + seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); + seq_printf(m, "name: "); + seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); + seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); + if (clp->cl_nii_domain.data) { + seq_printf(m, "Implementation domain: "); + seq_quote_mem(m, clp->cl_nii_domain.data, + clp->cl_nii_domain.len); + seq_printf(m, "\nImplementation name: "); + seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); + seq_printf(m, "\nImplementation time: [%ld, %ld]\n", + clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); + } + drop_client(clp); + + return 0; +} + +static int client_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, client_info_show, inode); +} + +static const struct file_operations client_info_fops = { + .open = client_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void *states_start(struct seq_file *s, loff_t *pos) + __acquires(&clp->cl_lock) +{ + struct nfs4_client *clp = s->private; + unsigned long id = *pos; + void *ret; + + spin_lock(&clp->cl_lock); + ret = idr_get_next_ul(&clp->cl_stateids, &id); + *pos = id; + return ret; +} + +static void *states_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct nfs4_client *clp = s->private; + unsigned long id = *pos; + void *ret; + + id = *pos; + id++; + ret = idr_get_next_ul(&clp->cl_stateids, &id); + *pos = id; + return ret; +} + +static void states_stop(struct seq_file *s, void *v) + __releases(&clp->cl_lock) +{ + struct nfs4_client *clp = s->private; + + spin_unlock(&clp->cl_lock); +} + +static void nfs4_show_superblock(struct seq_file *s, struct file *f) +{ + struct inode *inode = file_inode(f); + + seq_printf(s, "superblock: \"%02x:%02x:%ld\"", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), + inode->i_ino); +} + +static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) +{ + seq_printf(s, "owner: "); + seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); +} + +static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_ol_stateid *ols; + struct nfs4_file *nf; + struct file *file; + struct nfs4_stateowner *oo; + unsigned int access, deny; + + if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID) + return 0; /* XXX: or SEQ_SKIP? */ + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; + file = find_any_file(nf); + + seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid); + + access = bmap_to_share_mode(ols->st_access_bmap); + deny = bmap_to_share_mode(ols->st_deny_bmap); + + seq_printf(s, "access: \%s\%s, ", + access & NFS4_SHARE_ACCESS_READ ? "r" : "-", + access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); + seq_printf(s, "deny: \%s\%s, ", + deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", + deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); + + nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); + fput(file); + + return 0; +} + +static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_ol_stateid *ols; + struct nfs4_file *nf; + struct file *file; + struct nfs4_stateowner *oo; + + ols = openlockstateid(st); + oo = ols->st_stateowner; + nf = st->sc_file; + file = find_any_file(nf); + + seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid); + + /* + * Note: a lock stateid isn't really the same thing as a lock, + * it's the locking state held by one owner on a file, and there + * may be multiple (or no) lock ranges associated with it. + * (Same for the matter is true of open stateids.) + */ + + nfs4_show_superblock(s, file); + /* XXX: open stateid? */ + seq_printf(s, ", "); + nfs4_show_owner(s, oo); + seq_printf(s, " }\n"); + fput(file); + + return 0; +} + +static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_delegation *ds; + struct nfs4_file *nf; + struct file *file; + + ds = delegstateid(st); + nf = st->sc_file; + file = nf->fi_deleg_file; + + seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid); + + /* Kinda dead code as long as we only support read delegs: */ + seq_printf(s, "access: %s, ", + ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + + /* XXX: lease time, whether it's being recalled. */ + + nfs4_show_superblock(s, file); + seq_printf(s, " }\n"); + + return 0; +} + +static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) +{ + struct nfs4_layout_stateid *ls; + struct file *file; + + ls = container_of(st, struct nfs4_layout_stateid, ls_stid); + file = ls->ls_file; + + seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid); + + /* XXX: What else would be useful? */ + + nfs4_show_superblock(s, file); + seq_printf(s, " }\n"); + + return 0; +} + +static int states_show(struct seq_file *s, void *v) +{ + struct nfs4_stid *st = v; + + switch (st->sc_type) { + case NFS4_OPEN_STID: + return nfs4_show_open(s, st); + case NFS4_LOCK_STID: + return nfs4_show_lock(s, st); + case NFS4_DELEG_STID: + return nfs4_show_deleg(s, st); + case NFS4_LAYOUT_STID: + return nfs4_show_layout(s, st); + default: + return 0; /* XXX: or SEQ_SKIP? */ + } + /* XXX: copy stateids? */ +} + +static struct seq_operations states_seq_ops = { + .start = states_start, + .next = states_next, + .stop = states_stop, + .show = states_show +}; + +static int client_states_open(struct inode *inode, struct file *file) +{ + struct seq_file *s; + struct nfs4_client *clp; + int ret; + + clp = get_nfsdfs_clp(inode); + if (!clp) + return -ENXIO; + + ret = seq_open(file, &states_seq_ops); + if (ret) + return ret; + s = file->private_data; + s->private = clp; + return 0; +} + +static int client_opens_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct nfs4_client *clp = m->private; + + /* XXX: alternatively, we could get/drop in seq start/stop */ + drop_client(clp); + return 0; +} + +static const struct file_operations client_states_fops = { + .open = client_states_open, + .read = seq_read, + .llseek = seq_lseek, + .release = client_opens_release, +}; + +/* + * Normally we refuse to destroy clients that are in use, but here the + * administrator is telling us to just do it. We also want to wait + * so the caller has a guarantee that the client's locks are gone by + * the time the write returns: + */ +static void force_expire_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + bool already_expired; + + spin_lock(&clp->cl_lock); + clp->cl_time = 0; + spin_unlock(&clp->cl_lock); + + wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); + spin_lock(&nn->client_lock); + already_expired = list_empty(&clp->cl_lru); + if (!already_expired) + unhash_client_locked(clp); + spin_unlock(&nn->client_lock); + + if (!already_expired) + expire_client(clp); + else + wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL); +} + +static ssize_t client_ctl_write(struct file *file, const char __user *buf, + size_t size, loff_t *pos) +{ + char *data; + struct nfs4_client *clp; + + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); + if (size != 7 || 0 != memcmp(data, "expire\n", 7)) + return -EINVAL; + clp = get_nfsdfs_clp(file_inode(file)); + if (!clp) + return -ENXIO; + force_expire_client(clp); + drop_client(clp); + return 7; +} + +static const struct file_operations client_ctl_fops = { + .write = client_ctl_write, + .release = simple_transaction_release, +}; + +static const struct tree_descr client_files[] = { + [0] = {"info", &client_info_fops, S_IRUSR}, + [1] = {"states", &client_states_fops, S_IRUSR}, + [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, + [3] = {""}, +}; + static struct nfs4_client *create_client(struct xdr_netobj name, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -2206,6 +2575,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -2216,13 +2586,22 @@ static struct nfs4_client *create_client(struct xdr_netobj name, free_client(clp); return NULL; } + gen_clid(clp, nn); + kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); - rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); + memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; + clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files); + if (!clp->cl_nfsd_dentry) { + free_client(clp); + return NULL; + } return clp; } @@ -2533,6 +2912,22 @@ static bool client_has_state(struct nfs4_client *clp) || !list_empty(&clp->async_copies); } +static __be32 copy_impl_id(struct nfs4_client *clp, + struct nfsd4_exchange_id *exid) +{ + if (!exid->nii_domain.data) + return 0; + xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL); + if (!clp->cl_nii_domain.data) + return nfserr_jukebox; + xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); + if (!clp->cl_nii_name.data) + return nfserr_jukebox; + clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec; + clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec; + return 0; +} + __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2559,6 +2954,9 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new = create_client(exid->clname, rqstp, &verf); if (new == NULL) return nfserr_jukebox; + status = copy_impl_id(new, exid); + if (status) + goto out_nolock; switch (exid->spa_how) { case SP4_MACH_CRED: @@ -2667,7 +3065,6 @@ out_new: new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1]; - gen_clid(new, nn); add_to_unconfirmed(new); swap(new, conf); out_copy: @@ -3411,7 +3808,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy_clid(new, conf); gen_confirm(new, nn); } else /* case 4 (new client) or cases 2, 3 (client reboot): */ - gen_clid(new, nn); + ; new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -3632,12 +4029,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj if (!sop) return NULL; - sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); + xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL); if (!sop->so_owner.data) { kmem_cache_free(slab, sop); return NULL; } - sop->so_owner.len = owner->len; INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; @@ -4092,7 +4488,7 @@ static __be32 lookup_clientid(clientid_t *clid, spin_unlock(&nn->client_lock); return nfserr_expired; } - atomic_inc(&found->cl_refcount); + atomic_inc(&found->cl_rpc_users); spin_unlock(&nn->client_lock); /* Cache the nfs4_client in cstate! */ @@ -5725,12 +6121,11 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) if (fl->fl_lmops == &nfsd_posix_mng_ops) { lo = (struct nfs4_lockowner *) fl->fl_owner; - deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, - lo->lo_owner.so_owner.len, GFP_KERNEL); + xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner, + GFP_KERNEL); if (!deny->ld_owner.data) /* We just don't care that much */ goto nevermind; - deny->ld_owner.len = lo->lo_owner.so_owner.len; deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { nevermind: @@ -6584,7 +6979,7 @@ nfs4_check_open_reclaim(clientid_t *clid, static inline void put_client(struct nfs4_client *clp) { - atomic_dec(&clp->cl_refcount); + atomic_dec(&clp->cl_rpc_users); } static struct nfs4_client * @@ -6702,7 +7097,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, return; lockdep_assert_held(&nn->client_lock); - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); list_add(&lst->st_locks, collect); } @@ -6731,7 +7126,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, * Despite the fact that these functions deal * with 64-bit integers for "count", we must * ensure that it doesn't blow up the - * clp->cl_refcount. Throw a warning if we + * clp->cl_rpc_users. Throw a warning if we * start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); @@ -6855,7 +7250,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, if (func) { func(oop); if (collect) { - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); list_add(&oop->oo_perclient, collect); } } @@ -6863,7 +7258,7 @@ nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, /* * Despite the fact that these functions deal with * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_refcount. Throw a + * it doesn't blow up the clp->cl_rpc_users. Throw a * warning if we start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); @@ -6993,7 +7388,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, if (dp->dl_time != 0) continue; - atomic_inc(&clp->cl_refcount); + atomic_inc(&clp->cl_rpc_users); WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, victims); } @@ -7001,7 +7396,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, /* * Despite the fact that these functions deal with * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_refcount. Throw a + * it doesn't blow up the clp->cl_rpc_users. Throw a * warning if we start to approach INT_MAX here. */ WARN_ON_ONCE(count == (INT_MAX / 2)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 52c4f6daa649..442811809f3d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -269,19 +269,13 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) return ret; } -/* - * We require the high 32 bits of 'seconds' to be 0, and - * we ignore all 32 bits of 'nseconds'. - */ static __be32 -nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv) +nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { DECODE_HEAD; - u64 sec; READ_BUF(12); - p = xdr_decode_hyper(p, &sec); - tv->tv_sec = sec; + p = xdr_decode_hyper(p, &tv->tv_sec); tv->tv_nsec = be32_to_cpup(p++); if (tv->tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -320,7 +314,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl, struct xdr_netobj *label, int *umask) { - struct timespec ts; int expected_len, len = 0; u32 dummy32; char *buf; @@ -422,8 +415,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &ts); - iattr->ia_atime = timespec_to_timespec64(ts); + status = nfsd4_decode_time(argp, &iattr->ia_atime); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -442,8 +434,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, switch (dummy32) { case NFS4_SET_TO_CLIENT_TIME: len += 12; - status = nfsd4_decode_time(argp, &ts); - iattr->ia_mtime = timespec_to_timespec64(ts); + status = nfsd4_decode_time(argp, &iattr->ia_mtime); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -1398,7 +1389,6 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, goto xdr_error; } - /* Ignore Implementation ID */ READ_BUF(4); /* nfs_impl_id4 array length */ dummy = be32_to_cpup(p++); @@ -1406,21 +1396,19 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, goto xdr_error; if (dummy == 1) { - /* nii_domain */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + status = nfsd4_decode_opaque(argp, &exid->nii_domain); + if (status) + goto xdr_error; /* nii_name */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); + status = nfsd4_decode_opaque(argp, &exid->nii_name); + if (status) + goto xdr_error; /* nii_date */ - READ_BUF(12); - p += 3; + status = nfsd4_decode_time(argp, &exid->nii_time); + if (status) + goto xdr_error; } DECODE_TAIL; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index da52b594362a..26ad75ae2be0 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,6 +9,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */ +#include <linux/sunrpc/svc_xprt.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/sunrpc/addr.h> @@ -35,48 +36,12 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; -static struct nfsd_drc_bucket *drc_hashtbl; -static struct kmem_cache *drc_slab; - -/* max number of entries allowed in the cache */ -static unsigned int max_drc_entries; - -/* number of significant bits in the hash value */ -static unsigned int maskbits; -static unsigned int drc_hashsize; - -/* - * Stats and other tracking of on the duplicate reply cache. All of these and - * the "rc" fields in nfsdstats are protected by the cache_lock - */ - -/* total number of entries */ -static atomic_t num_drc_entries; - -/* cache misses due only to checksum comparison failures */ -static unsigned int payload_misses; - -/* amount of memory (in bytes) currently consumed by the DRC */ -static unsigned int drc_mem_usage; - -/* longest hash chain seen */ -static unsigned int longest_chain; - -/* size of cache when we saw the longest hash chain */ -static unsigned int longest_chain_cachesize; - static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc); -static struct shrinker nfsd_reply_cache_shrinker = { - .scan_objects = nfsd_reply_cache_scan, - .count_objects = nfsd_reply_cache_count, - .seeks = 1, -}; - /* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. @@ -94,6 +59,9 @@ static struct shrinker nfsd_reply_cache_shrinker = { * ...with a hard cap of 256k entries. In the worst case, each entry will be * ~1k, so the above numbers should give a rough max of the amount of memory * used in k. + * + * XXX: these limits are per-container, so memory used will increase + * linearly with number of containers. Maybe that's OK. */ static unsigned int nfsd_cache_size_limit(void) @@ -116,17 +84,18 @@ nfsd_hashsize(unsigned int limit) } static u32 -nfsd_cache_hash(__be32 xid) +nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) { - return hash_32(be32_to_cpu(xid), maskbits); + return hash_32(be32_to_cpu(xid), nn->maskbits); } static struct svc_cacherep * -nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) +nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, + struct nfsd_net *nn) { struct svc_cacherep *rp; - rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -147,91 +116,101 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) } static void -nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) +nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, + struct nfsd_net *nn) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { - drc_mem_usage -= rp->c_replvec.iov_len; + nn->drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } if (rp->c_state != RC_UNUSED) { rb_erase(&rp->c_node, &b->rb_head); list_del(&rp->c_lru); - atomic_dec(&num_drc_entries); - drc_mem_usage -= sizeof(*rp); + atomic_dec(&nn->num_drc_entries); + nn->drc_mem_usage -= sizeof(*rp); } - kmem_cache_free(drc_slab, rp); + kmem_cache_free(nn->drc_slab, rp); } static void -nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, + struct nfsd_net *nn) { spin_lock(&b->cache_lock); - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); spin_unlock(&b->cache_lock); } -int nfsd_reply_cache_init(void) +int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; unsigned int i; int status = 0; - max_drc_entries = nfsd_cache_size_limit(); - atomic_set(&num_drc_entries, 0); - hashsize = nfsd_hashsize(max_drc_entries); - maskbits = ilog2(hashsize); + nn->max_drc_entries = nfsd_cache_size_limit(); + atomic_set(&nn->num_drc_entries, 0); + hashsize = nfsd_hashsize(nn->max_drc_entries); + nn->maskbits = ilog2(hashsize); - status = register_shrinker(&nfsd_reply_cache_shrinker); + nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; + nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; + nn->nfsd_reply_cache_shrinker.seeks = 1; + status = register_shrinker(&nn->nfsd_reply_cache_shrinker); if (status) - return status; - - drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), - 0, 0, NULL); - if (!drc_slab) goto out_nomem; - drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); - if (!drc_hashtbl) { - drc_hashtbl = vzalloc(array_size(hashsize, - sizeof(*drc_hashtbl))); - if (!drc_hashtbl) - goto out_nomem; + nn->drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + if (!nn->drc_slab) + goto out_shrinker; + + nn->drc_hashtbl = kcalloc(hashsize, + sizeof(*nn->drc_hashtbl), GFP_KERNEL); + if (!nn->drc_hashtbl) { + nn->drc_hashtbl = vzalloc(array_size(hashsize, + sizeof(*nn->drc_hashtbl))); + if (!nn->drc_hashtbl) + goto out_slab; } for (i = 0; i < hashsize; i++) { - INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); - spin_lock_init(&drc_hashtbl[i].cache_lock); + INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); + spin_lock_init(&nn->drc_hashtbl[i].cache_lock); } - drc_hashsize = hashsize; + nn->drc_hashsize = hashsize; return 0; +out_slab: + kmem_cache_destroy(nn->drc_slab); +out_shrinker: + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); - nfsd_reply_cache_shutdown(); return -ENOMEM; } -void nfsd_reply_cache_shutdown(void) +void nfsd_reply_cache_shutdown(struct nfsd_net *nn) { struct svc_cacherep *rp; unsigned int i; - unregister_shrinker(&nfsd_reply_cache_shrinker); + unregister_shrinker(&nn->nfsd_reply_cache_shrinker); - for (i = 0; i < drc_hashsize; i++) { - struct list_head *head = &drc_hashtbl[i].lru_head; + for (i = 0; i < nn->drc_hashsize; i++) { + struct list_head *head = &nn->drc_hashtbl[i].lru_head; while (!list_empty(head)) { rp = list_first_entry(head, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp); + nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i], + rp, nn); } } - kvfree(drc_hashtbl); - drc_hashtbl = NULL; - drc_hashsize = 0; + kvfree(nn->drc_hashtbl); + nn->drc_hashtbl = NULL; + nn->drc_hashsize = 0; - kmem_cache_destroy(drc_slab); - drc_slab = NULL; + kmem_cache_destroy(nn->drc_slab); + nn->drc_slab = NULL; } /* @@ -246,7 +225,7 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) } static long -prune_bucket(struct nfsd_drc_bucket *b) +prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -258,10 +237,10 @@ prune_bucket(struct nfsd_drc_bucket *b) */ if (rp->c_state == RC_INPROG) continue; - if (atomic_read(&num_drc_entries) <= max_drc_entries && + if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); freed++; } return freed; @@ -272,18 +251,18 @@ prune_bucket(struct nfsd_drc_bucket *b) * Also prune the oldest ones when the total exceeds the max number of entries. */ static long -prune_cache_entries(void) +prune_cache_entries(struct nfsd_net *nn) { unsigned int i; long freed = 0; - for (i = 0; i < drc_hashsize; i++) { - struct nfsd_drc_bucket *b = &drc_hashtbl[i]; + for (i = 0; i < nn->drc_hashsize; i++) { + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i]; if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b); + freed += prune_bucket(b, nn); spin_unlock(&b->cache_lock); } return freed; @@ -292,13 +271,19 @@ prune_cache_entries(void) static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) { - return atomic_read(&num_drc_entries); + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_reply_cache_shrinker); + + return atomic_read(&nn->num_drc_entries); } static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { - return prune_cache_entries(); + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_reply_cache_shrinker); + + return prune_cache_entries(nn); } /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes @@ -334,11 +319,12 @@ nfsd_cache_csum(struct svc_rqst *rqstp) } static int -nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp) +nfsd_cache_key_cmp(const struct svc_cacherep *key, + const struct svc_cacherep *rp, struct nfsd_net *nn) { if (key->c_key.k_xid == rp->c_key.k_xid && key->c_key.k_csum != rp->c_key.k_csum) - ++payload_misses; + ++nn->payload_misses; return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } @@ -349,7 +335,8 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp * inserts an empty key on failure. */ static struct svc_cacherep * -nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) +nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key, + struct nfsd_net *nn) { struct svc_cacherep *rp, *ret = key; struct rb_node **p = &b->rb_head.rb_node, @@ -362,7 +349,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) parent = *p; rp = rb_entry(parent, struct svc_cacherep, c_node); - cmp = nfsd_cache_key_cmp(key, rp); + cmp = nfsd_cache_key_cmp(key, rp, nn); if (cmp < 0) p = &parent->rb_left; else if (cmp > 0) @@ -376,14 +363,14 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) rb_insert_color(&key->c_node, &b->rb_head); out: /* tally hash chain length stats */ - if (entries > longest_chain) { - longest_chain = entries; - longest_chain_cachesize = atomic_read(&num_drc_entries); - } else if (entries == longest_chain) { + if (entries > nn->longest_chain) { + nn->longest_chain = entries; + nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries); + } else if (entries == nn->longest_chain) { /* prefer to keep the smallest cachesize possible here */ - longest_chain_cachesize = min_t(unsigned int, - longest_chain_cachesize, - atomic_read(&num_drc_entries)); + nn->longest_chain_cachesize = min_t(unsigned int, + nn->longest_chain_cachesize, + atomic_read(&nn->num_drc_entries)); } lru_put_end(b, ret); @@ -400,11 +387,12 @@ out: int nfsd_cache_lookup(struct svc_rqst *rqstp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; __wsum csum; - u32 hash = nfsd_cache_hash(xid); - struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; + u32 hash = nfsd_cache_hash(xid, nn); + struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -420,16 +408,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ - rp = nfsd_reply_cache_alloc(rqstp, csum); + rp = nfsd_reply_cache_alloc(rqstp, csum, nn); if (!rp) { dprintk("nfsd: unable to allocate DRC entry!\n"); return rtn; } spin_lock(&b->cache_lock); - found = nfsd_cache_insert(b, rp); + found = nfsd_cache_insert(b, rp, nn); if (found != rp) { - nfsd_reply_cache_free_locked(NULL, rp); + nfsd_reply_cache_free_locked(NULL, rp, nn); rp = found; goto found_entry; } @@ -438,11 +426,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; - atomic_inc(&num_drc_entries); - drc_mem_usage += sizeof(*rp); + atomic_inc(&nn->num_drc_entries); + nn->drc_mem_usage += sizeof(*rp); /* go ahead and prune the cache */ - prune_bucket(b); + prune_bucket(b, nn); out: spin_unlock(&b->cache_lock); return rtn; @@ -477,7 +465,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp); + nfsd_reply_cache_free_locked(b, rp, nn); } goto out; @@ -502,6 +490,7 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; u32 hash; @@ -512,15 +501,15 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_key.k_xid); - b = &drc_hashtbl[hash]; + hash = nfsd_cache_hash(rp->c_key.k_xid, nn); + b = &nn->drc_hashtbl[hash]; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } @@ -535,18 +524,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) bufsize = len << 2; cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } cachv->iov_len = bufsize; memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: - nfsd_reply_cache_free(b, rp); + nfsd_reply_cache_free(b, rp, nn); return; } spin_lock(&b->cache_lock); - drc_mem_usage += bufsize; + nn->drc_mem_usage += bufsize; lru_put_end(b, rp); rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_type = cachetype; @@ -582,21 +571,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - seq_printf(m, "max entries: %u\n", max_drc_entries); + struct nfsd_net *nn = v; + + seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", - atomic_read(&num_drc_entries)); - seq_printf(m, "hash buckets: %u\n", 1 << maskbits); - seq_printf(m, "mem usage: %u\n", drc_mem_usage); + atomic_read(&nn->num_drc_entries)); + seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); + seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); - seq_printf(m, "payload misses: %u\n", payload_misses); - seq_printf(m, "longest chain len: %u\n", longest_chain); - seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); + seq_printf(m, "payload misses: %u\n", nn->payload_misses); + seq_printf(m, "longest chain len: %u\n", nn->longest_chain); + seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) { - return single_open(file, nfsd_reply_cache_stats_show, NULL); + struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, + nfsd_net_id); + + return single_open(file, nfsd_reply_cache_stats_show, nn); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 0f154b0e1c8a..13c548733860 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Syscall interface to knfsd. * @@ -16,6 +17,7 @@ #include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/module.h> +#include <linux/fsnotify.h> #include "idmap.h" #include "nfsd.h" @@ -53,6 +55,7 @@ enum { NFSD_RecoveryDir, NFSD_V4EndGrace, #endif + NFSD_MaxReserved }; /* @@ -1147,8 +1150,201 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size) * populating the filesystem. */ +/* Basically copying rpc_get_inode. */ +static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) +{ + struct inode *inode = new_inode(sb); + if (!inode) + return NULL; + /* Following advice from simple_fill_super documentation: */ + inode->i_ino = iunique(sb, NFSD_MaxReserved); + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + switch (mode & S_IFMT) { + case S_IFDIR: + inode->i_fop = &simple_dir_operations; + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + default: + break; + } + return inode; +} + +static int __nfsd_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + + inode = nfsd_get_inode(dir->i_sb, mode); + if (!inode) + return -ENOMEM; + d_add(dentry, inode); + inc_nlink(dir); + fsnotify_mkdir(dir, dentry); + return 0; +} + +static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int ret = -ENOMEM; + + inode_lock(dir); + dentry = d_alloc_name(parent, name); + if (!dentry) + goto out_err; + ret = __nfsd_mkdir(d_inode(parent), dentry, S_IFDIR | 0600); + if (ret) + goto out_err; + if (ncl) { + d_inode(dentry)->i_private = ncl; + kref_get(&ncl->cl_ref); + } +out: + inode_unlock(dir); + return dentry; +out_err: + dentry = ERR_PTR(ret); + goto out; +} + +static void clear_ncl(struct inode *inode) +{ + struct nfsdfs_client *ncl = inode->i_private; + + inode->i_private = NULL; + synchronize_rcu(); + kref_put(&ncl->cl_ref, ncl->cl_release); +} + + +static struct nfsdfs_client *__get_nfsdfs_client(struct inode *inode) +{ + struct nfsdfs_client *nc = inode->i_private; + + if (nc) + kref_get(&nc->cl_ref); + return nc; +} + +struct nfsdfs_client *get_nfsdfs_client(struct inode *inode) +{ + struct nfsdfs_client *nc; + + rcu_read_lock(); + nc = __get_nfsdfs_client(inode); + rcu_read_unlock(); + return nc; +} +/* from __rpc_unlink */ +static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) +{ + int ret; + + clear_ncl(d_inode(dentry)); + dget(dentry); + ret = simple_unlink(dir, dentry); + d_delete(dentry); + dput(dentry); + WARN_ON_ONCE(ret); +} + +static void nfsdfs_remove_files(struct dentry *root) +{ + struct dentry *dentry, *tmp; + + list_for_each_entry_safe(dentry, tmp, &root->d_subdirs, d_child) { + if (!simple_positive(dentry)) { + WARN_ON_ONCE(1); /* I think this can't happen? */ + continue; + } + nfsdfs_remove_file(d_inode(root), dentry); + } +} + +/* XXX: cut'n'paste from simple_fill_super; figure out if we could share + * code instead. */ +static int nfsdfs_create_files(struct dentry *root, + const struct tree_descr *files) +{ + struct inode *dir = d_inode(root); + struct inode *inode; + struct dentry *dentry; + int i; + + inode_lock(dir); + for (i = 0; files->name && files->name[0]; i++, files++) { + if (!files->name) + continue; + dentry = d_alloc_name(root, files->name); + if (!dentry) + goto out; + inode = nfsd_get_inode(d_inode(root)->i_sb, + S_IFREG | files->mode); + if (!inode) { + dput(dentry); + goto out; + } + inode->i_fop = files->ops; + inode->i_private = __get_nfsdfs_client(dir); + d_add(dentry, inode); + fsnotify_create(dir, dentry); + } + inode_unlock(dir); + return 0; +out: + nfsdfs_remove_files(root); + inode_unlock(dir); + return -ENOMEM; +} + +/* on success, returns positive number unique to that client. */ +struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files) +{ + struct dentry *dentry; + char name[11]; + int ret; + + sprintf(name, "%u", id); + + dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); + if (IS_ERR(dentry)) /* XXX: tossing errors? */ + return NULL; + ret = nfsdfs_create_files(dentry, files); + if (ret) { + nfsd_client_rmdir(dentry); + return NULL; + } + return dentry; +} + +/* Taken from __rpc_rmdir: */ +void nfsd_client_rmdir(struct dentry *dentry) +{ + struct inode *dir = d_inode(dentry->d_parent); + struct inode *inode = d_inode(dentry); + int ret; + + inode_lock(dir); + nfsdfs_remove_files(dentry); + clear_ncl(inode); + dget(dentry); + ret = simple_rmdir(dir, dentry); + WARN_ON_ONCE(ret); + d_delete(dentry); + inode_unlock(dir); +} + static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) { + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + struct dentry *dentry; + int ret; + static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", @@ -1178,7 +1374,14 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + nn->nfsd_client_dir = dentry; + return 0; } static int nfsd_fs_get_tree(struct fs_context *fc) @@ -1250,6 +1453,7 @@ unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; + struct vfsmount *mnt; struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); @@ -1260,18 +1464,33 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; + retval = nfsd_reply_cache_init(nn); + if (retval) + goto out_drc_error; nn->nfsd4_lease = 90; /* default lease time */ nn->nfsd4_grace = 90; nn->somebody_reclaimed = false; nn->track_reclaim_completes = false; nn->clverifier_counter = prandom_u32(); - nn->clientid_counter = prandom_u32(); + nn->clientid_base = prandom_u32(); + nn->clientid_counter = nn->clientid_base + 1; nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->ntf_refcnt, 0); init_waitqueue_head(&nn->ntf_wq); + + mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); + if (IS_ERR(mnt)) { + retval = PTR_ERR(mnt); + goto out_mount_err; + } + nn->nfsd_mnt = mnt; return 0; +out_mount_err: + nfsd_reply_cache_shutdown(nn); +out_drc_error: + nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: @@ -1280,6 +1499,10 @@ out_export_error: static __net_exit void nfsd_exit_net(struct net *net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + mntput(nn->nfsd_mnt); + nfsd_reply_cache_shutdown(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); @@ -1309,13 +1532,8 @@ static int __init init_nfsd(void) retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; - retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ - if (retval) - goto out_exit_pnfs; + nfsd_fault_inject_init(); /* nfsd fault injection controls */ nfsd_stat_init(); /* Statistics */ - retval = nfsd_reply_cache_init(); - if (retval) - goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1329,11 +1547,8 @@ out_free_all: remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); - nfsd_reply_cache_shutdown(); -out_free_stat: nfsd_stat_shutdown(); nfsd_fault_inject_cleanup(); -out_exit_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); @@ -1346,7 +1561,6 @@ out_unregister_pernet: static void __exit exit_nfsd(void) { - nfsd_reply_cache_shutdown(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 24187b5dd638..af2947551e9c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -22,6 +22,7 @@ #include <uapi/linux/nfsd/debug.h> +#include "netns.h" #include "stats.h" #include "export.h" @@ -86,6 +87,16 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +struct nfsdfs_client { + struct kref cl_ref; + void (*cl_release)(struct kref *kref); +}; + +struct nfsdfs_client *get_nfsdfs_client(struct inode *); +struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, + struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); +void nfsd_client_rmdir(struct dentry *dentry); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern const struct svc_version nfsd_acl_version2; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0b74d371ed67..5dbd16946e8e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -39,6 +39,7 @@ #include <linux/refcount.h> #include <linux/sunrpc/svc_xprt.h> #include "nfsfh.h" +#include "nfsd.h" typedef struct { u32 cl_boot; @@ -316,6 +317,10 @@ struct nfs4_client { clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ u32 cl_minorversion; + /* NFSv4.1 client implementation id: */ + struct xdr_netobj cl_nii_domain; + struct xdr_netobj cl_nii_name; + struct timespec cl_nii_time; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; @@ -347,9 +352,13 @@ struct nfs4_client { struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ u32 cl_exchange_flags; /* number of rpc's in progress over an associated session: */ - atomic_t cl_refcount; + atomic_t cl_rpc_users; + struct nfsdfs_client cl_nfsdfs; struct nfs4_op_map cl_spo_must_allow; + /* debugging info directory under nfsd/clients/ : */ + struct dentry *cl_nfsd_dentry; + /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ unsigned long cl_cb_slot_busy; @@ -663,7 +672,7 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn); /* nfs fault injection functions */ #ifdef CONFIG_NFSD_FAULT_INJECTION -int nfsd_fault_inject_init(void); +void nfsd_fault_inject_init(void); void nfsd_fault_inject_cleanup(void); u64 nfsd_inject_print_clients(void); @@ -684,7 +693,7 @@ u64 nfsd_inject_forget_delegations(u64); u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); u64 nfsd_inject_recall_delegations(u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline int nfsd_fault_inject_init(void) { return 0; } +static inline void nfsd_fault_inject_init(void) {} static inline void nfsd_fault_inject_cleanup(void) {} #endif /* CONFIG_NFSD_FAULT_INJECTION */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fc24ee47eab5..c85783e536d5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -404,7 +404,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, /* * If utimes(2) and friends are called with times not NULL, we should * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission - * will return EACCESS, when the caller's effective UID does not match + * will return EACCES, when the caller's effective UID does not match * the owner of the file, and the caller is not privileged. In this * situation, we should return EPERM(notify_change will return this). */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index feeb6d4bdffd..d64c870f998a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -410,6 +410,9 @@ struct nfsd4_exchange_id { int spa_how; u32 spo_must_enforce[3]; u32 spo_must_allow[3]; + struct xdr_netobj nii_domain; + struct xdr_netobj nii_name; + struct timespec64 nii_time; }; struct nfsd4_sequence { @@ -472,7 +475,7 @@ struct nfsd4_layoutcommit { u32 lc_reclaim; /* request */ u32 lc_newoffset; /* request */ u64 lc_last_wr; /* request */ - struct timespec lc_mtime; /* request */ + struct timespec64 lc_mtime; /* request */ u32 lc_layout_type; /* request */ u32 lc_up_len; /* layout length */ void *lc_up_layout; /* decoded by callback */ diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig index 80da8eb27393..254d102e79c9 100644 --- a/fs/nilfs2/Kconfig +++ b/fs/nilfs2/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config NILFS2_FS tristate "NILFS2 file system support" select CRC32 diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9b96d79eea6c..91b9dac6b2cc 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -148,13 +148,8 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, oldflags = NILFS_I(inode)->i_flags; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by the - * relevant capability. - */ - ret = -EPERM; - if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && - !capable(CAP_LINUX_IMMUTABLE)) + ret = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (ret) goto out; ret = nilfs_transaction_begin(inode->i_sb, &ti, 0); diff --git a/fs/nls/Kconfig b/fs/nls/Kconfig index e2ce79ef48c4..5a63303298e6 100644 --- a/fs/nls/Kconfig +++ b/fs/nls/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Native language support configuration # diff --git a/fs/no-block.c b/fs/no-block.c index 6e40e42a43de..481c0f0ab4bd 100644 --- a/fs/no-block.c +++ b/fs/no-block.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* no-block.c: implementation of routines required for non-BLOCK configuration * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 2a24249b30af..c020d26ba223 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config FSNOTIFY def_bool n select SRCU diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig index f9c1ca139d8f..3df7def17eea 100644 --- a/fs/notify/dnotify/Kconfig +++ b/fs/notify/dnotify/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config DNOTIFY bool "Dnotify support" select FSNOTIFY diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile index f145251dcadb..121b4dd6b1fe 100644 --- a/fs/notify/dnotify/Makefile +++ b/fs/notify/dnotify/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_DNOTIFY) += dnotify.o diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 250369d6901d..c03758c91481 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Directory notifications for Linux. * @@ -5,16 +6,6 @@ * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * dnotify was largly rewritten to use the new fsnotify infrastructure - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/fs.h> #include <linux/module.h> diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig index 521dc91d2cb5..8b9103f126ad 100644 --- a/fs/notify/fanotify/Kconfig +++ b/fs/notify/fanotify/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config FANOTIFY bool "Filesystem wide access notification" select FSNOTIFY diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile index 0999213e7e6e..25ef222915e5 100644 --- a/fs/notify/fanotify/Makefile +++ b/fs/notify/fanotify/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e6fde1a5c072..5778d1347b35 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -288,10 +288,13 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group, /* * For queues with unlimited length lost events are not expected and * can possibly have security implications. Avoid losing events when - * memory is short. + * memory is short. For the limited size queues, avoid OOM killer in the + * target monitoring memcg as it may have security repercussion. */ if (group->max_events == UINT_MAX) gfp |= __GFP_NOFAIL; + else + gfp |= __GFP_RETRY_MAYFAIL; /* Whoever is interested in the event, pays for the allocation. */ memalloc_use_memcg(group->memcg); @@ -355,6 +358,10 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) /* Mark is just getting destroyed or created? */ if (!conn) continue; + if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) + continue; + /* Pairs with smp_wmb() in fsnotify_add_mark_list() */ + smp_rmb(); fsid = conn->fsid; if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) continue; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index a90bb19dcfa2..91006f47e420 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -920,6 +920,22 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return 0; } +static int fanotify_events_supported(struct path *path, __u64 mask) +{ + /* + * Some filesystems such as 'proc' acquire unusual locks when opening + * files. For them fanotify permission events have high chances of + * deadlocking the system - open done when reporting fanotify event + * blocks on this "unusual" lock while another process holding the lock + * waits for fanotify permission event to be answered. Just disallow + * permission events for such filesystems. + */ + if (mask & FANOTIFY_PERM_EVENTS && + path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) + return -EINVAL; + return 0; +} + static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { @@ -1018,6 +1034,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (ret) goto fput_and_out; + if (flags & FAN_MARK_ADD) { + ret = fanotify_events_supported(&path, mask); + if (ret) + goto path_put_and_out; + } + if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) { ret = fanotify_test_fid(&path, &__fsid); if (ret) diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8c7cbac7183c..2ecef6155fc0 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/dcache.h> @@ -108,47 +95,6 @@ void fsnotify_sb_delete(struct super_block *sb) } /* - * fsnotify_nameremove - a filename was removed from a directory - * - * This is mostly called under parent vfs inode lock so name and - * dentry->d_parent should be stable. However there are some corner cases where - * inode lock is not held. So to be on the safe side and be reselient to future - * callers and out of tree users of d_delete(), we do not assume that d_parent - * and d_name are stable and we use dget_parent() and - * take_dentry_name_snapshot() to grab stable references. - */ -void fsnotify_nameremove(struct dentry *dentry, int isdir) -{ - struct dentry *parent; - struct name_snapshot name; - __u32 mask = FS_DELETE; - - /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */ - if (IS_ROOT(dentry)) - return; - - if (isdir) - mask |= FS_ISDIR; - - parent = dget_parent(dentry); - /* Avoid unneeded take_dentry_name_snapshot() */ - if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) && - !(dentry->d_sb->s_fsnotify_mask & FS_DELETE)) - goto out_dput; - - take_dentry_name_snapshot(&name, dentry); - - fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE, - &name.name, 0); - - release_dentry_name_snapshot(&name); - -out_dput: - dput(parent); -} -EXPORT_SYMBOL(fsnotify_nameremove); - -/* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event * on a child we run all of our children and set a dentry flag saying that the diff --git a/fs/notify/group.c b/fs/notify/group.c index c03b83662876..0391190305cc 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/list.h> diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index 0161c74e76e2..6736e47d94d8 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config INOTIFY_USER bool "Inotify support for userspace" select FSNOTIFY diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile index a380dabe09de..812237eecf3a 100644 --- a/fs/notify/inotify/Makefile +++ b/fs/notify/inotify/Makefile @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 7e8b131029f8..d510223d302c 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * @@ -10,16 +11,6 @@ * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/dcache.h> /* d_unlinked */ @@ -99,9 +90,13 @@ int inotify_handle_event(struct fsnotify_group *group, i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); - /* Whoever is interested in the event, pays for the allocation. */ + /* + * Whoever is interested in the event, pays for the allocation. Do not + * trigger OOM killer in the target monitoring memcg as it may have + * security repercussion. + */ memalloc_use_memcg(group->memcg); - event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT); + event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); memalloc_unuse_memcg(); if (unlikely(!event)) { diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 7b53598c8804..0b815178126e 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * @@ -10,16 +11,6 @@ * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewriten to make use of the fsnotify infrastructure - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/file.h> @@ -54,8 +45,6 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #include <linux/sysctl.h> -static int zero; - struct ctl_table inotify_table[] = { { .procname = "max_user_instances", @@ -63,7 +52,7 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "max_user_watches", @@ -71,7 +60,7 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = SYSCTL_ZERO, }, { .procname = "max_queued_events", @@ -79,7 +68,7 @@ struct ctl_table inotify_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero + .extra1 = SYSCTL_ZERO }, { } }; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index b251105f646f..99ddd126f6f0 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ /* @@ -495,10 +482,13 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, conn->type = type; conn->obj = connp; /* Cache fsid of filesystem containing the object */ - if (fsid) + if (fsid) { conn->fsid = *fsid; - else + conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID; + } else { conn->fsid.val[0] = conn->fsid.val[1] = 0; + conn->flags = 0; + } if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) inode = igrab(fsnotify_conn_inode(conn)); /* @@ -573,7 +563,12 @@ restart: if (err) return err; goto restart; - } else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) && + } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) { + conn->fsid = *fsid; + /* Pairs with smp_rmb() in fanotify_get_fsid() */ + smp_wmb(); + conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID; + } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) && (fsid->val[0] != conn->fsid.val[0] || fsid->val[1] != conn->fsid.val[1])) { /* diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 5f3a54d444b5..75d79d6d3ef0 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -1,19 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ /* diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig index f5a868cc9152..de9fb5cff226 100644 --- a/fs/ntfs/Kconfig +++ b/fs/ntfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config NTFS_FS tristate "NTFS file system support" select NLS diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 8946130c87ad..7202a1e39d70 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * aops.c - NTFS kernel address space operations and page cache handling. * * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/errno.h> diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h index 820d6eabf60f..f0962d46bd67 100644 --- a/fs/ntfs/aops.h +++ b/fs/ntfs/aops.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /** * aops.h - Defines for NTFS kernel address space operations and page cache * handling. Part of the Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_AOPS_H diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 44a39a099b54..d563abc3e136 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/buffer_head.h> diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h index 3c8b74c99b80..fe0890d3d072 100644 --- a/fs/ntfs/attrib.h +++ b/fs/ntfs/attrib.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. * Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_ATTRIB_H diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c index ec130c588d2b..0675b2400873 100644 --- a/fs/ntfs/bitmap.c +++ b/fs/ntfs/bitmap.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * bitmap.c - NTFS kernel bitmap handling. Part of the Linux-NTFS project. * * Copyright (c) 2004-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef NTFS_RW diff --git a/fs/ntfs/bitmap.h b/fs/ntfs/bitmap.h index 72c9ad8be70d..9dd2224ca9c4 100644 --- a/fs/ntfs/bitmap.h +++ b/fs/ntfs/bitmap.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * bitmap.h - Defines for NTFS kernel bitmap handling. Part of the Linux-NTFS * project. * * Copyright (c) 2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_BITMAP_H diff --git a/fs/ntfs/collate.c b/fs/ntfs/collate.c index 4a28ab3898ef..3ab6ec96abfe 100644 --- a/fs/ntfs/collate.c +++ b/fs/ntfs/collate.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * collate.c - NTFS kernel collation handling. Part of the Linux-NTFS project. * * Copyright (c) 2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "collate.h" diff --git a/fs/ntfs/collate.h b/fs/ntfs/collate.h index aba83347e5fe..f2255619b4f4 100644 --- a/fs/ntfs/collate.h +++ b/fs/ntfs/collate.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * collate.h - Defines for NTFS kernel collation handling. Part of the * Linux-NTFS project. * * Copyright (c) 2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_COLLATE_H diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index df7c32b5fac7..d2f9d6a0ee32 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * compress.c - NTFS kernel compressed attributes handling. * Part of the Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c index 825a54e8f490..a3c1c5656f8f 100644 --- a/fs/ntfs/debug.c +++ b/fs/ntfs/debug.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * debug.c - NTFS kernel debug support. Part of the Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "debug.h" diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 61bf091e32a8..6fdef388f129 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * debug.h - NTFS kernel debug support. Part of the Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_DEBUG_H diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 1a24be9e8405..3c4811469ae8 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/buffer_head.h> diff --git a/fs/ntfs/dir.h b/fs/ntfs/dir.h index aea7582d561f..0e326753df40 100644 --- a/fs/ntfs/dir.h +++ b/fs/ntfs/dir.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * dir.h - Defines for directory handling in NTFS Linux kernel driver. Part of * the Linux-NTFS project. * * Copyright (c) 2002-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_DIR_H diff --git a/fs/ntfs/endian.h b/fs/ntfs/endian.h index 927b5bf04b4f..f30c139bf9ae 100644 --- a/fs/ntfs/endian.h +++ b/fs/ntfs/endian.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * endian.h - Defines for endianness handling in NTFS Linux kernel driver. * Part of the Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_ENDIAN_H diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 331910fa8442..f42967b738eb 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/backing-dev.h> diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 0d645f357930..d46c2c03a032 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * index.c - NTFS kernel index handling. Part of the Linux-NTFS project. * * Copyright (c) 2004-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/slab.h> diff --git a/fs/ntfs/index.h b/fs/ntfs/index.h index 8745469c3989..bb3c3ae55138 100644 --- a/fs/ntfs/index.h +++ b/fs/ntfs/index.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * index.h - Defines for NTFS kernel index handling. Part of the Linux-NTFS * project. * * Copyright (c) 2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_INDEX_H diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index fb1a2b49a5da..8baa34baf548 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * inode.c - NTFS kernel inode handling. * * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/buffer_head.h> diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 58c8fd2948d3..98e670fbdd31 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of * the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_INODE_H diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h index 809c0e6d8e09..85422761ff43 100644 --- a/fs/ntfs/layout.h +++ b/fs/ntfs/layout.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS * project. * * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_LAYOUT_H diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c index 27a24a42f712..eda9972e6159 100644 --- a/fs/ntfs/lcnalloc.c +++ b/fs/ntfs/lcnalloc.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. * * Copyright (c) 2004-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef NTFS_RW diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h index 2adb04316941..1589a6d8434b 100644 --- a/fs/ntfs/lcnalloc.h +++ b/fs/ntfs/lcnalloc.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * lcnalloc.h - Exports for NTFS kernel cluster (de)allocation. Part of the * Linux-NTFS project. * * Copyright (c) 2004-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_LCNALLOC_H diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 353379ff6057..a0c40f1be7ac 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. * * Copyright (c) 2002-2007 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef NTFS_RW diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h index aa2b6ac3f0a4..429d4909cc72 100644 --- a/fs/ntfs/logfile.h +++ b/fs/ntfs/logfile.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of * the Linux-NTFS project. * * Copyright (c) 2000-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_LOGFILE_H diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 5becc8acc8f4..842b0bfc3ac9 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_MALLOC_H diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index fb14d17666c8..20c841a906f2 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/buffer_head.h> diff --git a/fs/ntfs/mft.h b/fs/ntfs/mft.h index b52bf87b99de..17bfefc30271 100644 --- a/fs/ntfs/mft.h +++ b/fs/ntfs/mft.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * mft.h - Defines for mft record handling in NTFS Linux kernel driver. * Part of the Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_MFT_H diff --git a/fs/ntfs/mst.c b/fs/ntfs/mst.c index 5a858d839b65..16b3c884abfc 100644 --- a/fs/ntfs/mst.c +++ b/fs/ntfs/mst.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * mst.c - NTFS multi sector transfer protection handling code. Part of the * Linux-NTFS project. * * Copyright (c) 2001-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "ntfs.h" diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 3986c7a1f6a8..2d3cc9e3395d 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS * project. * * Copyright (c) 2001-2006 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/dcache.h> diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h index 12de47b96ca9..e81376ea9152 100644 --- a/fs/ntfs/ntfs.h +++ b/fs/ntfs/ntfs.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * ntfs.h - Defines for NTFS Linux kernel driver. * * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. * Copyright (C) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_H diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c index 9793e68ba1dd..9160480222fd 100644 --- a/fs/ntfs/quota.c +++ b/fs/ntfs/quota.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * quota.c - NTFS kernel quota ($Quota) handling. Part of the Linux-NTFS * project. * * Copyright (c) 2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef NTFS_RW diff --git a/fs/ntfs/quota.h b/fs/ntfs/quota.h index 4cbe5594c0b0..fe3132a3d6d2 100644 --- a/fs/ntfs/quota.h +++ b/fs/ntfs/quota.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * quota.h - Defines for NTFS kernel quota ($Quota) handling. Part of the * Linux-NTFS project. * * Copyright (c) 2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_QUOTA_H diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index eac7d6788a10..508744a93180 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /** * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002-2005 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "debug.h" diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h index 47728fbb610b..38de0a375f59 100644 --- a/fs/ntfs/runlist.h +++ b/fs/ntfs/runlist.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * runlist.h - Defines for runlist handling in NTFS Linux kernel driver. * Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_RUNLIST_H diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 887ea8b3b000..29621d40f448 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2001,2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c index a503156ec15f..a030d00af90c 100644 --- a/fs/ntfs/sysctl.c +++ b/fs/ntfs/sysctl.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of * the Linux-NTFS project. Adapted from the old NTFS driver, * Copyright (C) 1997 Martin von Löwis, Régis Duchesne * * Copyright (c) 2002-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef DEBUG diff --git a/fs/ntfs/sysctl.h b/fs/ntfs/sysctl.h index d4f8ce920d95..96bb2299d2d5 100644 --- a/fs/ntfs/sysctl.h +++ b/fs/ntfs/sysctl.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of * the Linux-NTFS project. Adapted from the old NTFS driver, * Copyright (C) 1997 Martin von Löwis, Régis Duchesne * * Copyright (c) 2002-2004 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_SYSCTL_H diff --git a/fs/ntfs/time.h b/fs/ntfs/time.h index 24cd719f1fd2..6b63261300cc 100644 --- a/fs/ntfs/time.h +++ b/fs/ntfs/time.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * time.h - NTFS time conversion functions. Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_TIME_H diff --git a/fs/ntfs/types.h b/fs/ntfs/types.h index 8c8053b66984..9a47859e7a06 100644 --- a/fs/ntfs/types.h +++ b/fs/ntfs/types.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * types.h - Defines for NTFS Linux kernel driver specific types. * Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_TYPES_H diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 005ca4b0f132..a6b6c64f14a9 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. * * Copyright (c) 2001-2006 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/slab.h> diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c index e2f72ca98037..4ebe84a78dea 100644 --- a/fs/ntfs/upcase.c +++ b/fs/ntfs/upcase.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * upcase.c - Generate the full NTFS Unicode upcase table in little endian. * Part of the Linux-NTFS project. * * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org> * Copyright (c) 2001-2006 Anton Altaparmakov - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS source - * in the file COPYING); if not, write to the Free Software Foundation, - * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "malloc.h" diff --git a/fs/ntfs/usnjrnl.c b/fs/ntfs/usnjrnl.c index b2bc0d55b036..9097a0b4ef25 100644 --- a/fs/ntfs/usnjrnl.c +++ b/fs/ntfs/usnjrnl.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * usnjrnl.h - NTFS kernel transaction log ($UsnJrnl) handling. Part of the * Linux-NTFS project. * * Copyright (c) 2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef NTFS_RW diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 00d8e6bd7c36..85f531b59395 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h @@ -1,23 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * usnjrnl.h - Defines for NTFS kernel transaction log ($UsnJrnl) handling. * Part of the Linux-NTFS project. * * Copyright (c) 2005 Anton Altaparmakov - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_USNJRNL_H diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h index 4f579b02bc76..930a9ae8a053 100644 --- a/fs/ntfs/volume.h +++ b/fs/ntfs/volume.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part * of the Linux-NTFS project. * * Copyright (c) 2001-2006 Anton Altaparmakov * Copyright (c) 2002 Richard Russon - * - * This program/include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program/include file is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program (in the main directory of the Linux-NTFS - * distribution in the file COPYING); if not, write to the Free Software - * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_NTFS_VOLUME_H diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig index 77a8de5f7119..46bba20da6b5 100644 --- a/fs/ocfs2/Kconfig +++ b/fs/ocfs2/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config OCFS2_FS tristate "OCFS2 file system support" depends on NET && SYSFS && CONFIGFS_FS diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 917fadca8a7b..3e7da392aa6f 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -8,15 +9,6 @@ * CREDITS: * Lots of code in this file is copy from linux/fs/ext3/acl.c. * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/init.h> diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 7be0bb756286..127b13432146 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * acl.h * * Copyright (C) 2004, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_ACL_H diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 6f0999015a44..0c335b51043d 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Extent allocs and frees * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> @@ -6205,17 +6191,17 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, if (le16_to_cpu(tl->tl_used)) { trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used)); - *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); + /* + * Assuming the write-out below goes well, this copy will be + * passed back to recovery for processing. + */ + *tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL); if (!(*tl_copy)) { status = -ENOMEM; mlog_errno(status); goto bail; } - /* Assuming the write-out below goes well, this copy - * will be passed back to recovery for processing. */ - memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); - /* All we need to do to clear the truncate log is set * tl_used. */ tl->tl_used = 0; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 250bcacdf9e9..7f973dd76dbc 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_ALLOC_H diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 832c1759a09a..a4c905d6b575 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 3494a62ed749..70ed4382750d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_AOPS_H diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 0725e6054650..429e6a8359a5 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Checksum and ECC codes for the OCFS2 userspace library. * * Copyright (C) 2006, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/kernel.h> @@ -250,57 +242,29 @@ static struct dentry *blockcheck_debugfs_create(const char *name, static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) { if (stats) { - debugfs_remove(stats->b_debug_check); - stats->b_debug_check = NULL; - debugfs_remove(stats->b_debug_failure); - stats->b_debug_failure = NULL; - debugfs_remove(stats->b_debug_recover); - stats->b_debug_recover = NULL; - debugfs_remove(stats->b_debug_dir); + debugfs_remove_recursive(stats->b_debug_dir); stats->b_debug_dir = NULL; } } -static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent) +static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) { - int rc = -EINVAL; - - if (!stats) - goto out; - stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); - if (!stats->b_debug_dir) - goto out; - - stats->b_debug_check = - blockcheck_debugfs_create("blocks_checked", - stats->b_debug_dir, - &stats->b_check_count); - stats->b_debug_failure = - blockcheck_debugfs_create("checksums_failed", - stats->b_debug_dir, - &stats->b_failure_count); + blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir, + &stats->b_check_count); - stats->b_debug_recover = - blockcheck_debugfs_create("ecc_recoveries", - stats->b_debug_dir, - &stats->b_recover_count); - if (stats->b_debug_check && stats->b_debug_failure && - stats->b_debug_recover) - rc = 0; + blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir, + &stats->b_failure_count); -out: - if (rc) - ocfs2_blockcheck_debug_remove(stats); - return rc; + blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir, + &stats->b_recover_count); } #else -static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent) +static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) { - return 0; } static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) @@ -309,10 +273,10 @@ static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats * #endif /* CONFIG_DEBUG_FS */ /* Always-called wrappers for starting and stopping the debugfs files */ -int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent) +void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent) { - return ocfs2_blockcheck_debug_install(stats, parent); + ocfs2_blockcheck_debug_install(stats, parent); } void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats) diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h index d4b69febf70a..8f17d2c85f40 100644 --- a/fs/ocfs2/blockcheck.h +++ b/fs/ocfs2/blockcheck.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Checksum and ECC codes for the OCFS2 userspace library. * * Copyright (C) 2004, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_BLOCKCHECK_H @@ -33,9 +25,6 @@ struct ocfs2_blockcheck_stats { * ocfs2_blockcheck_stats_debugfs_install() */ struct dentry *b_debug_dir; /* Parent of the debugfs files */ - struct dentry *b_debug_check; /* Exposes b_check_count */ - struct dentry *b_debug_failure; /* Exposes b_failure_count */ - struct dentry *b_debug_recover; /* Exposes b_recover_count */ }; @@ -64,8 +53,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, struct ocfs2_blockcheck_stats *stats); /* Debug Initialization */ -int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, - struct dentry *parent); +void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats, + struct dentry *parent); void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats); /* diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index f9b84f7a3e4b..f0b104e483d8 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Buffer cache handling * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index b1bb70c8ca4d..1c5e533fba04 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Buffer cache handling functions defined * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_BUFFER_HEAD_IO_H diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile index 1aefc0350ec3..0e5b73215819 100644 --- a/fs/ocfs2/cluster/Makefile +++ b/fs/ocfs2/cluster/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f3c20b279eb2..f1b613327ac8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2004, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/kernel.h> @@ -106,10 +92,6 @@ static struct o2hb_debug_buf *o2hb_db_failedregions; #define O2HB_DEBUG_REGION_PINNED "pinned" static struct dentry *o2hb_debug_dir; -static struct dentry *o2hb_debug_livenodes; -static struct dentry *o2hb_debug_liveregions; -static struct dentry *o2hb_debug_quorumregions; -static struct dentry *o2hb_debug_failedregions; static LIST_HEAD(o2hb_all_regions); @@ -1198,7 +1180,7 @@ bail: if (atomic_read(®->hr_steady_iterations) != 0) { if (atomic_dec_and_test(®->hr_unsteady_iterations)) { printk(KERN_NOTICE "o2hb: Unable to stabilize " - "heartbeart on region %s (%s)\n", + "heartbeat on region %s (%s)\n", config_item_name(®->hr_item), reg->hr_dev_name); atomic_set(®->hr_steady_iterations, 0); @@ -1405,11 +1387,7 @@ static const struct file_operations o2hb_debug_fops = { void o2hb_exit(void) { - debugfs_remove(o2hb_debug_failedregions); - debugfs_remove(o2hb_debug_quorumregions); - debugfs_remove(o2hb_debug_liveregions); - debugfs_remove(o2hb_debug_livenodes); - debugfs_remove(o2hb_debug_dir); + debugfs_remove_recursive(o2hb_debug_dir); kfree(o2hb_db_livenodes); kfree(o2hb_db_liveregions); kfree(o2hb_db_quorumregions); @@ -1433,79 +1411,37 @@ static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, &o2hb_debug_fops); } -static int o2hb_debug_init(void) +static void o2hb_debug_init(void) { - int ret = -ENOMEM; - o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); - if (!o2hb_debug_dir) { - mlog_errno(ret); - goto bail; - } - - o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, - o2hb_debug_dir, - &o2hb_db_livenodes, - sizeof(*o2hb_db_livenodes), - O2HB_DB_TYPE_LIVENODES, - sizeof(o2hb_live_node_bitmap), - O2NM_MAX_NODES, - o2hb_live_node_bitmap); - if (!o2hb_debug_livenodes) { - mlog_errno(ret); - goto bail; - } - - o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, - o2hb_debug_dir, - &o2hb_db_liveregions, - sizeof(*o2hb_db_liveregions), - O2HB_DB_TYPE_LIVEREGIONS, - sizeof(o2hb_live_region_bitmap), - O2NM_MAX_REGIONS, - o2hb_live_region_bitmap); - if (!o2hb_debug_liveregions) { - mlog_errno(ret); - goto bail; - } - o2hb_debug_quorumregions = - o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, - o2hb_debug_dir, - &o2hb_db_quorumregions, - sizeof(*o2hb_db_quorumregions), - O2HB_DB_TYPE_QUORUMREGIONS, - sizeof(o2hb_quorum_region_bitmap), - O2NM_MAX_REGIONS, - o2hb_quorum_region_bitmap); - if (!o2hb_debug_quorumregions) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_LIVENODES, o2hb_debug_dir, + &o2hb_db_livenodes, sizeof(*o2hb_db_livenodes), + O2HB_DB_TYPE_LIVENODES, sizeof(o2hb_live_node_bitmap), + O2NM_MAX_NODES, o2hb_live_node_bitmap); - o2hb_debug_failedregions = - o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, - o2hb_debug_dir, - &o2hb_db_failedregions, - sizeof(*o2hb_db_failedregions), - O2HB_DB_TYPE_FAILEDREGIONS, - sizeof(o2hb_failed_region_bitmap), - O2NM_MAX_REGIONS, - o2hb_failed_region_bitmap); - if (!o2hb_debug_failedregions) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, o2hb_debug_dir, + &o2hb_db_liveregions, sizeof(*o2hb_db_liveregions), + O2HB_DB_TYPE_LIVEREGIONS, + sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS, + o2hb_live_region_bitmap); - ret = 0; -bail: - if (ret) - o2hb_exit(); + o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, o2hb_debug_dir, + &o2hb_db_quorumregions, + sizeof(*o2hb_db_quorumregions), + O2HB_DB_TYPE_QUORUMREGIONS, + sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS, + o2hb_quorum_region_bitmap); - return ret; + o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, o2hb_debug_dir, + &o2hb_db_failedregions, + sizeof(*o2hb_db_failedregions), + O2HB_DB_TYPE_FAILEDREGIONS, + sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS, + o2hb_failed_region_bitmap); } -int o2hb_init(void) +void o2hb_init(void) { int i; @@ -1525,7 +1461,7 @@ int o2hb_init(void) o2hb_dependent_users = 0; - return o2hb_debug_init(); + o2hb_debug_init(); } /* if we're already in a callback then we're already serialized by the sem */ diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index a9e67efc0004..beed31ea86cf 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * Function prototypes * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef O2CLUSTER_HEARTBEAT_H @@ -78,7 +63,7 @@ void o2hb_unregister_callback(const char *region_uuid, void o2hb_fill_node_map(unsigned long *map, unsigned bytes); void o2hb_exit(void); -int o2hb_init(void); +void o2hb_init(void); int o2hb_check_node_heartbeating_no_sem(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num); void o2hb_stop_all_regions(void); diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index d331c2386b94..1d696c96b8b2 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2004, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/module.h> diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index a396096a5099..446e452ac7a6 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef O2CLUSTER_MASKLOG_H diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 74a21f6695c8..02bf4a1774cc 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * debug functionality for o2net * * Copyright (C) 2005, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifdef CONFIG_DEBUG_FS @@ -53,10 +38,6 @@ #define SHOW_SOCK_STATS 1 static struct dentry *o2net_dentry; -static struct dentry *sc_dentry; -static struct dentry *nst_dentry; -static struct dentry *stats_dentry; -static struct dentry *nodes_dentry; static DEFINE_SPINLOCK(o2net_debug_lock); @@ -505,36 +486,23 @@ static const struct file_operations nodes_fops = { void o2net_debugfs_exit(void) { - debugfs_remove(nodes_dentry); - debugfs_remove(stats_dentry); - debugfs_remove(sc_dentry); - debugfs_remove(nst_dentry); - debugfs_remove(o2net_dentry); + debugfs_remove_recursive(o2net_dentry); } -int o2net_debugfs_init(void) +void o2net_debugfs_init(void) { umode_t mode = S_IFREG|S_IRUSR; o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); - if (o2net_dentry) - nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode, - o2net_dentry, NULL, &nst_seq_fops); - if (nst_dentry) - sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode, - o2net_dentry, NULL, &sc_seq_fops); - if (sc_dentry) - stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode, - o2net_dentry, NULL, &stats_seq_fops); - if (stats_dentry) - nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode, - o2net_dentry, NULL, &nodes_fops); - if (nodes_dentry) - return 0; - - o2net_debugfs_exit(); - mlog_errno(-ENOMEM); - return -ENOMEM; + + debugfs_create_file(NST_DEBUG_NAME, mode, o2net_dentry, NULL, + &nst_seq_fops); + debugfs_create_file(SC_DEBUG_NAME, mode, o2net_dentry, NULL, + &sc_seq_fops); + debugfs_create_file(STATS_DEBUG_NAME, mode, o2net_dentry, NULL, + &stats_seq_fops); + debugfs_create_file(NODES_DEBUG_NAME, mode, o2net_dentry, NULL, + &nodes_fops); } #endif /* CONFIG_DEBUG_FS */ diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 4ac775e32240..7a7640c59f3c 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -1,22 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2004, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/slab.h> @@ -842,9 +828,7 @@ static int __init init_o2nm(void) { int ret = -1; - ret = o2hb_init(); - if (ret) - goto out; + o2hb_init(); ret = o2net_init(); if (ret) diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h index 09ea2d388bbb..3e0006631cc4 100644 --- a/fs/ocfs2/cluster/nodemanager.h +++ b/fs/ocfs2/cluster/nodemanager.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * Function prototypes * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef O2CLUSTER_NODEMANAGER_H diff --git a/fs/ocfs2/cluster/ocfs2_heartbeat.h b/fs/ocfs2/cluster/ocfs2_heartbeat.h index 3f4151da9709..760d850be11e 100644 --- a/fs/ocfs2/cluster/ocfs2_heartbeat.h +++ b/fs/ocfs2/cluster/ocfs2_heartbeat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * On-disk structures for ocfs2_heartbeat * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef _OCFS2_HEARTBEAT_H diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 49b594325bec..21ad307419a8 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -7,22 +8,6 @@ * for the ocfs2_nodemanager module. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef _OCFS2_NODEMANAGER_H diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 67dcee65fe50..5c424a099280 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ /* This quorum hack is only here until we transition to some more rational @@ -90,7 +76,7 @@ static void o2quo_fence_self(void) }; } -/* Indicate that a timeout occurred on a hearbeat region write. The +/* Indicate that a timeout occurred on a heartbeat region write. The * other nodes in the cluster may consider us dead at that time so we * want to "fence" ourselves so that we don't scribble on the disk * after they think they've recovered us. This can't solve all diff --git a/fs/ocfs2/cluster/quorum.h b/fs/ocfs2/cluster/quorum.h index 6649cc6f67c9..6d45ce8b18a1 100644 --- a/fs/ocfs2/cluster/quorum.h +++ b/fs/ocfs2/cluster/quorum.h @@ -1,23 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef O2CLUSTER_QUORUM_H diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index b7f57271d49c..d6067c3d84c1 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * OCFS2 cluster sysfs interface * * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, - * version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #include <linux/kernel.h> diff --git a/fs/ocfs2/cluster/sys.h b/fs/ocfs2/cluster/sys.h index d66b8ab0045e..ce380517cf17 100644 --- a/fs/ocfs2/cluster/sys.h +++ b/fs/ocfs2/cluster/sys.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * Function prototypes for o2cb sysfs interface * * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, - * version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef O2CLUSTER_SYS_H diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index e9f236af1927..48a3398f0bf5 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2004 Oracle. All rights reserved. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * ---- * * Callers for this were originally written against a very simple synchronus @@ -1776,7 +1762,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, (msecs_to_jiffies(o2net_reconnect_delay()) + 1); if (node_num != o2nm_this_node()) { - /* believe it or not, accept and node hearbeating testing + /* believe it or not, accept and node heartbeating testing * can succeed for this node before we got here.. so * only use set_nn_state to clear the persistent error * if that hasn't already happened */ @@ -2143,8 +2129,7 @@ int o2net_init(void) o2quo_init(); - if (o2net_debugfs_init()) - goto out; + o2net_debugfs_init(); o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index c571e849fda4..de87cbffd175 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * Function prototypes * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef O2CLUSTER_TCP_H @@ -124,16 +109,15 @@ struct o2net_send_tracking; struct o2net_sock_container; #ifdef CONFIG_DEBUG_FS -int o2net_debugfs_init(void); +void o2net_debugfs_init(void); void o2net_debugfs_exit(void); void o2net_debug_add_nst(struct o2net_send_tracking *nst); void o2net_debug_del_nst(struct o2net_send_tracking *nst); void o2net_debug_add_sc(struct o2net_sock_container *sc); void o2net_debug_del_sc(struct o2net_sock_container *sc); #else -static inline int o2net_debugfs_init(void) +static inline void o2net_debugfs_init(void) { - return 0; } static inline void o2net_debugfs_exit(void) { diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 0276f7f8d5e6..e6a2b9dfcd16 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef O2CLUSTER_TCP_INTERNAL_H diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 290373024d9d..42a61eecdacd 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * dentry cache handling code * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> @@ -310,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, out_attach: spin_lock(&dentry_attach_lock); + if (unlikely(dentry->d_fsdata && !alias)) { + /* d_fsdata is set by a racing thread which is doing + * the same thing as this thread is doing. Leave the racing + * thread going ahead and we return here. + */ + spin_unlock(&dentry_attach_lock); + iput(dl->dl_inode); + ocfs2_lock_res_free(&dl->dl_lockres); + kfree(dl); + return 0; + } + dentry->d_fsdata = dl; dl->dl_count++; spin_unlock(&dentry_attach_lock); diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index 55f58892b153..3686a52ba143 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_DCACHE_H diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 85f21caaa6ec..784426dee56c 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -19,21 +20,6 @@ * linux/fs/minix/dir.c * * Copyright (C) 1991, 1992 Linus Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h index 3d8639f38973..e3e7d5dd29e8 100644 --- a/fs/ocfs2/dir.h +++ b/fs/ocfs2/dir.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_DIR_H diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index 3d4041f0431e..38b224372776 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ccflags-y := -I $(srctree)/$(src)/.. obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index 3cfa114aa391..6456c0fbcbb2 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * externally exported dlm interfaces * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef DLMAPI_H diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 39831fc2fd52..4de89af96abf 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * AST and BAST functionality for local and remote nodes * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index d06e27ec4be4..69a429b625cc 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1,25 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmcommon.h * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef DLMCOMMON_H diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 0bb128659d4b..965f45dbe17b 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * underlying calls for lock conversion * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ diff --git a/fs/ocfs2/dlm/dlmconvert.h b/fs/ocfs2/dlm/dlmconvert.h index b2e3677df878..12d9c28bc52f 100644 --- a/fs/ocfs2/dlm/dlmconvert.h +++ b/fs/ocfs2/dlm/dlmconvert.h @@ -1,25 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmconvert.h * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef DLMCONVERT_H diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 1d6dc8422899..a4b58ba99927 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * debug functionality for the dlm * * Copyright (C) 2004, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #include <linux/types.h> @@ -866,7 +851,7 @@ static const struct file_operations debug_state_fops = { /* end - debug state funcs */ /* files in subroot */ -int dlm_debug_init(struct dlm_ctxt *dlm) +void dlm_debug_init(struct dlm_ctxt *dlm) { struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; @@ -875,10 +860,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm) S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_state_fops); - if (!dc->debug_state_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } /* for dumping lockres */ dc->debug_lockres_dentry = @@ -886,20 +867,12 @@ int dlm_debug_init(struct dlm_ctxt *dlm) S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops); - if (!dc->debug_lockres_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } /* for dumping mles */ dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops); - if (!dc->debug_mle_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } /* for dumping lockres on the purge list */ dc->debug_purgelist_dentry = @@ -907,15 +880,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm) S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_purgelist_fops); - if (!dc->debug_purgelist_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } - - return 0; - -bail: - return -ENOMEM; } void dlm_debug_shutdown(struct dlm_ctxt *dlm) @@ -935,24 +899,16 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm) /* subroot - domain dir */ int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { - dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, - dlm_debugfs_root); - if (!dlm->dlm_debugfs_subroot) { - mlog_errno(-ENOMEM); - goto bail; - } - dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), GFP_KERNEL); if (!dlm->dlm_debug_ctxt) { mlog_errno(-ENOMEM); - goto bail; + return -ENOMEM; } + dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, + dlm_debugfs_root); return 0; -bail: - dlm_destroy_debugfs_subroot(dlm); - return -ENOMEM; } void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) @@ -961,14 +917,9 @@ void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) } /* debugfs root */ -int dlm_create_debugfs_root(void) +void dlm_create_debugfs_root(void) { dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); - if (!dlm_debugfs_root) { - mlog_errno(-ENOMEM); - return -ENOMEM; - } - return 0; } void dlm_destroy_debugfs_root(void) diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h index 5ced5482e7d3..7d0c7c9013ce 100644 --- a/fs/ocfs2/dlm/dlmdebug.h +++ b/fs/ocfs2/dlm/dlmdebug.h @@ -1,25 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmdebug.h * * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef DLMDEBUG_H @@ -43,20 +28,19 @@ struct debug_lockres { struct dlm_lock_resource *dl_res; }; -int dlm_debug_init(struct dlm_ctxt *dlm); +void dlm_debug_init(struct dlm_ctxt *dlm); void dlm_debug_shutdown(struct dlm_ctxt *dlm); int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); -int dlm_create_debugfs_root(void); +void dlm_create_debugfs_root(void); void dlm_destroy_debugfs_root(void); #else -static inline int dlm_debug_init(struct dlm_ctxt *dlm) +static inline void dlm_debug_init(struct dlm_ctxt *dlm) { - return 0; } static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) { @@ -68,9 +52,8 @@ static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { } -static inline int dlm_create_debugfs_root(void) +static inline void dlm_create_debugfs_root(void) { - return 0; } static inline void dlm_destroy_debugfs_root(void) { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 2acd58ba9b7b..7338b5d4647c 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * defines domain join / leave apis * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #include <linux/module.h> @@ -1896,11 +1881,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) goto bail; } - status = dlm_debug_init(dlm); - if (status < 0) { - mlog_errno(status); - goto bail; - } + dlm_debug_init(dlm); snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name); dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0); @@ -2361,9 +2342,7 @@ static int __init dlm_init(void) goto error; } - status = dlm_create_debugfs_root(); - if (status) - goto error; + dlm_create_debugfs_root(); return 0; error: diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h index 8a9281411c18..7c21664d23d0 100644 --- a/fs/ocfs2/dlm/dlmdomain.h +++ b/fs/ocfs2/dlm/dlmdomain.h @@ -1,25 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmdomain.h * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ #ifndef DLMDOMAIN_H diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 74962315794e..baff087f3863 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * underlying calls for lock creation * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 826f0567ec43..74b768ca1cd8 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * standalone DLM module * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ @@ -2176,7 +2161,7 @@ put: * think that $RECOVERY is currently mastered by a dead node. If so, * we wait a short time to allow that node to get notified by its own * heartbeat stack, then check again. All $RECOVERY lock resources - * mastered by dead nodes are purged when the hearbeat callback is + * mastered by dead nodes are purged when the heartbeat callback is * fired, so we can know for sure that it is safe to continue once * the node returns a live node or no node. */ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 802636d50365..064ce5bbc3f6 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * recovery stuff * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ @@ -1124,7 +1109,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, { u64 mig_cookie = be64_to_cpu(mres->mig_cookie); int mres_total_locks = be32_to_cpu(mres->total_locks); - int sz, ret = 0, status = 0; + int ret = 0, status = 0; u8 orig_flags = mres->flags, orig_master = mres->master; @@ -1132,9 +1117,6 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, if (!mres->num_locks) return 0; - sz = sizeof(struct dlm_migratable_lockres) + - (mres->num_locks * sizeof(struct dlm_migratable_lock)); - /* add an all-done flag if we reached the last lock */ orig_flags = mres->flags; BUG_ON(total_locks > mres_total_locks); @@ -1148,7 +1130,8 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, /* send it */ ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres, - sz, send_to, &status); + struct_size(mres, ml, mres->num_locks), + send_to, &status); if (ret < 0) { /* XXX: negative status is not handled. * this will end up killing this node. */ diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 074d5de17bb2..61c51c268460 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * standalone DLM module * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 63d701cd1e2e..e78657742bd8 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,22 +7,6 @@ * underlying calls for unlocking locks * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * */ diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile index 0a0b93d940fe..a9874e441bd4 100644 --- a/fs/ocfs2/dlmfs/Makefile +++ b/fs/ocfs2/dlmfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only ccflags-y := -I $(srctree)/$(src)/.. obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 98885181e1fe..4f1668c81e1f 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -9,21 +10,6 @@ * which was a template for the fs side of this module. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ /* Simple VFS hooks based on: */ diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index 9cecf4857195..525b14ddfba5 100644 --- a/fs/ocfs2/dlmfs/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -10,21 +11,6 @@ * functions. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/signal.h> diff --git a/fs/ocfs2/dlmfs/userdlm.h b/fs/ocfs2/dlmfs/userdlm.h index ede94a6e7fd3..4bef7cdef859 100644 --- a/fs/ocfs2/dlmfs/userdlm.h +++ b/fs/ocfs2/dlmfs/userdlm.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Userspace dlm defines * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index af405586c5b1..14207234fa3d 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Code which implements an OCFS2 specific interface to our DLM. * * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/types.h> @@ -440,6 +426,7 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res) static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res) { res->l_lock_refresh = 0; + res->l_lock_wait = 0; memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats)); memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats)); } @@ -474,6 +461,8 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, if (ret) stats->ls_fail++; + + stats->ls_last = ktime_to_us(ktime_get_real()); } static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) @@ -481,6 +470,21 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) lockres->l_lock_refresh++; } +static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_mask_waiter *mw; + + if (list_empty(&lockres->l_mask_waiters)) { + lockres->l_lock_wait = 0; + return; + } + + mw = list_first_entry(&lockres->l_mask_waiters, + struct ocfs2_mask_waiter, mw_item); + lockres->l_lock_wait = + ktime_to_us(ktime_mono_to_real(mw->mw_lock_start)); +} + static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) { mw->mw_lock_start = ktime_get(); @@ -496,6 +500,9 @@ static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres) { } +static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres) +{ +} static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) { } @@ -889,6 +896,7 @@ static void lockres_set_flags(struct ocfs2_lock_res *lockres, list_del_init(&mw->mw_item); mw->mw_status = 0; complete(&mw->mw_complete); + ocfs2_track_lock_wait(lockres); } } static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or) @@ -1400,6 +1408,7 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres, list_add_tail(&mw->mw_item, &lockres->l_mask_waiters); mw->mw_mask = mask; mw->mw_goal = goal; + ocfs2_track_lock_wait(lockres); } /* returns 0 if the mw that was removed was already satisfied, -EBUSY @@ -1416,6 +1425,7 @@ static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, list_del_init(&mw->mw_item); init_completion(&mw->mw_complete); + ocfs2_track_lock_wait(lockres); } return ret; @@ -3003,6 +3013,8 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) kref_init(&dlm_debug->d_refcnt); INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); dlm_debug->d_locking_state = NULL; + dlm_debug->d_locking_filter = NULL; + dlm_debug->d_filter_secs = 0; out: return dlm_debug; } @@ -3093,17 +3105,43 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos) * - Lock stats printed * New in version 3 * - Max time in lock stats is in usecs (instead of nsecs) + * New in version 4 + * - Add last pr/ex unlock times and first lock wait time in usecs */ -#define OCFS2_DLM_DEBUG_STR_VERSION 3 +#define OCFS2_DLM_DEBUG_STR_VERSION 4 static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) { int i; char *lvb; struct ocfs2_lock_res *lockres = v; +#ifdef CONFIG_OCFS2_FS_STATS + u64 now, last; + struct ocfs2_dlm_debug *dlm_debug = + ((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug; +#endif if (!lockres) return -EINVAL; +#ifdef CONFIG_OCFS2_FS_STATS + if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) { + now = ktime_to_us(ktime_get_real()); + if (lockres->l_lock_prmode.ls_last > + lockres->l_lock_exmode.ls_last) + last = lockres->l_lock_prmode.ls_last; + else + last = lockres->l_lock_exmode.ls_last; + /* + * Use d_filter_secs field to filter lock resources dump, + * the default d_filter_secs(0) value filters nothing, + * otherwise, only dump the last N seconds active lock + * resources. + */ + if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs) + return 0; + } +#endif + seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION); if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY) @@ -3145,6 +3183,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) # define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max) # define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max) # define lock_refresh(_l) ((_l)->l_lock_refresh) +# define lock_last_prmode(_l) ((_l)->l_lock_prmode.ls_last) +# define lock_last_exmode(_l) ((_l)->l_lock_exmode.ls_last) +# define lock_wait(_l) ((_l)->l_lock_wait) #else # define lock_num_prmode(_l) (0) # define lock_num_exmode(_l) (0) @@ -3155,6 +3196,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) # define lock_max_prmode(_l) (0) # define lock_max_exmode(_l) (0) # define lock_refresh(_l) (0) +# define lock_last_prmode(_l) (0ULL) +# define lock_last_exmode(_l) (0ULL) +# define lock_wait(_l) (0ULL) #endif /* The following seq_print was added in version 2 of this output */ seq_printf(m, "%u\t" @@ -3165,7 +3209,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) "%llu\t" "%u\t" "%u\t" - "%u\t", + "%u\t" + "%llu\t" + "%llu\t" + "%llu\t", lock_num_prmode(lockres), lock_num_exmode(lockres), lock_num_prmode_failed(lockres), @@ -3174,7 +3221,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) lock_total_exmode(lockres), lock_max_prmode(lockres), lock_max_exmode(lockres), - lock_refresh(lockres)); + lock_refresh(lockres), + lock_last_prmode(lockres), + lock_last_exmode(lockres), + lock_wait(lockres)); /* End the line */ seq_printf(m, "\n"); @@ -3228,9 +3278,8 @@ static const struct file_operations ocfs2_dlm_debug_fops = { .llseek = seq_lseek, }; -static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) +static void ocfs2_dlm_init_debug(struct ocfs2_super *osb) { - int ret = 0; struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; dlm_debug->d_locking_state = debugfs_create_file("locking_state", @@ -3238,16 +3287,11 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb) osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops); - if (!dlm_debug->d_locking_state) { - ret = -EINVAL; - mlog(ML_ERROR, - "Unable to create locking state debugfs file.\n"); - goto out; - } - ocfs2_get_dlm_debug(dlm_debug); -out: - return ret; + dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter", + 0600, + osb->osb_debug_root, + &dlm_debug->d_filter_secs); } static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) @@ -3256,6 +3300,7 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) if (dlm_debug) { debugfs_remove(dlm_debug->d_locking_state); + debugfs_remove(dlm_debug->d_locking_filter); ocfs2_put_dlm_debug(dlm_debug); } } @@ -3270,11 +3315,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) goto local; } - status = ocfs2_dlm_init_debug(osb); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_dlm_init_debug(osb); /* launch downconvert thread */ osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s", @@ -4366,7 +4407,6 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) static int ocfs2_downconvert_thread(void *arg) { - int status = 0; struct ocfs2_super *osb = arg; /* only quit once we've been asked to stop and there is no more @@ -4384,7 +4424,7 @@ static int ocfs2_downconvert_thread(void *arg) } osb->dc_task = NULL; - return status; + return 0; } void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 4ec1c828f6e0..b8fbed25df89 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * description here * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index af2888d23de3..69ed278dd84d 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Functions to facilitate NFS exporting * * Copyright (C) 2002, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/export.h b/fs/ocfs2/export.h index 41a738678c37..d485da0c3439 100644 --- a/fs/ocfs2/export.h +++ b/fs/ocfs2/export.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_EXPORT_H diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 06cb96462bf9..e66a249fe07c 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,20 +7,6 @@ * Block/Cluster mapping functions * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index 1057586ec19f..e5464f6cee8a 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,20 +7,6 @@ * In-memory file extent mappings for OCFS2. * * Copyright (C) 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef _EXTENT_MAP_H diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d640c5f8a85d..4435df3e5adb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * File open, close, extend, truncate * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/capability.h> diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 7eb7f03531f6..4832cbceba5b 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_FILE_H diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index f65f2b2f594d..50f11bfdc8c2 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Code which implements online file check. * * Copyright (C) 2016 SuSE. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/list.h> @@ -193,6 +185,7 @@ int ocfs2_filecheck_create_sysfs(struct ocfs2_super *osb) ret = kobject_init_and_add(&entry->fs_kobj, &ocfs2_ktype_filecheck, NULL, "filecheck"); if (ret) { + kobject_put(&entry->fs_kobj); kfree(fcheck); return ret; } diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h index 6a22ee79e8d0..4d006777ac54 100644 --- a/fs/ocfs2/filecheck.h +++ b/fs/ocfs2/filecheck.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Online file check. * * Copyright (C) 2016 SuSE. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index d8208b20dc53..60c5f995d30c 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -7,21 +8,6 @@ * up to date, and fire off recovery when needed. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index 74b9c5dda28d..5fedb2d35dc0 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_HEARTBEAT_H diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 79279240fb6e..7ad9d6590818 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * vfs' aops, fops, dops and iops * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 9b955f732bca..51a4f7197987 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_INODE_H diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 994726ada857..d6f7b299eb23 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -106,16 +106,9 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, flags = flags & mask; flags |= oldflags & ~mask; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ - status = -EPERM; - if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & - (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { - if (!capable(CAP_LINUX_IMMUTABLE)) - goto bail_unlock; - } + status = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (status) + goto bail_unlock; handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 46fd3ef2cf21..930e3d388579 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Defines functions of journalling api * * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 497a4171ef61..c0fe6ed08ab1 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Defines journalling api and structures. * * Copyright (C) 2003, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_JOURNAL_H diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 58973e4d2471..158e5af767fd 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Node local data allocation * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> @@ -438,12 +424,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) bh = osb->local_alloc_bh; alloc = (struct ocfs2_dinode *) bh->b_data; - alloc_copy = kmalloc(bh->b_size, GFP_NOFS); + alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; goto out_commit; } - memcpy(alloc_copy, alloc, bh->b_size); status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1286,13 +1271,12 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, * local alloc shutdown won't try to double free main bitmap * bits. Make a copy so the sync function knows which bits to * free. */ - alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); + alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); if (!alloc_copy) { status = -ENOMEM; mlog_errno(status); goto bail; } - memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 44a7d1fb2dec..e8a5cea48639 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_LOCALALLOC_H diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index b11acd34001a..7edc4e5c7c2c 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Userspace file locking support * * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h index 496d488b271f..389fe1fce3a5 100644 --- a/fs/ocfs2/locks.h +++ b/fs/ocfs2/locks.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes for Userspace file locking support * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_LOCKS_H diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 05220b365fb9..3a44e461828a 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Code to deal with the mess that is clustered mmap. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 1565dd8e8856..758d9661ef1e 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -1,18 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * move_extents.c * * Copyright (C) 2011 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/fs.h> #include <linux/types.h> diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h index 4e143e811441..28cac43892c5 100644 --- a/fs/ocfs2/move_extents.h +++ b/fs/ocfs2/move_extents.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * move_extents.h * * Copyright (C) 2011 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_MOVE_EXTENTS_H #define OCFS2_MOVE_EXTENTS_H diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b7ca84bc3df7..6f8e1c4fdb9c 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -19,21 +20,6 @@ * linux/fs/minix/dir.c * * Copyright (C) 1991, 1992 Linux Torvalds - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h index 1155918d6784..cc091ed02b4a 100644 --- a/fs/ocfs2/namei.h +++ b/fs/ocfs2/namei.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_NAMEI_H diff --git a/fs/ocfs2/ocfs1_fs_compat.h b/fs/ocfs2/ocfs1_fs_compat.h index dfb313bda5dd..01ae48c4834d 100644 --- a/fs/ocfs2/ocfs1_fs_compat.h +++ b/fs/ocfs2/ocfs1_fs_compat.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -9,20 +10,6 @@ * mount it. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef _OCFS1_FS_COMPAT_H diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1f029fbe8b8d..fddbbd60f434 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Defines macros and structures used in OCFS2 * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_H @@ -164,6 +150,7 @@ struct ocfs2_lock_stats { /* Storing max wait in usecs saves 24 bytes per inode */ u32 ls_max; /* Max wait in USEC */ + u64 ls_last; /* Last unlock time in USEC */ }; #endif @@ -205,6 +192,7 @@ struct ocfs2_lock_res { #ifdef CONFIG_OCFS2_FS_STATS struct ocfs2_lock_stats l_lock_prmode; /* PR mode stats */ u32 l_lock_refresh; /* Disk refreshes */ + u64 l_lock_wait; /* First lock wait time */ struct ocfs2_lock_stats l_lock_exmode; /* EX mode stats */ #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -236,6 +224,8 @@ struct ocfs2_orphan_scan { struct ocfs2_dlm_debug { struct kref d_refcnt; struct dentry *d_locking_state; + struct dentry *d_locking_filter; + u32 d_filter_secs; struct list_head d_lockres_tracking; }; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index b86bf5e74348..0db4a7ec58a2 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,20 +7,6 @@ * On-disk structures for OCFS2. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef _OCFS2_FS_H diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 5b27ff1fa577..d7b31734f6be 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Defines OCFS2 ioctls. * * Copyright (C) 2010 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_IOCTL_H diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 7051b994c776..b4be84956bc1 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Defines OCFS2 lockid bits. * * Copyright (C) 2002, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_LOCKID_H diff --git a/fs/ocfs2/ocfs2_lockingver.h b/fs/ocfs2/ocfs2_lockingver.h index 2e45c8d2ea7e..5c9c105b33ee 100644 --- a/fs/ocfs2/ocfs2_lockingver.h +++ b/fs/ocfs2/ocfs2_lockingver.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Defines OCFS2 Locking version values. * * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License, version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_LOCKINGVER_H diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 1dc9a08e8bdc..ee43e51188be 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -1,18 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * refcounttree.c * * Copyright (C) 2009 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/sort.h> diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index e9e862be4a1e..0b9014495726 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * refcounttree.h * * Copyright (C) 2009 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_REFCOUNTTREE_H #define OCFS2_REFCOUNTTREE_H diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index 6a348b0294ab..0249e8ca1028 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -13,15 +14,6 @@ * Universite Pierre et Marie Curie (Paris VI) * * The rest is copyright (C) 2010 Novell. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/fs.h> diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h index 42c2b804f3fd..6ac88122896d 100644 --- a/fs/ocfs2/reservations.h +++ b/fs/ocfs2/reservations.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Allocation reservations function prototypes and structures. * * Copyright (C) 2010 Novell. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_RESERVATIONS_H diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 18451e0fab81..24eb52f9059c 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -7,21 +8,6 @@ * Inspired by ext3/resize.c. * * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/resize.h b/fs/ocfs2/resize.h index f38841abf10b..0af0c023042c 100644 --- a/fs/ocfs2/resize.h +++ b/fs/ocfs2/resize.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_RESIZE_H diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index ea0756d83250..8caeceeaeda7 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -1,26 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * slot_map.c * - * - * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/types.h> diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index 601c95fd7003..93b53e73f0f7 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * description here * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 220cae7bbdbc..dbf8b5735808 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Code which interfaces ocfs2 with the o2cb stack. * * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/kernel.h> diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index d2fb97b173da..7397064c3f35 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Code which interfaces ocfs2 with fs/dlm and a userspace stack. * * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/module.h> diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index c4b029c43464..8aa6a667860c 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -7,15 +8,6 @@ * cluster stacks. * * Copyright (C) 2007, 2009 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/list.h> diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index f2dce10fae54..e9d26cbeb3b8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,15 +7,6 @@ * Glue to the underlying cluster stack. * * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f7c972fbed6a..69c21a3843af 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -7,21 +8,6 @@ * Inspired by ext3 block groups. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 2d2501767c0c..f0a5d30a175d 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Defines sub allocator api * * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef _CHAINALLOC_H_ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8821bc7b9c72..8b2f39506648 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * load/unload driver, mount/dismount volumes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/module.h> @@ -1093,33 +1079,15 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); - if (!osb->osb_debug_root) { - status = -EINVAL; - mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n"); - goto read_super_error; - } osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (!osb->osb_ctxt) { - status = -EINVAL; - mlog_errno(status); - goto read_super_error; - } - if (ocfs2_meta_ecc(osb)) { - status = ocfs2_blockcheck_stats_debugfs_install( - &osb->osb_ecc_stats, - osb->osb_debug_root); - if (status) { - mlog(ML_ERROR, - "Unable to create blockcheck statistics " - "files\n"); - goto read_super_error; - } - } + if (ocfs2_meta_ecc(osb)) + ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, + osb->osb_debug_root); status = ocfs2_mount_volume(sb); if (status < 0) @@ -1606,11 +1574,6 @@ static int __init ocfs2_init(void) goto out2; ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL); - if (!ocfs2_debugfs_root) { - status = -ENOMEM; - mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); - goto out3; - } ocfs2_set_locking_protocol(); diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h index d4550c8bbc41..76facaf63336 100644 --- a/fs/ocfs2/super.h +++ b/fs/ocfs2/super.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_SUPER_H diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h index 71ee4245e919..167094d1e5aa 100644 --- a/fs/ocfs2/symlink.h +++ b/fs/ocfs2/symlink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_SYMLINK_H diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index 5965f3878d49..bb701c4e449f 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Initialize, read, write, etc. system files. * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #include <linux/fs.h> diff --git a/fs/ocfs2/sysfile.h b/fs/ocfs2/sysfile.h index cc9ea661ffc1..a83dd962fccb 100644 --- a/fs/ocfs2/sysfile.h +++ b/fs/ocfs2/sysfile.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Function prototypes * * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_SYSFILE_H diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 78f09c76ab3c..580852ba05c4 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -8,21 +9,6 @@ * * Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - * * Standard buffer head caching flags (uptodate, etc) are insufficient * in a clustered environment - a buffer may be marked up to date on * our local node but could have been modified by another cluster diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 0d826fe2da0d..77a30cae4879 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -6,21 +7,6 @@ * Cluster uptodate tracking * * Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ #ifndef OCFS2_UPTODATE_H diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 3a24ce3deb01..385f3aaa2448 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * @@ -8,15 +9,6 @@ * CREDITS: * Lots of code in this file is copy from linux/fs/ext3/xattr.c. * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/capability.h> diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 1633cc15ea1f..9c80382da1f5 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * xattr.h * * Copyright (C) 2004, 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #ifndef OCFS2_XATTR_H diff --git a/fs/omfs/Kconfig b/fs/omfs/Kconfig index b1b9a0aba6fd..42b2ec35a05b 100644 --- a/fs/omfs/Kconfig +++ b/fs/omfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config OMFS_FS tristate "SonicBlue Optimized MPEG File System support" depends on BLOCK diff --git a/fs/omfs/Makefile b/fs/omfs/Makefile index 8b82b63f1129..ae16fc3d34a3 100644 --- a/fs/omfs/Makefile +++ b/fs/omfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_OMFS_FS) += omfs.o diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 4bee3a72b9f3..a0f45651f3b7 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * OMFS (as used by RIO Karma) directory operations. * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com> - * Released under GPL v2. */ #include <linux/fs.h> diff --git a/fs/omfs/file.c b/fs/omfs/file.c index bf83e6644333..d640b9388238 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * OMFS (as used by RIO Karma) file operations. * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com> - * Released under GPL v2. */ #include <linux/module.h> diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index ee14af9e26f2..08226a835ec3 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -1,7 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Optimized MPEG FS - inode and super operations. * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com> - * Released under GPL v2. */ #include <linux/module.h> #include <linux/sched.h> diff --git a/fs/open.c b/fs/open.c index 9c7d724a6f67..b5b80469b93d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/open.c * diff --git a/fs/openpromfs/Makefile b/fs/openpromfs/Makefile index 4563199b8fe6..843c270cd06f 100644 --- a/fs/openpromfs/Makefile +++ b/fs/openpromfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux Sun Openprom filesystem routines. # diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 74502de3f8d9..40c8c2e32fa3 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* inode.c: /proc/openprom handling routines * * Copyright (C) 1996-1999 Jakub Jelinek (jakub@redhat.com) diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig index 1554c02489de..890c0ae9a453 100644 --- a/fs/orangefs/Kconfig +++ b/fs/orangefs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config ORANGEFS_FS tristate "ORANGEFS (Powered by PVFS) support" select FS_POSIX_ACL diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index a35c17017210..960f9a3c012d 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -52,7 +52,7 @@ ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; struct orangefs_kernel_op_s *new_op = NULL; - int buffer_index = -1; + int buffer_index; ssize_t ret; size_t copy_amount; @@ -134,7 +134,6 @@ populate_shared_memory: */ if (ret == -EAGAIN && op_state_purged(new_op)) { orangefs_bufmap_put(buffer_index); - buffer_index = -1; if (type == ORANGEFS_IO_WRITE) iov_iter_revert(iter, total_size); gossip_debug(GOSSIP_FILE_DEBUG, @@ -262,7 +261,6 @@ out: "%s(%pU): PUT buffer_index %d\n", __func__, handle, buffer_index); } - buffer_index = -1; } op_release(new_op); return ret; @@ -357,11 +355,28 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, return ret; } +static int orangefs_getflags(struct inode *inode, unsigned long *uval) +{ + __u64 val = 0; + int ret; + + ret = orangefs_inode_getxattr(inode, + "user.pvfs2.meta_hint", + &val, sizeof(val)); + if (ret < 0 && ret != -ENODATA) + return ret; + else if (ret == -ENODATA) + val = 0; + *uval = val; + return 0; +} + /* * Perform a miscellaneous operation on a file. */ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file_inode(file); int ret = -ENOTTY; __u64 val = 0; unsigned long uval; @@ -375,20 +390,16 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar * and append flags */ if (cmd == FS_IOC_GETFLAGS) { - val = 0; - ret = orangefs_inode_getxattr(file_inode(file), - "user.pvfs2.meta_hint", - &val, sizeof(val)); - if (ret < 0 && ret != -ENODATA) + ret = orangefs_getflags(inode, &uval); + if (ret) return ret; - else if (ret == -ENODATA) - val = 0; - uval = val; gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", (unsigned long long)uval); return put_user(uval, (int __user *)arg); } else if (cmd == FS_IOC_SETFLAGS) { + unsigned long old_uval; + ret = 0; if (get_user(uval, (int __user *)arg)) return -EFAULT; @@ -404,11 +415,17 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); return -EINVAL; } + ret = orangefs_getflags(inode, &old_uval); + if (ret) + return ret; + ret = vfs_ioc_setflags_prepare(inode, old_uval, uval); + if (ret) + return ret; val = uval; gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", (unsigned long long)val); - ret = orangefs_inode_setxattr(file_inode(file), + ret = orangefs_inode_setxattr(inode, "user.pvfs2.meta_hint", &val, sizeof(val), 0); } diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 87b1a6fce628..25543a966c48 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -64,7 +64,7 @@ struct client_debug_mask { __u64 mask2; }; -static int orangefs_kernel_debug_init(void); +static void orangefs_kernel_debug_init(void); static int orangefs_debug_help_open(struct inode *, struct file *); static void *help_start(struct seq_file *, loff_t *); @@ -99,7 +99,6 @@ static char *debug_help_string; static char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; static char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN]; -static struct dentry *help_file_dentry; static struct dentry *client_debug_dentry; static struct dentry *debug_dir; @@ -151,10 +150,8 @@ static DEFINE_MUTEX(orangefs_help_file_lock); * initialize kmod debug operations, create orangefs debugfs dir and * ORANGEFS_KMOD_DEBUG_HELP_FILE. */ -int orangefs_debugfs_init(int debug_mask) +void orangefs_debugfs_init(int debug_mask) { - int rc = -ENOMEM; - /* convert input debug mask to a 64-bit unsigned integer */ orangefs_gossip_debug_mask = (unsigned long long)debug_mask; @@ -183,37 +180,21 @@ int orangefs_debugfs_init(int debug_mask) (unsigned long long)orangefs_gossip_debug_mask); debug_dir = debugfs_create_dir("orangefs", NULL); - if (!debug_dir) { - pr_info("%s: debugfs_create_dir failed.\n", __func__); - goto out; - } - help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, - 0444, - debug_dir, - debug_help_string, - &debug_help_fops); - if (!help_file_dentry) { - pr_info("%s: debugfs_create_file failed.\n", __func__); - goto out; - } + debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE, 0444, debug_dir, + debug_help_string, &debug_help_fops); orangefs_debug_disabled = 0; - rc = orangefs_kernel_debug_init(); - -out: - - return rc; + orangefs_kernel_debug_init(); } /* * initialize the kernel-debug file. */ -static int orangefs_kernel_debug_init(void) +static void orangefs_kernel_debug_init(void) { int rc = -ENOMEM; - struct dentry *ret; char *k_buffer = NULL; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__); @@ -230,24 +211,11 @@ static int orangefs_kernel_debug_init(void) pr_info("%s: overflow 1!\n", __func__); } - ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, - 0444, - debug_dir, - k_buffer, - &kernel_debug_fops); - if (!ret) { - pr_info("%s: failed to create %s.\n", - __func__, - ORANGEFS_KMOD_DEBUG_FILE); - goto out; - } - - rc = 0; + debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer, + &kernel_debug_fops); out: - gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc); - return rc; } @@ -353,12 +321,6 @@ static int orangefs_client_debug_init(void) debug_dir, c_buffer, &kernel_debug_fops); - if (!client_debug_dentry) { - pr_info("%s: failed to create updated %s.\n", - __func__, - ORANGEFS_CLIENT_DEBUG_FILE); - goto out; - } rc = 0; diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h index 51147f9ce3d6..502f6dedccde 100644 --- a/fs/orangefs/orangefs-debugfs.h +++ b/fs/orangefs/orangefs-debugfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -int orangefs_debugfs_init(int); +void orangefs_debugfs_init(int); void orangefs_debugfs_cleanup(void); int orangefs_prepare_debugfs_help_string(int); int orangefs_debugfs_new_client_mask(void __user *); diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 82cf8b3e568b..c010c1fddafc 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * (C) 2001 Clemson University and The University of Chicago * @@ -128,9 +129,7 @@ static int __init orangefs_init(void) if (ret) goto cleanup_key_table; - ret = orangefs_debugfs_init(module_parm_debug_mask); - if (ret) - goto debugfs_init_failed; + orangefs_debugfs_init(module_parm_debug_mask); ret = orangefs_sysfs_init(); if (ret) @@ -160,8 +159,6 @@ cleanup_device: orangefs_dev_cleanup(); sysfs_init_failed: - -debugfs_init_failed: orangefs_debugfs_cleanup(); cleanup_key_table: diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 2ef91be2a04e..444e2da4f60e 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config OVERLAY_FS tristate "Overlay filesystem support" select EXPORTFS diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 46e1ff8ac056..9164c585eb2f 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the overlay filesystem. # diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 56feaa739979..b801c6353100 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/module.h> @@ -37,7 +34,7 @@ static int ovl_ccup_get(char *buf, const struct kernel_param *param) } module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644); -MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing"); +MODULE_PARM_DESC(check_copy_up, "Obsolete; does nothing"); int ovl_copy_xattr(struct dentry *old, struct dentry *new) { diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93872bb50230..702aa63f6774 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -21,7 +18,7 @@ static unsigned short ovl_redirect_max = 256; module_param_named(redirect_max, ovl_redirect_max, ushort, 0644); -MODULE_PARM_DESC(ovl_redirect_max, +MODULE_PARM_DESC(redirect_max, "Maximum length of absolute redirect xattr value"); static int ovl_set_redirect(struct dentry *dentry, bool samedir); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index cc1c9e5606ba..cb8ec1f65c03 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Overlayfs NFS export support. * * Amir Goldstein <amir73il@gmail.com> * * Copyright (C) 2017-2018 CTERA Networks. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 540a8b845145..e235a635d9ec 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -1,9 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/cred.h> @@ -409,36 +406,16 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd, return ret; } -static unsigned int ovl_get_inode_flags(struct inode *inode) -{ - unsigned int flags = READ_ONCE(inode->i_flags); - unsigned int ovl_iflags = 0; - - if (flags & S_SYNC) - ovl_iflags |= FS_SYNC_FL; - if (flags & S_APPEND) - ovl_iflags |= FS_APPEND_FL; - if (flags & S_IMMUTABLE) - ovl_iflags |= FS_IMMUTABLE_FL; - if (flags & S_NOATIME) - ovl_iflags |= FS_NOATIME_FL; - - return ovl_iflags; -} - -static long ovl_ioctl_set_flags(struct file *file, unsigned long arg) +static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int iflags) { long ret; struct inode *inode = file_inode(file); - unsigned int flags; - unsigned int old_flags; + unsigned int old_iflags; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - ret = mnt_want_write_file(file); if (ret) return ret; @@ -447,8 +424,8 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned long arg) /* Check the capability before cred override */ ret = -EPERM; - old_flags = ovl_get_inode_flags(inode); - if (((flags ^ old_flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && + old_iflags = READ_ONCE(inode->i_flags); + if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) && !capable(CAP_LINUX_IMMUTABLE)) goto unlock; @@ -456,7 +433,7 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned long arg) if (ret) goto unlock; - ret = ovl_real_ioctl(file, FS_IOC_SETFLAGS, arg); + ret = ovl_real_ioctl(file, cmd, arg); ovl_copyflags(ovl_inode_real(inode), inode); unlock: @@ -468,17 +445,79 @@ unlock: } +static unsigned int ovl_fsflags_to_iflags(unsigned int flags) +{ + unsigned int iflags = 0; + + if (flags & FS_SYNC_FL) + iflags |= S_SYNC; + if (flags & FS_APPEND_FL) + iflags |= S_APPEND; + if (flags & FS_IMMUTABLE_FL) + iflags |= S_IMMUTABLE; + if (flags & FS_NOATIME_FL) + iflags |= S_NOATIME; + + return iflags; +} + +static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned int flags; + + if (get_user(flags, (int __user *) arg)) + return -EFAULT; + + return ovl_ioctl_set_flags(file, cmd, arg, + ovl_fsflags_to_iflags(flags)); +} + +static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags) +{ + unsigned int iflags = 0; + + if (xflags & FS_XFLAG_SYNC) + iflags |= S_SYNC; + if (xflags & FS_XFLAG_APPEND) + iflags |= S_APPEND; + if (xflags & FS_XFLAG_IMMUTABLE) + iflags |= S_IMMUTABLE; + if (xflags & FS_XFLAG_NOATIME) + iflags |= S_NOATIME; + + return iflags; +} + +static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fsxattr fa; + + memset(&fa, 0, sizeof(fa)); + if (copy_from_user(&fa, (void __user *) arg, sizeof(fa))) + return -EFAULT; + + return ovl_ioctl_set_flags(file, cmd, arg, + ovl_fsxflags_to_iflags(fa.fsx_xflags)); +} + static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret; switch (cmd) { case FS_IOC_GETFLAGS: + case FS_IOC_FSGETXATTR: ret = ovl_real_ioctl(file, cmd, arg); break; case FS_IOC_SETFLAGS: - ret = ovl_ioctl_set_flags(file, arg); + ret = ovl_ioctl_set_fsflags(file, cmd, arg); + break; + + case FS_IOC_FSSETXATTR: + ret = ovl_ioctl_set_fsxflags(file, cmd, arg); break; default: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b48273e846ad..7663aeb85fa3 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -553,15 +550,15 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, int xinobits = ovl_xino_bits(inode->i_sb); /* - * When NFS export is enabled and d_ino is consistent with st_ino - * (samefs or i_ino has enough bits to encode layer), set the same - * value used for d_ino to i_ino, because nfsd readdirplus compares - * d_ino values to i_ino values of child entries. When called from + * When d_ino is consistent with st_ino (samefs or i_ino has enough + * bits to encode layer), set the same value used for st_ino to i_ino, + * so inode number exposed via /proc/locks and a like will be + * consistent with d_ino and st_ino values. An i_ino value inconsistent + * with d_ino also causes nfsd readdirplus to fail. When called from * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). */ - if (inode->i_sb->s_export_op && - (ovl_same_sb(inode->i_sb) || xinobits)) { + if (ovl_same_sb(inode->i_sb) || xinobits) { inode->i_ino = ino; if (xinobits && fsid && !(ino >> (64 - xinobits))) inode->i_ino |= (unsigned long)fsid << (64 - xinobits); @@ -777,6 +774,54 @@ struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, return inode; } +bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) +{ + struct inode *key = d_inode(dir); + struct inode *trap; + bool res; + + trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); + if (!trap) + return false; + + res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && + !ovl_inode_lower(trap); + + iput(trap); + return res; +} + +/* + * Create an inode cache entry for layer root dir, that will intentionally + * fail ovl_verify_inode(), so any lookup that will find some layer root + * will fail. + */ +struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) +{ + struct inode *key = d_inode(dir); + struct inode *trap; + + if (!d_is_dir(dir)) + return ERR_PTR(-ENOTDIR); + + trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, + ovl_inode_set, key); + if (!trap) + return ERR_PTR(-ENOMEM); + + if (!(trap->i_state & I_NEW)) { + /* Conflicting layer roots? */ + iput(trap); + return ERR_PTR(-ELOOP); + } + + trap->i_mode = S_IFDIR; + trap->i_flags = S_DEAD; + unlock_new_inode(trap); + + return trap; +} + /* * Does overlay inode need to be hashed by lower inode? */ diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index efd372312ef1..e9717c2f7d45 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -18,6 +15,7 @@ #include "overlayfs.h" struct ovl_lookup_data { + struct super_block *sb; struct qstr name; bool is_dir; bool opaque; @@ -244,6 +242,12 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, if (!d->metacopy || d->last) goto out; } else { + if (ovl_lookup_trap_inode(d->sb, this)) { + /* Caught in a trap of overlapping layers */ + err = -ELOOP; + goto out_err; + } + if (last_element) d->is_dir = true; if (d->last) @@ -819,6 +823,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, int err; bool metacopy = false; struct ovl_lookup_data d = { + .sb = dentry->d_sb, .name = dentry->d_name, .is_dir = false, .opaque = false, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d26efed9f80a..6934bcf030f0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -1,10 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/kernel.h> @@ -270,6 +267,7 @@ void ovl_clear_flag(unsigned long flag, struct inode *inode); bool ovl_test_flag(unsigned long flag, struct inode *inode); bool ovl_inuse_trylock(struct dentry *dentry); void ovl_inuse_unlock(struct dentry *dentry); +bool ovl_is_inuse(struct dentry *dentry); bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry); void ovl_nlink_end(struct dentry *dentry); @@ -376,6 +374,8 @@ struct ovl_inode_params { struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev); struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper); +bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir); +struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir); struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip); static inline void ovl_copyattr(struct inode *from, struct inode *to) diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index ec237035333a..28a2d12a1029 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -1,11 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ struct ovl_config { @@ -29,6 +26,8 @@ struct ovl_sb { struct ovl_layer { struct vfsmount *mnt; + /* Trap in ovl inode cache */ + struct inode *trap; struct ovl_sb *fs; /* Index of this layer in fs root (upper idx == 0) */ int idx; @@ -65,6 +64,10 @@ struct ovl_fs { /* Did we take the inuse lock? */ bool upperdir_locked; bool workdir_locked; + /* Traps in ovl inode cache */ + struct inode *upperdir_trap; + struct inode *workdir_trap; + struct inode *indexdir_trap; /* Inode numbers in all layers do not use the high xino_bits */ unsigned int xino_bits; }; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index cc8303a806b4..47a91c9733a5 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5ec4fc2f5d7e..b368e2e102fa 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <uapi/linux/magic.h> @@ -31,29 +28,29 @@ struct ovl_dir_cache; static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); -MODULE_PARM_DESC(ovl_redirect_dir_def, +MODULE_PARM_DESC(redirect_dir, "Default to on or off for the redirect_dir feature"); static bool ovl_redirect_always_follow = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); module_param_named(redirect_always_follow, ovl_redirect_always_follow, bool, 0644); -MODULE_PARM_DESC(ovl_redirect_always_follow, +MODULE_PARM_DESC(redirect_always_follow, "Follow redirects even if redirect_dir feature is turned off"); static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); module_param_named(index, ovl_index_def, bool, 0644); -MODULE_PARM_DESC(ovl_index_def, +MODULE_PARM_DESC(index, "Default to on or off for the inodes index feature"); static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); -MODULE_PARM_DESC(ovl_nfs_export_def, +MODULE_PARM_DESC(nfs_export, "Default to on or off for the NFS export feature"); static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); -MODULE_PARM_DESC(ovl_xino_auto_def, +MODULE_PARM_DESC(xino_auto, "Auto enable xino feature"); static void ovl_entry_stack_free(struct ovl_entry *oe) @@ -66,7 +63,7 @@ static void ovl_entry_stack_free(struct ovl_entry *oe) static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); module_param_named(metacopy, ovl_metacopy_def, bool, 0644); -MODULE_PARM_DESC(ovl_metacopy_def, +MODULE_PARM_DESC(metacopy, "Default to on or off for the metadata only copy up feature"); static void ovl_dentry_release(struct dentry *dentry) @@ -215,6 +212,9 @@ static void ovl_free_fs(struct ovl_fs *ofs) { unsigned i; + iput(ofs->indexdir_trap); + iput(ofs->workdir_trap); + iput(ofs->upperdir_trap); dput(ofs->indexdir); dput(ofs->workdir); if (ofs->workdir_locked) @@ -223,8 +223,10 @@ static void ovl_free_fs(struct ovl_fs *ofs) if (ofs->upperdir_locked) ovl_inuse_unlock(ofs->upper_mnt->mnt_root); mntput(ofs->upper_mnt); - for (i = 0; i < ofs->numlower; i++) + for (i = 0; i < ofs->numlower; i++) { + iput(ofs->lower_layers[i].trap); mntput(ofs->lower_layers[i].mnt); + } for (i = 0; i < ofs->numlowerfs; i++) free_anon_bdev(ofs->lower_fs[i].pseudo_dev); kfree(ofs->lower_layers); @@ -983,7 +985,26 @@ static const struct xattr_handler *ovl_xattr_handlers[] = { NULL }; -static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath) +static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, + struct inode **ptrap, const char *name) +{ + struct inode *trap; + int err; + + trap = ovl_get_trap_inode(sb, dir); + err = PTR_ERR_OR_ZERO(trap); + if (err) { + if (err == -ELOOP) + pr_err("overlayfs: conflicting %s path\n", name); + return err; + } + + *ptrap = trap; + return 0; +} + +static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, + struct path *upperpath) { struct vfsmount *upper_mnt; int err; @@ -1003,6 +1024,11 @@ static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath) if (err) goto out; + err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap, + "upperdir"); + if (err) + goto out; + upper_mnt = clone_private_mount(upperpath); err = PTR_ERR(upper_mnt); if (IS_ERR(upper_mnt)) { @@ -1029,7 +1055,8 @@ out: return err; } -static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) +static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, + struct path *workpath) { struct vfsmount *mnt = ofs->upper_mnt; struct dentry *temp; @@ -1044,6 +1071,10 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) if (!ofs->workdir) goto out; + err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); + if (err) + goto out; + /* * Upper should support d_type, else whiteouts are visible. Given * workdir and upper are on same fs, we can do iterate_dir() on @@ -1104,7 +1135,8 @@ out: return err; } -static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) +static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, + struct path *upperpath) { int err; struct path workpath = { }; @@ -1135,19 +1167,16 @@ static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); } - err = ovl_make_workdir(ofs, &workpath); - if (err) - goto out; + err = ovl_make_workdir(sb, ofs, &workpath); - err = 0; out: path_put(&workpath); return err; } -static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, - struct path *upperpath) +static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, + struct ovl_entry *oe, struct path *upperpath) { struct vfsmount *mnt = ofs->upper_mnt; int err; @@ -1166,6 +1195,11 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); if (ofs->indexdir) { + err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, + "indexdir"); + if (err) + goto out; + /* * Verify upper root is exclusively associated with index dir. * Older kernels stored upper fh in "trusted.overlay.origin" @@ -1253,8 +1287,8 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) return ofs->numlowerfs; } -static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, - unsigned int numlower) +static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, + struct path *stack, unsigned int numlower) { int err; unsigned int i; @@ -1272,16 +1306,28 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, for (i = 0; i < numlower; i++) { struct vfsmount *mnt; + struct inode *trap; int fsid; err = fsid = ovl_get_fsid(ofs, &stack[i]); if (err < 0) goto out; + err = -EBUSY; + if (ovl_is_inuse(stack[i].dentry)) { + pr_err("overlayfs: lowerdir is in-use as upperdir/workdir\n"); + goto out; + } + + err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); + if (err) + goto out; + mnt = clone_private_mount(&stack[i]); err = PTR_ERR(mnt); if (IS_ERR(mnt)) { pr_err("overlayfs: failed to clone lowerpath\n"); + iput(trap); goto out; } @@ -1291,6 +1337,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, */ mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; + ofs->lower_layers[ofs->numlower].trap = trap; ofs->lower_layers[ofs->numlower].mnt = mnt; ofs->lower_layers[ofs->numlower].idx = i + 1; ofs->lower_layers[ofs->numlower].fsid = fsid; @@ -1385,7 +1432,7 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, goto out_err; } - err = ovl_get_lower_layers(ofs, stack, numlower); + err = ovl_get_lower_layers(sb, ofs, stack, numlower); if (err) goto out_err; @@ -1417,6 +1464,77 @@ out_err: goto out; } +/* + * Check if this layer root is a descendant of: + * - another layer of this overlayfs instance + * - upper/work dir of any overlayfs instance + */ +static int ovl_check_layer(struct super_block *sb, struct dentry *dentry, + const char *name) +{ + struct dentry *next = dentry, *parent; + int err = 0; + + if (!dentry) + return 0; + + parent = dget_parent(next); + + /* Walk back ancestors to root (inclusive) looking for traps */ + while (!err && parent != next) { + if (ovl_is_inuse(parent)) { + err = -EBUSY; + pr_err("overlayfs: %s path overlapping in-use upperdir/workdir\n", + name); + } else if (ovl_lookup_trap_inode(sb, parent)) { + err = -ELOOP; + pr_err("overlayfs: overlapping %s path\n", name); + } + next = parent; + parent = dget_parent(next); + dput(next); + } + + dput(parent); + + return err; +} + +/* + * Check if any of the layers or work dirs overlap. + */ +static int ovl_check_overlapping_layers(struct super_block *sb, + struct ovl_fs *ofs) +{ + int i, err; + + if (ofs->upper_mnt) { + err = ovl_check_layer(sb, ofs->upper_mnt->mnt_root, "upperdir"); + if (err) + return err; + + /* + * Checking workbasedir avoids hitting ovl_is_inuse(parent) of + * this instance and covers overlapping work and index dirs, + * unless work or index dir have been moved since created inside + * workbasedir. In that case, we already have their traps in + * inode cache and we will catch that case on lookup. + */ + err = ovl_check_layer(sb, ofs->workbasedir, "workdir"); + if (err) + return err; + } + + for (i = 0; i < ofs->numlower; i++) { + err = ovl_check_layer(sb, ofs->lower_layers[i].mnt->mnt_root, + "lowerdir"); + if (err) + return err; + } + + return 0; +} + static int ovl_fill_super(struct super_block *sb, void *data, int silent) { struct path upperpath = { }; @@ -1456,17 +1574,20 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (ofs->config.xino != OVL_XINO_OFF) ofs->xino_bits = BITS_PER_LONG - 32; + /* alloc/destroy_inode needed for setting up traps in inode cache */ + sb->s_op = &ovl_super_operations; + if (ofs->config.upperdir) { if (!ofs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); goto out_err; } - err = ovl_get_upper(ofs, &upperpath); + err = ovl_get_upper(sb, ofs, &upperpath); if (err) goto out_err; - err = ovl_get_workdir(ofs, &upperpath); + err = ovl_get_workdir(sb, ofs, &upperpath); if (err) goto out_err; @@ -1487,7 +1608,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= SB_RDONLY; if (!(ovl_force_readonly(ofs)) && ofs->config.index) { - err = ovl_get_indexdir(ofs, oe, &upperpath); + err = ovl_get_indexdir(sb, ofs, oe, &upperpath); if (err) goto out_free_oe; @@ -1500,6 +1621,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } + err = ovl_check_overlapping_layers(sb, ofs); + if (err) + goto out_free_oe; + /* Show index=off in /proc/mounts for forced r/o mount */ if (!ofs->indexdir) { ofs->config.index = false; @@ -1521,7 +1646,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); sb->s_magic = OVERLAYFS_SUPER_MAGIC; - sb->s_op = &ovl_super_operations; sb->s_xattr = ovl_xattr_handlers; sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 4035e640f402..f5678a3f8350 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 Novell Inc. * Copyright (C) 2016 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. */ #include <linux/fs.h> @@ -652,6 +649,18 @@ void ovl_inuse_unlock(struct dentry *dentry) } } +bool ovl_is_inuse(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + bool inuse; + + spin_lock(&inode->i_lock); + inuse = (inode->i_state & I_OVL_INUSE); + spin_unlock(&inode->i_lock); + + return inuse; +} + /* * Does this overlay dentry need to be indexed on copy up? */ diff --git a/fs/pnode.c b/fs/pnode.c index 7ea6cfb65077..49f6d7ff2139 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/pnode.c * * (C) Copyright IBM Corporation 2005. - * Released under GPL v2. * Author : Ram Pai (linuxram@us.ibm.com) - * */ #include <linux/mnt_namespace.h> #include <linux/mount.h> @@ -262,7 +261,6 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); - child->mnt.mnt_flags &= ~MNT_LOCKED; mnt_set_mountpoint(m, mp, child); last_dest = m; last_source = child; diff --git a/fs/pnode.h b/fs/pnode.h index 3960a83666cf..49a058c73e4c 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -1,9 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * linux/fs/pnode.h * * (C) Copyright IBM Corporation 2005. - * Released under GPL v2. - * */ #ifndef _LINUX_PNODE_H #define _LINUX_PNODE_H diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 2fd0fde16fe1..84ad1c90d535 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2002,2003 by Andreas Gruenbacher <a.gruenbacher@computer.org> * diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 817c02b13b1d..cb5629bd5fff 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config PROC_FS bool "/proc file system support" if EXPERT default y @@ -57,7 +58,8 @@ config PROC_VMCORE_DEVICE_DUMP snapshot. If you say Y here, the collected device dumps will be added - as ELF notes to /proc/vmcore. + as ELF notes to /proc/vmcore. You can still disable device + dump using the kernel command line option 'novmcoredd'. config PROC_SYSCTL bool "Sysctl support (/proc/sys)" if EXPERT @@ -71,7 +73,7 @@ config PROC_SYSCTL interface is through /proc/sys. If you say Y here a tree of modifiable sysctl entries will be generated beneath the /proc/sys directory. They are explained in the files - in <file:Documentation/sysctl/>. Note that enabling this + in <file:Documentation/admin-guide/sysctl/>. Note that enabling this option will enlarge the kernel by at least 8 KB. As it is generally a good thing, you should say Y here unless @@ -97,3 +99,7 @@ config PROC_CHILDREN Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. + +config PROC_PID_ARCH_STATUS + def_bool n + depends on PROC_FS diff --git a/fs/proc/array.c b/fs/proc/array.c index 2edbb657f859..46dcb6f0eccf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { seq_printf(m, "Cpus_allowed:\t%*pb\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", - cpumask_pr_args(&task->cpus_allowed)); + cpumask_pr_args(task->cpus_ptr)); } static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) @@ -462,7 +462,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * a program is not able to use ptrace(2) in that case. It is * safe because the task has stopped executing permanently. */ - if (permitted && (task->flags & PF_DUMPCORE)) { + if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) { if (try_get_task_stack(task)) { eip = KSTK_EIP(task); esp = KSTK_ESP(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index 9c8ca6cd3ce4..ebea9501afb8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -209,12 +209,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } +/* + * If the user used setproctitle(), we just get the string from + * user space at arg_start, and limit it to a maximum of one page. + */ +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, + size_t count, unsigned long pos, + unsigned long arg_start) +{ + char *page; + int ret, got; + + if (pos >= PAGE_SIZE) + return 0; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + ret = 0; + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); + if (got > 0) { + int len = strnlen(page, got); + + /* Include the NUL character if it was found */ + if (len < got) + len++; + + if (len > pos) { + len -= pos; + if (len > count) + len = count; + len -= copy_to_user(buf, page+pos, len); + if (!len) + len = -EFAULT; + ret = len; + } + } + free_page((unsigned long)page); + return ret; +} + static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { unsigned long arg_start, arg_end, env_start, env_end; unsigned long pos, len; - char *page; + char *page, c; /* Check if process spawned far enough to have cmdline. */ if (!mm->env_end) @@ -231,28 +272,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, return 0; /* - * We have traditionally allowed the user to re-write - * the argument strings and overflow the end result - * into the environment section. But only do that if - * the environment area is contiguous to the arguments. + * We allow setproctitle() to overwrite the argument + * strings, and overflow past the original end. But + * only when it overflows into the environment area. */ - if (env_start != arg_end || env_start >= env_end) + if (env_start != arg_end || env_end < env_start) env_start = env_end = arg_end; - - /* .. and limit it to a maximum of one page of slop */ - if (env_end >= arg_end + PAGE_SIZE) - env_end = arg_end + PAGE_SIZE - 1; + len = env_end - arg_start; /* We're not going to care if "*ppos" has high bits set */ - pos = arg_start + *ppos; - - /* .. but we do check the result is in the proper range */ - if (pos < arg_start || pos >= env_end) + pos = *ppos; + if (pos >= len) + return 0; + if (count > len - pos) + count = len - pos; + if (!count) return 0; - /* .. and we never go past env_end */ - if (env_end - pos < count) - count = env_end - pos; + /* + * Magical special case: if the argv[] end byte is not + * zero, the user has overwritten it with setproctitle(3). + * + * Possible future enhancement: do this only once when + * pos is 0, and set a flag in the 'struct file'. + */ + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) + return get_mm_proctitle(mm, buf, count, pos, arg_start); + + /* + * For the non-setproctitle() case we limit things strictly + * to the [arg_start, arg_end[ range. + */ + pos += arg_start; + if (pos < arg_start || pos >= arg_end) + return 0; + if (count > arg_end - pos) + count = arg_end - pos; page = (char *)__get_free_page(GFP_KERNEL); if (!page) @@ -262,48 +317,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); - long offset; - - /* - * Are we already starting past the official end? - * We always include the last byte that is *supposed* - * to be NUL - */ - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); - if (got <= offset) + got = access_remote_vm(mm, pos, page, size, FOLL_ANON); + if (got <= 0) break; - got -= offset; - - /* Don't walk past a NUL character once you hit arg_end */ - if (pos + got >= arg_end) { - int n = 0; - - /* - * If we started before 'arg_end' but ended up - * at or after it, we start the NUL character - * check at arg_end-1 (where we expect the normal - * EOF to be). - * - * NOTE! This is smaller than 'got', because - * pos + got >= arg_end - */ - if (pos < arg_end) - n = arg_end - pos - 1; - - /* Cut off at first NUL after 'n' */ - got = n + strnlen(page+n, offset+got-n); - if (got < offset) - break; - got -= offset; - - /* Include the NUL if it existed */ - if (got < size) - got++; - } - - got -= copy_to_user(buf, page+offset, got); + got -= copy_to_user(buf, page, got); if (unlikely(!got)) { if (!len) len = -EFAULT; @@ -532,8 +550,7 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; - points = oom_badness(task, NULL, NULL, totalpages) * - 1000 / totalpages; + points = oom_badness(task, totalpages) * 1000 / totalpages; seq_printf(m, "%lu\n", points); return 0; @@ -1962,9 +1979,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { - down_read(&mm->mmap_sem); - exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); - up_read(&mm->mmap_sem); + status = down_read_killable(&mm->mmap_sem); + if (!status) { + exact_vma_exists = !!find_exact_vma(mm, vm_start, + vm_end); + up_read(&mm->mmap_sem); + } } mmput(mm); @@ -2010,8 +2030,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = down_read_killable(&mm->mmap_sem); + if (rc) + goto out_mmput; + rc = -ENOENT; - down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { *path = vma->vm_file->f_path; @@ -2107,7 +2130,11 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, if (!mm) goto out_put_task; - down_read(&mm->mmap_sem); + result = ERR_PTR(-EINTR); + if (down_read_killable(&mm->mmap_sem)) + goto out_put_mm; + + result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; @@ -2118,6 +2145,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, out_no_vma: up_read(&mm->mmap_sem); +out_put_mm: mmput(mm); out_put_task: put_task_struct(task); @@ -2160,7 +2188,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) mm = get_task_mm(task); if (!mm) goto out_put_task; - down_read(&mm->mmap_sem); + + ret = down_read_killable(&mm->mmap_sem); + if (ret) { + mmput(mm); + goto out_put_task; + } nr_files = 0; @@ -3061,6 +3094,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3077,8 +3113,7 @@ static const struct file_operations proc_tgid_base_operations = { struct pid *tgid_pidfd_to_pid(const struct file *file) { - if (!d_is_dir(file->f_path.dentry) || - (file->f_op != &proc_tgid_base_operations)) + if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); @@ -3449,6 +3484,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif +#ifdef CONFIG_PROC_PID_ARCH_STATUS + ONE("arch_status", S_IRUGO, proc_pid_arch_status), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 954caf0b7fee..dfe6ce3505ce 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2010 Werner Fink, Jiri Slaby - * - * Licensed under GPLv2 */ #include <linux/console.h> diff --git a/fs/proc/generic.c b/fs/proc/generic.c index e39bac94dead..64e9ee1b129e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * proc/fs/generic.c --- generic routines for the proc-fs * diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 5f8d215b3fd0..dbe43a50caf2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -200,7 +200,8 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) struct proc_dir_entry *pde = PDE(file_inode(file)); loff_t rv = -EINVAL; if (use_pde(pde)) { - loff_t (*llseek)(struct file *, loff_t, int); + typeof_member(struct file_operations, llseek) llseek; + llseek = pde->proc_fops->llseek; if (!llseek) llseek = default_llseek; @@ -212,10 +213,11 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (use_pde(pde)) { + typeof_member(struct file_operations, read) read; + read = pde->proc_fops->read; if (read) rv = read(file, buf, count, ppos); @@ -226,10 +228,11 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; if (use_pde(pde)) { + typeof_member(struct file_operations, write) write; + write = pde->proc_fops->write; if (write) rv = write(file, buf, count, ppos); @@ -242,8 +245,9 @@ static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) { struct proc_dir_entry *pde = PDE(file_inode(file)); __poll_t rv = DEFAULT_POLLMASK; - __poll_t (*poll)(struct file *, struct poll_table_struct *); if (use_pde(pde)) { + typeof_member(struct file_operations, poll) poll; + poll = pde->proc_fops->poll; if (poll) rv = poll(file, pts); @@ -256,8 +260,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne { struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; - long (*ioctl)(struct file *, unsigned int, unsigned long); if (use_pde(pde)) { + typeof_member(struct file_operations, unlocked_ioctl) ioctl; + ioctl = pde->proc_fops->unlocked_ioctl; if (ioctl) rv = ioctl(file, cmd, arg); @@ -271,8 +276,9 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned { struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; - long (*compat_ioctl)(struct file *, unsigned int, unsigned long); if (use_pde(pde)) { + typeof_member(struct file_operations, compat_ioctl) compat_ioctl; + compat_ioctl = pde->proc_fops->compat_ioctl; if (compat_ioctl) rv = compat_ioctl(file, cmd, arg); @@ -286,8 +292,9 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) { struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; - int (*mmap)(struct file *, struct vm_area_struct *); if (use_pde(pde)) { + typeof_member(struct file_operations, mmap) mmap; + mmap = pde->proc_fops->mmap; if (mmap) rv = mmap(file, vma); @@ -305,7 +312,7 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long rv = -EIO; if (use_pde(pde)) { - typeof(proc_reg_get_unmapped_area) *get_area; + typeof_member(struct file_operations, get_unmapped_area) get_area; get_area = pde->proc_fops->get_unmapped_area; #ifdef CONFIG_MMU @@ -326,8 +333,8 @@ static int proc_reg_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; - int (*open)(struct inode *, struct file *); - int (*release)(struct inode *, struct file *); + typeof_member(struct file_operations, open) open; + typeof_member(struct file_operations, release) release; struct pde_opener *pdeo; /* diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d1671e97f7fe..cd0c8d5ce9a1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* Internal procfs definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/proc_fs.h> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 568d90e17c17..465ea0153b2a 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -120,7 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Committed_AS: ", committed); seq_printf(m, "VmallocTotal: %8lu kB\n", (unsigned long)VMALLOC_TOTAL >> 10); - show_val_kb(m, "VmallocUsed: ", 0ul); + show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages()); show_val_kb(m, "VmallocChunk: ", 0ul); show_val_kb(m, "Percpu: ", pcpu_nr_pages()); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 3b63be64e436..14c2badb8fd9 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* nommu.c: mmu-less memory info files * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/init.h> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a7b12435519e..76ae278df1c4 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/proc/net.c * diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c74570736b24..d80989b6c344 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -22,6 +22,10 @@ static const struct inode_operations proc_sys_inode_operations; static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; +/* shared constants to be used in various sysctls */ +const int sysctl_vals[] = { 0, 1, INT_MAX }; +EXPORT_SYMBOL(sysctl_vals); + /* Support for permanently empty directories */ struct ctl_table sysctl_mount_point[] = { @@ -499,6 +503,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (root->set_ownership) root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); + else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + } return inode; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 74c2c6ab10bd..33f72d1b92cc 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -210,7 +210,7 @@ static struct file_system_type proc_fs_type = { .init_fs_context = proc_init_fs_context, .parameters = &proc_fs_parameters, .kill_sb = proc_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; void __init proc_root_init(void) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 01d4eb0e6bd1..731642e0f5a0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -166,7 +166,11 @@ static void *m_start(struct seq_file *m, loff_t *ppos) if (!mm || !mmget_not_zero(mm)) return NULL; - down_read(&mm->mmap_sem); + if (down_read_killable(&mm->mmap_sem)) { + mmput(mm); + return ERR_PTR(-EINTR); + } + hold_task_mempolicy(priv); priv->tail_vma = get_gate_vma(mm); @@ -417,17 +421,53 @@ struct mem_size_stats { unsigned long shared_hugetlb; unsigned long private_hugetlb; u64 pss; + u64 pss_anon; + u64 pss_file; + u64 pss_shmem; u64 pss_locked; u64 swap_pss; bool check_shmem_swap; }; +static void smaps_page_accumulate(struct mem_size_stats *mss, + struct page *page, unsigned long size, unsigned long pss, + bool dirty, bool locked, bool private) +{ + mss->pss += pss; + + if (PageAnon(page)) + mss->pss_anon += pss; + else if (PageSwapBacked(page)) + mss->pss_shmem += pss; + else + mss->pss_file += pss; + + if (locked) + mss->pss_locked += pss; + + if (dirty || PageDirty(page)) { + if (private) + mss->private_dirty += size; + else + mss->shared_dirty += size; + } else { + if (private) + mss->private_clean += size; + else + mss->shared_clean += size; + } +} + static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked) { int i, nr = compound ? 1 << compound_order(page) : 1; unsigned long size = nr * PAGE_SIZE; + /* + * First accumulate quantities that depend only on |size| and the type + * of the compound page. + */ if (PageAnon(page)) { mss->anonymous += size; if (!PageSwapBacked(page) && !dirty && !PageDirty(page)) @@ -440,42 +480,25 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, mss->referenced += size; /* + * Then accumulate quantities that may depend on sharing, or that may + * differ page-by-page. + * * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). */ if (page_count(page) == 1) { - if (dirty || PageDirty(page)) - mss->private_dirty += size; - else - mss->private_clean += size; - mss->pss += (u64)size << PSS_SHIFT; - if (locked) - mss->pss_locked += (u64)size << PSS_SHIFT; + smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, + locked, true); return; } - for (i = 0; i < nr; i++, page++) { int mapcount = page_mapcount(page); - unsigned long pss = (PAGE_SIZE << PSS_SHIFT); - - if (mapcount >= 2) { - if (dirty || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; - else - mss->shared_clean += PAGE_SIZE; - mss->pss += pss / mapcount; - if (locked) - mss->pss_locked += pss / mapcount; - } else { - if (dirty || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; - else - mss->private_clean += PAGE_SIZE; - mss->pss += pss; - if (locked) - mss->pss_locked += pss; - } + unsigned long pss = PAGE_SIZE << PSS_SHIFT; + if (mapcount >= 2) + pss /= mapcount; + smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked, + mapcount < 2); } } @@ -754,10 +777,23 @@ static void smap_gather_stats(struct vm_area_struct *vma, seq_put_decimal_ull_width(m, str, (val) >> 10, 8) /* Show the contents common for smaps and smaps_rollup */ -static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss) +static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss, + bool rollup_mode) { SEQ_PUT_DEC("Rss: ", mss->resident); SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); + if (rollup_mode) { + /* + * These are meaningful only for smaps_rollup, otherwise two of + * them are zero, and the other one is the same as Pss. + */ + SEQ_PUT_DEC(" kB\nPss_Anon: ", + mss->pss_anon >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nPss_File: ", + mss->pss_file >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nPss_Shmem: ", + mss->pss_shmem >> PSS_SHIFT); + } SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); @@ -794,9 +830,10 @@ static int show_smap(struct seq_file *m, void *v) SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); seq_puts(m, " kB\n"); - __show_smap(m, &mss); + __show_smap(m, &mss, false); - seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma)); + seq_printf(m, "THPeligible: %d\n", + transparent_hugepage_enabled(vma)); if (arch_pkeys_enabled()) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); @@ -828,7 +865,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v) memset(&mss, 0, sizeof(mss)); - down_read(&mm->mmap_sem); + ret = down_read_killable(&mm->mmap_sem); + if (ret) + goto out_put_mm; + hold_task_mempolicy(priv); for (vma = priv->mm->mmap; vma; vma = vma->vm_next) { @@ -841,12 +881,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v) seq_pad(m, ' '); seq_puts(m, "[rollup]\n"); - __show_smap(m, &mss); + __show_smap(m, &mss, true); release_task_mempolicy(priv); up_read(&mm->mmap_sem); - mmput(mm); +out_put_mm: + mmput(mm); out_put_task: put_task_struct(priv->task); priv->task = NULL; @@ -1132,7 +1173,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, goto out_mm; } - down_read(&mm->mmap_sem); + if (down_read_killable(&mm->mmap_sem)) { + count = -EINTR; + goto out_mm; + } tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { @@ -1279,7 +1323,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; - page = _vm_normal_page(vma, addr, pte, true); + page = vm_normal_page(vma, addr, pte); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { @@ -1539,7 +1583,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; - down_read(&mm->mmap_sem); + ret = down_read_killable(&mm->mmap_sem); + if (ret) + goto out_free; ret = walk_page_range(start_vaddr, end, &pagemap_walk); up_read(&mm->mmap_sem); start_vaddr = end; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 36bf0f2e102e..7907e6419e57 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -211,7 +211,11 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!mm || !mmget_not_zero(mm)) return NULL; - down_read(&mm->mmap_sem); + if (down_read_killable(&mm->mmap_sem)) { + mmput(mm); + return ERR_PTR(-EINTR); + } + /* start from the Nth VMA */ for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) if (n-- == 0) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 3fe90443c1bb..7bcc92add72c 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/proc/vmcore.c Interface for accessing the crash * dump from the system's previous life. @@ -20,6 +21,7 @@ #include <linux/init.h> #include <linux/crash_dump.h> #include <linux/list.h> +#include <linux/moduleparam.h> #include <linux/mutex.h> #include <linux/vmalloc.h> #include <linux/pagemap.h> @@ -53,6 +55,9 @@ static struct proc_dir_entry *proc_vmcore; /* Device Dump list and mutex to synchronize access to list */ static LIST_HEAD(vmcoredd_list); static DEFINE_MUTEX(vmcoredd_mutex); + +static bool vmcoredd_disabled; +core_param(novmcoredd, vmcoredd_disabled, bool, 0); #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */ /* Device Dump Size */ @@ -165,7 +170,7 @@ void __weak elfcorehdr_free(unsigned long long addr) */ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, false); + return read_from_oldmem(buf, count, ppos, 0, sev_active()); } /* @@ -173,7 +178,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos) */ ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) { - return read_from_oldmem(buf, count, ppos, 0, sme_active()); + return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active()); } /* @@ -373,7 +378,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos, buflen); start = m->paddr + *fpos - m->offset; tmp = read_from_oldmem(buffer, tsz, &start, - userbuf, sme_active()); + userbuf, mem_encrypt_active()); if (tmp < 0) return tmp; buflen -= tsz; @@ -1451,6 +1456,11 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) size_t data_size; int ret; + if (vmcoredd_disabled) { + pr_err_once("Device dump is disabled\n"); + return -EINVAL; + } + if (!data || !strlen(data->dump_name) || !data->vmcoredd_callback || !data->size) return -EINVAL; diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 0d19d191ae70..8f0369aad22a 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config PSTORE tristate "Persistent store support" select CRYPTO if PSTORE_COMPRESS diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index b8a0931568f8..bfbfc2698070 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2012 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/kernel.h> @@ -120,27 +112,13 @@ static struct dentry *pstore_ftrace_dir; void pstore_register_ftrace(void) { - struct dentry *file; - if (!psinfo->write) return; pstore_ftrace_dir = debugfs_create_dir("pstore", NULL); - if (!pstore_ftrace_dir) { - pr_err("%s: unable to create pstore directory\n", __func__); - return; - } - - file = debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, - NULL, &pstore_knob_fops); - if (!file) { - pr_err("%s: unable to create record_ftrace file\n", __func__); - goto err_file; - } - return; -err_file: - debugfs_remove(pstore_ftrace_dir); + debugfs_create_file("record_ftrace", 0600, pstore_ftrace_dir, NULL, + &pstore_knob_fops); } void pstore_unregister_ftrace(void) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 29e94e0b6d73..7fbe8f058220 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Persistent Storage - ramfs parts. * * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/module.h> @@ -330,22 +318,21 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) goto fail; inode->i_mode = S_IFREG | 0444; inode->i_fop = &pstore_file_operations; - private = kzalloc(sizeof(*private), GFP_KERNEL); - if (!private) - goto fail_alloc; - private->record = record; - scnprintf(name, sizeof(name), "%s-%s-%llu%s", pstore_type_to_name(record->type), record->psi->name, record->id, record->compressed ? ".enc.z" : ""); + private = kzalloc(sizeof(*private), GFP_KERNEL); + if (!private) + goto fail_inode; + dentry = d_alloc_name(root, name); if (!dentry) goto fail_private; + private->record = record; inode->i_size = private->total_size = size; - inode->i_private = private; if (record->time.tv_sec) @@ -361,7 +348,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) fail_private: free_pstore_private(private); -fail_alloc: +fail_inode: iput(inode); fail: diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 75887a269b64..3d7024662d29 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Persistent Storage - platform driver interface parts. * * Copyright (C) 2007-2008 Google, Inc. * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define pr_fmt(fmt) "pstore: " fmt @@ -347,8 +335,10 @@ static void allocate_buf_for_compression(void) static void free_buf_for_compression(void) { - if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) + if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) { crypto_free_comp(tfm); + tfm = NULL; + } kfree(big_oops_buf); big_oops_buf = NULL; big_oops_buf_sz = 0; @@ -606,7 +596,8 @@ int pstore_register(struct pstore_info *psi) return -EINVAL; } - allocate_buf_for_compression(); + if (psi->flags & PSTORE_FLAGS_DMESG) + allocate_buf_for_compression(); if (pstore_is_mounted()) pstore_get_records(0); diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index 97fcef74e5af..d8542ec2f38c 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2014 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/cdev.h> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index c5c685589e36..2bb3468fc93a 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -1,23 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * RAM Oops/Panic logger * * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com> * Copyright (C) 2011 Kees Cook <keescook@chromium.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -669,6 +655,7 @@ static int ramoops_parse_dt(struct platform_device *pdev, struct ramoops_platform_data *pdata) { struct device_node *of_node = pdev->dev.of_node; + struct device_node *parent_node; struct resource *res; u32 value; int ret; @@ -703,6 +690,26 @@ static int ramoops_parse_dt(struct platform_device *pdev, #undef parse_size + /* + * Some old Chromebooks relied on the kernel setting the + * console_size and pmsg_size to the record size since that's + * what the downstream kernel did. These same Chromebooks had + * "ramoops" straight under the root node which isn't + * according to the current upstream bindings (though it was + * arguably acceptable under a prior version of the bindings). + * Let's make those old Chromebooks work by detecting that + * we're not a child of "reserved-memory" and mimicking the + * expected behavior. + */ + parent_node = of_get_parent(of_node); + if (!of_node_name_eq(parent_node, "reserved-memory") && + !pdata->console_size && !pdata->ftrace_size && + !pdata->pmsg_size && !pdata->ecc_info.ecc_size) { + pdata->console_size = pdata->record_size; + pdata->pmsg_size = pdata->record_size; + } + of_node_put(parent_node); + return 0; } @@ -800,26 +807,36 @@ static int ramoops_probe(struct platform_device *pdev) cxt->pstore.data = cxt; /* - * Since bufsize is only used for dmesg crash dumps, it - * must match the size of the dprz record (after PRZ header - * and ECC bytes have been accounted for). + * Prepare frontend flags based on which areas are initialized. + * For ramoops_init_przs() cases, the "max count" variable tells + * if there are regions present. For ramoops_init_prz() cases, + * the single region size is how to check. */ - cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size; - cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); - if (!cxt->pstore.buf) { - pr_err("cannot allocate pstore crash dump buffer\n"); - err = -ENOMEM; - goto fail_clear; - } - - cxt->pstore.flags = PSTORE_FLAGS_DMESG; + cxt->pstore.flags = 0; + if (cxt->max_dump_cnt) + cxt->pstore.flags |= PSTORE_FLAGS_DMESG; if (cxt->console_size) cxt->pstore.flags |= PSTORE_FLAGS_CONSOLE; - if (cxt->ftrace_size) + if (cxt->max_ftrace_cnt) cxt->pstore.flags |= PSTORE_FLAGS_FTRACE; if (cxt->pmsg_size) cxt->pstore.flags |= PSTORE_FLAGS_PMSG; + /* + * Since bufsize is only used for dmesg crash dumps, it + * must match the size of the dprz record (after PRZ header + * and ECC bytes have been accounted for). + */ + if (cxt->pstore.flags & PSTORE_FLAGS_DMESG) { + cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size; + cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL); + if (!cxt->pstore.buf) { + pr_err("cannot allocate pstore crash dump buffer\n"); + err = -ENOMEM; + goto fail_clear; + } + } + err = pstore_register(&cxt->pstore); if (err) { pr_err("registering with pstore failed\n"); diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index f375c0735351..8823f65888f0 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -1,15 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/fs/qnx4/Kconfig b/fs/qnx4/Kconfig index 5f6089994042..45b5b98376c4 100644 --- a/fs/qnx4/Kconfig +++ b/fs/qnx4/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config QNX4FS_FS tristate "QNX4 file system support (read only)" depends on BLOCK diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile index 4a283b3f87f8..42d4fc263a3d 100644 --- a/fs/qnx4/Makefile +++ b/fs/qnx4/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux qnx4-filesystem routines. # diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 48c70aa4a3ec..922d083bbc7c 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * QNX4 file system, Linux implementation. * diff --git a/fs/qnx6/Kconfig b/fs/qnx6/Kconfig index edbba5c17cc8..6a9d6bce1586 100644 --- a/fs/qnx6/Kconfig +++ b/fs/qnx6/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config QNX6FS_FS tristate "QNX6 file system support (read only)" depends on BLOCK && CRC32 diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile index 5e6bae6fae50..bd4e6c8efbc6 100644 --- a/fs/qnx6/Makefile +++ b/fs/qnx6/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux qnx4-filesystem routines. # diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 59cf45f6be49..0f8b0ff1ba43 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * QNX6 file system, Linux implementation. * diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 4a09975aac90..7218314ca13f 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Quota configuration # diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9ad72ea7f71f..be9c471cdbc8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -223,9 +223,9 @@ static void put_quota_format(struct quota_format_type *fmt) /* * Dquot List Management: - * The quota code uses three lists for dquot management: the inuse_list, - * free_dquots, and dquot_hash[] array. A single dquot structure may be - * on all three lists, depending on its current state. + * The quota code uses four lists for dquot management: the inuse_list, + * free_dquots, dqi_dirty_list, and dquot_hash[] array. A single dquot + * structure may be on some of those lists, depending on its current state. * * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. @@ -236,6 +236,11 @@ static void put_quota_format(struct quota_format_type *fmt) * dqstats.free_dquots gives the number of dquots on the list. When * dquot is invalidated it's completely released from memory. * + * Dirty dquots are added to the dqi_dirty_list of quota_info when mark + * dirtied, and this list is searched when writing dirty dquots back to + * quota file. Note that some filesystems do dirty dquot tracking on their + * own (e.g. in a journal) and thus don't use dqi_dirty_list. + * * Dquots with a specific identity (device, type and id) are placed on * one of the dquot_hash[] hash chains. The provides an efficient search * mechanism to locate a specific dquot. @@ -1996,8 +2001,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) &warn_to[cnt]); if (ret) goto over_quota; - ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, - &warn_to[cnt]); + ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, + DQUOT_SPACE_WARN, &warn_to[cnt]); if (ret) { spin_lock(&transfer_to[cnt]->dq_dqb_lock); dquot_decr_inodes(transfer_to[cnt], inode_usage); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index fd5dd806f1b9..cb13fb76dbee 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -331,9 +331,9 @@ static int quota_state_to_flags(struct qc_state *state) return flags; } -static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) +static int quota_getstate(struct super_block *sb, int type, + struct fs_quota_stat *fqs) { - int type; struct qc_state state; int ret; @@ -349,14 +349,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; - /* - * GETXSTATE quotactl has space for just one set of time limits so - * report them for the first enabled quota type - */ - for (type = 0; type < MAXQUOTAS; type++) - if (state.s_state[type].flags & QCI_ACCT_ENABLED) - break; - BUG_ON(type == MAXQUOTAS); + fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -391,22 +384,22 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs) return 0; } -static int quota_getxstate(struct super_block *sb, void __user *addr) +static int quota_getxstate(struct super_block *sb, int type, void __user *addr) { struct fs_quota_stat fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; - ret = quota_getstate(sb, &fqs); + ret = quota_getstate(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; } -static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) +static int quota_getstatev(struct super_block *sb, int type, + struct fs_quota_statv *fqs) { - int type; struct qc_state state; int ret; @@ -422,14 +415,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; - /* - * GETXSTATV quotactl has space for just one set of time limits so - * report them for the first enabled quota type - */ - for (type = 0; type < MAXQUOTAS; type++) - if (state.s_state[type].flags & QCI_ACCT_ENABLED) - break; - BUG_ON(type == MAXQUOTAS); + fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; @@ -455,7 +441,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs) return 0; } -static int quota_getxstatev(struct super_block *sb, void __user *addr) +static int quota_getxstatev(struct super_block *sb, int type, void __user *addr) { struct fs_quota_statv fqs; int ret; @@ -474,7 +460,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr) default: return -EINVAL; } - ret = quota_getstatev(sb, &fqs); + ret = quota_getstatev(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; @@ -744,9 +730,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: - return quota_getxstate(sb, addr); + return quota_getxstate(sb, type, addr); case Q_XGETQSTATV: - return quota_getxstatev(sb, addr); + return quota_getxstatev(sb, type, addr); case Q_XSETQLIM: return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index bb3f59bcfcf5..a6f856f341dc 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * vfsv0 quota IO operations on file */ diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 9f2b2573b83c..c740e5572eb8 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/errno.h> #include <linux/fs.h> #include <linux/quota.h> diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 3c30034e733f..53429c29c784 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * vfsv0 quota IO operations on file */ diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile index c71e65dcad25..d1198adb562e 100644 --- a/fs/ramfs/Makefile +++ b/fs/ramfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux ramfs routines. # diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 3ac1f2387083..414695454956 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* file-nommu.c: no-MMU version of ramfs * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h index a9d8ae88fa15..3257fc1809d9 100644 --- a/fs/ramfs/internal.h +++ b/fs/ramfs/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* internal.h: ramfs internal definitions * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ diff --git a/fs/read_write.c b/fs/read_write.c index c543d965e288..1f5088dec566 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1565,6 +1565,58 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, } #endif +/** + * generic_copy_file_range - copy data between two files + * @file_in: file structure to read from + * @pos_in: file offset to read from + * @file_out: file structure to write data to + * @pos_out: file offset to write data to + * @len: amount of data to copy + * @flags: copy flags + * + * This is a generic filesystem helper to copy data from one file to another. + * It has no constraints on the source or destination file owners - the files + * can belong to different superblocks and different filesystem types. Short + * copies are allowed. + * + * This should be called from the @file_out filesystem, as per the + * ->copy_file_range() method. + * + * Returns the number of bytes copied or a negative error indicating the + * failure. + */ + +ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, + len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); +} +EXPORT_SYMBOL(generic_copy_file_range); + +static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags) +{ + /* + * Although we now allow filesystems to handle cross sb copy, passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. NFS defines + * several different file_system_type structures, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range && + file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) + return file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); +} + /* * copy_file_range() differs from regular file read and write in that it * specifically allows return partial success. When it does so is up to @@ -1574,17 +1626,15 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); ssize_t ret; if (flags != 0) return -EINVAL; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, + flags); + if (unlikely(ret)) + return ret; ret = rw_verify_area(READ, file_in, &pos_in, len); if (unlikely(ret)) @@ -1594,15 +1644,6 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, if (unlikely(ret)) return ret; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; - - /* this could be relaxed once a method supports cross-fs copies */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; - if (len == 0) return 0; @@ -1612,7 +1653,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). */ - if (file_in->f_op->remap_file_range) { + if (file_in->f_op->remap_file_range && + file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { loff_t cloned; cloned = file_in->f_op->remap_file_range(file_in, pos_in, @@ -1625,16 +1667,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, } } - if (file_out->f_op->copy_file_range) { - ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, - pos_out, len, flags); - if (ret != -EOPNOTSUPP) - goto done; - } - - ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, - len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); - + ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + WARN_ON_ONCE(ret == -EOPNOTSUPP); done: if (ret > 0) { fsnotify_access(file_in); @@ -1951,25 +1986,10 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; /* If can't alter the file contents, we're done. */ - if (!(remap_flags & REMAP_FILE_DEDUP)) { - /* Update the timestamps, since we can alter file contents. */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - return ret; - } - - /* - * Clear the security bits if the process is not being run by - * root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - return ret; - } + if (!(remap_flags & REMAP_FILE_DEDUP)) + ret = file_modified(file_out); - return 0; + return ret; } EXPORT_SYMBOL(generic_remap_file_range_prep); @@ -1977,29 +1997,21 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags) { - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); loff_t ret; WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - /* * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on * the same mount. Practically, they only need to be on the same file * system. */ - if (inode_in->i_sb != inode_out->i_sb) + if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) return -EXDEV; - if (!(file_in->f_mode & FMODE_READ) || - !(file_out->f_mode & FMODE_WRITE) || - (file_out->f_flags & O_APPEND)) - return -EBADF; + ret = generic_file_rw_checks(file_in, file_out); + if (ret < 0) + return ret; if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig index 86e71c0caf48..8fd54ed8f844 100644 --- a/fs/reiserfs/Kconfig +++ b/fs/reiserfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config REISERFS_FS tristate "Reiserfs support" select CRC32 diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index acbbaf7a0bb2..45e1a5d11af3 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -74,13 +74,11 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = -EPERM; goto setflags_out; } - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; + err = vfs_ioc_setflags_prepare(inode, + REISERFS_I(inode)->i_attrs, + flags); + if (err) goto setflags_out; - } if ((flags & REISERFS_NOTAIL_FL) && S_ISREG(inode->i_mode)) { int result; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 36346dc4cec0..4517a1394c6f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -94,7 +94,7 @@ static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *sb); static void release_journal_dev(struct super_block *super, struct reiserfs_journal *journal); -static int dirty_one_transaction(struct super_block *s, +static void dirty_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl); static void flush_async_commits(struct work_struct *work); static void queue_log_writer(struct super_block *s); @@ -1682,12 +1682,11 @@ next: } /* used by flush_commit_list */ -static int dirty_one_transaction(struct super_block *s, +static void dirty_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl) { struct reiserfs_journal_cnode *cn; struct reiserfs_journal_list *pjl; - int ret = 0; jl->j_state |= LIST_DIRTY; cn = jl->j_realblock; @@ -1716,7 +1715,6 @@ static int dirty_one_transaction(struct super_block *s, } cn = cn->next; } - return ret; } static int kupdate_transactions(struct super_block *s, diff --git a/fs/romfs/Kconfig b/fs/romfs/Kconfig index ce2d6bcc6266..ad4c45788896 100644 --- a/fs/romfs/Kconfig +++ b/fs/romfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config ROMFS_FS tristate "ROM file system support" depends on BLOCK || MTD diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h index 95217b830118..e90df22bdc82 100644 --- a/fs/romfs/internal.h +++ b/fs/romfs/internal.h @@ -1,12 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* RomFS internal definitions * * Copyright © 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/romfs_fs.h> diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index 1118a0dc6b45..2c4a23113fb5 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* NOMMU mmap support for RomFS on MTD devices * * Copyright © 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/mm.h> diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c index f86f51f99ace..6b2b4362089e 100644 --- a/fs/romfs/storage.c +++ b/fs/romfs/storage.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* RomFS storage access routines * * Copyright © 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/fs.h> diff --git a/fs/select.c b/fs/select.c index 6cbc9ff56ba0..53a0c149f528 100644 --- a/fs/select.c +++ b/fs/select.c @@ -294,12 +294,14 @@ enum poll_time_type { PT_OLD_TIMESPEC = 3, }; -static int poll_select_copy_remaining(struct timespec64 *end_time, - void __user *p, - enum poll_time_type pt_type, int ret) +static int poll_select_finish(struct timespec64 *end_time, + void __user *p, + enum poll_time_type pt_type, int ret) { struct timespec64 rts; + restore_saved_sigmask_unless(ret == -ERESTARTNOHAND); + if (!p) return ret; @@ -714,9 +716,7 @@ static int kern_select(int n, fd_set __user *inp, fd_set __user *outp, } ret = core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tvp, PT_TIMEVAL, ret); - - return ret; + return poll_select_finish(&end_time, tvp, PT_TIMEVAL, ret); } SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, @@ -730,7 +730,6 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, const sigset_t __user *sigmask, size_t sigsetsize, enum poll_time_type type) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -753,16 +752,12 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return -EINVAL; } - ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - - restore_user_sigmask(sigmask, &sigsaved); - - return ret; + return poll_select_finish(&end_time, tsp, type, ret); } /* @@ -927,7 +922,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait, if (!count) { count = wait->error; if (signal_pending(current)) - count = -EINTR; + count = -ERESTARTNOHAND; } if (count || timed_out) break; @@ -966,7 +961,7 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct timespec64 *end_time) { struct poll_wqueues table; - int err = -EFAULT, fdcount, len, size; + int err = -EFAULT, fdcount, len; /* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ @@ -994,8 +989,8 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, break; len = min(todo, POLLFD_PER_PAGE); - size = sizeof(struct poll_list) + sizeof(struct pollfd) * len; - walk = walk->next = kmalloc(size, GFP_KERNEL); + walk = walk->next = kmalloc(struct_size(walk, entries, len), + GFP_KERNEL); if (!walk) { err = -ENOMEM; goto out_fds; @@ -1042,7 +1037,7 @@ static long do_restart_poll(struct restart_block *restart_block) ret = do_sys_poll(ufds, nfds, to); - if (ret == -EINTR) { + if (ret == -ERESTARTNOHAND) { restart_block->fn = do_restart_poll; ret = -ERESTART_RESTARTBLOCK; } @@ -1063,7 +1058,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, ret = do_sys_poll(ufds, nfds, to); - if (ret == -EINTR) { + if (ret == -ERESTARTNOHAND) { struct restart_block *restart_block; restart_block = ¤t->restart_block; @@ -1087,7 +1082,6 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1100,21 +1094,12 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, return -EINVAL; } - ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - - restore_user_sigmask(sigmask, &sigsaved); - - /* We can restart this syscall, usually */ - if (ret == -EINTR) - ret = -ERESTARTNOHAND; - - ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); - - return ret; + return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret); } #if defined(CONFIG_COMPAT_32BIT_TIME) && !defined(CONFIG_64BIT) @@ -1123,7 +1108,6 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1136,21 +1120,12 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, return -EINVAL; } - ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - - restore_user_sigmask(sigmask, &sigsaved); - - /* We can restart this syscall, usually */ - if (ret == -EINTR) - ret = -ERESTARTNOHAND; - - ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); - - return ret; + return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret); } #endif @@ -1287,9 +1262,7 @@ static int do_compat_select(int n, compat_ulong_t __user *inp, } ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tvp, PT_OLD_TIMEVAL, ret); - - return ret; + return poll_select_finish(&end_time, tvp, PT_OLD_TIMEVAL, ret); } COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, @@ -1322,7 +1295,6 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, void __user *tsp, compat_sigset_t __user *sigmask, compat_size_t sigsetsize, enum poll_time_type type) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1345,16 +1317,12 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return -EINVAL; } - ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_compat_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); - ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - - restore_user_sigmask(sigmask, &sigsaved); - - return ret; + return poll_select_finish(&end_time, tsp, type, ret); } COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp, @@ -1406,7 +1374,6 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1419,21 +1386,12 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, return -EINVAL; } - ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_compat_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - - restore_user_sigmask(sigmask, &sigsaved); - - /* We can restart this syscall, usually */ - if (ret == -EINTR) - ret = -ERESTARTNOHAND; - - ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); - - return ret; + return poll_select_finish(&end_time, tsp, PT_OLD_TIMESPEC, ret); } #endif @@ -1442,7 +1400,6 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1455,21 +1412,12 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, return -EINVAL; } - ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_compat_user_sigmask(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - - restore_user_sigmask(sigmask, &sigsaved); - - /* We can restart this syscall, usually */ - if (ret == -EINTR) - ret = -ERESTARTNOHAND; - - ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); - - return ret; + return poll_select_finish(&end_time, tsp, PT_TIMESPEC, ret); } #endif diff --git a/fs/seq_file.c b/fs/seq_file.c index abe27ec43176..04f09689cd6d 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -384,6 +384,17 @@ void seq_escape(struct seq_file *m, const char *s, const char *esc) } EXPORT_SYMBOL(seq_escape); +void seq_escape_mem_ascii(struct seq_file *m, const char *src, size_t isz) +{ + char *buf; + size_t size = seq_get_buf(m, &buf); + int ret; + + ret = string_escape_mem_ascii(src, isz, buf, size); + seq_commit(m, ret < size ? ret : -1); +} +EXPORT_SYMBOL(seq_escape_mem_ascii); + void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; diff --git a/fs/splice.c b/fs/splice.c index 25212dcca2df..98412721f056 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * "splice": joining two ropes together by interweaving their strands. * @@ -1355,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1366,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } @@ -1381,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1392,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io error = compat_import_iovec(type, iov32, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 1adb3346b9d6..916e78fabcaa 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config SQUASHFS tristate "SquashFS 4.0 - Squashed file system support" depends on BLOCK diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index f098b9f1c396..4f9b9fb59362 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * block.c */ diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 0839efa720b3..5062326d0efb 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * cache.c */ diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 836639810ea0..d57bef91ab08 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * decompressor.c */ diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 0f5a8e4e58da..ec8617523e56 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef DECOMPRESSOR_H #define DECOMPRESSOR_H /* @@ -6,20 +7,6 @@ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * decompressor.h */ diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index d6008a636479..c181dee235bb 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Minchan Kim <minchan@kernel.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> #include <linux/mutex.h> diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 23a9c28ad8ea..2a2a2d106440 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index a6c75929a00e..550c3e592032 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/types.h> diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index a5845f94a2a1..a2ade63eccdf 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * dir.c */ diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 8073b6532cf0..ae2c87bb0fbe 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * export.c */ diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index f1c1430ae721..7b1128398976 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * file.c */ diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c index a9ba8d96776a..54c17b7c85fd 100644 --- a/fs/squashfs/file_cache.c +++ b/fs/squashfs/file_cache.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/fs.h> diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 80db1b86a27c..a4894cc59447 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/fs.h> diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 0681feab4a84..49602b9a42e1 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * fragment.c */ diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index d38ea3dab951..6be5afe7287d 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * id.c */ diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index e9793b1e49a5..24463145b351 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * inode.c */ diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index 95da65366548..c4e47e0588c7 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013, 2014 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/buffer_head.h> diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 934c17e96590..aa3c3dafc33d 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2010 LG Electronics * Chan Jeong <chan.jeong@lge.com> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * lzo_wrapper.c */ diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 40c10d9974c9..11e4539b9eae 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * namei.c */ diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 9b7b1b6a7892..520d323a99ce 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #include <linux/kernel.h> diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 98537eab27e2..2e3073ace009 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -1,11 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef PAGE_ACTOR_H #define PAGE_ACTOR_H /* * Copyright (c) 2013 * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. */ #ifndef CONFIG_SQUASHFS_FILE_DIRECT diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index f89f8a74c6ce..2797763ed046 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * squashfs.h */ diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 4e6853f084d0..7187bd1a30ea 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef SQUASHFS_FS #define SQUASHFS_FS /* @@ -6,20 +7,6 @@ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * squashfs_fs.h */ diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index d09fcd6fb85d..2c82d6f2a456 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef SQUASHFS_FS_I #define SQUASHFS_FS_I /* @@ -6,20 +7,6 @@ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * squashfs_fs_i.h */ diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index ef69c31947bf..34c21ffb6df3 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef SQUASHFS_FS_SB #define SQUASHFS_FS_SB /* @@ -6,20 +7,6 @@ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * squashfs_fs_sb.h */ diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 767046d9f65d..effa638d6d85 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * super.c */ diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c index befeba0fa70a..1430613183e6 100644 --- a/fs/squashfs/symlink.c +++ b/fs/squashfs/symlink.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * symlink.c */ diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index 1548b3784548..e1e3f3dd5a06 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2010 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * xattr.c */ diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index afe70f815e3d..184129afd456 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h @@ -1,23 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2010 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * xattr.h */ diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index c89607d690c4..d99e08464554 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2010 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * xattr_id.c */ diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 6bfaef73d065..4b2f2051a6dc 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * xz_wrapper.c */ diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 2ec24d128bce..f2226afa1625 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -1,23 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 * Phillip Lougher <phillip@squashfs.org.uk> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * * zlib_wrapper.c */ diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c index eeaabf881159..b448c2a1d0ed 100644 --- a/fs/squashfs/zstd_wrapper.c +++ b/fs/squashfs/zstd_wrapper.c @@ -1,19 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2016-present, Facebook, Inc. * All rights reserved. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2, - * or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * zstd_wrapper.c */ diff --git a/fs/stack.c b/fs/stack.c index 664ed35558bd..4ef2c056269d 100644 --- a/fs/stack.c +++ b/fs/stack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/fs.h> #include <linux/fs_stack.h> diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index b2756014508c..b0fe1cce33a0 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config SYSFS bool "sysfs file system support" if EXPERT default y diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile index 6eff6e1205a5..0906b9e522fe 100644 --- a/fs/sysfs/Makefile +++ b/fs/sysfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the sysfs virtual filesystem # diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 57038604d4a8..d41c21fef138 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -175,6 +175,26 @@ int sysfs_create_group(struct kobject *kobj, } EXPORT_SYMBOL_GPL(sysfs_create_group); +static int internal_create_groups(struct kobject *kobj, int update, + const struct attribute_group **groups) +{ + int error = 0; + int i; + + if (!groups) + return 0; + + for (i = 0; groups[i]; i++) { + error = internal_create_group(kobj, update, groups[i]); + if (error) { + while (--i >= 0) + sysfs_remove_group(kobj, groups[i]); + break; + } + } + return error; +} + /** * sysfs_create_groups - given a directory kobject, create a bunch of attribute groups * @kobj: The kobject to create the group on @@ -191,25 +211,29 @@ EXPORT_SYMBOL_GPL(sysfs_create_group); int sysfs_create_groups(struct kobject *kobj, const struct attribute_group **groups) { - int error = 0; - int i; - - if (!groups) - return 0; - - for (i = 0; groups[i]; i++) { - error = sysfs_create_group(kobj, groups[i]); - if (error) { - while (--i >= 0) - sysfs_remove_group(kobj, groups[i]); - break; - } - } - return error; + return internal_create_groups(kobj, 0, groups); } EXPORT_SYMBOL_GPL(sysfs_create_groups); /** + * sysfs_update_groups - given a directory kobject, create a bunch of attribute groups + * @kobj: The kobject to update the group on + * @groups: The attribute groups to update, NULL terminated + * + * This function update a bunch of attribute groups. If an error occurs when + * updating a group, all previously updated groups will be removed together + * with already existing (not updated) attributes. + * + * Returns 0 on success or error code from sysfs_update_group on failure. + */ +int sysfs_update_groups(struct kobject *kobj, + const struct attribute_group **groups) +{ + return internal_create_groups(kobj, 1, groups); +} +EXPORT_SYMBOL_GPL(sysfs_update_groups); + +/** * sysfs_update_group - given a directory kobject, update an attribute group * @kobj: The kobject to update the group on * @grp: The attribute group to update diff --git a/fs/sysv/Kconfig b/fs/sysv/Kconfig index 33aeb4b75db1..d4edf7d9ae10 100644 --- a/fs/sysv/Kconfig +++ b/fs/sysv/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config SYSV_FS tristate "System V/Xenix/V7/Coherent file system support" depends on BLOCK diff --git a/fs/sysv/Makefile b/fs/sysv/Makefile index 7a75e70a4b61..17d12ba04b18 100644 --- a/fs/sysv/Makefile +++ b/fs/sysv/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux SystemV/Coherent filesystem routines. # diff --git a/fs/sysv/super.c b/fs/sysv/super.c index d3b2f54d6449..d788b1daa7eb 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/sysv/inode.c * diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile index 82fa35b656c4..7c35a282b484 100644 --- a/fs/tracefs/Makefile +++ b/fs/tracefs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only tracefs-objs := inode.o obj-$(CONFIG_TRACING) += tracefs.o diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 7098c49f3693..eeeae0475da9 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * inode.c - part of tracefs, a pseudo file system for activating tracing * @@ -5,12 +6,7 @@ * * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * * tracefs is the file system that is used by the tracing infrastructure. - * */ #include <linux/module.h> @@ -509,9 +505,12 @@ static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) switch (dentry->d_inode->i_mode & S_IFMT) { case S_IFDIR: ret = simple_rmdir(parent->d_inode, dentry); + if (!ret) + fsnotify_rmdir(parent->d_inode, dentry); break; default: simple_unlink(parent->d_inode, dentry); + fsnotify_unlink(parent->d_inode, dentry); break; } if (!ret) diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index 9da2f135121b..69932bcfa920 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config UBIFS_FS tristate "UBIFS file system support" select CRC16 @@ -5,8 +6,10 @@ config UBIFS_FS select CRYPTO if UBIFS_FS_ADVANCED_COMPR select CRYPTO if UBIFS_FS_LZO select CRYPTO if UBIFS_FS_ZLIB + select CRYPTO if UBIFS_FS_ZSTD select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB + select CRYPTO_ZSTD if UBIFS_FS_ZSTD select CRYPTO_HASH_INFO select UBIFS_FS_XATTR if FS_ENCRYPTION depends on MTD_UBI @@ -37,6 +40,14 @@ config UBIFS_FS_ZLIB help Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. +config UBIFS_FS_ZSTD + bool "ZSTD compression support" if UBIFS_FS_ADVANCED_COMPR + depends on UBIFS_FS + default y + help + ZSTD compresses is a big win in speed over Zlib and + in compression ratio over LZO. Say 'Y' if unsure. + config UBIFS_ATIME_SUPPORT bool "Access time support" default n @@ -76,8 +87,9 @@ config UBIFS_FS_SECURITY config UBIFS_FS_AUTHENTICATION bool "UBIFS authentication support" - depends on KEYS + select KEYS select CRYPTO_HMAC + select SYSTEM_DATA_VERIFICATION help Enable authentication support for UBIFS. This feature offers protection against offline changes for both data and metadata of the filesystem. diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index 60f43b93d06e..d9af2de9084a 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -10,10 +10,12 @@ */ #include <linux/crypto.h> +#include <linux/verification.h> #include <crypto/hash.h> #include <crypto/sha.h> #include <crypto/algapi.h> #include <keys/user-type.h> +#include <keys/asymmetric-type.h> #include "ubifs.h" @@ -199,6 +201,77 @@ int __ubifs_node_check_hash(const struct ubifs_info *c, const void *node, } /** + * ubifs_sb_verify_signature - verify the signature of a superblock + * @c: UBIFS file-system description object + * @sup: The superblock node + * + * To support offline signed images the superblock can be signed with a + * PKCS#7 signature. The signature is placed directly behind the superblock + * node in an ubifs_sig_node. + * + * Returns 0 when the signature can be successfully verified or a negative + * error code if not. + */ +int ubifs_sb_verify_signature(struct ubifs_info *c, + const struct ubifs_sb_node *sup) +{ + int err; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; + const struct ubifs_sig_node *signode; + + sleb = ubifs_scan(c, UBIFS_SB_LNUM, UBIFS_SB_NODE_SZ, c->sbuf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + return err; + } + + if (sleb->nodes_cnt == 0) { + ubifs_err(c, "Unable to find signature node"); + err = -EINVAL; + goto out_destroy; + } + + snod = list_first_entry(&sleb->nodes, struct ubifs_scan_node, list); + + if (snod->type != UBIFS_SIG_NODE) { + ubifs_err(c, "Signature node is of wrong type"); + err = -EINVAL; + goto out_destroy; + } + + signode = snod->node; + + if (le32_to_cpu(signode->len) > snod->len + sizeof(struct ubifs_sig_node)) { + ubifs_err(c, "invalid signature len %d", le32_to_cpu(signode->len)); + err = -EINVAL; + goto out_destroy; + } + + if (le32_to_cpu(signode->type) != UBIFS_SIGNATURE_TYPE_PKCS7) { + ubifs_err(c, "Signature type %d is not supported\n", + le32_to_cpu(signode->type)); + err = -EINVAL; + goto out_destroy; + } + + err = verify_pkcs7_signature(sup, sizeof(struct ubifs_sb_node), + signode->sig, le32_to_cpu(signode->len), + NULL, VERIFYING_UNSPECIFIED_SIGNATURE, + NULL, NULL); + + if (err) + ubifs_err(c, "Failed to verify signature"); + else + ubifs_msg(c, "Successfully verified super block signature"); + +out_destroy: + ubifs_scan_destroy(sleb); + + return err; +} + +/** * ubifs_init_authentication - initialize UBIFS authentication support * @c: UBIFS file-system description object * @@ -478,3 +551,16 @@ int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac) return err; return 0; } + +/* + * ubifs_hmac_zero - test if a HMAC is zero + * @c: UBIFS file-system description object + * @hmac: the HMAC to test + * + * This function tests if a HMAC is zero and returns true if it is + * and false otherwise. + */ +bool ubifs_hmac_zero(struct ubifs_info *c, const u8 *hmac) +{ + return !memchr_inv(hmac, 0, c->hmac_desc_len); +} diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 7ef22baf9d15..80d7301ab76d 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 591f2c7a48f0..ad292c5a43a9 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 565cb56d7225..3a92e6af69b2 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -1,22 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * Copyright (C) 2006, 2007 University of Szeged, Hungary * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) * Zoltan Sogor @@ -71,6 +59,24 @@ static struct ubifs_compressor zlib_compr = { }; #endif +#ifdef CONFIG_UBIFS_FS_ZSTD +static DEFINE_MUTEX(zstd_enc_mutex); +static DEFINE_MUTEX(zstd_dec_mutex); + +static struct ubifs_compressor zstd_compr = { + .compr_type = UBIFS_COMPR_ZSTD, + .comp_mutex = &zstd_enc_mutex, + .decomp_mutex = &zstd_dec_mutex, + .name = "zstd", + .capi_name = "zstd", +}; +#else +static struct ubifs_compressor zstd_compr = { + .compr_type = UBIFS_COMPR_ZSTD, + .name = "zstd", +}; +#endif + /* All UBIFS compressors */ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; @@ -228,13 +234,19 @@ int __init ubifs_compressors_init(void) if (err) return err; - err = compr_init(&zlib_compr); + err = compr_init(&zstd_compr); if (err) goto out_lzo; + err = compr_init(&zlib_compr); + if (err) + goto out_zstd; + ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; return 0; +out_zstd: + compr_exit(&zstd_compr); out_lzo: compr_exit(&lzo_compr); return err; @@ -247,4 +259,5 @@ void ubifs_compressors_exit(void) { compr_exit(&lzo_compr); compr_exit(&zlib_compr); + compr_exit(&zstd_compr); } diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 4aaedf2d7f44..22be7aeb96c4 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -29,8 +29,8 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, { struct ubifs_info *c = inode->i_sb->s_fs_info; void *p = &dn->data; - struct page *ret; unsigned int pad_len = round_up(in_len, UBIFS_CIPHER_BLOCK_SIZE); + int err; ubifs_assert(c, pad_len <= *out_len); dn->compr_size = cpu_to_le16(in_len); @@ -39,11 +39,11 @@ int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn, if (pad_len != in_len) memset(p + in_len, 0, pad_len - in_len); - ret = fscrypt_encrypt_page(inode, virt_to_page(&dn->data), pad_len, - offset_in_page(&dn->data), block, GFP_NOFS); - if (IS_ERR(ret)) { - ubifs_err(c, "fscrypt_encrypt_page failed: %ld", PTR_ERR(ret)); - return PTR_ERR(ret); + err = fscrypt_encrypt_block_inplace(inode, virt_to_page(p), pad_len, + offset_in_page(p), block, GFP_NOFS); + if (err) { + ubifs_err(c, "fscrypt_encrypt_block_inplace() failed: %d", err); + return err; } *out_len = pad_len; @@ -64,10 +64,11 @@ int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn, } ubifs_assert(c, dlen <= UBIFS_BLOCK_SIZE); - err = fscrypt_decrypt_page(inode, virt_to_page(&dn->data), dlen, - offset_in_page(&dn->data), block); + err = fscrypt_decrypt_block_inplace(inode, virt_to_page(&dn->data), + dlen, offset_in_page(&dn->data), + block); if (err) { - ubifs_err(c, "fscrypt_decrypt_page failed: %i", err); + ubifs_err(c, "fscrypt_decrypt_block_inplace() failed: %d", err); return err; } *out_len = clen; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 3a2613038e88..a5f10d79e0dd 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -2812,115 +2800,69 @@ static const struct file_operations dfs_fops = { * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. * @c: UBIFS file-system description object * - * This function creates all debugfs files for this instance of UBIFS. Returns - * zero in case of success and a negative error code in case of failure. + * This function creates all debugfs files for this instance of UBIFS. * * Note, the only reason we have not merged this function with the * 'ubifs_debugging_init()' function is because it is better to initialize * debugfs interfaces at the very end of the mount process, and remove them at * the very beginning of the mount process. */ -int dbg_debugfs_init_fs(struct ubifs_info *c) +void dbg_debugfs_init_fs(struct ubifs_info *c) { - int err, n; + int n; const char *fname; - struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - if (!IS_ENABLED(CONFIG_DEBUG_FS)) - return 0; - n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); if (n == UBIFS_DFS_DIR_LEN) { /* The array size is too small */ fname = UBIFS_DFS_DIR_NAME; - dent = ERR_PTR(-EINVAL); - goto out; + return; } fname = d->dfs_dir_name; - dent = debugfs_create_dir(fname, dfs_rootdir); - if (IS_ERR_OR_NULL(dent)) - goto out; - d->dfs_dir = dent; + d->dfs_dir = debugfs_create_dir(fname, dfs_rootdir); fname = "dump_lprops"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_dump_lprops = dent; + d->dfs_dump_lprops = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, + &dfs_fops); fname = "dump_budg"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_dump_budg = dent; + d->dfs_dump_budg = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, + &dfs_fops); fname = "dump_tnc"; - dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_dump_tnc = dent; + d->dfs_dump_tnc = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, + &dfs_fops); fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_gen = dent; + d->dfs_chk_gen = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_index = dent; + d->dfs_chk_index = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_orph = dent; + d->dfs_chk_orph = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_lprops = dent; + d->dfs_chk_lprops = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_chk_fs = dent; + d->dfs_chk_fs = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_tst_rcvry = dent; + d->dfs_tst_rcvry = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); fname = "ro_error"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, - &dfs_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - d->dfs_ro_error = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(d->dfs_dir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n", - fname, err); - return err; + d->dfs_ro_error = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + d->dfs_dir, c, &dfs_fops); } /** @@ -2929,8 +2871,7 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - if (IS_ENABLED(CONFIG_DEBUG_FS)) - debugfs_remove_recursive(c->dbg->dfs_dir); + debugfs_remove_recursive(c->dbg->dfs_dir); } struct ubifs_global_debug_info ubifs_dbg; @@ -3006,75 +2947,38 @@ static const struct file_operations dfs_global_fops = { * * UBIFS uses debugfs file-system to expose various debugging knobs to * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. + * file-system. */ -int dbg_debugfs_init(void) +void dbg_debugfs_init(void) { - int err; const char *fname; - struct dentry *dent; - - if (!IS_ENABLED(CONFIG_DEBUG_FS)) - return 0; fname = "ubifs"; - dent = debugfs_create_dir(fname, NULL); - if (IS_ERR_OR_NULL(dent)) - goto out; - dfs_rootdir = dent; + dfs_rootdir = debugfs_create_dir(fname, NULL); fname = "chk_general"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_gen = dent; + dfs_chk_gen = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, + NULL, &dfs_global_fops); fname = "chk_index"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_index = dent; + dfs_chk_index = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); fname = "chk_orphans"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_orph = dent; + dfs_chk_orph = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); fname = "chk_lprops"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_lprops = dent; + dfs_chk_lprops = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); fname = "chk_fs"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_chk_fs = dent; + dfs_chk_fs = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, + NULL, &dfs_global_fops); fname = "tst_recovery"; - dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, - &dfs_global_fops); - if (IS_ERR_OR_NULL(dent)) - goto out_remove; - dfs_tst_rcvry = dent; - - return 0; - -out_remove: - debugfs_remove_recursive(dfs_rootdir); -out: - err = dent ? PTR_ERR(dent) : -ENODEV; - pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n", - current->pid, fname, err); - return err; + dfs_tst_rcvry = debugfs_create_file(fname, S_IRUSR | S_IWUSR, + dfs_rootdir, NULL, &dfs_global_fops); } /** @@ -3082,8 +2986,7 @@ out: */ void dbg_debugfs_exit(void) { - if (IS_ENABLED(CONFIG_DEBUG_FS)) - debugfs_remove_recursive(dfs_rootdir); + debugfs_remove_recursive(dfs_rootdir); } void ubifs_assert_failed(struct ubifs_info *c, const char *expr, diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 64c6977c189b..7763639a426b 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -309,9 +297,9 @@ int dbg_leb_unmap(struct ubifs_info *c, int lnum); int dbg_leb_map(struct ubifs_info *c, int lnum); /* Debugfs-related stuff */ -int dbg_debugfs_init(void); +void dbg_debugfs_init(void); void dbg_debugfs_exit(void); -int dbg_debugfs_init_fs(struct ubifs_info *c); +void dbg_debugfs_init_fs(struct ubifs_info *c); void dbg_debugfs_exit_fs(struct ubifs_info *c); #endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 1a379b596b0d..0b98e3c8b461 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * Copyright (C) 2006, 2007 University of Szeged, Hungary * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter * Zoltan Sogor diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 512e7d9c60cd..400970d740bb 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -1482,7 +1470,7 @@ static int ubifs_migrate_page(struct address_space *mapping, { int rc; - rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 5deaae7fcead..873e6e1c92b5 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index bf75fdc76fc3..62cb3db44e6e 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index d124117efd42..8ceb51478800 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -1,22 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * Copyright (C) 2006, 2007 University of Szeged, Hungary * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter * Zoltan Sogor diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 6b05b3ec500e..034ad14710d1 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -1,22 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * Copyright (C) 2006, 2007 University of Szeged, Hungary * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Zoltan Sogor * Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter @@ -119,18 +107,11 @@ static int setflags(struct inode *inode, int flags) if (err) return err; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ mutex_lock(&ui->ui_mutex); oldflags = ubifs2ioctl(ui->flags); - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto out_unlock; - } - } + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) + goto out_unlock; ui->flags = ioctl2ubifs(flags); ubifs_set_inode_flags(inode); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 74a7306978d0..4fd9683b8245 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 2feff6cbbb77..afa704ff5ca0 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 15fd854149bb..b6ac9c4281ef 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -450,10 +438,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) *ltail_lnum = c->lhead_lnum; c->lhead_offs += len; - if (c->lhead_offs == c->leb_size) { - c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); - c->lhead_offs = 0; - } + ubifs_assert(c, c->lhead_offs < c->leb_size); remove_buds(c); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index fa8d775c9753..29826c51883a 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index b0c5f06128b5..e21abf250951 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 1f88caffdf2a..ff5e0411cf2d 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 5ea51bbd14c7..52a85c01397e 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -60,6 +48,39 @@ int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2) return 0; } +/* mst_node_check_hash - Check hash of a master node + * @c: UBIFS file-system description object + * @mst: The master node + * @expected: The expected hash of the master node + * + * This checks the hash of a master node against a given expected hash. + * Note that we have two master nodes on a UBIFS image which have different + * sequence numbers and consequently different CRCs. To be able to match + * both master nodes we exclude the common node header containing the sequence + * number and CRC from the hash. + * + * Returns 0 if the hashes are equal, a negative error code otherwise. + */ +static int mst_node_check_hash(const struct ubifs_info *c, + const struct ubifs_mst_node *mst, + const u8 *expected) +{ + u8 calc[UBIFS_MAX_HASH_LEN]; + const void *node = mst; + + SHASH_DESC_ON_STACK(shash, c->hash_tfm); + + shash->tfm = c->hash_tfm; + + crypto_shash_digest(shash, node + sizeof(struct ubifs_ch), + UBIFS_MST_NODE_SZ - sizeof(struct ubifs_ch), calc); + + if (ubifs_check_hash(c, expected, calc)) + return -EPERM; + + return 0; +} + /** * scan_for_master - search the valid master node. * @c: UBIFS file-system description object @@ -114,14 +135,22 @@ static int scan_for_master(struct ubifs_info *c) if (!ubifs_authenticated(c)) return 0; - err = ubifs_node_verify_hmac(c, c->mst_node, - sizeof(struct ubifs_mst_node), - offsetof(struct ubifs_mst_node, hmac)); - if (err) { - ubifs_err(c, "Failed to verify master node HMAC"); - return -EPERM; + if (ubifs_hmac_zero(c, c->mst_node->hmac)) { + err = mst_node_check_hash(c, c->mst_node, + c->sup_node->hash_mst); + if (err) + ubifs_err(c, "Failed to verify master node hash"); + } else { + err = ubifs_node_verify_hmac(c, c->mst_node, + sizeof(struct ubifs_mst_node), + offsetof(struct ubifs_mst_node, hmac)); + if (err) + ubifs_err(c, "Failed to verify master node HMAC"); } + if (err) + return -EPERM; + return 0; out: diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 78a6e97f846e..c97a4d537d83 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 2f1618f300fb..b52624e28fa1 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Author: Adrian Hunter */ @@ -138,25 +126,11 @@ static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o) kfree(o); } -static void orphan_delete(struct ubifs_info *c, ino_t inum) +static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) { - struct ubifs_orphan *orph, *child_orph, *tmp_o; - - spin_lock(&c->orphan_lock); - - orph = lookup_orphan(c, inum); - if (!orph) { - spin_unlock(&c->orphan_lock); - ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum); - dump_stack(); - - return; - } - if (orph->del) { spin_unlock(&c->orphan_lock); - dbg_gen("deleted twice ino %lu", - (unsigned long)inum); + dbg_gen("deleted twice ino %lu", orph->inum); return; } @@ -165,19 +139,11 @@ static void orphan_delete(struct ubifs_info *c, ino_t inum) orph->dnext = c->orph_dnext; c->orph_dnext = orph; spin_unlock(&c->orphan_lock); - dbg_gen("delete later ino %lu", - (unsigned long)inum); + dbg_gen("delete later ino %lu", orph->inum); return; } - list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { - list_del(&child_orph->child_list); - __orphan_drop(c, child_orph); - } - __orphan_drop(c, orph); - - spin_unlock(&c->orphan_lock); } /** @@ -235,7 +201,27 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum) */ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) { - orphan_delete(c, inum); + struct ubifs_orphan *orph, *child_orph, *tmp_o; + + spin_lock(&c->orphan_lock); + + orph = lookup_orphan(c, inum); + if (!orph) { + spin_unlock(&c->orphan_lock); + ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum); + dump_stack(); + + return; + } + + list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { + list_del(&child_orph->child_list); + orphan_delete(c, child_orph); + } + + orphan_delete(c, orph); + + spin_unlock(&c->orphan_lock); } /** @@ -642,6 +628,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, { struct ubifs_scan_node *snod; struct ubifs_orph_node *orph; + struct ubifs_ino_node *ino = NULL; unsigned long long cmt_no; ino_t inum; int i, n, err, first = 1; @@ -688,23 +675,40 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (first) first = 0; + ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); + if (!ino) + return -ENOMEM; + n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; for (i = 0; i < n; i++) { union ubifs_key key1, key2; inum = le64_to_cpu(orph->inos[i]); - dbg_rcvry("deleting orphaned inode %lu", - (unsigned long)inum); - - lowest_ino_key(c, &key1, inum); - highest_ino_key(c, &key2, inum); - err = ubifs_tnc_remove_range(c, &key1, &key2); + ino_key_init(c, &key1, inum); + err = ubifs_tnc_lookup(c, &key1, ino); if (err) - return err; + goto out_free; + + /* + * Check whether an inode can really get deleted. + * linkat() with O_TMPFILE allows rebirth of an inode. + */ + if (ino->nlink == 0) { + dbg_rcvry("deleting orphaned inode %lu", + (unsigned long)inum); + + lowest_ino_key(c, &key1, inum); + highest_ino_key(c, &key2, inum); + + err = ubifs_tnc_remove_range(c, &key1, &key2); + if (err) + goto out_ro; + } + err = insert_dead_orphan(c, inum); if (err) - return err; + goto out_free; } *last_cmt_no = cmt_no; @@ -716,7 +720,15 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, *last_flagged = 0; } - return 0; + err = 0; +out_free: + kfree(ino); + return err; + +out_ro: + ubifs_ro_mode(c, err); + kfree(ino); + return err; } /** diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 8526b7ec4707..f116f7b3f9e5 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ @@ -830,7 +818,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, goto out_err; } if (cs_node->ch.node_type != UBIFS_CS_NODE) { - ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type); + ubifs_err(c, "Not a CS node, type is %d", cs_node->ch.node_type); goto out_err; } if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 5c8a81a019a4..b28ac4dfb407 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 3ca41965db6e..a551eb3e9b89 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -590,17 +578,26 @@ static int authenticate_sb_node(struct ubifs_info *c, return -EINVAL; } - err = ubifs_hmac_wkm(c, hmac_wkm); - if (err) - return err; - - if (ubifs_check_hmac(c, hmac_wkm, sup->hmac_wkm)) { - ubifs_err(c, "provided key does not fit"); - return -ENOKEY; + /* + * The super block node can either be authenticated by a HMAC or + * by a signature in a ubifs_sig_node directly following the + * super block node to support offline image creation. + */ + if (ubifs_hmac_zero(c, sup->hmac)) { + err = ubifs_sb_verify_signature(c, sup); + } else { + err = ubifs_hmac_wkm(c, hmac_wkm); + if (err) + return err; + if (ubifs_check_hmac(c, hmac_wkm, sup->hmac_wkm)) { + ubifs_err(c, "provided key does not fit"); + return -ENOKEY; + } + err = ubifs_node_verify_hmac(c, sup, sizeof(*sup), + offsetof(struct ubifs_sb_node, + hmac)); } - err = ubifs_node_verify_hmac(c, sup, sizeof(*sup), - offsetof(struct ubifs_sb_node, hmac)); if (err) ubifs_err(c, "Failed to authenticate superblock: %d", err); @@ -756,21 +753,16 @@ int ubifs_read_superblock(struct ubifs_info *c) } /* Automatically increase file system size to the maximum size */ - c->old_leb_cnt = c->leb_cnt; if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { + int old_leb_cnt = c->leb_cnt; + c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); - if (c->ro_mount) - dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", - c->old_leb_cnt, c->leb_cnt); - else { - dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs", - c->old_leb_cnt, c->leb_cnt); - sup->leb_cnt = cpu_to_le32(c->leb_cnt); - err = ubifs_write_sb_node(c, sup); - if (err) - goto out; - c->old_leb_cnt = c->leb_cnt; - } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + + c->superblock_need_write = 1; + + dbg_mnt("Auto resizing from %d LEBs to %d LEBs", + old_leb_cnt, c->leb_cnt); } c->log_bytes = (long long)c->log_lebs * c->leb_size; @@ -928,9 +920,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c) c->space_fixup = 0; sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); - err = ubifs_write_sb_node(c, sup); - if (err) - return err; + c->superblock_need_write = 1; ubifs_msg(c, "free space fixup complete"); return err; diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index ea88926163f4..c69cdb5e65bc 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 5eb5958723d4..d00a6f20ac7b 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 04b8ecfd3470..2c0803b0ac3a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -578,6 +566,8 @@ static int init_constants_early(struct ubifs_info *c) c->ranges[UBIFS_AUTH_NODE].min_len = UBIFS_AUTH_NODE_SZ; c->ranges[UBIFS_AUTH_NODE].max_len = UBIFS_AUTH_NODE_SZ + UBIFS_MAX_HMAC_LEN; + c->ranges[UBIFS_SIG_NODE].min_len = UBIFS_SIG_NODE_SZ; + c->ranges[UBIFS_SIG_NODE].max_len = c->leb_size - UBIFS_SB_NODE_SZ; c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ; c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ; @@ -1055,6 +1045,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, c->mount_opts.compr_type = UBIFS_COMPR_LZO; else if (!strcmp(name, "zlib")) c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; + else if (!strcmp(name, "zstd")) + c->mount_opts.compr_type = UBIFS_COMPR_ZSTD; else { ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready? kfree(name); @@ -1308,8 +1300,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_free; - sz = ALIGN(c->max_idx_node_sz, c->min_io_size); - sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); + sz = ALIGN(c->max_idx_node_sz, c->min_io_size) * 2; c->cbuf = kmalloc(sz, GFP_NOFS); if (!c->cbuf) { err = -ENOMEM; @@ -1372,6 +1363,26 @@ static int mount_ubifs(struct ubifs_info *c) goto out_lpt; } + /* + * Handle offline signed images: Now that the master node is + * written and its validation no longer depends on the hash + * in the superblock, we can update the offline signed + * superblock with a HMAC version, + */ + if (ubifs_authenticated(c) && ubifs_hmac_zero(c, c->sup_node->hmac)) { + err = ubifs_hmac_wkm(c, c->sup_node->hmac_wkm); + if (err) + goto out_lpt; + c->superblock_need_write = 1; + } + + if (!c->ro_mount && c->superblock_need_write) { + err = ubifs_write_sb_node(c, c->sup_node); + if (err) + goto out_lpt; + c->superblock_need_write = 0; + } + err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; @@ -1477,9 +1488,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_infos; - err = dbg_debugfs_init_fs(c); - if (err) - goto out_infos; + dbg_debugfs_init_fs(c); c->mounting = 0; @@ -1656,15 +1665,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (err) goto out; - if (c->old_leb_cnt != c->leb_cnt) { - struct ubifs_sb_node *sup = c->sup_node; - - sup->leb_cnt = cpu_to_le32(c->leb_cnt); - err = ubifs_write_sb_node(c, sup); - if (err) - goto out; - } - if (c->need_recovery) { ubifs_msg(c, "completing deferred recovery"); err = ubifs_write_rcvrd_mst_node(c); @@ -1696,6 +1696,16 @@ static int ubifs_remount_rw(struct ubifs_info *c) goto out; } + if (c->superblock_need_write) { + struct ubifs_sb_node *sup = c->sup_node; + + err = ubifs_write_sb_node(c, sup); + if (err) + goto out; + + c->superblock_need_write = 0; + } + c->ileb_buf = vmalloc(c->leb_size); if (!c->ileb_buf) { err = -ENOMEM; @@ -2364,9 +2374,7 @@ static int __init ubifs_init(void) if (err) goto out_shrinker; - err = dbg_debugfs_init(); - if (err) - goto out_compr; + dbg_debugfs_init(); err = register_filesystem(&ubifs_fs_type); if (err) { @@ -2378,7 +2386,6 @@ static int __init ubifs_init(void) out_dbg: dbg_debugfs_exit(); -out_compr: ubifs_compressors_exit(); out_shrinker: unregister_shrinker(&ubifs_shrinker_info); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index ebf8c26f5b22..e8e7b0e9532e 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ @@ -1170,8 +1158,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, * o exact match, i.e. the found zero-level znode contains key @key, then %1 * is returned and slot number of the matched branch is stored in @n; * o not exact match, which means that zero-level znode does not contain - * @key, then %0 is returned and slot number of the closest branch is stored - * in @n; + * @key, then %0 is returned and slot number of the closest branch or %-1 + * is stored in @n; In this case calling tnc_next() is mandatory. * o @key is so small that it is even less than the lowest key of the * leftmost zero-level node, then %0 is returned and %0 is stored in @n. * @@ -1894,13 +1882,19 @@ int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, static int search_dh_cookie(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_dent_node *dent, uint32_t cookie, - struct ubifs_znode **zn, int *n) + struct ubifs_znode **zn, int *n, int exact) { int err; struct ubifs_znode *znode = *zn; struct ubifs_zbranch *zbr; union ubifs_key *dkey; + if (!exact) { + err = tnc_next(c, &znode, n); + if (err) + return err; + } + for (;;) { zbr = &znode->zbranch[*n]; dkey = &zbr->key; @@ -1942,7 +1936,7 @@ static int do_lookup_dh(struct ubifs_info *c, const union ubifs_key *key, if (unlikely(err < 0)) goto out_unlock; - err = search_dh_cookie(c, key, dent, cookie, &znode, &n); + err = search_dh_cookie(c, key, dent, cookie, &znode, &n, err); out_unlock: mutex_unlock(&c->tnc_mutex); @@ -2735,7 +2729,7 @@ int ubifs_tnc_remove_dh(struct ubifs_info *c, const union ubifs_key *key, if (unlikely(err < 0)) goto out_free; - err = search_dh_cookie(c, key, dent, cookie, &znode, &n); + err = search_dh_cookie(c, key, dent, cookie, &znode, &n, err); if (err) goto out_free; } diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index dbcd2c350b65..a384a0f9ff32 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index d1815e959007..6f293f662d98 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Adrian Hunter * Artem Bityutskiy (Битюцкий Артём) */ diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 8b7c1844014f..3c9792cbb6ff 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -287,6 +275,8 @@ enum { #define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node) #define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) #define UBIFS_AUTH_NODE_SZ sizeof(struct ubifs_auth_node) +#define UBIFS_SIG_NODE_SZ sizeof(struct ubifs_sig_node) + /* Extended attribute entry nodes are identical to directory entry nodes */ #define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ /* Only this does not have to be multiple of 8 bytes */ @@ -313,6 +303,8 @@ enum { */ #define UBIFS_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* Type field in ubifs_sig_node */ +#define UBIFS_SIGNATURE_TYPE_PKCS7 1 /* * On-flash inode flags. @@ -348,12 +340,14 @@ enum { * UBIFS_COMPR_NONE: no compression * UBIFS_COMPR_LZO: LZO compression * UBIFS_COMPR_ZLIB: ZLIB compression + * UBIFS_COMPR_ZSTD: ZSTD compression * UBIFS_COMPR_TYPES_CNT: count of supported compression types */ enum { UBIFS_COMPR_NONE, UBIFS_COMPR_LZO, UBIFS_COMPR_ZLIB, + UBIFS_COMPR_ZSTD, UBIFS_COMPR_TYPES_CNT, }; @@ -373,6 +367,7 @@ enum { * UBIFS_CS_NODE: commit start node * UBIFS_ORPH_NODE: orphan node * UBIFS_AUTH_NODE: authentication node + * UBIFS_SIG_NODE: signature node * UBIFS_NODE_TYPES_CNT: count of supported node types * * Note, we index arrays by these numbers, so keep them low and contiguous. @@ -393,6 +388,7 @@ enum { UBIFS_CS_NODE, UBIFS_ORPH_NODE, UBIFS_AUTH_NODE, + UBIFS_SIG_NODE, UBIFS_NODE_TYPES_CNT, }; @@ -650,6 +646,8 @@ struct ubifs_pad_node { * @hmac_wkm: HMAC of a well known message (the string "UBIFS") as a convenience * to the user to check if the correct key is passed. * @hash_algo: The hash algo used for this filesystem (one of enum hash_algo) + * @hash_mst: hash of the master node, only valid for signed images in which the + * master node does not contain a hmac */ struct ubifs_sb_node { struct ubifs_ch ch; @@ -680,7 +678,8 @@ struct ubifs_sb_node { __u8 hmac[UBIFS_MAX_HMAC_LEN]; __u8 hmac_wkm[UBIFS_MAX_HMAC_LEN]; __le16 hash_algo; - __u8 padding2[3838]; + __u8 hash_mst[UBIFS_MAX_HASH_LEN]; + __u8 padding2[3774]; } __packed; /** @@ -783,6 +782,23 @@ struct ubifs_auth_node { } __packed; /** + * struct ubifs_sig_node - node for signing other nodes + * @ch: common header + * @type: type of the signature, currently only UBIFS_SIGNATURE_TYPE_PKCS7 + * supported + * @len: The length of the signature data + * @padding: reserved for future, zeroes + * @sig: The signature data + */ +struct ubifs_sig_node { + struct ubifs_ch ch; + __le32 type; + __le32 len; + __u8 padding[32]; + __u8 sig[]; +} __packed; + +/** * struct ubifs_branch - key/reference/length branch * @lnum: LEB number of the target node * @offs: offset within @lnum diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index fd7f39990157..c55f212dcb75 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ @@ -1116,7 +1104,6 @@ struct ubifs_debug_info; * used to store indexing nodes (@leb_size - @max_idx_node_sz) * @leb_cnt: count of logical eraseblocks * @max_leb_cnt: maximum count of logical eraseblocks - * @old_leb_cnt: count of logical eraseblocks before re-size * @ro_media: the underlying UBI volume is read-only * @ro_mount: the file-system was mounted as read-only * @ro_error: UBIFS switched to R/O mode because an error happened @@ -1307,6 +1294,7 @@ struct ubifs_info { unsigned int rw_incompat:1; unsigned int assert_action:2; unsigned int authenticated:1; + unsigned int superblock_need_write:1; struct mutex tnc_mutex; struct ubifs_zbranch zroot; @@ -1364,7 +1352,6 @@ struct ubifs_info { int idx_leb_size; int leb_cnt; int max_leb_cnt; - int old_leb_cnt; unsigned int ro_media:1; unsigned int ro_mount:1; unsigned int ro_error:1; @@ -1692,6 +1679,9 @@ static inline int ubifs_auth_node_sz(const struct ubifs_info *c) else return 0; } +int ubifs_sb_verify_signature(struct ubifs_info *c, + const struct ubifs_sb_node *sup); +bool ubifs_hmac_zero(struct ubifs_info *c, const u8 *hmac); int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index bcfed27e8997..9aefbb60074f 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -1,21 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * This file is part of UBIFS. * * Copyright (C) 2006-2008 Nokia Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 51 - * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * * Authors: Artem Bityutskiy (Битюцкий Артём) * Adrian Hunter */ diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig index aa415054ad0a..6848de581ce1 100644 --- a/fs/udf/Kconfig +++ b/fs/udf/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config UDF_FS tristate "UDF file system support" select CRC_ITU_T diff --git a/fs/udf/Makefile b/fs/udf/Makefile index eb880f66c23a..63981cd333e3 100644 --- a/fs/udf/Makefile +++ b/fs/udf/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the linux udf-filesystem routines. # diff --git a/fs/udf/inode.c b/fs/udf/inode.c index e7276932e433..9bb18311a22f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -470,13 +470,15 @@ static struct buffer_head *udf_getblk(struct inode *inode, udf_pblk_t block, return NULL; } -/* Extend the file by 'blocks' blocks, return the number of extents added */ +/* Extend the file with new blocks totaling 'new_block_bytes', + * return the number of extents added + */ static int udf_do_extend_file(struct inode *inode, struct extent_position *last_pos, struct kernel_long_ad *last_ext, - sector_t blocks) + loff_t new_block_bytes) { - sector_t add; + uint32_t add; int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); struct super_block *sb = inode->i_sb; struct kernel_lb_addr prealloc_loc = {}; @@ -486,7 +488,7 @@ static int udf_do_extend_file(struct inode *inode, /* The previous extent is fake and we should not extend by anything * - there's nothing to do... */ - if (!blocks && fake) + if (!new_block_bytes && fake) return 0; iinfo = UDF_I(inode); @@ -517,13 +519,12 @@ static int udf_do_extend_file(struct inode *inode, /* Can we merge with the previous extent? */ if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { - add = ((1 << 30) - sb->s_blocksize - - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) >> - sb->s_blocksize_bits; - if (add > blocks) - add = blocks; - blocks -= add; - last_ext->extLength += add << sb->s_blocksize_bits; + add = (1 << 30) - sb->s_blocksize - + (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + if (add > new_block_bytes) + add = new_block_bytes; + new_block_bytes -= add; + last_ext->extLength += add; } if (fake) { @@ -544,28 +545,27 @@ static int udf_do_extend_file(struct inode *inode, } /* Managed to do everything necessary? */ - if (!blocks) + if (!new_block_bytes) goto out; /* All further extents will be NOT_RECORDED_NOT_ALLOCATED */ last_ext->extLocation.logicalBlockNum = 0; last_ext->extLocation.partitionReferenceNum = 0; - add = (1 << (30-sb->s_blocksize_bits)) - 1; - last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (add << sb->s_blocksize_bits); + add = (1 << 30) - sb->s_blocksize; + last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | add; /* Create enough extents to cover the whole hole */ - while (blocks > add) { - blocks -= add; + while (new_block_bytes > add) { + new_block_bytes -= add; err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) return err; count++; } - if (blocks) { + if (new_block_bytes) { last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (blocks << sb->s_blocksize_bits); + new_block_bytes; err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) @@ -596,6 +596,24 @@ out: return count; } +/* Extend the final block of the file to final_block_len bytes */ +static void udf_do_extend_final_block(struct inode *inode, + struct extent_position *last_pos, + struct kernel_long_ad *last_ext, + uint32_t final_block_len) +{ + struct super_block *sb = inode->i_sb; + uint32_t added_bytes; + + added_bytes = final_block_len - + (last_ext->extLength & (sb->s_blocksize - 1)); + last_ext->extLength += added_bytes; + UDF_I(inode)->i_lenExtents += added_bytes; + + udf_write_aext(inode, last_pos, &last_ext->extLocation, + last_ext->extLength, 1); +} + static int udf_extend_file(struct inode *inode, loff_t newsize) { @@ -605,10 +623,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) int8_t etype; struct super_block *sb = inode->i_sb; sector_t first_block = newsize >> sb->s_blocksize_bits, offset; + unsigned long partial_final_block; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); struct kernel_long_ad extent; - int err; + int err = 0; + int within_final_block; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -618,18 +638,8 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) BUG(); etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); + within_final_block = (etype != -1); - /* File has extent covering the new size (could happen when extending - * inside a block)? */ - if (etype != -1) - return 0; - if (newsize & (sb->s_blocksize - 1)) - offset++; - /* Extended file just to the boundary of the last file block? */ - if (offset == 0) - return 0; - - /* Truncate is extending the file by 'offset' blocks */ if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { /* File has no extents at all or has empty last @@ -643,7 +653,22 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) &extent.extLength, 0); extent.extLength |= etype << 30; } - err = udf_do_extend_file(inode, &epos, &extent, offset); + + partial_final_block = newsize & (sb->s_blocksize - 1); + + /* File has extent covering the new size (could happen when extending + * inside a block)? + */ + if (within_final_block) { + /* Extending file within the last file block */ + udf_do_extend_final_block(inode, &epos, &extent, + partial_final_block); + } else { + loff_t add = ((loff_t)offset << sb->s_blocksize_bits) | + partial_final_block; + err = udf_do_extend_file(inode, &epos, &extent, add); + } + if (err < 0) goto out; err = 0; @@ -745,6 +770,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, /* Are we beyond EOF? */ if (etype == -1) { int ret; + loff_t hole_len; isBeyondEOF = true; if (count) { if (c) @@ -760,7 +786,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, startnum = (offset > 0); } /* Create extents for the hole between EOF and offset */ - ret = udf_do_extend_file(inode, &prev_epos, laarr, offset); + hole_len = (loff_t)offset << inode->i_blkbits; + ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len); if (ret < 0) { *err = ret; newblock = 0; diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig index 0bf6e16f8d79..fcb41516ea59 100644 --- a/fs/ufs/Kconfig +++ b/fs/ufs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config UFS_FS tristate "UFS file system support (read only)" depends on BLOCK diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile index ec4a6b49fa13..042344c85be0 100644 --- a/fs/ufs/Makefile +++ b/fs/ufs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for the Linux ufs filesystem routines. # diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 84c0c5178cd2..4ed0dca52ec8 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/ufs/super.c * @@ -1406,11 +1407,9 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi; unsigned flags = UFS_SB(sb)->s_flags; - struct ufs_super_block_third *usb3; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); mutex_lock(&UFS_SB(sb)->s_lock); - usb3 = ubh_get_usb_third(uspi); if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) buf->f_type = UFS2_MAGIC; diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig index b560a879edf7..2c27b9a5cd6c 100644 --- a/fs/unicode/Kconfig +++ b/fs/unicode/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # UTF-8 normalization # diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 6afab4fdce90..71ca4d047d65 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -73,6 +73,34 @@ int utf8_strncasecmp(const struct unicode_map *um, } EXPORT_SYMBOL(utf8_strncasecmp); +/* String cf is expected to be a valid UTF-8 casefolded + * string. + */ +int utf8_strncasecmp_folded(const struct unicode_map *um, + const struct qstr *cf, + const struct qstr *s1) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur1; + int c1, c2; + int i = 0; + + if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) + return -EINVAL; + + do { + c1 = utf8byte(&cur1); + c2 = cf->name[i++]; + if (c1 < 0) + return -EINVAL; + if (c1 != c2) + return 1; + } while (c1); + + return 0; +} +EXPORT_SYMBOL(utf8_strncasecmp_folded); + int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/fs/unicode/utf8-norm.c b/fs/unicode/utf8-norm.c index 801ed6d2ea37..1d2d2e5b906a 100644 --- a/fs/unicode/utf8-norm.c +++ b/fs/unicode/utf8-norm.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014 SGI. * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include "utf8n.h" diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c index 80752013fce0..6c1a36bbf6ad 100644 --- a/fs/unicode/utf8-selftest.c +++ b/fs/unicode/utf8-selftest.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Kernel module for testing utf-8 support. * * Copyright 2017 Collabora Ltd. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/fs/unicode/utf8n.h b/fs/unicode/utf8n.h index a120638014c1..0acd530c2c79 100644 --- a/fs/unicode/utf8n.h +++ b/fs/unicode/utf8n.h @@ -1,16 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2014 SGI. * All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #ifndef UTF8NORM_H diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3b30301c90ec..ccbdbd62f0d8 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/userfaultfd.c * @@ -5,9 +6,6 @@ * Copyright (C) 2008-2009 Red Hat, Inc. * Copyright (C) 2015 Red Hat, Inc. * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * * Some part derived from fs/eventfd.c (anon inode setup) and * mm/ksm.c (mm hashing). */ @@ -42,6 +40,16 @@ enum userfaultfd_state { /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ @@ -460,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) blocking_state = return_to_userland ? TASK_INTERRUPTIBLE : TASK_KILLABLE; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -472,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * __add_wait_queue. */ set_current_state(blocking_state); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, @@ -554,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * kernel stack can be released after the list_del_init. */ if (!list_empty_careful(&uwq.wq.entry)) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* * No need of list_del_init(), the uwq on the stack * will be freed shortly anyway. */ list_del(&uwq.wq.entry); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } /* @@ -585,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, init_waitqueue_entry(&ewq->wq, current); release_new_ctx = NULL; - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland * through poll/read(). @@ -615,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); wake_up_poll(&ctx->fd_wqh, EPOLLIN); schedule(); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); } __set_current_state(TASK_RUNNING); - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); if (release_new_ctx) { struct vm_area_struct *vma; @@ -920,10 +928,10 @@ wakeup: * the last page faults that may have been already waiting on * the fault_*wqh. */ - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range); __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* Flush pending events that may still wait on event_wqh */ wake_up_all(&ctx->event_wqh); @@ -1136,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (!ret && msg->event == UFFD_EVENT_FORK) { ret = resolve_userfault_fork(ctx, fork_nctx, msg); - spin_lock(&ctx->event_wqh.lock); + spin_lock_irq(&ctx->event_wqh.lock); if (!list_empty(&fork_event)) { /* * The fork thread didn't abort, so we can @@ -1182,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait, if (ret) userfaultfd_ctx_put(fork_nctx); } - spin_unlock(&ctx->event_wqh.lock); + spin_unlock_irq(&ctx->event_wqh.lock); } return ret; @@ -1221,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, static void __wake_userfault(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range *range) { - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); /* wake all in the range and autoremove */ if (waitqueue_active(&ctx->fault_pending_wqh)) __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, range); if (waitqueue_active(&ctx->fault_wqh)) __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range); - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); } static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, @@ -1883,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) wait_queue_entry_t *wq; unsigned long pending = 0, total = 0; - spin_lock(&ctx->fault_pending_wqh.lock); + spin_lock_irq(&ctx->fault_pending_wqh.lock); list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) { pending++; total++; @@ -1891,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) list_for_each_entry(wq, &ctx->fault_wqh.head, entry) { total++; } - spin_unlock(&ctx->fault_pending_wqh.lock); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); /* * If more protocols will be added, there will be all shown diff --git a/fs/xattr.c b/fs/xattr.c index 0d6a6a4af861..90dd78f0eb27 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* File: fs/xattr.c diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 99af5e5bda9f..e685299eb3d2 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only config XFS_FS tristate "XFS filesystem support" depends on BLOCK diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 91831975363b..06b68b6115bc 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -49,6 +49,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_refcount_btree.o \ xfs_sb.o \ xfs_symlink_remote.o \ + xfs_trans_inode.o \ xfs_trans_resv.o \ xfs_types.o \ ) @@ -62,6 +63,7 @@ xfs-y += xfs_aops.o \ xfs_attr_inactive.o \ xfs_attr_list.o \ xfs_bmap_util.o \ + xfs_bio_io.o \ xfs_buf.o \ xfs_dir2_readdir.o \ xfs_discard.o \ @@ -80,9 +82,11 @@ xfs-y += xfs_aops.o \ xfs_iops.o \ xfs_inode.o \ xfs_itable.o \ + xfs_iwalk.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_pwork.o \ xfs_reflink.o \ xfs_stats.o \ xfs_super.o \ @@ -104,12 +108,7 @@ xfs-y += xfs_log.o \ xfs_rmap_item.o \ xfs_log_recover.o \ xfs_trans_ail.o \ - xfs_trans_bmap.o \ - xfs_trans_buf.o \ - xfs_trans_extfree.o \ - xfs_trans_inode.o \ - xfs_trans_refcount.o \ - xfs_trans_rmap.o \ + xfs_trans_buf.o # optional features xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index fdd9d6ede25c..16bb9a328678 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -3,12 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/mm.h> #include <linux/sched/mm.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <linux/swap.h> -#include <linux/blkdev.h> #include <linux/backing-dev.h> #include "kmem.h" #include "xfs_message.h" diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 8e6b3ba81c03..267655acd426 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) return kmem_zone_alloc(zone, flags | KM_ZERO); } +static inline struct page * +kmem_to_page(void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + return virt_to_page(addr); +} + #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index b0c89f54d1bb..5de296b34ab1 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -10,6 +10,7 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" +#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" @@ -44,6 +45,12 @@ xfs_get_aghdr_buf( return bp; } +static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) +{ + return mp->m_sb.sb_logstart > 0 && + id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); +} + /* * Generic btree root block init function */ @@ -53,40 +60,85 @@ xfs_btroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0); + xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); } -/* - * Alloc btree root block init functions - */ +/* Finish initializing a free space btree. */ static void -xfs_bnoroot_init( +xfs_freesp_init_recs( struct xfs_mount *mp, struct xfs_buf *bp, struct aghdr_init_data *id) { struct xfs_alloc_rec *arec; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); + + if (is_log_ag(mp, id)) { + struct xfs_alloc_rec *nrec; + xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, + mp->m_sb.sb_logstart); + + ASSERT(start >= mp->m_ag_prealloc_blocks); + if (start != mp->m_ag_prealloc_blocks) { + /* + * Modify first record to pad stripe align of log + */ + arec->ar_blockcount = cpu_to_be32(start - + mp->m_ag_prealloc_blocks); + nrec = arec + 1; + + /* + * Insert second record at start of internal log + * which then gets trimmed. + */ + nrec->ar_startblock = cpu_to_be32( + be32_to_cpu(arec->ar_startblock) + + be32_to_cpu(arec->ar_blockcount)); + arec = nrec; + be16_add_cpu(&block->bb_numrecs, 1); + } + /* + * Change record start to after the internal log + */ + be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); + } + + /* + * Calculate the record block count and check for the case where + * the log might have consumed all available space in the AG. If + * so, reset the record count to 0 to avoid exposure of an invalid + * record start block. + */ arec->ar_blockcount = cpu_to_be32(id->agsize - be32_to_cpu(arec->ar_startblock)); + if (!arec->ar_blockcount) + block->bb_numrecs = 0; } +/* + * Alloc btree root block init functions + */ static void -xfs_cntroot_init( +xfs_bnoroot_init( struct xfs_mount *mp, struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_alloc_rec *arec; + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); + xfs_freesp_init_recs(mp, bp, id); +} - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); - arec->ar_blockcount = cpu_to_be32(id->agsize - - be32_to_cpu(arec->ar_startblock)); +static void +xfs_cntroot_init( + struct xfs_mount *mp, + struct xfs_buf *bp, + struct aghdr_init_data *id) +{ + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); + xfs_freesp_init_recs(mp, bp, id); } /* @@ -101,7 +153,7 @@ xfs_rmaproot_init( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_rmap_rec *rrec; - xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); /* * mark the AG header regions as static metadata The BNO @@ -149,6 +201,18 @@ xfs_rmaproot_init( rrec->rm_offset = 0; be16_add_cpu(&block->bb_numrecs, 1); } + + /* account for the log space */ + if (is_log_ag(mp, id)) { + rrec = XFS_RMAP_REC_ADDR(block, + be16_to_cpu(block->bb_numrecs) + 1); + rrec->rm_startblock = cpu_to_be32( + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); + rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + } } /* @@ -209,6 +273,14 @@ xfs_agfblock_init( agf->agf_refcount_level = cpu_to_be32(1); agf->agf_refcount_blocks = cpu_to_be32(1); } + + if (is_log_ag(mp, id)) { + int64_t logblocks = mp->m_sb.sb_logblocks; + + be32_add_cpu(&agf->agf_freeblks, -logblocks); + agf->agf_longest = cpu_to_be32(id->agsize - + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); + } } static void diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e2ba2a3b63b2..87a9747f1d36 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -9,20 +9,12 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_alloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" -#include "xfs_bit.h" -#include "xfs_bmap.h" -#include "xfs_bmap_btree.h" -#include "xfs_ag_resv.h" -#include "xfs_trans_space.h" #include "xfs_rmap_btree.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index a9ff3cf82cce..372ad55631fc 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -13,7 +13,6 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_alloc_btree.h" @@ -21,7 +20,6 @@ #include "xfs_extent_busy.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" @@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq; STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); /* * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in @@ -555,7 +551,7 @@ static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; @@ -596,7 +592,7 @@ static void xfs_agfl_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; /* @@ -621,7 +617,7 @@ static void xfs_agfl_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; @@ -700,6 +696,107 @@ xfs_alloc_update_counters( */ /* + * Deal with the case where only small freespaces remain. Either return the + * contents of the last freespace record, or allocate space from the freelist if + * there is nothing in the tree. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_small( + struct xfs_alloc_arg *args, /* allocation argument structure */ + struct xfs_btree_cur *ccur, /* optional by-size cursor */ + xfs_agblock_t *fbnop, /* result block number */ + xfs_extlen_t *flenp, /* result length */ + int *stat) /* status: 0-freelist, 1-normal/none */ +{ + int error = 0; + xfs_agblock_t fbno = NULLAGBLOCK; + xfs_extlen_t flen = 0; + int i = 0; + + /* + * If a cntbt cursor is provided, try to allocate the largest record in + * the tree. Try the AGFL if the cntbt is empty, otherwise fail the + * allocation. Make sure to respect minleft even when pulling from the + * freelist. + */ + if (ccur) + error = xfs_btree_decrement(ccur, 0, &i); + if (error) + goto error; + if (i) { + error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error); + goto out; + } + + if (args->minlen != 1 || args->alignment != 1 || + args->resv == XFS_AG_RESV_AGFL || + (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= + args->minleft)) + goto out; + + error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); + if (error) + goto error; + if (fbno == NULLAGBLOCK) + goto out; + + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, + xfs_alloc_allow_busy_reuse(args->datatype)); + + if (xfs_alloc_is_userdata(args->datatype)) { + struct xfs_buf *bp; + + bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); + if (!bp) { + error = -EFSCORRUPTED; + goto error; + } + xfs_trans_binval(args->tp, bp); + } + *fbnop = args->agbno = fbno; + *flenp = args->len = 1; + XFS_WANT_CORRUPTED_GOTO(args->mp, + fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), + error); + args->wasfromfl = 1; + trace_xfs_alloc_small_freelist(args); + + /* + * If we're feeding an AGFL block to something that doesn't live in the + * free space, we need to clear out the OWN_AG rmap. + */ + error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, + &XFS_RMAP_OINFO_AG); + if (error) + goto error; + + *stat = 0; + return 0; + +out: + /* + * Can't do the allocation, give up. + */ + if (flen < args->minlen) { + args->agbno = NULLAGBLOCK; + trace_xfs_alloc_small_notenough(args); + flen = 0; + } + *fbnop = fbno; + *flenp = flen; + *stat = 1; + trace_xfs_alloc_small_done(args); + return 0; + +error: + trace_xfs_alloc_small_error(args); + return error; +} + +/* * Allocate a variable extent in the allocation group agno. * Type and bno are used to determine where in the allocation group the * extent will start. @@ -1583,112 +1680,6 @@ out_nominleft: } /* - * Deal with the case where only small freespaces remain. - * Either return the contents of the last freespace record, - * or allocate space from the freelist if there is nothing in the tree. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent_small( - xfs_alloc_arg_t *args, /* allocation argument structure */ - xfs_btree_cur_t *ccur, /* by-size cursor */ - xfs_agblock_t *fbnop, /* result block number */ - xfs_extlen_t *flenp, /* result length */ - int *stat) /* status: 0-freelist, 1-normal/none */ -{ - int error; - xfs_agblock_t fbno; - xfs_extlen_t flen; - int i; - - if ((error = xfs_btree_decrement(ccur, 0, &i))) - goto error0; - if (i) { - if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - } - /* - * Nothing in the btree, try the freelist. Make sure - * to respect minleft even when pulling from the - * freelist. - */ - else if (args->minlen == 1 && args->alignment == 1 && - args->resv != XFS_AG_RESV_AGFL && - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) - > args->minleft)) { - error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); - if (error) - goto error0; - if (fbno != NULLAGBLOCK) { - xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, - xfs_alloc_allow_busy_reuse(args->datatype)); - - if (xfs_alloc_is_userdata(args->datatype)) { - xfs_buf_t *bp; - - bp = xfs_btree_get_bufs(args->mp, args->tp, - args->agno, fbno, 0); - if (!bp) { - error = -EFSCORRUPTED; - goto error0; - } - xfs_trans_binval(args->tp, bp); - } - args->len = 1; - args->agbno = fbno; - XFS_WANT_CORRUPTED_GOTO(args->mp, - args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), - error0); - args->wasfromfl = 1; - trace_xfs_alloc_small_freelist(args); - - /* - * If we're feeding an AGFL block to something that - * doesn't live in the free space, we need to clear - * out the OWN_AG rmap. - */ - error = xfs_rmap_free(args->tp, args->agbp, args->agno, - fbno, 1, &XFS_RMAP_OINFO_AG); - if (error) - goto error0; - - *stat = 0; - return 0; - } - /* - * Nothing in the freelist. - */ - else - flen = 0; - } - /* - * Can't allocate from the freelist for some reason. - */ - else { - fbno = NULLAGBLOCK; - flen = 0; - } - /* - * Can't do the allocation, give up. - */ - if (flen < args->minlen) { - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_small_notenough(args); - flen = 0; - } - *fbnop = fbno; - *flenp = flen; - *stat = 1; - trace_xfs_alloc_small_done(args); - return 0; - -error0: - trace_xfs_alloc_small_error(args); - return error; -} - -/* * Free the extent starting at agno/bno for length. */ STATIC int @@ -2095,7 +2086,7 @@ xfs_free_agfl_block( if (error) return error; - bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0); + bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); if (!bp) return -EFSCORRUPTED; xfs_trans_binval(tp, bp); @@ -2586,7 +2577,7 @@ static xfs_failaddr_t xfs_agf_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb)) { @@ -2644,7 +2635,7 @@ static void xfs_agf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -2661,7 +2652,7 @@ static void xfs_agf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; @@ -3146,7 +3137,7 @@ xfs_alloc_has_record( /* * Walk all the blocks in the AGFL. The @walk_fn can return any negative - * error code or XFS_BTREE_QUERY_RANGE_ABORT. + * error code or XFS_ITER_*. */ int xfs_agfl_walk( diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 9fe949f6055e..2a94543857a1 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -17,7 +17,6 @@ #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" @@ -292,7 +291,7 @@ static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c441f41f14e8..d48fcf11cc35 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -9,23 +9,18 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr_sf.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_attr_remote.h" -#include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 3b0dce06e454..ff28ebf3b635 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context { struct xfs_inode *dp; /* inode */ struct attrlist_cursor_kern *cursor; /* position in list */ char *alist; /* output buffer */ - int seen_enough; /* T/F: seen enough of list? */ + + /* + * Abort attribute list iteration if non-zero. Can be used to pass + * error values to the xfs_attr_list caller. + */ + int seen_enough; + ssize_t count; /* num used entries */ int dupcnt; /* count dup hashvals seen */ int bufsize; /* total buffer size */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 1f6e3965ff74..70eb941d02e4 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -10,14 +10,12 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_attr_sf.h" @@ -27,7 +25,6 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_dir2.h" #include "xfs_log.h" @@ -240,7 +237,7 @@ xfs_attr3_leaf_verify( struct xfs_buf *bp) { struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *entries; uint32_t end; /* must be 32bit - see below */ @@ -313,7 +310,7 @@ static void xfs_attr3_leaf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; @@ -343,7 +340,7 @@ static void xfs_attr3_leaf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -865,7 +862,7 @@ xfs_attr_shortform_allfit( struct xfs_attr3_icleaf_hdr leafhdr; int bytes; int i; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); @@ -1525,7 +1522,7 @@ xfs_attr_leaf_order( { struct xfs_attr3_icleaf_hdr ichdr1; struct xfs_attr3_icleaf_hdr ichdr2; - struct xfs_mount *mp = leaf1_bp->b_target->bt_mount; + struct xfs_mount *mp = leaf1_bp->b_mount; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr); xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr); @@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash( { struct xfs_attr3_icleaf_hdr ichdr; struct xfs_attr_leaf_entry *entries; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr); entries = xfs_attr3_leaf_entryp(bp->b_addr); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 65ff600a8067..4eb30d357045 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -16,18 +16,10 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_attr_remote.h" -#include "xfs_trans_space.h" #include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_buf_item.h" #include "xfs_error.h" #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ @@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify( bool check_crc, xfs_failaddr_t *failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; char *ptr; int len; xfs_daddr_t bno; @@ -175,7 +167,7 @@ static void xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; int blksize = mp->m_attr_geo->blksize; char *ptr; @@ -535,7 +527,7 @@ xfs_attr_rmtval_set( dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); if (!bp) return -ENOMEM; bp->b_ops = &xfs_attr3_rmt_buf_ops; diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c index 40ce5f3094d1..7071ff98fdbc 100644 --- a/fs/xfs/libxfs/xfs_bit.c +++ b/fs/xfs/libxfs/xfs_bit.c @@ -5,7 +5,6 @@ */ #include "xfs.h" #include "xfs_log_format.h" -#include "xfs_bit.h" /* * XFS bit manipulation routines, used in non-realtime code. diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 356ebd1cbe82..baf0b72c0a37 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -13,14 +13,10 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -32,7 +28,6 @@ #include "xfs_trans_space.h" #include "xfs_buf_item.h" #include "xfs_trace.h" -#include "xfs_symlink.h" #include "xfs_attr_leaf.h" #include "xfs_filestream.h" #include "xfs_rmap.h" @@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents( bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (!bp) { bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) @@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents( bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (!bp) { bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) @@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents( XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, xfs_btree_check_lptr(cur, cbno, 1)); #endif - error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, + error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; @@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree( cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); - abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno); if (!abp) { error = -EFSCORRUPTED; goto out_unreserve_dquot; @@ -878,7 +873,7 @@ xfs_bmap_local_to_extents( ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); tp->t_firstblock = args.fsbno; - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno); /* * Initialize the block, copy the data and log the remote buffer. @@ -1203,7 +1198,7 @@ xfs_iread_extents( * pointer (leftmost) at each level. */ while (level-- > 0) { - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) goto out; @@ -1276,7 +1271,7 @@ xfs_iread_extents( */ if (bno == NULLFSBLOCK) break; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index aff82ed112c9..fbb18ba5d905 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -11,10 +11,8 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_bmap_btree.h" @@ -22,7 +20,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_rmap.h" /* @@ -411,7 +408,7 @@ static xfs_failaddr_t xfs_bmbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); xfs_failaddr_t fa; unsigned int level; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index bbdae2b4559f..f1048efa4268 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -11,16 +11,13 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_buf_item.h" #include "xfs_btree.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_alloc.h" #include "xfs_log.h" @@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_buf_log_item *bip = bp->b_log_item; - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) return; if (bip) block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); @@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) @@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_buf_log_item *bip = bp->b_log_item; - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) return; if (bip) block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); @@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) @@ -691,14 +688,13 @@ xfs_buf_t * /* buffer for fsbno */ xfs_btree_get_bufl( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock) /* lock flags for get_buf */ + xfs_fsblock_t fsbno) /* file system block number */ { xfs_daddr_t d; /* real disk block address */ ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); } /* @@ -710,15 +706,14 @@ xfs_btree_get_bufs( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock) /* lock flags for get_buf */ + xfs_agblock_t agbno) /* allocation group block number */ { xfs_daddr_t d; /* real disk block address */ ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); } /* @@ -845,7 +840,6 @@ xfs_btree_read_bufl( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ struct xfs_buf **bpp, /* buffer for fsbno */ int refval, /* ref count value for buffer */ const struct xfs_buf_ops *ops) @@ -858,7 +852,7 @@ xfs_btree_read_bufl( return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp, ops); + mp->m_bsize, 0, &bp, ops); if (error) return error; if (bp) @@ -1185,11 +1179,10 @@ xfs_btree_init_block( xfs_btnum_t btnum, __u16 level, __u16 numrecs, - __u64 owner, - unsigned int flags) + __u64 owner) { xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - btnum, level, numrecs, owner, flags); + btnum, level, numrecs, owner, 0); } STATIC void @@ -1288,7 +1281,6 @@ STATIC int xfs_btree_get_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, - int flags, struct xfs_btree_block **block, struct xfs_buf **bpp) { @@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block( xfs_daddr_t d; int error; - /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XBF_TRYLOCK)); - error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags); + mp->m_bsize, 0); if (!*bpp) return -ENOMEM; @@ -2706,7 +2695,7 @@ __xfs_btree_split( XFS_BTREE_STATS_INC(cur, alloc); /* Set up the new block as "right". */ - error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); + error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); if (error) goto error0; @@ -2961,7 +2950,7 @@ xfs_btree_new_iroot( XFS_BTREE_STATS_INC(cur, alloc); /* Copy the root into a real block. */ - error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); + error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); if (error) goto error0; @@ -3058,7 +3047,7 @@ xfs_btree_new_root( XFS_BTREE_STATS_INC(cur, alloc); /* Set up the new block. */ - error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); + error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); if (error) goto error0; @@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify( struct xfs_buf *bp, uint64_t owner) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify( struct xfs_buf *bp, unsigned int max_recs) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); /* numrecs verification */ @@ -4484,7 +4473,7 @@ xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; @@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify( struct xfs_buf *bp, unsigned int max_recs) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); xfs_agblock_t agno; diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index e3b3e9dce5da..fa3cd8ab9aba 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -301,8 +301,7 @@ struct xfs_buf * /* buffer for fsbno */ xfs_btree_get_bufl( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock); /* lock flags for get_buf */ + xfs_fsblock_t fsbno); /* file system block number */ /* * Get a buffer for the block, return it with no data read. @@ -313,8 +312,7 @@ xfs_btree_get_bufs( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock); /* lock flags for get_buf */ + xfs_agblock_t agbno); /* allocation group block number */ /* * Check for the cursor referring to the last block at the given level. @@ -345,7 +343,6 @@ xfs_btree_read_bufl( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ struct xfs_buf **bpp, /* buffer for fsbno */ int refval, /* ref count value for buffer */ const struct xfs_buf_ops *ops); @@ -383,8 +380,7 @@ xfs_btree_init_block( xfs_btnum_t btnum, __u16 level, __u16 numrecs, - __u64 owner, - unsigned int flags); + __u64 owner); void xfs_btree_init_block_int( @@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); /* return codes */ -#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ -#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */ +#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */ +#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, union xfs_btree_rec *rec, void *priv); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index e2737e2ac2ae..d1c77fd0815d 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -12,20 +12,14 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" #include "xfs_bmap.h" -#include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_buf_item.h" #include "xfs_log.h" @@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify( struct xfs_buf *bp, struct xfs_da3_blkinfo *hdr3) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_da_blkinfo *hdr = &hdr3->hdr; if (!xfs_verify_magic16(bp, hdr->magic)) @@ -148,7 +142,7 @@ static xfs_failaddr_t xfs_da3_node_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_da_intnode *hdr = bp->b_addr; struct xfs_da3_icnode_hdr ichdr; const struct xfs_dir_ops *ops; @@ -186,7 +180,7 @@ static void xfs_da3_node_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index b39053dcb643..b1ae572496b6 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -11,11 +11,8 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" -#include "xfs_dir2_priv.h" /* * Shortform directory ops diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 1c6bf2105939..eb2be2a6a25a 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -9,8 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 156ce95c9c45..67840723edbb 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -5,20 +5,16 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_ialloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index b7d6d78f4ce2..a6fb0cc2085e 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -6,22 +6,19 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_log.h" /* @@ -50,7 +47,7 @@ static xfs_failaddr_t xfs_dir3_block_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_verify_magic(bp, hdr3->magic)) @@ -71,7 +68,7 @@ static void xfs_dir3_block_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -88,7 +85,7 @@ static void xfs_dir3_block_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index b7b9ce002cb9..2c79be4c3153 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -6,19 +6,16 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" -#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_log.h" static xfs_failaddr_t xfs_dir2_data_freefind_verify( @@ -50,14 +47,13 @@ __xfs_dir3_data_check( int i; /* leaf index */ int lastfree; /* last entry was unused */ xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ - xfs_mount_t *mp; /* filesystem mount point */ + struct xfs_mount *mp = bp->b_mount; char *p; /* current data position */ int stale; /* count of stale leaves */ struct xfs_name name; const struct xfs_dir_ops *ops; struct xfs_da_geometry *geo; - mp = bp->b_target->bt_mount; geo = mp->m_dir_geo; /* @@ -249,7 +245,7 @@ static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_verify_magic(bp, hdr3->magic)) @@ -298,7 +294,7 @@ static void xfs_dir3_data_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -315,7 +311,7 @@ static void xfs_dir3_data_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 9c2a0a13ed61..a53e4585a2f3 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -6,12 +6,11 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_dir2.h" @@ -20,8 +19,6 @@ #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" -#include "xfs_log.h" /* * Local function declarations. @@ -144,7 +141,7 @@ static xfs_failaddr_t xfs_dir3_leaf_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_leaf *leaf = bp->b_addr; xfs_failaddr_t fa; @@ -159,7 +156,7 @@ static void xfs_dir3_leaf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -176,7 +173,7 @@ static void xfs_dir3_leaf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 16731d2d684b..afcc6642690a 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -6,12 +6,11 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_dir2.h" @@ -20,7 +19,6 @@ #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_log.h" /* @@ -84,7 +82,7 @@ static xfs_failaddr_t xfs_dir3_free_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_free_hdr *hdr = bp->b_addr; if (!xfs_verify_magic(bp, hdr->magic)) @@ -110,7 +108,7 @@ static void xfs_dir3_free_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -127,7 +125,7 @@ static void xfs_dir3_free_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 585dfdb7b6b6..033589257f54 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -5,16 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_error.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_trace.h" diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 88fa11071f9f..e8bd688a4073 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -16,8 +16,6 @@ #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_error.h" -#include "xfs_cksum.h" -#include "xfs_trace.h" int xfs_calc_dquots_per_chunk( @@ -224,7 +222,7 @@ static xfs_failaddr_t xfs_dquot_buf_verify_struct( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; return xfs_dquot_buf_verify(mp, bp, false); } @@ -233,7 +231,7 @@ static void xfs_dquot_buf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (!xfs_dquot_buf_verify_crc(mp, bp, false)) return; @@ -250,7 +248,7 @@ static void xfs_dquot_buf_readahead_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (!xfs_dquot_buf_verify_crc(mp, bp, true) || xfs_dquot_buf_verify(mp, bp, true) != NULL) { @@ -268,7 +266,7 @@ static void xfs_dquot_buf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_dquot_buf_verify(mp, bp, false); } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9bb3c48843ec..c968b60cee15 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1) #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog -#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log +#define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log) #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log #define XFS_INO_BITS(mp) \ XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index e7382c780ed7..52d03a3a02a4 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -97,7 +97,7 @@ struct getbmapx { * For use by backup and restore programs to set the XFS on-disk inode * fields di_dmevmask and di_dmstate. These must be set to exactly and * only values previously obtained via xfs_bulkstat! (Specifically the - * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) + * struct xfs_bstat fields bs_dmevmask and bs_dmstate.) */ #ifndef HAVE_FSDMIDATA struct fsdmidata { @@ -328,7 +328,7 @@ typedef struct xfs_bstime { __s32 tv_nsec; /* and nanoseconds */ } xfs_bstime_t; -typedef struct xfs_bstat { +struct xfs_bstat { __u64 bs_ino; /* inode number */ __u16 bs_mode; /* type and mode */ __u16 bs_nlink; /* number of links */ @@ -356,7 +356,53 @@ typedef struct xfs_bstat { __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ -} xfs_bstat_t; +}; + +/* New bulkstat structure that reports v5 features and fixes padding issues */ +struct xfs_bulkstat { + uint64_t bs_ino; /* inode number */ + uint64_t bs_size; /* file size */ + + uint64_t bs_blocks; /* number of blocks */ + uint64_t bs_xflags; /* extended flags */ + + uint64_t bs_atime; /* access time, seconds */ + uint64_t bs_mtime; /* modify time, seconds */ + + uint64_t bs_ctime; /* inode change time, seconds */ + uint64_t bs_btime; /* creation time, seconds */ + + uint32_t bs_gen; /* generation count */ + uint32_t bs_uid; /* user id */ + uint32_t bs_gid; /* group id */ + uint32_t bs_projectid; /* project id */ + + uint32_t bs_atime_nsec; /* access time, nanoseconds */ + uint32_t bs_mtime_nsec; /* modify time, nanoseconds */ + uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */ + uint32_t bs_btime_nsec; /* creation time, nanoseconds */ + + uint32_t bs_blksize; /* block size */ + uint32_t bs_rdev; /* device value */ + uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */ + uint32_t bs_extsize_blks; /* extent size hint, blocks */ + + uint32_t bs_nlink; /* number of links */ + uint32_t bs_extents; /* number of extents */ + uint32_t bs_aextents; /* attribute number of extents */ + uint16_t bs_version; /* structure version */ + uint16_t bs_forkoff; /* inode fork offset in bytes */ + + uint16_t bs_sick; /* sick inode metadata */ + uint16_t bs_checked; /* checked inode metadata */ + uint16_t bs_mode; /* type and mode */ + uint16_t bs_pad2; /* zeroed */ + + uint64_t bs_pad[7]; /* zeroed */ +}; + +#define XFS_BULKSTAT_VERSION_V1 (1) +#define XFS_BULKSTAT_VERSION_V5 (5) /* bs_sick flags */ #define XFS_BS_SICK_INODE (1 << 0) /* inode core */ @@ -374,7 +420,7 @@ typedef struct xfs_bstat { * to retain compatibility with "old" filesystems). */ static inline uint32_t -bstat_get_projid(struct xfs_bstat *bs) +bstat_get_projid(const struct xfs_bstat *bs) { return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; } @@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs) /* * The user-level BulkStat Request interface structure. */ -typedef struct xfs_fsop_bulkreq { +struct xfs_fsop_bulkreq { __u64 __user *lastip; /* last inode # pointer */ __s32 icount; /* count of entries in buffer */ void __user *ubuffer;/* user buffer for inode desc. */ __s32 __user *ocount; /* output count pointer */ -} xfs_fsop_bulkreq_t; - +}; /* * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). */ -typedef struct xfs_inogrp { +struct xfs_inogrp { __u64 xi_startino; /* starting inode number */ __s32 xi_alloccount; /* # bits set in allocmask */ __u64 xi_allocmask; /* mask of allocated inodes */ -} xfs_inogrp_t; +}; +/* New inumbers structure that reports v5 features and fixes padding issues */ +struct xfs_inumbers { + uint64_t xi_startino; /* starting inode number */ + uint64_t xi_allocmask; /* mask of allocated inodes */ + uint8_t xi_alloccount; /* # bits set in allocmask */ + uint8_t xi_version; /* version */ + uint8_t xi_padding[6]; /* zero */ +}; + +#define XFS_INUMBERS_VERSION_V1 (1) +#define XFS_INUMBERS_VERSION_V5 (5) + +/* Header for bulk inode requests. */ +struct xfs_bulk_ireq { + uint64_t ino; /* I/O: start with this inode */ + uint32_t flags; /* I/O: operation flags */ + uint32_t icount; /* I: count of entries in buffer */ + uint32_t ocount; /* O: count of entries filled out */ + uint32_t agno; /* I: see comment for IREQ_AGNO */ + uint64_t reserved[5]; /* must be zero */ +}; + +/* + * Only return results from the specified @agno. If @ino is zero, start + * with the first inode of @agno. + */ +#define XFS_BULK_IREQ_AGNO (1 << 0) + +/* + * Return bulkstat information for a single inode, where @ino value is a + * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_* + * values below. Not compatible with XFS_BULK_IREQ_AGNO. + */ +#define XFS_BULK_IREQ_SPECIAL (1 << 1) + +#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ + XFS_BULK_IREQ_SPECIAL) + +/* Operate on the root directory inode. */ +#define XFS_BULK_IREQ_SPECIAL_ROOT (1) + +/* + * ioctl structures for v5 bulkstat and inumbers requests + */ +struct xfs_bulkstat_req { + struct xfs_bulk_ireq hdr; + struct xfs_bulkstat bulkstat[]; +}; +#define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \ + (nr) * sizeof(struct xfs_bulkstat)) + +struct xfs_inumbers_req { + struct xfs_bulk_ireq hdr; + struct xfs_inumbers inumbers[]; +}; +#define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \ + (nr) * sizeof(struct xfs_inumbers)) /* * Error injection. @@ -529,7 +631,7 @@ typedef struct xfs_swapext xfs_off_t sx_offset; /* offset into file */ xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ - xfs_bstat_t sx_stat; /* stat of target b4 copy */ + struct xfs_bstat sx_stat; /* stat of target b4 copy */ } xfs_swapext_t; /* @@ -701,6 +803,8 @@ struct xfs_scrub_metadata { #define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4) #define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t) #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) +#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) +#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 49ddfeac19f2..272005ac8c88 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip) void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); -void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs); +void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); #endif /* __XFS_HEALTH_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index fe9898875097..04377ab75863 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -12,17 +12,14 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" -#include "xfs_rtalloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_icreate_item.h" @@ -31,20 +28,6 @@ #include "xfs_log.h" #include "xfs_rmap.h" - -/* - * Allocation group level functions. - */ -int -xfs_ialloc_cluster_alignment( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) - return mp->m_sb.sb_inoalignmt; - return 1; -} - /* * Lookup a record by ino in the btree given by cur. */ @@ -299,7 +282,7 @@ xfs_ialloc_inode_init( * sizes, manipulate the inodes in buffers which are multiples of the * blocks size. */ - nbufs = length / mp->m_blocks_per_cluster; + nbufs = length / M_IGEO(mp)->blocks_per_cluster; /* * Figure out what version number to use in the inodes we create. If @@ -343,9 +326,10 @@ xfs_ialloc_inode_init( * Get the block. */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + - (j * mp->m_blocks_per_cluster)); + (j * M_IGEO(mp)->blocks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize * mp->m_blocks_per_cluster, + mp->m_bsize * + M_IGEO(mp)->blocks_per_cluster, XBF_UNMAPPED); if (!fbuf) return -ENOMEM; @@ -353,7 +337,7 @@ xfs_ialloc_inode_init( /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); - for (i = 0; i < mp->m_inodes_per_cluster; i++) { + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); @@ -616,24 +600,26 @@ error: * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ -STATIC int /* error code or 0 */ +STATIC int xfs_ialloc_ag_alloc( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* alloc group buffer */ - int *alloc) + struct xfs_trans *tp, + struct xfs_buf *agbp, + int *alloc) { - xfs_agi_t *agi; /* allocation group header */ - xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_agnumber_t agno; - int error; - xfs_agino_t newino; /* new first inode's number */ - xfs_agino_t newlen; /* new number of inodes */ - int isaligned = 0; /* inode allocation at stripe unit */ - /* boundary */ - uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */ + struct xfs_agi *agi; + struct xfs_alloc_arg args; + xfs_agnumber_t agno; + int error; + xfs_agino_t newino; /* new first inode's number */ + xfs_agino_t newlen; /* new number of inodes */ + int isaligned = 0; /* inode allocation at stripe */ + /* unit boundary */ + /* init. to full chunk */ + uint16_t allocmask = (uint16_t) -1; struct xfs_inobt_rec_incore rec; - struct xfs_perag *pag; - int do_sparse = 0; + struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp); + int do_sparse = 0; memset(&args, 0, sizeof(args)); args.tp = tp; @@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc( #ifdef DEBUG /* randomly do sparse inode allocations */ if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks) + igeo->ialloc_min_blks < igeo->ialloc_blks) do_sparse = prandom_u32() & 1; #endif @@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc( * Locking will ensure that we don't have two callers in here * at one time. */ - newlen = args.mp->m_ialloc_inos; - if (args.mp->m_maxicount && + newlen = igeo->ialloc_inos; + if (igeo->maxicount && percpu_counter_read_positive(&args.mp->m_icount) + newlen > - args.mp->m_maxicount) + igeo->maxicount) return -ENOSPC; - args.minlen = args.maxlen = args.mp->m_ialloc_blks; + args.minlen = args.maxlen = igeo->ialloc_blks; /* * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill @@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc( newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + - args.mp->m_ialloc_blks; + igeo->ialloc_blks; if (do_sparse) goto sparse_alloc; if (likely(newino != NULLAGINO && @@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc( * but not to use them in the actual exact allocation. */ args.alignment = 1; - args.minalignslop = args.mp->m_cluster_align - 1; + args.minalignslop = igeo->cluster_align - 1; /* Allow space for the inode btree to split. */ - args.minleft = args.mp->m_in_maxlevels - 1; + args.minleft = igeo->inobt_maxlevels - 1; if ((error = xfs_alloc_vextent(&args))) return error; @@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc( * pieces, so don't need alignment anyway. */ isaligned = 0; - if (args.mp->m_sinoalign) { + if (igeo->ialloc_align) { ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); args.alignment = args.mp->m_dalign; isaligned = 1; } else - args.alignment = args.mp->m_cluster_align; + args.alignment = igeo->cluster_align; /* * Need to figure out where to allocate the inode blocks. * Ideally they should be spaced out through the a.g. @@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc( /* * Allow space for the inode btree to split. */ - args.minleft = args.mp->m_in_maxlevels - 1; + args.minleft = igeo->inobt_maxlevels - 1; if ((error = xfs_alloc_vextent(&args))) return error; } @@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc( args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); - args.alignment = args.mp->m_cluster_align; + args.alignment = igeo->cluster_align; if ((error = xfs_alloc_vextent(&args))) return error; } @@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc( * the sparse allocation length is smaller than a full chunk. */ if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks && + igeo->ialloc_min_blks < igeo->ialloc_blks && args.fsbno == NULLFSBLOCK) { sparse_alloc: args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -773,7 +759,7 @@ sparse_alloc: args.alignment = args.mp->m_sb.sb_spino_align; args.prod = 1; - args.minlen = args.mp->m_ialloc_min_blks; + args.minlen = igeo->ialloc_min_blks; args.maxlen = args.minlen; /* @@ -789,7 +775,7 @@ sparse_alloc: args.min_agbno = args.mp->m_sb.sb_inoalignmt; args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, args.mp->m_sb.sb_inoalignmt) - - args.mp->m_ialloc_blks; + igeo->ialloc_blks; error = xfs_alloc_vextent(&args); if (error) @@ -1006,7 +992,7 @@ xfs_ialloc_ag_select( * space needed for alignment of inode chunks when checking the * longest contiguous free space in the AG - this prevents us * from getting ENOSPC because we have free space larger than - * m_ialloc_blks but alignment constraints prevent us from using + * ialloc_blks but alignment constraints prevent us from using * it. * * If we can't find an AG with space for full alignment slack to @@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select( * if we fail allocation due to alignment issues then it is most * likely a real ENOSPC condition. */ - ineed = mp->m_ialloc_min_blks; + ineed = M_IGEO(mp)->ialloc_min_blks; if (flags && ineed > 1) - ineed += mp->m_cluster_align; + ineed += M_IGEO(mp)->cluster_align; longest = pag->pagf_longest; if (!longest) longest = pag->pagf_flcount > 0; @@ -1703,6 +1689,7 @@ xfs_dialloc( int noroom = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(mp); int okalloc = 1; if (*IO_agbp) { @@ -1733,9 +1720,9 @@ xfs_dialloc( * Read rough value of mp->m_icount by percpu_counter_read_positive, * which will sacrifice the preciseness but improve the performance. */ - if (mp->m_maxicount && - percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos - > mp->m_maxicount) { + if (igeo->maxicount && + percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos + > igeo->maxicount) { noroom = 1; okalloc = 0; } @@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES); + M_IGEO(mp)->ialloc_blks, + &XFS_RMAP_OINFO_INODES); return; } @@ -2261,7 +2249,7 @@ xfs_imap_lookup( /* check that the returned record contains the required inode */ if (rec.ir_startino > agino || - rec.ir_startino + mp->m_ialloc_inos <= agino) + rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino) return -EINVAL; /* for untrusted inodes check it is allocated first */ @@ -2352,7 +2340,7 @@ xfs_imap( * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. */ - if (mp->m_blocks_per_cluster == 1) { + if (M_IGEO(mp)->blocks_per_cluster == 1) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); @@ -2368,8 +2356,8 @@ xfs_imap( * find the location. Otherwise we have to do a btree * lookup to find the location. */ - if (mp->m_inoalign_mask) { - offset_agbno = agbno & mp->m_inoalign_mask; + if (M_IGEO(mp)->inoalign_mask) { + offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { error = xfs_imap_lookup(mp, tp, agno, agino, agbno, @@ -2381,13 +2369,13 @@ xfs_imap( out_map: ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + - ((offset_agbno / mp->m_blocks_per_cluster) * - mp->m_blocks_per_cluster); + ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) * + M_IGEO(mp)->blocks_per_cluster); offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); - imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); + imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); /* @@ -2409,20 +2397,6 @@ out_map: } /* - * Compute and fill in value of m_in_maxlevels. - */ -void -xfs_ialloc_compute_maxlevels( - xfs_mount_t *mp) /* file system mount structure */ -{ - uint inodes; - - inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; - mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr, - inodes); -} - -/* * Log specified fields for the ag hdr (inode section). The growth of the agi * structure over time requires that we interpret the buffer as two logical * regions delineated by the end of the unlinked list. This is due to the size @@ -2493,7 +2467,7 @@ static xfs_failaddr_t xfs_agi_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); int i; @@ -2545,7 +2519,7 @@ static void xfs_agi_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -2562,7 +2536,7 @@ static void xfs_agi_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; @@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes( *freecount = ci.freecount; return 0; } + +/* + * Initialize inode-related geometry information. + * + * Compute the inode btree min and max levels and set maxicount. + * + * Set the inode cluster size. This may still be overridden by the file + * system block size if it is larger than the chosen cluster size. + * + * For v5 filesystems, scale the cluster size with the inode size to keep a + * constant ratio of inode per cluster buffer, but only if mkfs has set the + * inode alignment value appropriately for larger cluster sizes. + * + * Then compute the inode cluster alignment information. + */ +void +xfs_ialloc_setup_geometry( + struct xfs_mount *mp) +{ + struct xfs_sb *sbp = &mp->m_sb; + struct xfs_ino_geometry *igeo = M_IGEO(mp); + uint64_t icount; + uint inodes; + + /* Compute inode btree geometry. */ + igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; + igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; + + igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, + sbp->sb_inopblock); + igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog; + + if (sbp->sb_spino_align) + igeo->ialloc_min_blks = sbp->sb_spino_align; + else + igeo->ialloc_min_blks = igeo->ialloc_blks; + + /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */ + inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; + igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, + inodes); + + /* Set the maximum inode count for this filesystem. */ + if (sbp->sb_imax_pct) { + /* + * Make sure the maximum inode count is a multiple + * of the units we allocate inodes in. + */ + icount = sbp->sb_dblocks * sbp->sb_imax_pct; + do_div(icount, 100); + do_div(icount, igeo->ialloc_blks); + igeo->maxicount = XFS_FSB_TO_INO(mp, + icount * igeo->ialloc_blks); + } else { + igeo->maxicount = 0; + } + + /* + * Compute the desired size of an inode cluster buffer size, which + * starts at 8K and (on v5 filesystems) scales up with larger inode + * sizes. + * + * Preserve the desired inode cluster size because the sparse inodes + * feature uses that desired size (not the actual size) to compute the + * sparse inode alignment. The mount code validates this value, so we + * cannot change the behavior. + */ + igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int new_size = igeo->inode_cluster_size_raw; + + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) + igeo->inode_cluster_size_raw = new_size; + } + + /* Calculate inode cluster ratios. */ + if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize) + igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp, + igeo->inode_cluster_size_raw); + else + igeo->blocks_per_cluster = 1; + igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster); + igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster); + + /* Calculate inode cluster alignment. */ + if (xfs_sb_version_hasalign(&mp->m_sb) && + mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster) + igeo->cluster_align = mp->m_sb.sb_inoalignmt; + else + igeo->cluster_align = 1; + igeo->inoalign_mask = igeo->cluster_align - 1; + igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align); + + /* + * If we are using stripe alignment, check whether + * the stripe unit is a multiple of the inode alignment + */ + if (mp->m_dalign && igeo->inoalign_mask && + !(mp->m_dalign & igeo->inoalign_mask)) + igeo->ialloc_align = mp->m_dalign; + else + igeo->ialloc_align = 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index e936b7cc9389..323592d563d5 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -23,16 +23,6 @@ struct xfs_icluster { * sparse chunks */ }; -/* Calculate and return the number of filesystem blocks per inode cluster */ -static inline int -xfs_icluster_size_fsb( - struct xfs_mount *mp) -{ - if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) - return 1; - return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; -} - /* * Make an inode pointer out of the buffer/offset. */ @@ -96,13 +86,6 @@ xfs_imap( uint flags); /* flags for inode btree lookup */ /* - * Compute and fill in value of m_in_maxlevels. - */ -void -xfs_ialloc_compute_maxlevels( - struct xfs_mount *mp); /* file system mount structure */ - -/* * Log specified fields for the ag hdr (inode section) */ void @@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, int *stat); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); +void xfs_ialloc_setup_geometry(struct xfs_mount *mp); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 1080381ff243..b82992f795aa 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -11,14 +11,12 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_rmap.h" @@ -28,7 +26,7 @@ xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, int level) { - return cur->bc_mp->m_inobt_mnr[level != 0]; + return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0]; } STATIC struct xfs_btree_cur * @@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs( struct xfs_btree_cur *cur, int level) { - return cur->bc_mp->m_inobt_mxr[level != 0]; + return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0]; } STATIC void @@ -255,7 +253,7 @@ static xfs_failaddr_t xfs_inobt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); xfs_failaddr_t fa; unsigned int level; @@ -281,10 +279,11 @@ xfs_inobt_verify( /* level verification */ level = be16_to_cpu(block->bb_level); - if (level >= mp->m_in_maxlevels) + if (level >= M_IGEO(mp)->inobt_maxlevels) return __this_address; - return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); + return xfs_btree_sblock_verify(bp, + M_IGEO(mp)->inobt_mxr[level != 0]); } static void @@ -546,14 +545,53 @@ xfs_inobt_max_size( xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); /* Bail out if we're uninitialized, which can happen in mkfs. */ - if (mp->m_inobt_mxr[0] == 0) + if (M_IGEO(mp)->inobt_mxr[0] == 0) return 0; - return xfs_btree_calc_size(mp->m_inobt_mnr, + /* + * The log is permanently allocated, so the space it occupies will + * never be available for the kinds of things that would require btree + * expansion. We therefore can pretend the space isn't there. + */ + if (mp->m_sb.sb_logstart && + XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) + agblocks -= mp->m_sb.sb_logblocks; + + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, (uint64_t)agblocks * mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK); } +/* Read AGI and create inobt cursor. */ +int +xfs_inobt_cur( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_btnum_t which, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp) +{ + struct xfs_btree_cur *cur; + int error; + + ASSERT(*agi_bpp == NULL); + ASSERT(*curpp == NULL); + + error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp); + if (error) + return error; + + cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); + if (!cur) { + xfs_trans_brelse(tp, *agi_bpp); + *agi_bpp = NULL; + return -ENOMEM; + } + *curpp = cur; + return 0; +} + static int xfs_inobt_count_blocks( struct xfs_mount *mp, @@ -562,15 +600,14 @@ xfs_inobt_count_blocks( xfs_btnum_t btnum, xfs_extlen_t *tree_blocks) { - struct xfs_buf *agbp; - struct xfs_btree_cur *cur; + struct xfs_buf *agbp = NULL; + struct xfs_btree_cur *cur = NULL; int error; - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp); if (error) return error; - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); error = xfs_btree_count_blocks(cur, tree_blocks); xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); @@ -610,5 +647,5 @@ xfs_iallocbt_calc_size( struct xfs_mount *mp, unsigned long long len) { - return xfs_btree_calc_size(mp->m_inobt_mnr, len); + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index ebdd0c6b8766..951305ecaae1 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, unsigned long long len); +int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, xfs_btnum_t btnum, + struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index bc690f2409fa..27aa3f2bc4bc 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -3,18 +3,14 @@ * Copyright (c) 2017 Christoph Hellwig. */ -#include <linux/cache.h> -#include <linux/kernel.h> -#include <linux/slab.h> #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap.h" #include "xfs_trace.h" /* diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index e021d5133ccb..28ab3c5255e1 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -10,11 +10,9 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_cksum.h" #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" @@ -33,12 +31,9 @@ xfs_inobp_check( xfs_buf_t *bp) { int i; - int j; xfs_dinode_t *dip; - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - - for (i = 0; i < j; i++) { + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); if (!dip->di_next_unlinked) { xfs_alert(mp, @@ -80,7 +75,7 @@ xfs_inode_buf_verify( struct xfs_buf *bp, bool readahead) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_agnumber_t agno; int i; int ni; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index f9acf1d436f6..bf3e04018246 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -3,10 +3,10 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/log2.h> #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -19,12 +19,10 @@ #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_attr_sf.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" #include "xfs_attr_leaf.h" -#include "xfs_shared.h" kmem_zone_t *xfs_ifork_zone; diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 1b542ec11d5d..7f55eb3f3653 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -12,9 +12,7 @@ #include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_trans_space.h" -#include "xfs_inode.h" #include "xfs_da_btree.h" -#include "xfs_attr_leaf.h" #include "xfs_bmap_btree.h" /* diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 542aa1475b5f..51bb9bdb0e84 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -9,7 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" @@ -19,7 +18,6 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_bit.h" #include "xfs_refcount.h" diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 6f47ab876d90..38529dbacd55 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -12,12 +12,10 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" -#include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_bit.h" #include "xfs_rmap.h" @@ -203,7 +201,7 @@ STATIC xfs_failaddr_t xfs_refcountbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; @@ -427,6 +425,15 @@ xfs_refcountbt_calc_reserves( tree_len = be32_to_cpu(agf->agf_refcount_blocks); xfs_trans_brelse(tp, agbp); + /* + * The log is permanently allocated, so the space it occupies will + * never be available for the kinds of things that would require btree + * expansion. We therefore can pretend the space isn't there. + */ + if (mp->m_sb.sb_logstart && + XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) + agblocks -= mp->m_sb.sb_logblocks; + *ask += xfs_refcountbt_max_size(mp, agblocks); *used += tree_len; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 8ed885507dd8..e6aeb390b2fb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -10,24 +10,17 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_btree.h" #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_extent_busy.h" -#include "xfs_bmap.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" /* * Lookup the first record less than or equal to [bno, len, owner, offset] diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 5738e11055e6..fc78efa52c94 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -9,18 +9,14 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_ag_resv.h" @@ -292,7 +288,7 @@ static xfs_failaddr_t xfs_rmapbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; @@ -578,6 +574,15 @@ xfs_rmapbt_calc_reserves( tree_len = be32_to_cpu(agf->agf_rmap_blocks); xfs_trans_brelse(tp, agbp); + /* + * The log is permanently allocated, so the space it occupies will + * never be available for the kinds of things that would require btree + * expansion. We therefore can pretend the space isn't there. + */ + if (mp->m_sb.sb_logstart && + XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) + agblocks -= mp->m_sb.sb_logblocks; + /* Reserve 1% of the AG or enough for 1 block per record. */ *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); *used += tree_len; diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index eaaff67e9626..8ea1efc97b41 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -13,15 +13,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_trans.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_buf.h" -#include "xfs_icache.h" #include "xfs_rtalloc.h" diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e76a3e5d28d7..a08dd8f40346 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -10,26 +10,19 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" -#include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_health.h" /* @@ -686,7 +679,7 @@ xfs_sb_read_verify( struct xfs_buf *bp) { struct xfs_sb sb; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); int error; @@ -752,7 +745,7 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_sb sb; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; int error; @@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { * * Mount initialization code establishing various mount * fields from the superblock associated with the given - * mount structure + * mount structure. + * + * Inode geometry are calculated in xfs_ialloc_setup_geometry. */ void xfs_sb_mount_common( - struct xfs_mount *mp, - struct xfs_sb *sbp) + struct xfs_mount *mp, + struct xfs_sb *sbp) { mp->m_agfrotor = mp->m_agirotor = 0; mp->m_maxagi = mp->m_sb.sb_agcount; @@ -813,7 +808,6 @@ xfs_sb_mount_common( mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; - mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; @@ -823,11 +817,6 @@ xfs_sb_mount_common( mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; - mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); - mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; - mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; - mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; @@ -844,14 +833,6 @@ xfs_sb_mount_common( mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); - mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, - sbp->sb_inopblock); - mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; - - if (sbp->sb_spino_align) - mp->m_ialloc_min_blks = sbp->sb_spino_align; - else - mp->m_ialloc_min_blks = mp->m_ialloc_blks; mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); } @@ -939,7 +920,7 @@ xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); + struct xfs_buf *bp = xfs_trans_getsb(tp, mp); mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); @@ -1005,7 +986,7 @@ xfs_update_secondary_sbs( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), - XFS_FSS_TO_BB(mp, 1), 0); + XFS_FSS_TO_BB(mp, 1)); /* * If we get an error reading or writing alternate superblocks, * continue. xfs_repair chooses the "best" superblock based @@ -1069,7 +1050,7 @@ xfs_sync_sb_buf( if (error) return error; - bp = xfs_trans_getsb(tp, mp, 0); + bp = xfs_trans_getsb(tp, mp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); xfs_trans_set_sync(tp); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 4e909791aeac..e0641b7337b3 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -65,7 +65,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ -#define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */ /* * LOWMODE is used by the allocator to activate the lowspace algorithm - when * free space is running low the extent allocator may choose to allocate an @@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); +/* Computed inode geometry for the filesystem. */ +struct xfs_ino_geometry { + /* Maximum inode count in this filesystem. */ + uint64_t maxicount; + + /* Actual inode cluster buffer size, in bytes. */ + unsigned int inode_cluster_size; + + /* + * Desired inode cluster buffer size, in bytes. This value is not + * rounded up to at least one filesystem block, which is necessary for + * the sole purpose of validating sb_spino_align. Runtime code must + * only ever use inode_cluster_size. + */ + unsigned int inode_cluster_size_raw; + + /* Inode cluster sizes, adjusted to be at least 1 fsb. */ + unsigned int inodes_per_cluster; + unsigned int blocks_per_cluster; + + /* Inode cluster alignment. */ + unsigned int cluster_align; + unsigned int cluster_align_inodes; + unsigned int inoalign_mask; /* mask sb_inoalignmt if used */ + + unsigned int inobt_mxr[2]; /* max inobt btree records */ + unsigned int inobt_mnr[2]; /* min inobt btree records */ + unsigned int inobt_maxlevels; /* max inobt btree levels. */ + + /* Size of inode allocations under normal operation. */ + unsigned int ialloc_inos; + unsigned int ialloc_blks; + + /* Minimum inode blocks for a sparse allocation. */ + unsigned int ialloc_min_blks; + + /* stripe unit inode alignment */ + unsigned int ialloc_align; + + unsigned int agino_log; /* #bits for agino in inum */ +}; + +/* Keep iterating the data structure. */ +#define XFS_ITER_CONTINUE (0) + +/* Stop iterating the data structure. */ +#define XFS_ITER_ABORT (1) + #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index a0ccc253c43d..3b8260ca7d1b 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -11,12 +11,8 @@ #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_symlink.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" @@ -90,7 +86,7 @@ static xfs_failaddr_t xfs_symlink_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dsymlink_hdr *dsl = bp->b_addr; if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -116,7 +112,7 @@ static void xfs_symlink_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; /* no verification of non-crc buffers */ @@ -136,7 +132,7 @@ static void xfs_symlink_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 542927321a61..a9ad90926b87 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -8,13 +8,10 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" -#include "xfs_trace.h" #include <linux/iversion.h> @@ -69,6 +66,10 @@ xfs_trans_ichgtime( inode->i_mtime = tv; if (flags & XFS_ICHGTIME_CHG) inode->i_ctime = tv; + if (flags & XFS_ICHGTIME_CREATE) { + ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; + ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec; + } } /* diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 83f4ee2afc49..d12bbd526e7c 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -15,12 +15,10 @@ #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap_btree.h" -#include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_trans_space.h" -#include "xfs_trace.h" #define _ALLOC true #define _FREE false @@ -136,9 +134,10 @@ STATIC uint xfs_calc_inobt_res( struct xfs_mount *mp) { - return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); + return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); } /* @@ -167,7 +166,7 @@ xfs_calc_finobt_res( * includes: * * the allocation btrees: 2 trees * (max depth - 1) * block size - * the inode chunk: m_ialloc_blks * N + * the inode chunk: m_ino_geo.ialloc_blks * N * * The size N of the inode chunk reservation depends on whether it is for * allocation or free and which type of create transaction is in use. An inode @@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res( size = XFS_FSB_TO_B(mp, 1); } - res += xfs_calc_buf_res(mp->m_ialloc_blks, size); + res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); return res; } @@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + 2 * M_IGEO(mp)->inode_cluster_size; } /* @@ -345,7 +344,7 @@ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + M_IGEO(mp)->inode_cluster_size; } /* diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index a62fb950bef1..88221c7a04cc 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -56,9 +56,9 @@ #define XFS_DIRREMOVE_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ - ((mp)->m_ialloc_blks + \ + (M_IGEO(mp)->ialloc_blks + \ (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ - ((mp)->m_in_maxlevels - 1))) + (M_IGEO(mp)->inobt_maxlevels - 1))) /* * Space reservation values for various transactions. @@ -94,7 +94,8 @@ #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) #define XFS_IFREE_SPACE_RES(mp) \ - (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) + (xfs_sb_version_hasfinobt(&mp->m_sb) ? \ + M_IGEO(mp)->inobt_maxlevels : 0) #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index d51acc95bc00..4f595546a639 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -7,19 +7,10 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_rmap.h" -#include "xfs_alloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" /* Find the size of the AG, in blocks. */ xfs_agblock_t @@ -87,14 +78,14 @@ xfs_agino_range( * Calculate the first inode, which will be in the first * cluster-aligned block after the AGFL. */ - bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align); + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); *first = XFS_AGB_TO_AGINO(mp, bno); /* * Calculate the last inode, which will be at the end of the * last (aligned) cluster that can be allocated in the AG. */ - bno = round_down(eoag, mp->m_cluster_align); + bno = round_down(eoag, M_IGEO(mp)->cluster_align); *last = XFS_AGB_TO_AGINO(mp, bno) - 1; } diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index adaeabdefdd3..16b09b941441 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -9,20 +9,13 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_rmap.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Superblock */ @@ -646,7 +639,7 @@ xchk_agfl_block( xchk_agfl_block_xref(sc, agbno); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return XFS_BTREE_QUERY_RANGE_ABORT; + return XFS_ITER_ABORT; return 0; } @@ -737,7 +730,7 @@ xchk_agfl( /* Check the blocks in the AGFL. */ error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), sc->sa.agfl_bp, xchk_agfl_block, &sai); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + if (error == XFS_ITER_ABORT) { error = 0; goto out_free; } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 64e31f87d490..7a1a38b636a9 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -9,22 +9,17 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_refcount.h" #include "xfs_refcount_btree.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 44883e9112ad..a43d1813c4ff 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -9,19 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Set us up to scrub free space btrees. diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index dce74ec57038..1afc58bf71dd 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -9,26 +9,62 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_dir2.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/dabtree.h" -#include "scrub/trace.h" +#include "scrub/attr.h" -#include <linux/posix_acl_xattr.h> -#include <linux/xattr.h> +/* + * Allocate enough memory to hold an attr value and attr block bitmaps, + * reallocating the buffer if necessary. Buffer contents are not preserved + * across a reallocation. + */ +int +xchk_setup_xattr_buf( + struct xfs_scrub *sc, + size_t value_size, + xfs_km_flags_t flags) +{ + size_t sz; + struct xchk_xattr_buf *ab = sc->buf; + + /* + * We need enough space to read an xattr value from the file or enough + * space to hold three copies of the xattr free space bitmap. We don't + * need the buffer space for both purposes at the same time. + */ + sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); + sz = max_t(size_t, sz, value_size); + + /* + * If there's already a buffer, figure out if we need to reallocate it + * to accommodate a larger size. + */ + if (ab) { + if (sz <= ab->sz) + return 0; + kmem_free(ab); + sc->buf = NULL; + } + + /* + * Don't zero the buffer upon allocation to avoid runtime overhead. + * All users must be careful never to read uninitialized contents. + */ + ab = kmem_alloc_large(sizeof(*ab) + sz, flags); + if (!ab) + return -ENOMEM; + + ab->sz = sz; + sc->buf = ab; + return 0; +} /* Set us up to scrub an inode's extended attributes. */ int @@ -36,19 +72,18 @@ xchk_setup_xattr( struct xfs_scrub *sc, struct xfs_inode *ip) { - size_t sz; + int error; /* - * Allocate the buffer without the inode lock held. We need enough - * space to read every xattr value in the file or enough space to - * hold three copies of the xattr free space bitmap. (Not both at - * the same time.) + * We failed to get memory while checking attrs, so this time try to + * get all the memory we're ever going to need. Allocate the buffer + * without the inode lock held, which means we can sleep. */ - sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * - BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); - sc->buf = kmem_zalloc_large(sz, KM_SLEEP); - if (!sc->buf) - return -ENOMEM; + if (sc->flags & XCHK_TRY_HARDER) { + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP); + if (error) + return error; + } return xchk_setup_inode_contents(sc, ip, 0); } @@ -83,7 +118,7 @@ xchk_xattr_listent( sx = container_of(context, struct xchk_xattr, context); if (xchk_should_terminate(sx->sc, &error)) { - context->seen_enough = 1; + context->seen_enough = error; return; } @@ -99,6 +134,19 @@ xchk_xattr_listent( return; } + /* + * Try to allocate enough memory to extrat the attr value. If that + * doesn't work, we overload the seen_enough variable to convey + * the error message back to the main scrub function. + */ + error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) { + context->seen_enough = error; + return; + } + args.flags = ATTR_KERNOTIME; if (flags & XFS_ATTR_ROOT) args.flags |= ATTR_ROOT; @@ -111,8 +159,8 @@ xchk_xattr_listent( args.namelen = namelen; args.hashval = xfs_da_hashname(args.name, args.namelen); args.trans = context->tp; - args.value = sx->sc->buf; - args.valuelen = XATTR_SIZE_MAX; + args.value = xchk_xattr_valuebuf(sx->sc); + args.valuelen = valuelen; error = xfs_attr_get_ilocked(context->dp, &args); if (error == -EEXIST) @@ -125,7 +173,7 @@ xchk_xattr_listent( args.blkno); fail_xref: if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - context->seen_enough = 1; + context->seen_enough = XFS_ITER_ABORT; return; } @@ -170,13 +218,12 @@ xchk_xattr_check_freemap( unsigned long *map, struct xfs_attr3_icleaf_hdr *leafhdr) { - unsigned long *freemap; - unsigned long *dstmap; + unsigned long *freemap = xchk_xattr_freemap(sc); + unsigned long *dstmap = xchk_xattr_dstmap(sc); unsigned int mapsize = sc->mp->m_attr_geo->blksize; int i; /* Construct bitmap of freemap contents. */ - freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); bitmap_zero(freemap, mapsize); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { if (!xchk_xattr_set_map(sc, freemap, @@ -186,7 +233,6 @@ xchk_xattr_check_freemap( } /* Look for bits that are set in freemap and are marked in use. */ - dstmap = freemap + BITS_TO_LONGS(mapsize); return bitmap_and(dstmap, freemap, map, mapsize) == 0; } @@ -201,13 +247,13 @@ xchk_xattr_entry( char *buf_end, struct xfs_attr_leafblock *leaf, struct xfs_attr3_icleaf_hdr *leafhdr, - unsigned long *usedmap, struct xfs_attr_leaf_entry *ent, int idx, unsigned int *usedbytes, __u32 *last_hashval) { struct xfs_mount *mp = ds->state->mp; + unsigned long *usedmap = xchk_xattr_usedmap(ds->sc); char *name_end; struct xfs_attr_leaf_name_local *lentry; struct xfs_attr_leaf_name_remote *rentry; @@ -267,16 +313,26 @@ xchk_xattr_block( struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *ent; struct xfs_attr_leaf_entry *entries; - unsigned long *usedmap = ds->sc->buf; + unsigned long *usedmap; char *buf_end; size_t off; __u32 last_hashval = 0; unsigned int usedbytes = 0; unsigned int hdrsize; int i; + int error; if (*last_checked == blk->blkno) return 0; + + /* Allocate memory for block usage checking. */ + error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL); + if (error == -ENOMEM) + return -EDEADLOCK; + if (error) + return error; + usedmap = xchk_xattr_usedmap(ds->sc); + *last_checked = blk->blkno; bitmap_zero(usedmap, mp->m_attr_geo->blksize); @@ -324,7 +380,7 @@ xchk_xattr_block( /* Check the entry and nameval. */ xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr, - usedmap, ent, i, &usedbytes, &last_hashval); + ent, i, &usedbytes, &last_hashval); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -464,6 +520,10 @@ xchk_xattr( error = xfs_attr_list_int_ilocked(&sx.context); if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) goto out; + + /* Did our listent function try to return any errors? */ + if (sx.context.seen_enough < 0) + error = sx.context.seen_enough; out: return error; } diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h new file mode 100644 index 000000000000..13a1d2e8424d --- /dev/null +++ b/fs/xfs/scrub/attr.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_SCRUB_ATTR_H__ +#define __XFS_SCRUB_ATTR_H__ + +/* + * Temporary storage for online scrub and repair of extended attributes. + */ +struct xchk_xattr_buf { + /* Size of @buf, in bytes. */ + size_t sz; + + /* + * Memory buffer -- either used for extracting attr values while + * walking the attributes; or for computing attr block bitmaps when + * checking the attribute tree. + * + * Each bitmap contains enough bits to track every byte in an attr + * block (rounded up to the size of an unsigned long). The attr block + * used space bitmap starts at the beginning of the buffer; the free + * space bitmap follows immediately after; and we have a third buffer + * for storing intermediate bitmap results. + */ + uint8_t buf[0]; +}; + +/* A place to store attribute values. */ +static inline uint8_t * +xchk_xattr_valuebuf( + struct xfs_scrub *sc) +{ + struct xchk_xattr_buf *ab = sc->buf; + + return ab->buf; +} + +/* A bitmap of space usage computed by walking an attr leaf block. */ +static inline unsigned long * +xchk_xattr_usedmap( + struct xfs_scrub *sc) +{ + struct xchk_xattr_buf *ab = sc->buf; + + return (unsigned long *)ab->buf; +} + +/* A bitmap of free space computed by walking attr leaf block free info. */ +static inline unsigned long * +xchk_xattr_freemap( + struct xfs_scrub *sc) +{ + return xchk_xattr_usedmap(sc) + + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); +} + +/* A bitmap used to hold temporary results. */ +static inline unsigned long * +xchk_xattr_dstmap( + struct xfs_scrub *sc) +{ + return xchk_xattr_freemap(sc) + + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); +} + +int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size, + xfs_km_flags_t flags); + +#endif /* __XFS_SCRUB_ATTR_H__ */ diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index fdadc9e1dc49..3d47d111be5a 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -10,11 +10,6 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_btree.h" -#include "scrub/xfs_scrub.h" -#include "scrub/scrub.h" -#include "scrub/common.h" -#include "scrub/trace.h" -#include "scrub/repair.h" #include "scrub/bitmap.h" /* diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index a703cd58a90e..1bd29fdc2ab5 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -9,27 +9,19 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" #include "xfs_alloc.h" -#include "xfs_rtalloc.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_refcount.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* Set us up with an inode's bmap. */ int diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 117910db51b8..f52a7b8256f9 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -9,14 +9,7 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 973aa59975e3..18876056e5e0 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -9,22 +9,16 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" -#include "xfs_itable.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" -#include "xfs_bmap.h" -#include "xfs_bmap_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" -#include "xfs_refcount.h" #include "xfs_refcount_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" @@ -32,11 +26,9 @@ #include "xfs_trans_priv.h" #include "xfs_attr.h" #include "xfs_reflink.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -#include "scrub/btree.h" #include "scrub/repair.h" #include "scrub/health.h" diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 90527b094878..94c4f1de1922 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -9,20 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_attr_leaf.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index a38a22785a1a..1e2e11721eb9 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -9,24 +9,14 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" -#include "xfs_itable.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_ialloc.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" #include "scrub/dabtree.h" /* Set us up to scrub directories. */ diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 07c11e3e6437..fc3f510c9034 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -9,22 +9,10 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" -#include "xfs_rmap.h" -#include "xfs_error.h" -#include "xfs_errortag.h" -#include "xfs_icache.h" #include "xfs_health.h" -#include "xfs_bmap.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 23cf8e2f25db..b2f602811e9d 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -7,18 +7,10 @@ #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_health.h" #include "scrub/scrub.h" -#include "scrub/health.h" /* * Scrub and In-Core Filesystem Health Assessments diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 693eb51f5efb..681758704fda 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -9,21 +9,14 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_icache.h" #include "xfs_rmap.h" -#include "xfs_log.h" -#include "xfs_trans_priv.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" @@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster( int error = 0; nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, - mp->m_inodes_per_cluster); + M_IGEO(mp)->inodes_per_cluster); /* Map this inode cluster */ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); @@ -251,8 +244,9 @@ xchk_iallocbt_check_cluster( */ ir_holemask = (irec->ir_holemask & cluster_mask); imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); - imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); - imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino); + imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); + imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) << + mp->m_sb.sb_inodelog; if (imap.im_boffset != 0 && cluster_base != 0) { ASSERT(imap.im_boffset == 0 || cluster_base == 0); @@ -275,12 +269,12 @@ xchk_iallocbt_check_cluster( /* If any part of this is a hole, skip it. */ if (ir_holemask) { xchk_xref_is_not_owned_by(bs->sc, agbno, - mp->m_blocks_per_cluster, + M_IGEO(mp)->blocks_per_cluster, &XFS_RMAP_OINFO_INODES); return 0; } - xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, + xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster, &XFS_RMAP_OINFO_INODES); /* Grab the inode cluster buffer. */ @@ -332,7 +326,7 @@ xchk_iallocbt_check_clusters( */ for (cluster_base = 0; cluster_base < XFS_INODES_PER_CHUNK; - cluster_base += bs->sc->mp->m_inodes_per_cluster) { + cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) { error = xchk_iallocbt_check_cluster(bs, irec, cluster_base); if (error) break; @@ -354,6 +348,7 @@ xchk_iallocbt_rec_alignment( { struct xfs_mount *mp = bs->sc->mp; struct xchk_iallocbt *iabt = bs->private; + struct xfs_ino_geometry *igeo = M_IGEO(mp); /* * finobt records have different positioning requirements than inobt @@ -371,7 +366,7 @@ xchk_iallocbt_rec_alignment( unsigned int imask; imask = min_t(unsigned int, XFS_INODES_PER_CHUNK, - mp->m_cluster_align_inodes) - 1; + igeo->cluster_align_inodes) - 1; if (irec->ir_startino & imask) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return; @@ -399,17 +394,17 @@ xchk_iallocbt_rec_alignment( } /* inobt records must be aligned to cluster and inoalignmnt size. */ - if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) { + if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return; } - if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) { + if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return; } - if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK) + if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK) return; /* @@ -418,7 +413,7 @@ xchk_iallocbt_rec_alignment( * after this one. */ iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK; - iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster; + iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster; } /* Scrub an inobt/finobt record. */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index e213efc194a1..6d483ab29e63 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -9,27 +9,17 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_icache.h" -#include "xfs_inode_buf.h" -#include "xfs_inode_fork.h" #include "xfs_ialloc.h" #include "xfs_da_format.h" #include "xfs_reflink.h" #include "xfs_rmap.h" -#include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Grab total control of the inode metadata. It doesn't matter here if diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index d5d197f1b80f..c962bd534690 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -9,21 +9,13 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_ialloc.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Set us up to scrub parents. */ int diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 5dfe2b5924db..0a33b4421c32 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -9,24 +9,13 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_qm.h" -#include "xfs_dquot.h" -#include "xfs_dquot_item.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Convert a scrub type code to a DQ flag, or return 0 if error. */ static inline uint @@ -144,7 +133,7 @@ xchk_quota_item( if (bsoft > bhard) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); - if (ihard > mp->m_maxicount) + if (ihard > M_IGEO(mp)->maxicount) xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (isoft > ihard) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 708b4158eb90..93b3793bc5b3 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -7,22 +7,12 @@ #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_alloc.h" #include "xfs_rmap.h" #include "xfs_refcount.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Set us up to scrub reference count btrees. diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index eb358f0f5e0a..4cfeec57fb05 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -9,29 +9,21 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_icache.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_refcount.h" #include "xfs_refcount_btree.h" #include "xfs_extent_busy.h" #include "xfs_ag_resv.h" -#include "xfs_trans_space.h" #include "xfs_quota.h" -#include "xfs_attr.h" -#include "xfs_reflink.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -357,7 +349,7 @@ xrep_init_btblock( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0); + xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(tp, bp, 0, bp->b_length); bp->b_ops = ops; @@ -672,7 +664,7 @@ xrep_findroot_agfl_walk( { xfs_agblock_t *agbno = priv; - return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0; + return (*agbno == bno) ? XFS_ITER_ABORT : 0; } /* Does this block match the btree information passed in? */ @@ -702,7 +694,7 @@ xrep_findroot_block( if (owner == XFS_RMAP_OWN_AG) { error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, xrep_findroot_agfl_walk, &agbno); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) + if (error == XFS_ITER_ABORT) return 0; if (error) return error; diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 92a140c5b55e..8d4cefd761c1 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -9,21 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" #include "xfs_rmap.h" #include "xfs_refcount.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Set us up to scrub reverse mapping btrees. diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index dbe115b075f7..c642bc206c41 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -9,19 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_inode.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Set us up with the realtime metadata locked. */ int diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index f630389ee176..15c8c5f3f688 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -9,36 +9,16 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_icache.h" -#include "xfs_itable.h" -#include "xfs_alloc.h" -#include "xfs_alloc_btree.h" -#include "xfs_bmap.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" -#include "xfs_refcount.h" -#include "xfs_refcount_btree.h" -#include "xfs_rmap.h" -#include "xfs_rmap_btree.h" #include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_log.h" -#include "xfs_trans_priv.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -#include "scrub/btree.h" #include "scrub/repair.h" #include "scrub/health.h" diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index f7ebaa946999..99c0b1234c3c 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -9,19 +9,11 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" #include "xfs_symlink.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Set us up to scrub a symbolic link. */ int diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 96feaf8dcdec..9eaab2eb5ed3 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -10,15 +10,9 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_trans.h" -#include "xfs_bit.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" -#include "scrub/common.h" /* Figure out which block the btree cursor was pointing to. */ static inline xfs_fsblock_t diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8039e35147dd..cbda40d40326 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -4,16 +4,14 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_trace.h" -#include <linux/slab.h> -#include <linux/xattr.h> #include <linux/posix_acl_xattr.h> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a6f0f4761a37..f16d5f196c6b 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -12,16 +12,11 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" #include "xfs_reflink.h" -#include <linux/writeback.h> /* * structure owned by writepages passed to individual writepage calls @@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc( struct xfs_trans *tp; int error; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, - XFS_TRANS_NOFS, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); if (error) return error; @@ -240,9 +234,17 @@ xfs_end_ioend( struct xfs_inode *ip = XFS_I(ioend->io_inode); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; + unsigned int nofs_flag; int error; /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + + /* * Just clean up the in-memory strutures if the fs has been shut down. */ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { @@ -282,6 +284,8 @@ done: list_del_init(&ioend->io_list); xfs_destroy_ioend(ioend, error); } + + memalloc_nofs_restore(nofs_flag); } /* @@ -290,13 +294,9 @@ done: static bool xfs_ioend_can_merge( struct xfs_ioend *ioend, - int ioend_error, struct xfs_ioend *next) { - int next_error; - - next_error = blk_status_to_errno(next->io_bio->bi_status); - if (ioend_error != next_error) + if (ioend->io_bio->bi_status != next->io_bio->bi_status) return false; if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) return false; @@ -305,11 +305,28 @@ xfs_ioend_can_merge( return false; if (ioend->io_offset + ioend->io_size != next->io_offset) return false; - if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next)) - return false; return true; } +/* + * If the to be merged ioend has a preallocated transaction for file + * size updates we need to ensure the ioend it is merged into also + * has one. If it already has one we can simply cancel the transaction + * as it is guaranteed to be clean. + */ +static void +xfs_ioend_merge_append_transactions( + struct xfs_ioend *ioend, + struct xfs_ioend *next) +{ + if (!ioend->io_append_trans) { + ioend->io_append_trans = next->io_append_trans; + next->io_append_trans = NULL; + } else { + xfs_setfilesize_ioend(next, -ECANCELED); + } +} + /* Try to merge adjacent completions. */ STATIC void xfs_ioend_try_merge( @@ -317,25 +334,16 @@ xfs_ioend_try_merge( struct list_head *more_ioends) { struct xfs_ioend *next_ioend; - int ioend_error; - int error; - - if (list_empty(more_ioends)) - return; - - ioend_error = blk_status_to_errno(ioend->io_bio->bi_status); while (!list_empty(more_ioends)) { next_ioend = list_first_entry(more_ioends, struct xfs_ioend, io_list); - if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend)) + if (!xfs_ioend_can_merge(ioend, next_ioend)) break; list_move_tail(&next_ioend->io_list, &ioend->io_list); ioend->io_size += next_ioend->io_size; - if (ioend->io_append_trans) { - error = xfs_setfilesize_ioend(next_ioend, 1); - ASSERT(error == 1); - } + if (next_ioend->io_append_trans) + xfs_ioend_merge_append_transactions(ioend, next_ioend); } } @@ -626,7 +634,7 @@ allocate_blocks: * reference to the ioend to ensure that the ioend completion is only done once * all bios have been submitted and the ioend is really done. * - * If @fail is non-zero, it means that we have a situation where some part of + * If @status is non-zero, it means that we have a situation where some part of * the submission process has failed after we have marked paged for writeback * and unlocked them. In this situation, we need to fail the bio and ioend * rather than submit it to IO. This typically only happens on a filesystem @@ -638,21 +646,19 @@ xfs_submit_ioend( struct xfs_ioend *ioend, int status) { + unsigned int nofs_flag; + + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + /* Convert CoW extents to regular */ if (!status && ioend->io_fork == XFS_COW_FORK) { - /* - * Yuk. This can do memory allocation, but is not a - * transactional operation so everything is done in GFP_KERNEL - * context. That can deadlock, because we hold pages in - * writeback state and GFP_KERNEL allocations can block on them. - * Hence we must operate in nofs conditions here. - */ - unsigned nofs_flag; - - nofs_flag = memalloc_nofs_save(); status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); - memalloc_nofs_restore(nofs_flag); } /* Reserve log space if we might write beyond the on-disk inode size. */ @@ -663,9 +669,10 @@ xfs_submit_ioend( !ioend->io_append_trans) status = xfs_setfilesize_trans_alloc(ioend); + memalloc_nofs_restore(nofs_flag); + ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); /* * If we are failing the IO now, just mark the ioend with an @@ -679,7 +686,6 @@ xfs_submit_ioend( return status; } - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; submit_bio(ioend->io_bio); return 0; } @@ -691,7 +697,8 @@ xfs_alloc_ioend( xfs_exntst_t state, xfs_off_t offset, struct block_device *bdev, - sector_t sector) + sector_t sector, + struct writeback_control *wbc) { struct xfs_ioend *ioend; struct bio *bio; @@ -699,6 +706,9 @@ xfs_alloc_ioend( bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = sector; + bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); + bio->bi_write_hint = inode->i_write_hint; + wbc_init_bio(wbc, bio); ioend = container_of(bio, struct xfs_ioend, io_inline_bio); INIT_LIST_HEAD(&ioend->io_list); @@ -719,24 +729,22 @@ xfs_alloc_ioend( * so that the bi_private linkage is set up in the right direction for the * traversal in xfs_destroy_ioend(). */ -static void +static struct bio * xfs_chain_bio( - struct xfs_ioend *ioend, - struct writeback_control *wbc, - struct block_device *bdev, - sector_t sector) + struct bio *prev) { struct bio *new; new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); - bio_set_dev(new, bdev); - new->bi_iter.bi_sector = sector; - bio_chain(ioend->io_bio, new); - bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; - submit_bio(ioend->io_bio); - ioend->io_bio = new; + bio_copy_dev(new, prev);/* also copies over blkcg information */ + new->bi_iter.bi_sector = bio_end_sector(prev); + new->bi_opf = prev->bi_opf; + new->bi_write_hint = prev->bi_write_hint; + + bio_chain(prev, new); + bio_get(prev); /* for xfs_destroy_ioend */ + submit_bio(prev); + return new; } /* @@ -758,6 +766,7 @@ xfs_add_to_ioend( struct block_device *bdev = xfs_find_bdev_for_inode(inode); unsigned len = i_blocksize(inode); unsigned poff = offset & (PAGE_SIZE - 1); + bool merged, same_page = false; sector_t sector; sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) + @@ -771,18 +780,23 @@ xfs_add_to_ioend( if (wpc->ioend) list_add(&wpc->ioend->io_list, iolist); wpc->ioend = xfs_alloc_ioend(inode, wpc->fork, - wpc->imap.br_state, offset, bdev, sector); + wpc->imap.br_state, offset, bdev, sector, wbc); } - if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) { - if (iop) - atomic_inc(&iop->write_count); - if (bio_full(wpc->ioend->io_bio)) - xfs_chain_bio(wpc->ioend, wbc, bdev, sector); + merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, + &same_page); + + if (iop && !same_page) + atomic_inc(&iop->write_count); + + if (!merged) { + if (bio_full(wpc->ioend->io_bio, len)) + wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio); bio_add_page(wpc->ioend->io_bio, page, len, poff); } wpc->ioend->io_size += len; + wbc_account_cgroup_owner(wbc, page, len); } STATIC void diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f62b03186c62..45a1ea240cbb 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops; int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); -extern void xfs_count_page_state(struct page *, int *, int *); extern struct block_device *xfs_find_bdev_for_inode(struct inode *); extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 228821b2ebe0..dc93c51c17de 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -15,18 +15,13 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_attr_remote.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "xfs_error.h" #include "xfs_quota.h" -#include "xfs_trace.h" #include "xfs_dir2.h" -#include "xfs_defer.h" /* * Look at all the extents for this logical region, @@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive( int size; int tmp; int i; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 3d213a7394c5..58fc820a70c6 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -6,25 +6,20 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_dir2.h" STATIC int diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c new file mode 100644 index 000000000000..e2148f2d5d6b --- /dev/null +++ b/fs/xfs/xfs_bio_io.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Christoph Hellwig. + */ +#include "xfs.h" + +static inline unsigned int bio_max_vecs(unsigned int count) +{ + return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES); +} + +int +xfs_rw_bdev( + struct block_device *bdev, + sector_t sector, + unsigned int count, + char *data, + unsigned int op) + +{ + unsigned int is_vmalloc = is_vmalloc_addr(data); + unsigned int left = count; + int error; + struct bio *bio; + + if (is_vmalloc && op == REQ_OP_WRITE) + flush_kernel_vmap_range(data, count); + + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = sector; + bio->bi_opf = op | REQ_META | REQ_SYNC; + + do { + struct page *page = kmem_to_page(data); + unsigned int off = offset_in_page(data); + unsigned int len = min_t(unsigned, left, PAGE_SIZE - off); + + while (bio_add_page(bio, page, len, off) != len) { + struct bio *prev = bio; + + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); + bio_copy_dev(bio, prev); + bio->bi_iter.bi_sector = bio_end_sector(prev); + bio->bi_opf = prev->bi_opf; + bio_chain(prev, bio); + + submit_bio(prev); + } + + data += len; + left -= len; + } while (left > 0); + + error = submit_bio_wait(bio); + bio_put(bio); + + if (is_vmalloc && op == REQ_OP_READ) + invalidate_kernel_vmap_range(data, count); + return error; +} diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index ce45f066995e..9fa4a7ee8cfc 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -9,17 +9,16 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_shared.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_bmap_item.h" #include "xfs_log.h" #include "xfs_bmap.h" #include "xfs_icache.h" -#include "xfs_trace.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" @@ -96,15 +95,6 @@ xfs_bui_item_format( } /* - * Pinning has no meaning for an bui item, so just return. - */ -STATIC void -xfs_bui_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an BUI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the BUI transaction has been successfully committed to make it @@ -123,71 +113,22 @@ xfs_bui_item_unpin( } /* - * BUI items have no locking or pushing. However, since BUIs are pulled from - * the AIL when their corresponding BUDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the BUI out of - * the AIL. - */ -STATIC uint -xfs_bui_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The BUI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an BUD isn't going to be * constructed and thus we free the BUI here directly. */ STATIC void -xfs_bui_item_unlock( +xfs_bui_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_bui_release(BUI_ITEM(lip)); -} - -/* - * The BUI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_bui_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; + xfs_bui_release(BUI_ITEM(lip)); } -/* - * The BUI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_bui_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all bui log items. - */ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_size = xfs_bui_item_size, .iop_format = xfs_bui_item_format, - .iop_pin = xfs_bui_item_pin, .iop_unpin = xfs_bui_item_unpin, - .iop_unlock = xfs_bui_item_unlock, - .iop_committed = xfs_bui_item_committed, - .iop_push = xfs_bui_item_push, - .iop_committing = xfs_bui_item_committing, + .iop_release = xfs_bui_item_release, }; /* @@ -249,126 +190,241 @@ xfs_bud_item_format( } /* - * Pinning has no meaning for an bud item, so just return. + * The BUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the BUI and free the + * BUD. */ STATIC void -xfs_bud_item_pin( +xfs_bud_item_release( struct xfs_log_item *lip) { + struct xfs_bud_log_item *budp = BUD_ITEM(lip); + + xfs_bui_release(budp->bud_buip); + kmem_zone_free(xfs_bud_zone, budp); } -/* - * Since pinning has no meaning for an bud item, unpinning does - * not either. - */ -STATIC void -xfs_bud_item_unpin( - struct xfs_log_item *lip, - int remove) +static const struct xfs_item_ops xfs_bud_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_bud_item_size, + .iop_format = xfs_bud_item_format, + .iop_release = xfs_bud_item_release, +}; + +static struct xfs_bud_log_item * +xfs_trans_get_bud( + struct xfs_trans *tp, + struct xfs_bui_log_item *buip) { + struct xfs_bud_log_item *budp; + + budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); + xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, + &xfs_bud_item_ops); + budp->bud_buip = buip; + budp->bud_format.bud_bui_id = buip->bui_format.bui_id; + + xfs_trans_add_item(tp, &budp->bud_item); + return budp; } /* - * There isn't much you can do to push on an bud item. It is simply stuck - * waiting for the log to be flushed to disk. + * Finish an bmap update and log it to the BUD. Note that the + * transaction is marked dirty regardless of whether the bmap update + * succeeds or fails to support the BUI/BUD lifecycle rules. */ -STATIC uint -xfs_bud_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +static int +xfs_trans_log_finish_bmap_update( + struct xfs_trans *tp, + struct xfs_bud_log_item *budp, + enum xfs_bmap_intent_type type, + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t *blockcount, + xfs_exntst_t state) { - return XFS_ITEM_PINNED; + int error; + + error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, + startblock, blockcount, state); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the BUI and frees the BUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); + + return error; } -/* - * The BUD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the BUI and free the - * BUD. - */ -STATIC void -xfs_bud_item_unlock( - struct xfs_log_item *lip) +/* Sort bmap intents by inode. */ +static int +xfs_bmap_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_bud_log_item *budp = BUD_ITEM(lip); + struct xfs_bmap_intent *ba; + struct xfs_bmap_intent *bb; - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_bui_release(budp->bud_buip); - kmem_zone_free(xfs_bud_zone, budp); - } + ba = container_of(a, struct xfs_bmap_intent, bi_list); + bb = container_of(b, struct xfs_bmap_intent, bi_list); + return ba->bi_owner->i_ino - bb->bi_owner->i_ino; } -/* - * When the bud item is committed to disk, all we need to do is delete our - * reference to our partner bui item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from - * further referencing this item. - */ -STATIC xfs_lsn_t -xfs_bud_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Get an BUI. */ +STATIC void * +xfs_bmap_update_create_intent( + struct xfs_trans *tp, + unsigned int count) { - struct xfs_bud_log_item *budp = BUD_ITEM(lip); + struct xfs_bui_log_item *buip; + + ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); + ASSERT(tp != NULL); + + buip = xfs_bui_init(tp->t_mountp); + ASSERT(buip != NULL); /* - * Drop the BUI reference regardless of whether the BUD has been - * aborted. Once the BUD transaction is constructed, it is the sole - * responsibility of the BUD to release the BUI (even if the BUI is - * aborted due to log I/O error). + * Get a log_item_desc to point at the new item. */ - xfs_bui_release(budp->bud_buip); - kmem_zone_free(xfs_bud_zone, budp); + xfs_trans_add_item(tp, &buip->bui_item); + return buip; +} - return (xfs_lsn_t)-1; +/* Set the map extent flags for this mapping. */ +static void +xfs_trans_set_bmap_flags( + struct xfs_map_extent *bmap, + enum xfs_bmap_intent_type type, + int whichfork, + xfs_exntst_t state) +{ + bmap->me_flags = 0; + switch (type) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + bmap->me_flags = type; + break; + default: + ASSERT(0); + } + if (state == XFS_EXT_UNWRITTEN) + bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; + if (whichfork == XFS_ATTR_FORK) + bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; } -/* - * The BUD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ +/* Log bmap updates in the intent item. */ STATIC void -xfs_bud_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +xfs_bmap_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { + struct xfs_bui_log_item *buip = intent; + struct xfs_bmap_intent *bmap; + uint next_extent; + struct xfs_map_extent *map; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); + + /* + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. + */ + next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; + ASSERT(next_extent < buip->bui_format.bui_nextents); + map = &buip->bui_format.bui_extents[next_extent]; + map->me_owner = bmap->bi_owner->i_ino; + map->me_startblock = bmap->bi_bmap.br_startblock; + map->me_startoff = bmap->bi_bmap.br_startoff; + map->me_len = bmap->bi_bmap.br_blockcount; + xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, + bmap->bi_bmap.br_state); } -/* - * This is the ops vector shared by all bud log items. - */ -static const struct xfs_item_ops xfs_bud_item_ops = { - .iop_size = xfs_bud_item_size, - .iop_format = xfs_bud_item_format, - .iop_pin = xfs_bud_item_pin, - .iop_unpin = xfs_bud_item_unpin, - .iop_unlock = xfs_bud_item_unlock, - .iop_committed = xfs_bud_item_committed, - .iop_push = xfs_bud_item_push, - .iop_committing = xfs_bud_item_committing, -}; +/* Get an BUD so we can process all the deferred rmap updates. */ +STATIC void * +xfs_bmap_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_bud(tp, intent); +} -/* - * Allocate and initialize an bud item with the given number of extents. - */ -struct xfs_bud_log_item * -xfs_bud_init( - struct xfs_mount *mp, - struct xfs_bui_log_item *buip) +/* Process a deferred rmap update. */ +STATIC int +xfs_bmap_update_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_bmap_intent *bmap; + xfs_filblks_t count; + int error; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + count = bmap->bi_bmap.br_blockcount; + error = xfs_trans_log_finish_bmap_update(tp, done_item, + bmap->bi_type, + bmap->bi_owner, bmap->bi_whichfork, + bmap->bi_bmap.br_startoff, + bmap->bi_bmap.br_startblock, + &count, + bmap->bi_bmap.br_state); + if (!error && count > 0) { + ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); + bmap->bi_bmap.br_blockcount = count; + return -EAGAIN; + } + kmem_free(bmap); + return error; +} +/* Abort all pending BUIs. */ +STATIC void +xfs_bmap_update_abort_intent( + void *intent) { - struct xfs_bud_log_item *budp; + xfs_bui_release(intent); +} - budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); - xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); - budp->bud_buip = buip; - budp->bud_format.bud_bui_id = buip->bui_format.bui_id; +/* Cancel a deferred rmap update. */ +STATIC void +xfs_bmap_update_cancel_item( + struct list_head *item) +{ + struct xfs_bmap_intent *bmap; - return budp; + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + kmem_free(bmap); } +const struct xfs_defer_op_type xfs_bmap_update_defer_type = { + .max_items = XFS_BUI_MAX_FAST_EXTENTS, + .diff_items = xfs_bmap_update_diff_items, + .create_intent = xfs_bmap_update_create_intent, + .abort_intent = xfs_bmap_update_abort_intent, + .log_item = xfs_bmap_update_log_item, + .create_done = xfs_bmap_update_create_done, + .finish_item = xfs_bmap_update_finish_item, + .cancel_item = xfs_bmap_update_cancel_item, +}; + /* * Process a bmap update intent item that was recovered from the log. * We need to update some inode's bmbt. diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index 89e043a88bb8..ad479cc73de8 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -75,8 +75,6 @@ extern struct kmem_zone *xfs_bui_zone; extern struct kmem_zone *xfs_bud_zone; struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); -struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, - struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 06d07f1e310b..98c6a7a71427 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -12,12 +12,10 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" -#include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -28,11 +26,8 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_log.h" -#include "xfs_rmap_btree.h" #include "xfs_iomap.h" #include "xfs_reflink.h" -#include "xfs_refcount.h" /* Kernel only BMAP related definitions and functions */ @@ -276,7 +271,7 @@ xfs_bmap_count_tree( struct xfs_btree_block *block, *nextblock; int numrecs; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; @@ -287,7 +282,7 @@ xfs_bmap_count_tree( /* Not at node above leaves, count this level of nodes */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { - error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, + error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) @@ -321,7 +316,7 @@ xfs_bmap_count_tree( if (nextbno == NULLFSBLOCK) break; bno = nextbno; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 548344e25128..ca0849043f54 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -4,24 +4,9 @@ * All Rights Reserved. */ #include "xfs.h" -#include <linux/stddef.h> -#include <linux/errno.h> -#include <linux/gfp.h> -#include <linux/pagemap.h> -#include <linux/init.h> -#include <linux/vmalloc.h> -#include <linux/bio.h> -#include <linux/sysctl.h> -#include <linux/proc_fs.h> -#include <linux/workqueue.h> -#include <linux/percpu.h> -#include <linux/blkdev.h> -#include <linux/hash.h> -#include <linux/kthread.h> -#include <linux/migrate.h> #include <linux/backing-dev.h> -#include <linux/freezer.h> +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -213,7 +198,7 @@ xfs_buf_free_maps( } } -struct xfs_buf * +static struct xfs_buf * _xfs_buf_alloc( struct xfs_buftarg *target, struct xfs_buf_map *map, @@ -243,6 +228,7 @@ _xfs_buf_alloc( sema_init(&bp->b_sema, 0); /* held, no waiters */ spin_lock_init(&bp->b_lock); bp->b_target = target; + bp->b_mount = target->bt_mount; bp->b_flags = flags; /* @@ -263,12 +249,11 @@ _xfs_buf_alloc( bp->b_maps[i].bm_len = map[i].bm_len; bp->b_length += map[i].bm_len; } - bp->b_io_length = bp->b_length; atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); - XFS_STATS_INC(target->bt_mount, xb_create); + XFS_STATS_INC(bp->b_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); return bp; @@ -425,12 +410,12 @@ retry: current->comm, current->pid, __func__, gfp_mask); - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); + XFS_STATS_INC(bp->b_mount, xb_page_retries); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); + XFS_STATS_INC(bp->b_mount, xb_page_found); nbytes = min_t(size_t, size, PAGE_SIZE - offset); size -= nbytes; @@ -909,83 +894,6 @@ xfs_buf_read_uncached( return 0; } -/* - * Return a buffer allocated as an empty buffer and associated to external - * memory via xfs_buf_associate_memory() back to it's empty state. - */ -void -xfs_buf_set_empty( - struct xfs_buf *bp, - size_t numblks) -{ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_page_count = 0; - bp->b_addr = NULL; - bp->b_length = numblks; - bp->b_io_length = numblks; - - ASSERT(bp->b_map_count == 1); - bp->b_bn = XFS_BUF_DADDR_NULL; - bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; - bp->b_maps[0].bm_len = bp->b_length; -} - -static inline struct page * -mem_to_page( - void *addr) -{ - if ((!is_vmalloc_addr(addr))) { - return virt_to_page(addr); - } else { - return vmalloc_to_page(addr); - } -} - -int -xfs_buf_associate_memory( - xfs_buf_t *bp, - void *mem, - size_t len) -{ - int rval; - int i = 0; - unsigned long pageaddr; - unsigned long offset; - size_t buflen; - int page_count; - - pageaddr = (unsigned long)mem & PAGE_MASK; - offset = (unsigned long)mem - pageaddr; - buflen = PAGE_ALIGN(len + offset); - page_count = buflen >> PAGE_SHIFT; - - /* Free any previous set of page pointers */ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_addr = mem; - - rval = _xfs_buf_get_pages(bp, page_count); - if (rval) - return rval; - - bp->b_offset = offset; - - for (i = 0; i < bp->b_page_count; i++) { - bp->b_pages[i] = mem_to_page((void *)pageaddr); - pageaddr += PAGE_SIZE; - } - - bp->b_io_length = BTOBB(len); - bp->b_length = BTOBB(buflen); - - return 0; -} - xfs_buf_t * xfs_buf_get_uncached( struct xfs_buftarg *target, @@ -1180,7 +1088,7 @@ xfs_buf_lock( trace_xfs_buf_lock(bp, _RET_IP_); if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); + xfs_log_force(bp->b_mount, 0); down(&bp->b_sema); trace_xfs_buf_lock_done(bp, _RET_IP_); @@ -1269,7 +1177,7 @@ xfs_buf_ioend_async( struct xfs_buf *bp) { INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); - queue_work(bp->b_ioend_wq, &bp->b_ioend_work); + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); } void @@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert( struct xfs_buf *bp, const char *func) { - xfs_alert(bp->b_target->bt_mount, + xfs_alert(bp->b_mount, "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, -bp->b_error); @@ -1307,10 +1215,8 @@ xfs_bwrite( XBF_WRITE_FAIL | XBF_DONE); error = xfs_buf_submit(bp); - if (error) { - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); - } + if (error) + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); return error; } @@ -1436,21 +1342,8 @@ _xfs_buf_ioapply( */ bp->b_error = 0; - /* - * Initialize the I/O completion workqueue if we haven't yet or the - * submitter has not opted to specify a custom one. - */ - if (!bp->b_ioend_wq) - bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; - if (bp->b_flags & XBF_WRITE) { op = REQ_OP_WRITE; - if (bp->b_flags & XBF_SYNCIO) - op_flags = REQ_SYNC; - if (bp->b_flags & XBF_FUA) - op_flags |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - op_flags |= REQ_PREFLUSH; /* * Run the write verifier callback function if it exists. If @@ -1460,12 +1353,12 @@ _xfs_buf_ioapply( if (bp->b_ops) { bp->b_ops->verify_write(bp); if (bp->b_error) { - xfs_force_shutdown(bp->b_target->bt_mount, + xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE); return; } } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; /* * non-crc filesystems don't attach verifiers during @@ -1497,7 +1390,7 @@ _xfs_buf_ioapply( * subsequent call. */ offset = bp->b_offset; - size = BBTOB(bp->b_io_length); + size = BBTOB(bp->b_length); blk_start_plug(&plug); for (i = 0; i < bp->b_map_count; i++) { xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); @@ -1543,7 +1436,7 @@ __xfs_buf_submit( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); /* on shutdown we stale and complete the buffer immediately */ - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); @@ -1613,16 +1506,11 @@ xfs_buf_offset( return page_address(page) + (offset & (PAGE_SIZE-1)); } -/* - * Move data into or out of a buffer. - */ void -xfs_buf_iomove( - xfs_buf_t *bp, /* buffer to process */ - size_t boff, /* starting buffer offset */ - size_t bsize, /* length to copy */ - void *data, /* data address */ - xfs_buf_rw_t mode) /* read/write/zero flag */ +xfs_buf_zero( + struct xfs_buf *bp, + size_t boff, + size_t bsize) { size_t bend; @@ -1635,23 +1523,13 @@ xfs_buf_iomove( page_offset = (boff + bp->b_offset) & ~PAGE_MASK; page = bp->b_pages[page_index]; csize = min_t(size_t, PAGE_SIZE - page_offset, - BBTOB(bp->b_io_length) - boff); + BBTOB(bp->b_length) - boff); ASSERT((csize + page_offset) <= PAGE_SIZE); - switch (mode) { - case XBRW_ZERO: - memset(page_address(page) + page_offset, 0, csize); - break; - case XBRW_READ: - memcpy(data, page_address(page) + page_offset, csize); - break; - case XBRW_WRITE: - memcpy(page_address(page) + page_offset, data, csize); - } + memset(page_address(page) + page_offset, 0, csize); boff += csize; - data += csize; } } @@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) * This allows userspace to disrupt buffer caching for debug/testing * purposes. */ - if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, - XFS_ERRTAG_BUF_LRU_REF)) + if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) lru_ref = 0; atomic_set(&bp->b_lru_ref, lru_ref); @@ -2215,7 +2092,7 @@ xfs_verify_magic( struct xfs_buf *bp, __be32 dmagic) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; int idx; idx = xfs_sb_version_hascrc(&mp->m_sb); @@ -2233,7 +2110,7 @@ xfs_verify_magic16( struct xfs_buf *bp, __be16 dmagic) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; int idx; idx = xfs_sb_version_hascrc(&mp->m_sb); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d0b96e071cec..c6e57a3f409e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -21,12 +21,6 @@ #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) -typedef enum { - XBRW_READ = 1, /* transfer into target memory */ - XBRW_WRITE = 2, /* transfer from target memory */ - XBRW_ZERO = 3, /* Zero target memory */ -} xfs_buf_rw_t; - #define XBF_READ (1 << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ @@ -34,12 +28,7 @@ typedef enum { #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ -#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ - -/* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */ /* flags used only as arguments to access routines */ #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ @@ -49,7 +38,6 @@ typedef enum { #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ -#define _XBF_COMPOUND (1 << 23)/* compound buffer */ typedef unsigned int xfs_buf_flags_t; @@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t; { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ - { XBF_SYNCIO, "SYNCIO" }, \ - { XBF_FUA, "FUA" }, \ - { XBF_FLUSH, "FLUSH" }, \ { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ - { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" } + { _XBF_DELWRI_Q, "DELWRI_Q" } /* @@ -161,13 +145,13 @@ typedef struct xfs_buf { wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ + struct xfs_mount *b_mount; xfs_buftarg_t *b_target; /* buffer target (device) */ void *b_addr; /* virtual address of buffer */ struct work_struct b_ioend_work; - struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ - void *b_log_item; + struct xfs_buf_log_item *b_log_item; struct list_head b_li_list; /* Log items list head */ struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ @@ -175,7 +159,6 @@ typedef struct xfs_buf { struct xfs_buf_map *b_maps; /* compound buffer map */ struct xfs_buf_map __b_map; /* inline compound buffer map */ int b_map_count; - int b_io_length; /* IO size in BBs */ atomic_t b_pin_count; /* pin count */ atomic_t b_io_remaining; /* #outstanding I/O requests */ unsigned int b_page_count; /* size of page array */ @@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags); -struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target, - struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags); - -static inline struct xfs_buf * -xfs_buf_alloc( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) -{ - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return _xfs_buf_alloc(target, &map, 1, flags); -} - struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags); @@ -239,11 +207,10 @@ static inline struct xfs_buf * xfs_buf_get( struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) + size_t numblks) { DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return xfs_buf_get_map(target, &map, 1, flags); + return xfs_buf_get_map(target, &map, 1, 0); } static inline struct xfs_buf * @@ -269,9 +236,6 @@ xfs_buf_readahead( return xfs_buf_readahead_map(target, &map, 1, ops); } -void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); -int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); - struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags); int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, @@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp) return __xfs_buf_submit(bp, wait); } -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, - xfs_buf_rw_t); -#define xfs_buf_zero(bp, off, len) \ - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) +void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); /* Buffer Utility Routines */ extern void *xfs_buf_offset(struct xfs_buf *, size_t); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 65b32acfa0f6..7dcaec54a20b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -5,19 +5,17 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" -#include "xfs_error.h" #include "xfs_trace.h" #include "xfs_log.h" -#include "xfs_inode.h" kmem_zone_t *xfs_buf_item_zone; @@ -520,7 +518,7 @@ xfs_buf_item_push( /* has a previous flush failed due to IO errors? */ if ((bp->b_flags & XBF_WRITE_FAIL) && ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { - xfs_warn(bp->b_target->bt_mount, + xfs_warn(bp->b_mount, "Failing async write on buffer block 0x%llx. Retrying async write.", (long long)bp->b_bn); } @@ -594,7 +592,7 @@ xfs_buf_item_put( * free the item. */ STATIC void -xfs_buf_item_unlock( +xfs_buf_item_release( struct xfs_log_item *lip) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); @@ -609,7 +607,7 @@ xfs_buf_item_unlock( &lip->li_flags); #endif - trace_xfs_buf_item_unlock(bip); + trace_xfs_buf_item_release(bip); /* * The bli dirty state should match whether the blf has logged segments @@ -639,6 +637,14 @@ xfs_buf_item_unlock( xfs_buf_relse(bp); } +STATIC void +xfs_buf_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) +{ + return xfs_buf_item_release(lip); +} + /* * This is called to find out where the oldest active copy of the * buf log item in the on disk log resides now that the last log @@ -671,25 +677,15 @@ xfs_buf_item_committed( return lsn; } -STATIC void -xfs_buf_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) -{ -} - -/* - * This is the ops vector shared by all buf log items. - */ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_size = xfs_buf_item_size, .iop_format = xfs_buf_item_format, .iop_pin = xfs_buf_item_pin, .iop_unpin = xfs_buf_item_unpin, - .iop_unlock = xfs_buf_item_unlock, + .iop_release = xfs_buf_item_release, + .iop_committing = xfs_buf_item_committing, .iop_committed = xfs_buf_item_committed, .iop_push = xfs_buf_item_push, - .iop_committing = xfs_buf_item_committing }; STATIC int @@ -743,7 +739,7 @@ xfs_buf_item_init( * this buffer. If we do already have one, there is * nothing to do here so return. */ - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); if (bip) { ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!bp->b_transp); @@ -980,9 +976,9 @@ xfs_buf_item_relse( */ void xfs_buf_attach_iodone( - xfs_buf_t *bp, - void (*cb)(xfs_buf_t *, xfs_log_item_t *), - xfs_log_item_t *lip) + struct xfs_buf *bp, + void (*cb)(struct xfs_buf *, struct xfs_log_item *), + struct xfs_log_item *lip) { ASSERT(xfs_buf_islocked(bp)); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 90f65f891fab..4a054b11011a 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -39,7 +39,7 @@ struct xfs_buf_log_item; * locked, and which 128 byte chunks of the buffer are dirty. */ struct xfs_buf_log_item { - xfs_log_item_t bli_item; /* common item structure */ + struct xfs_log_item bli_item; /* common item structure */ struct xfs_buf *bli_buf; /* real buffer pointer */ unsigned int bli_flags; /* misc flags */ unsigned int bli_recur; /* lock recursion count */ @@ -55,8 +55,8 @@ bool xfs_buf_item_put(struct xfs_buf_log_item *); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, - void(*)(struct xfs_buf *, xfs_log_item_t *), - xfs_log_item_t *); + void(*)(struct xfs_buf *, struct xfs_log_item *), + struct xfs_log_item *); void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 5142e64e2345..283df898dd9f 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -6,17 +6,14 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_error.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_trans.h" diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index d0df0ed50f4b..8ec7aab89044 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -4,19 +4,17 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_extent_busy.h" -#include "xfs_discard.h" #include "xfs_trace.h" #include "xfs_log.h" diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index a1af984e4913..fb1ad4483081 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -14,16 +14,12 @@ #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_alloc.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_bmap_btree.h" @@ -1243,7 +1239,7 @@ xfs_qm_exit(void) /* * Iterate every dquot of a particular type. The caller must ensure that the * particular quota type is active. iter_fn can return negative error codes, - * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating. + * or XFS_ITER_ABORT to indicate that it wants to stop iterating. */ int xfs_qm_dqiterate( diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 64bd8640f6e8..4fe85709d55d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -34,7 +34,6 @@ typedef struct xfs_dquot { uint dq_flags; /* various flags (XFS_DQ_*) */ struct list_head q_lru; /* global free list of dquots */ struct xfs_mount*q_mount; /* filesystem this relates to */ - struct xfs_trans*q_transp; /* trans this belongs to currently */ uint q_nrefs; /* # active refs from inodes */ xfs_daddr_t q_blkno; /* blkno of dquot buffer */ int q_bufoffset; /* off of dq in buffer (# dquots) */ diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 7dedd17c4813..282ec5af293e 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -5,13 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin( wake_up(&dqp->q_pinwait); } -STATIC xfs_lsn_t -xfs_qm_dquot_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - /* - * We always re-log the entire dquot when it becomes dirty, - * so, the latest copy _is_ the only one that matters. - */ - return lsn; -} - /* * This is called to wait for the given dquot to be unpinned. * Most of these pin/unpin routines are plagiarized from inode code. @@ -209,14 +197,8 @@ out_unlock: return rval; } -/* - * Unlock the dquot associated with the log item. - * Clear the fields of the dquot and dquot log item that - * are specific to the current transaction. If the - * hold flags is set, do not unlock the dquot. - */ STATIC void -xfs_qm_dquot_logitem_unlock( +xfs_qm_dquot_logitem_release( struct xfs_log_item *lip) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; @@ -224,11 +206,6 @@ xfs_qm_dquot_logitem_unlock( ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* - * Clear the transaction pointer in the dquot - */ - dqp->q_transp = NULL; - - /* * dquots are never 'held' from getting unlocked at the end of * a transaction. Their locking and unlocking is hidden inside the * transaction layer, within trans_commit. Hence, no LI_HOLD flag @@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock( xfs_dqunlock(dqp); } -/* - * this needs to stamp an lsn into the dquot, I think. - * rpc's that look at user dquot's would then have to - * push on the dependency recorded in the dquot - */ STATIC void xfs_qm_dquot_logitem_committing( struct xfs_log_item *lip, - xfs_lsn_t lsn) + xfs_lsn_t commit_lsn) { + return xfs_qm_dquot_logitem_release(lip); } -/* - * This is the ops vector for dquots - */ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = xfs_qm_dquot_logitem_size, .iop_format = xfs_qm_dquot_logitem_format, .iop_pin = xfs_qm_dquot_logitem_pin, .iop_unpin = xfs_qm_dquot_logitem_unpin, - .iop_unlock = xfs_qm_dquot_logitem_unlock, - .iop_committed = xfs_qm_dquot_logitem_committed, + .iop_release = xfs_qm_dquot_logitem_release, + .iop_committing = xfs_qm_dquot_logitem_committing, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing, .iop_error = xfs_dquot_item_error }; @@ -320,26 +289,6 @@ xfs_qm_qoff_logitem_format( } /* - * Pinning has no meaning for an quotaoff item, so just return. - */ -STATIC void -xfs_qm_qoff_logitem_pin( - struct xfs_log_item *lip) -{ -} - -/* - * Since pinning has no meaning for an quotaoff item, unpinning does - * not either. - */ -STATIC void -xfs_qm_qoff_logitem_unpin( - struct xfs_log_item *lip, - int remove) -{ -} - -/* * There isn't much you can do to push a quotaoff item. It is simply * stuck waiting for the log to be flushed to disk. */ @@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push( return XFS_ITEM_LOCKED; } -/* - * Quotaoff items have no locking or pushing, so return failure - * so that the caller doesn't bother with us. - */ -STATIC void -xfs_qm_qoff_logitem_unlock( - struct xfs_log_item *lip) -{ -} - -/* - * The quotaoff-start-item is logged only once and cannot be moved in the log, - * so simply return the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_qm_qoff_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - STATIC xfs_lsn_t xfs_qm_qoffend_logitem_committed( struct xfs_log_item *lip, @@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed( return (xfs_lsn_t)-1; } -/* - * XXX rcc - don't know quite what to do with this. I think we can - * just ignore it. The only time that isn't the case is if we allow - * the client to somehow see that quotas have been turned off in which - * we can't allow that to get back until the quotaoff hits the disk. - * So how would that happen? Also, do we need different routines for - * quotaoff start and quotaoff end? I suspect the answer is yes but - * to be sure, I need to look at the recovery code and see how quota off - * recovery is handled (do we roll forward or back or do something else). - * If we roll forwards or backwards, then we need two separate routines, - * one that does nothing and one that stamps in the lsn that matters - * (truly makes the quotaoff irrevocable). If we do something else, - * then maybe we don't need two. - */ -STATIC void -xfs_qm_qoff_logitem_committing( - struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) -{ -} - static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { .iop_size = xfs_qm_qoff_logitem_size, .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_unlock = xfs_qm_qoff_logitem_unlock, .iop_committed = xfs_qm_qoffend_logitem_committed, .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing }; -/* - * This is the ops vector shared by all quotaoff-start log items. - */ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { .iop_size = xfs_qm_qoff_logitem_size, .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_unlock = xfs_qm_qoff_logitem_unlock, - .iop_committed = xfs_qm_qoff_logitem_committed, .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing }; /* diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index db9df710a308..1aed34ccdabc 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -12,13 +12,13 @@ struct xfs_mount; struct xfs_qoff_logitem; typedef struct xfs_dq_logitem { - xfs_log_item_t qli_item; /* common portion */ + struct xfs_log_item qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ } xfs_dq_logitem_t; typedef struct xfs_qoff_logitem { - xfs_log_item_t qql_item; /* common portion */ + struct xfs_log_item qql_item; /* common portion */ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ unsigned int qql_flags; } xfs_qoff_logitem_t; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index a1e177f66404..544c9482a0ef 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -4,6 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_fs.h" #include "xfs_log_format.h" @@ -353,7 +354,7 @@ xfs_buf_verifier_error( size_t bufsz, xfs_failaddr_t failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; int sz; diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index f2284ceb129f..f1372f9046e3 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -4,18 +4,16 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_export.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" #include "xfs_pnfs.h" diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 74ddf66f4cfe..86f6512d6864 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -9,14 +9,18 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_shared.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_extfree_item.h" #include "xfs_log.h" #include "xfs_btree.h" #include "xfs_rmap.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_trace.h" kmem_zone_t *xfs_efi_zone; @@ -107,15 +111,6 @@ xfs_efi_item_format( /* - * Pinning has no meaning for an efi item, so just return. - */ -STATIC void -xfs_efi_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an EFI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the EFI transaction has been successfully committed to make it @@ -133,71 +128,22 @@ xfs_efi_item_unpin( } /* - * Efi items have no locking or pushing. However, since EFIs are pulled from - * the AIL when their corresponding EFDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the EFI out of - * the AIL. - */ -STATIC uint -xfs_efi_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The EFI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an EFD isn't going to be * constructed and thus we free the EFI here directly. */ STATIC void -xfs_efi_item_unlock( +xfs_efi_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_efi_release(EFI_ITEM(lip)); -} - -/* - * The EFI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_efi_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * The EFI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_efi_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ + xfs_efi_release(EFI_ITEM(lip)); } -/* - * This is the ops vector shared by all efi log items. - */ static const struct xfs_item_ops xfs_efi_item_ops = { .iop_size = xfs_efi_item_size, .iop_format = xfs_efi_item_format, - .iop_pin = xfs_efi_item_pin, .iop_unpin = xfs_efi_item_unpin, - .iop_unlock = xfs_efi_item_unlock, - .iop_committed = xfs_efi_item_committed, - .iop_push = xfs_efi_item_push, - .iop_committing = xfs_efi_item_committing + .iop_release = xfs_efi_item_release, }; @@ -349,136 +295,298 @@ xfs_efd_item_format( } /* - * Pinning has no meaning for an efd item, so just return. + * The EFD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the EFI and free the EFD. */ STATIC void -xfs_efd_item_pin( +xfs_efd_item_release( struct xfs_log_item *lip) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + + xfs_efi_release(efdp->efd_efip); + xfs_efd_item_free(efdp); } +static const struct xfs_item_ops xfs_efd_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_efd_item_size, + .iop_format = xfs_efd_item_format, + .iop_release = xfs_efd_item_release, +}; + /* - * Since pinning has no meaning for an efd item, unpinning does - * not either. + * Allocate an "extent free done" log item that will hold nextents worth of + * extents. The caller must use all nextents extents, because we are not + * flexible about this at all. */ -STATIC void -xfs_efd_item_unpin( - struct xfs_log_item *lip, - int remove) +static struct xfs_efd_log_item * +xfs_trans_get_efd( + struct xfs_trans *tp, + struct xfs_efi_log_item *efip, + unsigned int nextents) { + struct xfs_efd_log_item *efdp; + + ASSERT(nextents > 0); + + if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { + efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + + (nextents - 1) * sizeof(struct xfs_extent), + KM_SLEEP); + } else { + efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); + } + + xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD, + &xfs_efd_item_ops); + efdp->efd_efip = efip; + efdp->efd_format.efd_nextents = nextents; + efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; + + xfs_trans_add_item(tp, &efdp->efd_item); + return efdp; } /* - * There isn't much you can do to push on an efd item. It is simply stuck - * waiting for the log to be flushed to disk. + * Free an extent and log it to the EFD. Note that the transaction is marked + * dirty regardless of whether the extent free succeeds or fails to support the + * EFI/EFD lifecycle rules. */ -STATIC uint -xfs_efd_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +static int +xfs_trans_free_extent( + struct xfs_trans *tp, + struct xfs_efd_log_item *efdp, + xfs_fsblock_t start_block, + xfs_extlen_t ext_len, + const struct xfs_owner_info *oinfo, + bool skip_discard) { - return XFS_ITEM_PINNED; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_extent *extp; + uint next_extent; + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, + start_block); + int error; + + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); + + error = __xfs_free_extent(tp, start_block, ext_len, + oinfo, XFS_AG_RESV_NONE, skip_discard); + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the EFI and frees the EFD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); + + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); + extp->ext_start = start_block; + extp->ext_len = ext_len; + efdp->efd_next_extent++; + + return error; } -/* - * The EFD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the EFI and free the EFD. - */ -STATIC void -xfs_efd_item_unlock( - struct xfs_log_item *lip) +/* Sort bmap items by AG. */ +static int +xfs_extent_free_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + struct xfs_mount *mp = priv; + struct xfs_extent_free_item *ra; + struct xfs_extent_free_item *rb; + + ra = container_of(a, struct xfs_extent_free_item, xefi_list); + rb = container_of(b, struct xfs_extent_free_item, xefi_list); + return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - + XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); +} - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_efi_release(efdp->efd_efip); - xfs_efd_item_free(efdp); - } +/* Get an EFI. */ +STATIC void * +xfs_extent_free_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_efi_log_item *efip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + efip = xfs_efi_init(tp->t_mountp, count); + ASSERT(efip != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &efip->efi_item); + return efip; } -/* - * When the efd item is committed to disk, all we need to do is delete our - * reference to our partner efi item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from further - * referencing this item. - */ -STATIC xfs_lsn_t -xfs_efd_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Log a free extent to the intent item. */ +STATIC void +xfs_extent_free_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { - struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + struct xfs_efi_log_item *efip = intent; + struct xfs_extent_free_item *free; + uint next_extent; + struct xfs_extent *extp; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); /* - * Drop the EFI reference regardless of whether the EFD has been - * aborted. Once the EFD transaction is constructed, it is the sole - * responsibility of the EFD to release the EFI (even if the EFI is - * aborted due to log I/O error). + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. */ - xfs_efi_release(efdp->efd_efip); - xfs_efd_item_free(efdp); + next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; + ASSERT(next_extent < efip->efi_format.efi_nextents); + extp = &efip->efi_format.efi_extents[next_extent]; + extp->ext_start = free->xefi_startblock; + extp->ext_len = free->xefi_blockcount; +} - return (xfs_lsn_t)-1; +/* Get an EFD so we can process all the free extents. */ +STATIC void * +xfs_extent_free_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_efd(tp, intent, count); } -/* - * The EFD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ +/* Process a free extent. */ +STATIC int +xfs_extent_free_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_extent_free_item *free; + int error; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + error = xfs_trans_free_extent(tp, done_item, + free->xefi_startblock, + free->xefi_blockcount, + &free->xefi_oinfo, free->xefi_skip_discard); + kmem_free(free); + return error; +} + +/* Abort all pending EFIs. */ STATIC void -xfs_efd_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +xfs_extent_free_abort_intent( + void *intent) { + xfs_efi_release(intent); } -/* - * This is the ops vector shared by all efd log items. - */ -static const struct xfs_item_ops xfs_efd_item_ops = { - .iop_size = xfs_efd_item_size, - .iop_format = xfs_efd_item_format, - .iop_pin = xfs_efd_item_pin, - .iop_unpin = xfs_efd_item_unpin, - .iop_unlock = xfs_efd_item_unlock, - .iop_committed = xfs_efd_item_committed, - .iop_push = xfs_efd_item_push, - .iop_committing = xfs_efd_item_committing +/* Cancel a free extent. */ +STATIC void +xfs_extent_free_cancel_item( + struct list_head *item) +{ + struct xfs_extent_free_item *free; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + kmem_free(free); +} + +const struct xfs_defer_op_type xfs_extent_free_defer_type = { + .max_items = XFS_EFI_MAX_FAST_EXTENTS, + .diff_items = xfs_extent_free_diff_items, + .create_intent = xfs_extent_free_create_intent, + .abort_intent = xfs_extent_free_abort_intent, + .log_item = xfs_extent_free_log_item, + .create_done = xfs_extent_free_create_done, + .finish_item = xfs_extent_free_finish_item, + .cancel_item = xfs_extent_free_cancel_item, }; /* - * Allocate and initialize an efd item with the given number of extents. + * AGFL blocks are accounted differently in the reserve pools and are not + * inserted into the busy extent list. */ -struct xfs_efd_log_item * -xfs_efd_init( - struct xfs_mount *mp, - struct xfs_efi_log_item *efip, - uint nextents) - +STATIC int +xfs_agfl_free_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) { - struct xfs_efd_log_item *efdp; - uint size; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_efd_log_item *efdp = done_item; + struct xfs_extent_free_item *free; + struct xfs_extent *extp; + struct xfs_buf *agbp; + int error; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + uint next_extent; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + ASSERT(free->xefi_blockcount == 1); + agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + + trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (!error) + error = xfs_free_agfl_block(tp, agno, agbno, agbp, + &free->xefi_oinfo); - ASSERT(nextents > 0); - if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { - size = (uint)(sizeof(xfs_efd_log_item_t) + - ((nextents - 1) * sizeof(xfs_extent_t))); - efdp = kmem_zalloc(size, KM_SLEEP); - } else { - efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); - } + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the EFI and frees the EFD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); - xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); - efdp->efd_efip = efip; - efdp->efd_format.efd_nextents = nextents; - efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); + extp->ext_start = free->xefi_startblock; + extp->ext_len = free->xefi_blockcount; + efdp->efd_next_extent++; - return efdp; + kmem_free(free); + return error; } +/* sub-type with special handling for AGFL deferred frees */ +const struct xfs_defer_op_type xfs_agfl_free_defer_type = { + .max_items = XFS_EFI_MAX_FAST_EXTENTS, + .diff_items = xfs_extent_free_diff_items, + .create_intent = xfs_extent_free_create_intent, + .abort_intent = xfs_extent_free_abort_intent, + .log_item = xfs_extent_free_log_item, + .create_done = xfs_extent_free_create_done, + .finish_item = xfs_agfl_free_finish_item, + .cancel_item = xfs_extent_free_cancel_item, +}; + /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 2a6a895ca73e..16aaab06d4ec 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -51,7 +51,7 @@ struct kmem_zone; * AIL, so at this point both the EFI and EFD are freed. */ typedef struct xfs_efi_log_item { - xfs_log_item_t efi_item; + struct xfs_log_item efi_item; atomic_t efi_refcount; atomic_t efi_next_extent; unsigned long efi_flags; /* misc flags */ @@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item { * have been freed. */ typedef struct xfs_efd_log_item { - xfs_log_item_t efd_item; + struct xfs_log_item efd_item; xfs_efi_log_item_t *efd_efip; uint efd_next_extent; xfs_efd_log_format_t efd_format; @@ -79,8 +79,6 @@ extern struct kmem_zone *xfs_efi_zone; extern struct kmem_zone *xfs_efd_zone; xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); -xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, - uint); int xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt); void xfs_efi_item_free(xfs_efi_log_item_t *); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 76748255f843..28101bbc0b78 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -10,14 +10,11 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_error.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_ioctl.h" @@ -28,9 +25,7 @@ #include "xfs_iomap.h" #include "xfs_reflink.h" -#include <linux/dcache.h> #include <linux/falloc.h> -#include <linux/pagevec.h> #include <linux/backing-dev.h> #include <linux/mman.h> @@ -367,20 +362,7 @@ restart: * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) { - error = file_update_time(file); - if (error) - return error; - } - - /* - * If we're writing the file then make sure to clear the setuid and - * setgid bits if the process is not being run by root. This keeps - * people from modifying setuid and setgid binaries. - */ - if (!IS_NOSEC(inode)) - return file_remove_privs(file); - return 0; + return file_modified(file); } static int @@ -392,6 +374,7 @@ xfs_dio_write_end_io( struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); loff_t offset = iocb->ki_pos; + unsigned int nofs_flag; int error = 0; trace_xfs_end_io_direct_write(ip, offset, size); @@ -408,10 +391,17 @@ xfs_dio_write_end_io( */ XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + if (flags & IOMAP_DIO_COW) { error = xfs_reflink_end_cow(ip, offset, size); if (error) - return error; + goto out; } /* @@ -420,8 +410,10 @@ xfs_dio_write_end_io( * earlier allows a racing dio read to find unwritten extents before * they are converted. */ - if (flags & IOMAP_DIO_UNWRITTEN) - return xfs_iomap_write_unwritten(ip, offset, size, true); + if (flags & IOMAP_DIO_UNWRITTEN) { + error = xfs_iomap_write_unwritten(ip, offset, size, true); + goto out; + } /* * We need to update the in-core inode size here so that we don't end up @@ -443,6 +435,8 @@ xfs_dio_write_end_io( spin_unlock(&ip->i_flags_lock); } +out: + memalloc_nofs_restore(nofs_flag); return error; } @@ -1203,11 +1197,14 @@ xfs_file_mmap( struct file *filp, struct vm_area_struct *vma) { + struct dax_device *dax_dev; + + dax_dev = xfs_find_daxdev_for_inode(file_inode(filp)); /* - * We don't support synchronous mappings for non-DAX files. At least - * until someone comes with a sensible use case. + * We don't support synchronous mappings for non-DAX files and + * for DAX files if underneath dax_device is not synchronous. */ - if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC)) + if (!daxdev_mapping_supported(vma, dax_dev)) return -EOPNOTSUPP; file_accessed(filp); diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 182501373af2..574a7a8b4736 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -5,22 +5,19 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_alloc.h" #include "xfs_mru_cache.h" -#include "xfs_filestream.h" #include "xfs_trace.h" #include "xfs_ag_resv.h" #include "xfs_trans.h" -#include "xfs_shared.h" struct xfs_fstrm_item { struct xfs_mru_cache_elem mru; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 3d76a9e35870..5a8f9641562a 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -9,16 +9,12 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_error.h" #include "xfs_btree.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" -#include "xfs_log.h" #include "xfs_rmap.h" #include "xfs_alloc.h" #include "xfs_bit.h" diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3d0e0570e3aa..3e61d0cc23f8 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -11,15 +11,11 @@ #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_error.h" -#include "xfs_btree.h" #include "xfs_alloc.h" #include "xfs_fsops.h" #include "xfs_trans_space.h" -#include "xfs_rtalloc.h" -#include "xfs_trace.h" #include "xfs_log.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" @@ -251,9 +247,9 @@ xfs_growfs_data( if (mp->m_sb.sb_imax_pct) { uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; do_div(icount, 100); - mp->m_maxicount = XFS_FSB_TO_INO(mp, icount); + M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); } else - mp->m_maxicount = 0; + M_IGEO(mp)->maxicount = 0; /* Update secondary superblocks now the physical grow has completed */ error = xfs_update_secondary_sbs(mp); diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index d0d377384120..fa55ab8b8d80 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -4,7 +4,6 @@ * All Rights Reserved. */ #include "xfs.h" -#include "xfs_sysctl.h" /* * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, @@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = { #else .bug_on_assert = false, /* assert failures WARN() */ #endif +#ifdef DEBUG + .pwork_threads = -1, /* automatic thread detection */ +#endif }; diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 4c4929f9e7bf..8e0cb05a7142 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -9,12 +9,8 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trace.h" #include "xfs_health.h" @@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = { void xfs_bulkstat_health( struct xfs_inode *ip, - struct xfs_bstat *bs) + struct xfs_bulkstat *bs) { const struct ioctl_sick_map *m; unsigned int sick; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index a76b27565a18..0b0fd10a36d4 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -5,13 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" @@ -23,8 +23,6 @@ #include "xfs_dquot.h" #include "xfs_reflink.h" -#include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/iversion.h> /* diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 8381d34cb102..d99a0a3e5f40 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -6,14 +6,9 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" -#include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_error.h" #include "xfs_icreate_item.h" #include "xfs_log.h" @@ -56,80 +51,18 @@ xfs_icreate_item_format( sizeof(struct xfs_icreate_log)); } - -/* Pinning has no meaning for the create item, so just return. */ STATIC void -xfs_icreate_item_pin( +xfs_icreate_item_release( struct xfs_log_item *lip) { + kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip)); } - -/* pinning has no meaning for the create item, so just return. */ -STATIC void -xfs_icreate_item_unpin( - struct xfs_log_item *lip, - int remove) -{ -} - -STATIC void -xfs_icreate_item_unlock( - struct xfs_log_item *lip) -{ - struct xfs_icreate_item *icp = ICR_ITEM(lip); - - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - kmem_zone_free(xfs_icreate_zone, icp); - return; -} - -/* - * Because we have ordered buffers being tracked in the AIL for the inode - * creation, we don't need the create item after this. Hence we can free - * the log item and return -1 to tell the caller we're done with the item. - */ -STATIC xfs_lsn_t -xfs_icreate_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - struct xfs_icreate_item *icp = ICR_ITEM(lip); - - kmem_zone_free(xfs_icreate_zone, icp); - return (xfs_lsn_t)-1; -} - -/* item can never get into the AIL */ -STATIC uint -xfs_icreate_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - ASSERT(0); - return XFS_ITEM_SUCCESS; -} - -/* Ordered buffers do the dependency tracking here, so this does nothing. */ -STATIC void -xfs_icreate_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all buf log items. - */ static const struct xfs_item_ops xfs_icreate_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, .iop_size = xfs_icreate_item_size, .iop_format = xfs_icreate_item_format, - .iop_pin = xfs_icreate_item_pin, - .iop_unpin = xfs_icreate_item_unpin, - .iop_push = xfs_icreate_item_push, - .iop_unlock = xfs_icreate_item_unlock, - .iop_committed = xfs_icreate_item_committed, - .iop_committing = xfs_icreate_item_committing, + .iop_release = xfs_icreate_item_release, }; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 71d216cf6f87..6467d5e1df2d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3,7 +3,6 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/log2.h> #include <linux/iversion.h> #include "xfs.h" @@ -16,10 +15,7 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" -#include "xfs_attr_sf.h" #include "xfs_attr.h" #include "xfs_trans_space.h" #include "xfs_trans.h" @@ -32,7 +28,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_filestream.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" @@ -40,7 +35,6 @@ #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" -#include "xfs_dir2_priv.h" kmem_zone_t *xfs_inode_zone; @@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass) */ static void xfs_lock_inodes( - xfs_inode_t **ips, - int inodes, - uint lock_mode) + struct xfs_inode **ips, + int inodes, + uint lock_mode) { - int attempts = 0, i, j, try_lock; - xfs_log_item_t *lp; + int attempts = 0, i, j, try_lock; + struct xfs_log_item *lp; /* * Currently supports between 2 and 5 inodes with exclusive locking. We @@ -485,7 +479,7 @@ again: */ if (!try_lock) { for (j = (i - 1); j >= 0 && !try_lock; j--) { - lp = (xfs_log_item_t *)ips[j]->i_itemp; + lp = &ips[j]->i_itemp->ili_item; if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) try_lock++; } @@ -551,7 +545,7 @@ xfs_lock_two_inodes( struct xfs_inode *temp; uint mode_temp; int attempts = 0; - xfs_log_item_t *lp; + struct xfs_log_item *lp; ASSERT(hweight32(ip0_mode) == 1); ASSERT(hweight32(ip1_mode) == 1); @@ -585,7 +579,7 @@ xfs_lock_two_inodes( * the second lock. If we can't get it, we must release the first one * and try again. */ - lp = (xfs_log_item_t *)ip0->i_itemp; + lp = &ip0->i_itemp->ili_item; if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { xfs_iunlock(ip0, ip0_mode); @@ -2537,13 +2531,14 @@ xfs_ifree_cluster( xfs_inode_log_item_t *iip; struct xfs_log_item *lip; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(mp); xfs_ino_t inum; inum = xic->first_ino; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); - nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster; + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; - for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) { + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { /* * The allocation bitmap tells us which inodes of the chunk were * physically allocated. Skip the cluster if an inode falls into @@ -2551,7 +2546,7 @@ xfs_ifree_cluster( */ ioffset = inum - xic->first_ino; if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { - ASSERT(ioffset % mp->m_inodes_per_cluster == 0); + ASSERT(ioffset % igeo->inodes_per_cluster == 0); continue; } @@ -2567,7 +2562,7 @@ xfs_ifree_cluster( * to mark all the active inodes on the buffer stale. */ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, - mp->m_bsize * mp->m_blocks_per_cluster, + mp->m_bsize * igeo->blocks_per_cluster, XBF_UNMAPPED); if (!bp) @@ -2614,7 +2609,7 @@ xfs_ifree_cluster( * transaction stale above, which means there is no point in * even trying to lock them. */ - for (i = 0; i < mp->m_inodes_per_cluster; i++) { + for (i = 0; i < igeo->inodes_per_cluster; i++) { retry: rcu_read_lock(); ip = radix_tree_lookup(&pag->pag_ici_root, @@ -3472,28 +3467,27 @@ xfs_iflush_cluster( struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; unsigned long first_index, mask; - unsigned long inodes_per_cluster; int cilist_size; struct xfs_inode **cilist; struct xfs_inode *cip; + struct xfs_ino_geometry *igeo = M_IGEO(mp); int nr_found; int clcount = 0; int i; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); + cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *); cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); if (!cilist) goto out_put; - mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); + mask = ~(igeo->inodes_per_cluster - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; rcu_read_lock(); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, - first_index, inodes_per_cluster); + first_index, igeo->inodes_per_cluster); if (nr_found == 0) goto out_free; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fa1c4fe2ffbf..c9a502eed204 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -5,6 +5,7 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -12,7 +13,6 @@ #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_error.h" #include "xfs_trace.h" #include "xfs_trans_priv.h" #include "xfs_buf_item.h" @@ -565,7 +565,7 @@ out_unlock: * Unlock the inode associated with the inode log item. */ STATIC void -xfs_inode_item_unlock( +xfs_inode_item_release( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); @@ -621,23 +621,21 @@ xfs_inode_item_committed( STATIC void xfs_inode_item_committing( struct xfs_log_item *lip, - xfs_lsn_t lsn) + xfs_lsn_t commit_lsn) { - INODE_ITEM(lip)->ili_last_lsn = lsn; + INODE_ITEM(lip)->ili_last_lsn = commit_lsn; + return xfs_inode_item_release(lip); } -/* - * This is the ops vector shared by all buf log items. - */ static const struct xfs_item_ops xfs_inode_item_ops = { .iop_size = xfs_inode_item_size, .iop_format = xfs_inode_item_format, .iop_pin = xfs_inode_item_pin, .iop_unpin = xfs_inode_item_unpin, - .iop_unlock = xfs_inode_item_unlock, + .iop_release = xfs_inode_item_release, .iop_committed = xfs_inode_item_committed, .iop_push = xfs_inode_item_push, - .iop_committing = xfs_inode_item_committing, + .iop_committing = xfs_inode_item_committing, .iop_error = xfs_inode_item_error }; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 27081eba220c..07a60e74c39c 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -14,7 +14,7 @@ struct xfs_inode; struct xfs_mount; typedef struct xfs_inode_log_item { - xfs_log_item_t ili_item; /* common portion */ + struct xfs_log_item ili_item; /* common portion */ struct xfs_inode *ili_inode; /* inode ptr */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d7dfc13f30f5..6f7848cd5527 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -11,9 +11,8 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_ioctl.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_attr.h" @@ -25,7 +24,6 @@ #include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_symlink.h" #include "xfs_trans.h" #include "xfs_acl.h" #include "xfs_btree.h" @@ -36,14 +34,8 @@ #include "xfs_ag.h" #include "xfs_health.h" -#include <linux/capability.h> -#include <linux/cred.h> -#include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/pagemap.h> -#include <linux/slab.h> -#include <linux/exportfs.h> /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to @@ -721,16 +713,45 @@ out_unlock: return error; } +/* Return 0 on success or positive error */ +int +xfs_fsbulkstat_one_fmt( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) +{ + struct xfs_bstat bs1; + + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); + if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat)); +} + +int +xfs_fsinumbers_fmt( + struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp) +{ + struct xfs_inogrp ig1; + + xfs_inumbers_to_inogrp(&ig1, igrp); + if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp)); +} + STATIC int -xfs_ioc_bulkstat( +xfs_ioc_fsbulkstat( xfs_mount_t *mp, unsigned int cmd, void __user *arg) { - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + .ocount = 0, + }; + xfs_ino_t lastino; int error; /* done = 1 if there are more stats to get and if bulkstat */ @@ -742,41 +763,243 @@ xfs_ioc_bulkstat( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) + if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) return -EFAULT; - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; - if (cmd == XFS_IOC_FSINUMBERS) - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt); - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, - sizeof(xfs_bstat_t), NULL, &done); - else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - &done); + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request back end functions + * take the inode to start with, so we have to compute the startino + * parameter from lastino to maintain correct function. lastino == 0 + * is a special case because it has traditionally meant "first inode + * in filesystem". + */ + if (cmd == XFS_IOC_FSINUMBERS) { + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_inumbers(&breq, xfs_fsinumbers_fmt); + lastino = breq.startino - 1; + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { + breq.startino = lastino; + breq.icount = 1; + error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt); + } else { /* XFS_IOC_FSBULKSTAT */ + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt); + lastino = breq.startino - 1; + } if (error) return error; - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -EFAULT; + if (bulkreq.lastip != NULL && + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) + return -EFAULT; - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -EFAULT; + if (bulkreq.ocount != NULL && + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) + return -EFAULT; + + return 0; +} + +/* Return 0 on success or positive error */ +static int +xfs_bulkstat_fmt( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) +{ + if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat)); +} + +/* + * Check the incoming bulk request @hdr from userspace and initialize the + * internal @breq bulk request appropriately. Returns 0 if the bulk request + * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual + * negative error code. + */ +static int +xfs_bulk_ireq_setup( + struct xfs_mount *mp, + struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq, + void __user *ubuffer) +{ + if (hdr->icount == 0 || + (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) || + memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) + return -EINVAL; + + breq->startino = hdr->ino; + breq->ubuffer = ubuffer; + breq->icount = hdr->icount; + breq->ocount = 0; + breq->flags = 0; + + /* + * The @ino parameter is a special value, so we must look it up here. + * We're not allowed to have IREQ_AGNO, and we only return one inode + * worth of data. + */ + if (hdr->flags & XFS_BULK_IREQ_SPECIAL) { + if (hdr->flags & XFS_BULK_IREQ_AGNO) + return -EINVAL; + + switch (hdr->ino) { + case XFS_BULK_IREQ_SPECIAL_ROOT: + hdr->ino = mp->m_sb.sb_rootino; + break; + default: + return -EINVAL; + } + breq->icount = 1; } + /* + * The IREQ_AGNO flag means that we only want results from a given AG. + * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is + * beyond the specified AG then we return no results. + */ + if (hdr->flags & XFS_BULK_IREQ_AGNO) { + if (hdr->agno >= mp->m_sb.sb_agcount) + return -EINVAL; + + if (breq->startino == 0) + breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0); + else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) + return -EINVAL; + + breq->flags |= XFS_IBULK_SAME_AG; + + /* Asking for an inode past the end of the AG? We're done! */ + if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) + return XFS_ITER_ABORT; + } else if (hdr->agno) + return -EINVAL; + + /* Asking for an inode past the end of the FS? We're done! */ + if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) + return XFS_ITER_ABORT; + + return 0; +} + +/* + * Update the userspace bulk request @hdr to reflect the end state of the + * internal bulk request @breq. + */ +static void +xfs_bulk_ireq_teardown( + struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq) +{ + hdr->ino = breq->startino; + hdr->ocount = breq->ocount; +} + +/* Handle the v5 bulkstat ioctl. */ +STATIC int +xfs_ioc_bulkstat( + struct xfs_mount *mp, + unsigned int cmd, + struct xfs_bulkstat_req __user *arg) +{ + struct xfs_bulk_ireq hdr; + struct xfs_ibulk breq = { + .mp = mp, + }; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) + return -EFAULT; + + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); + if (error == XFS_ITER_ABORT) + goto out_teardown; + if (error < 0) + return error; + + error = xfs_bulkstat(&breq, xfs_bulkstat_fmt); + if (error) + return error; + +out_teardown: + xfs_bulk_ireq_teardown(&hdr, &breq); + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) + return -EFAULT; + + return 0; +} + +STATIC int +xfs_inumbers_fmt( + struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp) +{ + if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers)); +} + +/* Handle the v5 inumbers ioctl. */ +STATIC int +xfs_ioc_inumbers( + struct xfs_mount *mp, + unsigned int cmd, + struct xfs_inumbers_req __user *arg) +{ + struct xfs_bulk_ireq hdr; + struct xfs_ibulk breq = { + .mp = mp, + }; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) + return -EFAULT; + + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); + if (error == XFS_ITER_ABORT) + goto out_teardown; + if (error < 0) + return error; + + error = xfs_inumbers(&breq, xfs_inumbers_fmt); + if (error) + return error; + +out_teardown: + xfs_bulk_ireq_teardown(&hdr, &breq); + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) + return -EFAULT; + return 0; } @@ -879,37 +1102,44 @@ xfs_di2lxflags( return flags; } -STATIC int -xfs_ioc_fsgetxattr( - xfs_inode_t *ip, - int attr, - void __user *arg) +static void +xfs_fill_fsxattr( + struct xfs_inode *ip, + bool attr, + struct fsxattr *fa) { - struct fsxattr fa; - - memset(&fa, 0, sizeof(struct fsxattr)); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - fa.fsx_xflags = xfs_ip2xflags(ip); - fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_cowextsize = ip->i_d.di_cowextsize << + simple_fill_fsxattr(fa, xfs_ip2xflags(ip)); + fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa->fsx_cowextsize = ip->i_d.di_cowextsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = xfs_get_projid(ip); + fa->fsx_projid = xfs_get_projid(ip); if (attr) { if (ip->i_afp) { if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = xfs_iext_count(ip->i_afp); + fa->fsx_nextents = xfs_iext_count(ip->i_afp); else - fa.fsx_nextents = ip->i_d.di_anextents; + fa->fsx_nextents = ip->i_d.di_anextents; } else - fa.fsx_nextents = 0; + fa->fsx_nextents = 0; } else { if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = xfs_iext_count(&ip->i_df); + fa->fsx_nextents = xfs_iext_count(&ip->i_df); else - fa.fsx_nextents = ip->i_d.di_nextents; + fa->fsx_nextents = ip->i_d.di_nextents; } +} + +STATIC int +xfs_ioc_fsgetxattr( + xfs_inode_t *ip, + int attr, + void __user *arg) +{ + struct fsxattr fa; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + xfs_fill_fsxattr(ip, attr, &fa); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (copy_to_user(arg, &fa, sizeof(fa))) @@ -1035,15 +1265,6 @@ xfs_ioctl_setattr_xflags( if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) return -EINVAL; - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || - (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - /* diflags2 only valid for v3 inodes. */ di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); if (di_flags2 && ip->i_d.di_version < 3) @@ -1202,39 +1423,31 @@ xfs_ioctl_setattr_check_extsize( struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; - - if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode)) - return -EINVAL; - - if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && - !S_ISDIR(VFS_I(ip)->i_mode)) - return -EINVAL; + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) return -EINVAL; - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) - return -EINVAL; + if (fa->fsx_extsize == 0) + return 0; - if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & FS_XFLAG_REALTIME)) { - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; - } + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) + return -EINVAL; - if (fa->fsx_extsize % size) + if (XFS_IS_REALTIME_INODE(ip) || + (fa->fsx_xflags & FS_XFLAG_REALTIME)) { + size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) return -EINVAL; - } else - fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); + } + + if (fa->fsx_extsize % size) + return -EINVAL; return 0; } @@ -1260,6 +1473,8 @@ xfs_ioctl_setattr_check_cowextsize( struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t size; + xfs_fsblock_t cowextsize_fsb; if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) return 0; @@ -1268,25 +1483,19 @@ xfs_ioctl_setattr_check_cowextsize( ip->i_d.di_version != 3) return -EINVAL; - if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode)) - return -EINVAL; - - if (fa->fsx_cowextsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t cowextsize_fsb; + if (fa->fsx_cowextsize == 0) + return 0; - cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); - if (cowextsize_fsb > MAXEXTLEN) - return -EINVAL; + cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); + if (cowextsize_fsb > MAXEXTLEN) + return -EINVAL; - size = mp->m_sb.sb_blocksize; - if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; + size = mp->m_sb.sb_blocksize; + if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; - if (fa->fsx_cowextsize % size) - return -EINVAL; - } else - fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + if (fa->fsx_cowextsize % size) + return -EINVAL; return 0; } @@ -1300,21 +1509,6 @@ xfs_ioctl_setattr_check_projid( if (fa->fsx_projid > (uint16_t)-1 && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return -EINVAL; - - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (xfs_get_projid(ip) != fa->fsx_projid) - return -EINVAL; - if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) != - (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) - return -EINVAL; - return 0; } @@ -1323,6 +1517,7 @@ xfs_ioctl_setattr( xfs_inode_t *ip, struct fsxattr *fa) { + struct fsxattr old_fa; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_dquot *udqp = NULL; @@ -1370,7 +1565,6 @@ xfs_ioctl_setattr( goto error_free_dquots; } - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && xfs_get_projid(ip) != fa->fsx_projid) { code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, @@ -1379,6 +1573,11 @@ xfs_ioctl_setattr( goto error_trans_cancel; } + xfs_fill_fsxattr(ip, false, &old_fa); + code = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa); + if (code) + goto error_trans_cancel; + code = xfs_ioctl_setattr_check_extsize(ip, fa); if (code) goto error_trans_cancel; @@ -1489,6 +1688,7 @@ xfs_ioc_setxflags( { struct xfs_trans *tp; struct fsxattr fa; + struct fsxattr old_fa; unsigned int flags; int join_flags = 0; int error; @@ -1524,6 +1724,13 @@ xfs_ioc_setxflags( goto out_drop_write; } + xfs_fill_fsxattr(ip, false, &old_fa); + error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa); + if (error) { + xfs_trans_cancel(tp); + goto out_drop_write; + } + error = xfs_ioctl_setattr_xflags(tp, ip, &fa); if (error) { xfs_trans_cancel(tp); @@ -1942,7 +2149,12 @@ xfs_file_ioctl( case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: + return xfs_ioc_fsbulkstat(mp, cmd, arg); + + case XFS_IOC_BULKSTAT: return xfs_ioc_bulkstat(mp, cmd, arg); + case XFS_IOC_INUMBERS: + return xfs_ioc_inumbers(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry(mp, arg, 3); diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 4b17f67c888a..654c0bb1bcf8 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -77,4 +77,12 @@ xfs_set_dmattrs( uint evmask, uint16_t state); +struct xfs_ibulk; +struct xfs_bstat; +struct xfs_inogrp; + +int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat); +int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp); + #endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 614fc6886d24..7fcf7569743f 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -3,23 +3,19 @@ * Copyright (c) 2004-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/compat.h> -#include <linux/ioctl.h> #include <linux/mount.h> -#include <linux/slab.h> -#include <linux/uaccess.h> #include <linux/fsmap.h> #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" -#include "xfs_error.h" #include "xfs_fsops.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_attr.h" #include "xfs_ioctl.h" @@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin( } STATIC int -xfs_inumbers_fmt_compat( - void __user *ubuffer, - const struct xfs_inogrp *buffer, - long count, - long *written) +xfs_fsinumbers_fmt_compat( + struct xfs_ibulk *breq, + const struct xfs_inumbers *ig) { - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; + struct compat_xfs_inogrp __user *p32 = breq->ubuffer; + struct xfs_inogrp ig1; + struct xfs_inogrp *igrp = &ig1; - for (i = 0; i < count; i++) { - if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || - put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || - put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -EFAULT; - } - *written = count * sizeof(*p32); - return 0; + xfs_inumbers_to_inogrp(&ig1, ig); + + if (put_user(igrp->xi_startino, &p32->xi_startino) || + put_user(igrp->xi_alloccount, &p32->xi_alloccount) || + put_user(igrp->xi_allocmask, &p32->xi_allocmask)) + return -EFAULT; + + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp)); } #else -#define xfs_inumbers_fmt_compat xfs_inumbers_fmt +#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt #endif /* BROKEN_X86_ALIGNMENT */ STATIC int @@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin( return 0; } -/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +/* + * struct xfs_bstat has differing alignment on intel, & bstime_t sizes + * everywhere + */ STATIC int xfs_ioctl32_bstat_copyin( - xfs_bstat_t *bstat, - compat_xfs_bstat_t __user *bstat32) + struct xfs_bstat *bstat, + struct compat_xfs_bstat __user *bstat32) { if (get_user(bstat->bs_ino, &bstat32->bs_ino) || get_user(bstat->bs_mode, &bstat32->bs_mode) || @@ -171,16 +169,15 @@ xfs_bstime_store_compat( /* Return 0 on success or positive error (to xfs_bulkstat()) */ STATIC int -xfs_bulkstat_one_fmt_compat( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) +xfs_fsbulkstat_one_fmt_compat( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) { - compat_xfs_bstat_t __user *p32 = ubuffer; + struct compat_xfs_bstat __user *p32 = breq->ubuffer; + struct xfs_bstat bs1; + struct xfs_bstat *buffer = &bs1; - if (ubsize < sizeof(*p32)) - return -ENOMEM; + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) return -EFAULT; - if (ubused) - *ubused = sizeof(*p32); - return 0; -} -STATIC int -xfs_bulkstat_one_compat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ -{ - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, - ubused, stat); + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat)); } /* copied from xfs_ioctl.c */ STATIC int -xfs_compat_ioc_bulkstat( +xfs_compat_ioc_fsbulkstat( xfs_mount_t *mp, unsigned int cmd, - compat_xfs_fsop_bulkreq_t __user *p32) + struct compat_xfs_fsop_bulkreq __user *p32) { u32 addr; - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + .ocount = 0, + }; + xfs_ino_t lastino; int error; /* @@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat( * to userpace memory via bulkreq.ubuffer. Normally the compat * functions and structure size are the correct ones to use ... */ - inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; - bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; - size_t bs_one_size = sizeof(struct compat_xfs_bstat); + inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat; + bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat; #ifdef CONFIG_X86_X32 if (in_x32_syscall()) { @@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat( * the data written out in compat layout will not match what * x32 userspace expects. */ - inumbers_func = xfs_inumbers_fmt; - bs_one_func = xfs_bulkstat_one; - bs_one_size = sizeof(struct xfs_bstat); + inumbers_func = xfs_fsinumbers_fmt; + bs_one_func = xfs_fsbulkstat_one_fmt; } #endif @@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat( return -EFAULT; bulkreq.ocount = compat_ptr(addr); - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request back end functions + * take the inode to start with, so we have to compute the startino + * parameter from lastino to maintain correct function. lastino == 0 + * is a special case because it has traditionally meant "first inode + * in filesystem". + */ if (cmd == XFS_IOC_FSINUMBERS_32) { - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, inumbers_func); + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_inumbers(&breq, inumbers_func); + lastino = breq.startino - 1; } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { - int res; - - error = bs_one_func(mp, inlast, bulkreq.ubuffer, - bs_one_size, NULL, &res); + breq.startino = lastino; + breq.icount = 1; + error = xfs_bulkstat_one(&breq, bs_one_func); + lastino = breq.startino; } else if (cmd == XFS_IOC_FSBULKSTAT_32) { - error = xfs_bulkstat(mp, &inlast, &count, - bs_one_func, bs_one_size, - bulkreq.ubuffer, &done); - } else + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_bulkstat(&breq, bs_one_func); + lastino = breq.startino - 1; + } else { error = -EINVAL; + } if (error) return error; - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -EFAULT; + if (bulkreq.lastip != NULL && + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) + return -EFAULT; - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -EFAULT; - } + if (bulkreq.ocount != NULL && + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) + return -EFAULT; return 0; } @@ -577,6 +574,8 @@ xfs_file_compat_ioctl( case XFS_IOC_ERROR_CLEARALL: case FS_IOC_GETFSMAP: case XFS_IOC_SCRUB_METADATA: + case XFS_IOC_BULKSTAT: + case XFS_IOC_INUMBERS: return xfs_file_ioctl(filp, cmd, p); #if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) /* @@ -674,7 +673,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: - return xfs_compat_ioc_bulkstat(mp, cmd, arg); + return xfs_compat_ioc_fsbulkstat(mp, cmd, arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: case XFS_IOC_PATH_TO_FSHANDLE_32: { diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index d28fa824284a..7985344d3aa6 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime { __s32 tv_nsec; /* and nanoseconds */ } compat_xfs_bstime_t; -typedef struct compat_xfs_bstat { +struct compat_xfs_bstat { __u64 bs_ino; /* inode number */ __u16 bs_mode; /* type and mode */ __u16 bs_nlink; /* number of links */ @@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat { __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ -} __compat_packed compat_xfs_bstat_t; +} __compat_packed; -typedef struct compat_xfs_fsop_bulkreq { +struct compat_xfs_fsop_bulkreq { compat_uptr_t lastip; /* last inode # pointer */ __s32 icount; /* count of entries in buffer */ compat_uptr_t ubuffer; /* user buffer for inode desc. */ compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; +}; #define XFS_IOC_FSBULKSTAT_32 \ _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) @@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext { xfs_off_t sx_offset; /* offset into file */ xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ - compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ + struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */ } __compat_packed compat_xfs_swapext_t; #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) @@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 { #define XFS_IOC_FSGEOMETRY_V1_32 \ _IOR('X', 100, struct compat_xfs_fsop_geom_v1) -typedef struct compat_xfs_inogrp { +struct compat_xfs_inogrp { __u64 xi_startino; /* starting inode number */ __s32 xi_alloccount; /* # bits set in allocmask */ __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; +} __attribute__((packed)); /* These growfs input structures have padding on the end, so must translate */ typedef struct compat_xfs_growfs_data { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 63d323916bba..3a4310d7cb59 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -4,7 +4,6 @@ * Copyright (c) 2016-2018 Christoph Hellwig. * All Rights Reserved. */ -#include <linux/iomap.h> #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" @@ -12,7 +11,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap_btree.h" @@ -25,7 +23,6 @@ #include "xfs_inode_item.h" #include "xfs_iomap.h" #include "xfs_trace.h" -#include "xfs_icache.h" #include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" @@ -779,7 +776,7 @@ xfs_iomap_write_unwritten( * complete here and might deadlock on the iolock. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); + XFS_TRANS_RESERVE, &tp); if (error) return error; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 74047bd0c1ae..ff3c1fae5357 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -10,30 +10,20 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_acl.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_attr.h" #include "xfs_trans.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" -#include "xfs_trans_space.h" #include "xfs_iomap.h" -#include "xfs_defer.h" -#include <linux/capability.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/security.h> -#include <linux/iomap.h> -#include <linux/slab.h> #include <linux/iversion.h> /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 1e1a0af1dd34..a8a06bb78ea8 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -14,46 +14,66 @@ #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_health.h" /* - * Return stat information for one inode. - * Return 0 if ok, else errno. + * Bulk Stat + * ========= + * + * Use the inode walking functions to fill out struct xfs_bulkstat for every + * allocated inode, then pass the stat information to some externally provided + * iteration function. */ -int + +struct xfs_bstat_chunk { + bulkstat_one_fmt_pf formatter; + struct xfs_ibulk *breq; + struct xfs_bulkstat *buf; +}; + +/* + * Fill out the bulkstat info for a single inode and report it somewhere. + * + * bc->breq->lastino is effectively the inode cursor as we walk through the + * filesystem. Therefore, we update it any time we need to move the cursor + * forward, regardless of whether or not we're sending any bstat information + * back to userspace. If the inode is internal metadata or, has been freed + * out from under us, we just simply keep going. + * + * However, if any other type of error happens we want to stop right where we + * are so that userspace will call back with exact number of the bad inode and + * we can send back an error code. + * + * Note that if the formatter tells us there's no space left in the buffer we + * move the cursor forward and abort the walk. + */ +STATIC int xfs_bulkstat_one_int( - struct xfs_mount *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + struct xfs_bstat_chunk *bc) { struct xfs_icdinode *dic; /* dinode core info pointer */ struct xfs_inode *ip; /* incore inode pointer */ struct inode *inode; - struct xfs_bstat *buf; /* return buffer */ - int error = 0; /* error value */ + struct xfs_bulkstat *buf = bc->buf; + int error = -EINVAL; - *stat = BULKSTAT_RV_NOTHING; + if (xfs_internal_inum(mp, ino)) + goto out_advance; - if (!buffer || xfs_internal_inum(mp, ino)) - return -EINVAL; - - buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); - if (!buf) - return -ENOMEM; - - error = xfs_iget(mp, NULL, ino, + error = xfs_iget(mp, tp, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); + if (error == -ENOENT || error == -EINVAL) + goto out_advance; if (error) - goto out_free; + goto out; ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); @@ -64,37 +84,35 @@ xfs_bulkstat_one_int( /* xfs_iget returns the following without needing * further change. */ - buf->bs_projid_lo = dic->di_projid_lo; - buf->bs_projid_hi = dic->di_projid_hi; + buf->bs_projectid = xfs_get_projid(ip); buf->bs_ino = ino; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; buf->bs_size = dic->di_size; buf->bs_nlink = inode->i_nlink; - buf->bs_atime.tv_sec = inode->i_atime.tv_sec; - buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; - buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; - buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; - buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; - buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; + buf->bs_atime = inode->i_atime.tv_sec; + buf->bs_atime_nsec = inode->i_atime.tv_nsec; + buf->bs_mtime = inode->i_mtime.tv_sec; + buf->bs_mtime_nsec = inode->i_mtime.tv_nsec; + buf->bs_ctime = inode->i_ctime.tv_sec; + buf->bs_ctime_nsec = inode->i_ctime.tv_nsec; + buf->bs_btime = dic->di_crtime.t_sec; + buf->bs_btime_nsec = dic->di_crtime.t_nsec; buf->bs_gen = inode->i_generation; buf->bs_mode = inode->i_mode; buf->bs_xflags = xfs_ip2xflags(ip); - buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; + buf->bs_extsize_blks = dic->di_extsize; buf->bs_extents = dic->di_nextents; - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); xfs_bulkstat_health(ip, buf); - buf->bs_dmevmask = dic->di_dmevmask; - buf->bs_dmstate = dic->di_dmstate; buf->bs_aextents = dic->di_anextents; buf->bs_forkoff = XFS_IFORK_BOFF(ip); + buf->bs_version = XFS_BULKSTAT_VERSION_V5; if (dic->di_version == 3) { if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) - buf->bs_cowextsize = dic->di_cowextsize << - mp->m_sb.sb_blocklog; + buf->bs_cowextsize_blks = dic->di_cowextsize; } switch (dic->di_format) { @@ -118,385 +136,121 @@ xfs_bulkstat_one_int( xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); - error = formatter(buffer, ubsize, ubused, buf); - if (!error) - *stat = BULKSTAT_RV_DIDONE; + error = bc->formatter(bc->breq, buf); + if (error == XFS_IBULK_ABORT) + goto out_advance; + if (error) + goto out; - out_free: - kmem_free(buf); +out_advance: + /* + * Advance the cursor to the inode that comes after the one we just + * looked at. We want the caller to move along if the bulkstat + * information was copied successfully; if we tried to grab the inode + * but it's no longer allocated; or if it's internal metadata. + */ + bc->breq->startino = ino + 1; +out: return error; } -/* Return 0 on success or positive error */ -STATIC int -xfs_bulkstat_one_fmt( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) -{ - if (ubsize < sizeof(*buffer)) - return -ENOMEM; - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return -EFAULT; - if (ubused) - *ubused = sizeof(*buffer); - return 0; -} - +/* Bulkstat a single inode. */ int xfs_bulkstat_one( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ + struct xfs_ibulk *breq, + bulkstat_one_fmt_pf formatter) { - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt, ubused, stat); -} + struct xfs_bstat_chunk bc = { + .formatter = formatter, + .breq = breq, + }; + int error; -/* - * Loop over all clusters in a chunk for a given incore inode allocation btree - * record. Do a readahead if there are any allocated inodes in that cluster. - */ -STATIC void -xfs_bulkstat_ichunk_ra( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irec) -{ - xfs_agblock_t agbno; - struct blk_plug plug; - int i; /* inode chunk index */ - - agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); - - blk_start_plug(&plug); - for (i = 0; i < XFS_INODES_PER_CHUNK; - i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) { - if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) & - ~irec->ir_free) { - xfs_btree_reada_bufs(mp, agno, agbno, - mp->m_blocks_per_cluster, - &xfs_inode_buf_ops); - } - } - blk_finish_plug(&plug); -} + ASSERT(breq->icount == 1); -/* - * Lookup the inode chunk that the given inode lives in and then get the record - * if we found the chunk. If the inode was not the last in the chunk and there - * are some left allocated, update the data for the pointed-to record as well as - * return the count of grabbed inodes. - */ -STATIC int -xfs_bulkstat_grab_ichunk( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t agino, /* starting inode of chunk */ - int *icount,/* return # of inodes grabbed */ - struct xfs_inobt_rec_incore *irec) /* btree record */ -{ - int idx; /* index into inode chunk */ - int stat; - int error = 0; - - /* Lookup the inode chunk that this inode lives in */ - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat); - if (error) - return error; - if (!stat) { - *icount = 0; - return error; - } + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), + KM_SLEEP | KM_MAYFAIL); + if (!bc.buf) + return -ENOMEM; - /* Get the record, should always work */ - error = xfs_inobt_get_rec(cur, irec, &stat); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1); + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); - /* Check if the record contains the inode in request */ - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { - *icount = 0; - return 0; - } + kmem_free(bc.buf); - idx = agino - irec->ir_startino + 1; - if (idx < XFS_INODES_PER_CHUNK && - (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { - int i; - - /* We got a right chunk with some left inodes allocated at it. - * Grab the chunk record. Mark all the uninteresting inodes - * free -- because they're before our start point. - */ - for (i = 0; i < idx; i++) { - if (XFS_INOBT_MASK(i) & ~irec->ir_free) - irec->ir_freecount++; - } - - irec->ir_free |= xfs_inobt_maskn(0, idx); - *icount = irec->ir_count - irec->ir_freecount; - } + /* + * If we reported one inode to userspace then we abort because we hit + * the end of the buffer. Don't leak that back to userspace. + */ + if (error == XFS_IWALK_ABORT) + error = 0; - return 0; + return error; } -#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) - -struct xfs_bulkstat_agichunk { - char __user **ac_ubuffer;/* pointer into user's buffer */ - int ac_ubleft; /* bytes left in user's buffer */ - int ac_ubelem; /* spaces used in user's buffer */ -}; - -/* - * Process inodes in chunk with a pointer to a formatter function - * that will iget the inode and fill in the appropriate structure. - */ static int -xfs_bulkstat_ag_ichunk( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irbp, - bulkstat_one_pf formatter, - size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp, - xfs_agino_t *last_agino) +xfs_bulkstat_iwalk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) { - char __user **ubufp = acp->ac_ubuffer; - int chunkidx; - int error = 0; - xfs_agino_t agino = irbp->ir_startino; - - for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; - chunkidx++, agino++) { - int fmterror; - int ubused; - - /* inode won't fit in buffer, we are done */ - if (acp->ac_ubleft < statstruct_size) - break; - - /* Skip if this inode is free */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) - continue; - - /* Get the inode and fill in a single buffer */ - ubused = statstruct_size; - error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), - *ubufp, acp->ac_ubleft, &ubused, &fmterror); - - if (fmterror == BULKSTAT_RV_GIVEUP || - (error && error != -ENOENT && error != -EINVAL)) { - acp->ac_ubleft = 0; - ASSERT(error); - break; - } - - /* be careful not to leak error if at end of chunk */ - if (fmterror == BULKSTAT_RV_NOTHING || error) { - error = 0; - continue; - } - - *ubufp += ubused; - acp->ac_ubleft -= ubused; - acp->ac_ubelem++; - } - - /* - * Post-update *last_agino. At this point, agino will always point one - * inode past the last inode we processed successfully. Hence we - * substract that inode when setting the *last_agino cursor so that we - * return the correct cookie to userspace. On the next bulkstat call, - * the inode under the lastino cookie will be skipped as we have already - * processed it here. - */ - *last_agino = agino - 1; + int error; + error = xfs_bulkstat_one_int(mp, tp, ino, data); + /* bulkstat just skips over missing inodes */ + if (error == -ENOENT || error == -EINVAL) + return 0; return error; } /* - * Return stat information in bulk (by-inode) for the filesystem. + * Check the incoming lastino parameter. + * + * We allow any inode value that could map to physical space inside the + * filesystem because if there are no inodes there, bulkstat moves on to the + * next chunk. In other words, the magic agino value of zero takes us to the + * first chunk in the AG, and an agino value past the end of the AG takes us to + * the first chunk in the next AG. + * + * Therefore we can end early if the requested inode is beyond the end of the + * filesystem or doesn't map properly. */ -int /* error status */ -xfs_bulkstat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastinop, /* last inode returned */ - int *ubcountp, /* size of buffer/count returned */ - bulkstat_one_pf formatter, /* func that'd fill a single buf */ - size_t statstruct_size, /* sizeof struct filling */ - char __user *ubuffer, /* buffer with inode stats */ - int *done) /* 1 if there are more stats to get */ +static inline bool +xfs_bulkstat_already_done( + struct xfs_mount *mp, + xfs_ino_t startino) { - xfs_buf_t *agbp; /* agi header buffer */ - xfs_agino_t agino; /* inode # in allocation group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ - int nirbuf; /* size of irbuf */ - int ubcount; /* size of user's buffer */ - struct xfs_bulkstat_agichunk ac; - int error = 0; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); - /* - * Get the last inode value, see if there's nothing to do. - */ - agno = XFS_INO_TO_AGNO(mp, *lastinop); - agino = XFS_INO_TO_AGINO(mp, *lastinop); - if (agno >= mp->m_sb.sb_agcount || - *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { - *done = 1; - *ubcountp = 0; - return 0; - } + return agno >= mp->m_sb.sb_agcount || + startino != XFS_AGINO_TO_INO(mp, agno, agino); +} - ubcount = *ubcountp; /* statstruct's */ - ac.ac_ubuffer = &ubuffer; - ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; - ac.ac_ubelem = 0; +/* Return stat information in bulk (by-inode) for the filesystem. */ +int +xfs_bulkstat( + struct xfs_ibulk *breq, + bulkstat_one_fmt_pf formatter) +{ + struct xfs_bstat_chunk bc = { + .formatter = formatter, + .breq = breq, + }; + int error; - *ubcountp = 0; - *done = 0; + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) + return 0; - irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); - if (!irbuf) + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), + KM_SLEEP | KM_MAYFAIL); + if (!bc.buf) return -ENOMEM; - nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); - /* - * Loop over the allocation groups, starting from the last - * inode returned; 0 means start of the allocation group. - */ - while (agno < mp->m_sb.sb_agcount) { - struct xfs_inobt_rec_incore *irbp = irbuf; - struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; - bool end_of_ag = false; - int icount = 0; - int stat; - - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) - break; - /* - * Allocate and initialize a btree cursor for ialloc btree. - */ - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO); - if (agino > 0) { - /* - * In the middle of an allocation group, we need to get - * the remainder of the chunk we're in. - */ - struct xfs_inobt_rec_incore r; - - error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); - if (error) - goto del_cursor; - if (icount) { - irbp->ir_startino = r.ir_startino; - irbp->ir_holemask = r.ir_holemask; - irbp->ir_count = r.ir_count; - irbp->ir_freecount = r.ir_freecount; - irbp->ir_free = r.ir_free; - irbp++; - } - /* Increment to the next record */ - error = xfs_btree_increment(cur, 0, &stat); - } else { - /* Start of ag. Lookup the first inode chunk */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); - } - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - - /* - * Loop through inode btree records in this ag, - * until we run out of inodes or space in the buffer. - */ - while (irbp < irbufend && icount < ubcount) { - struct xfs_inobt_rec_incore r; - - error = xfs_inobt_get_rec(cur, &r, &stat); - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - - /* - * If this chunk has any allocated inodes, save it. - * Also start read-ahead now for this chunk. - */ - if (r.ir_freecount < r.ir_count) { - xfs_bulkstat_ichunk_ra(mp, agno, &r); - irbp->ir_startino = r.ir_startino; - irbp->ir_holemask = r.ir_holemask; - irbp->ir_count = r.ir_count; - irbp->ir_freecount = r.ir_freecount; - irbp->ir_free = r.ir_free; - irbp++; - icount += r.ir_count - r.ir_freecount; - } - error = xfs_btree_increment(cur, 0, &stat); - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - cond_resched(); - } - - /* - * Drop the btree buffers and the agi buffer as we can't hold any - * of the locks these represent when calling iget. If there is a - * pending error, then we are done. - */ -del_cursor: - xfs_btree_del_cursor(cur, error); - xfs_buf_relse(agbp); - if (error) - break; - /* - * Now format all the good inodes into the user's buffer. The - * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer - * for the next loop iteration. - */ - irbufend = irbp; - for (irbp = irbuf; - irbp < irbufend && ac.ac_ubleft >= statstruct_size; - irbp++) { - error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, - formatter, statstruct_size, &ac, - &agino); - if (error) - break; - - cond_resched(); - } - - /* - * If we've run out of space or had a formatting error, we - * are now done - */ - if (ac.ac_ubleft < statstruct_size || error) - break; - - if (end_of_ag) { - agno++; - agino = 0; - } - } - /* - * Done, we're either out of filesystem or space to put the data. - */ - kmem_free(irbuf); - *ubcountp = ac.ac_ubelem; + error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags, + xfs_bulkstat_iwalk, breq->icount, &bc); + + kmem_free(bc.buf); /* * We found some inodes, so clear the error status and return them. @@ -505,135 +259,136 @@ del_cursor: * triggered again and propagated to userspace as there will be no * formatted inodes in the buffer. */ - if (ac.ac_ubelem) + if (breq->ocount > 0) error = 0; - /* - * If we ran out of filesystem, lastino will point off the end of - * the filesystem so the next call will return immediately. - */ - *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); - if (agno >= mp->m_sb.sb_agcount) - *done = 1; - return error; } -int -xfs_inumbers_fmt( - void __user *ubuffer, /* buffer to write to */ - const struct xfs_inogrp *buffer, /* buffer to read from */ - long count, /* # of elements to read */ - long *written) /* # of bytes written */ +/* Convert bulkstat (v5) to bstat (v1). */ +void +xfs_bulkstat_to_bstat( + struct xfs_mount *mp, + struct xfs_bstat *bs1, + const struct xfs_bulkstat *bstat) { - if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) - return -EFAULT; - *written = count * sizeof(*buffer); - return 0; + memset(bs1, 0, sizeof(struct xfs_bstat)); + bs1->bs_ino = bstat->bs_ino; + bs1->bs_mode = bstat->bs_mode; + bs1->bs_nlink = bstat->bs_nlink; + bs1->bs_uid = bstat->bs_uid; + bs1->bs_gid = bstat->bs_gid; + bs1->bs_rdev = bstat->bs_rdev; + bs1->bs_blksize = bstat->bs_blksize; + bs1->bs_size = bstat->bs_size; + bs1->bs_atime.tv_sec = bstat->bs_atime; + bs1->bs_mtime.tv_sec = bstat->bs_mtime; + bs1->bs_ctime.tv_sec = bstat->bs_ctime; + bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; + bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; + bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; + bs1->bs_blocks = bstat->bs_blocks; + bs1->bs_xflags = bstat->bs_xflags; + bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); + bs1->bs_extents = bstat->bs_extents; + bs1->bs_gen = bstat->bs_gen; + bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; + bs1->bs_forkoff = bstat->bs_forkoff; + bs1->bs_projid_hi = bstat->bs_projectid >> 16; + bs1->bs_sick = bstat->bs_sick; + bs1->bs_checked = bstat->bs_checked; + bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); + bs1->bs_dmevmask = 0; + bs1->bs_dmstate = 0; + bs1->bs_aextents = bstat->bs_aextents; +} + +struct xfs_inumbers_chunk { + inumbers_fmt_pf formatter; + struct xfs_ibulk *breq; +}; + +/* + * INUMBERS + * ======== + * This is how we export inode btree records to userspace, so that XFS tools + * can figure out where inodes are allocated. + */ + +/* + * Format the inode group structure and report it somewhere. + * + * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk + * through the filesystem so we move it forward unless there was a runtime + * error. If the formatter tells us the buffer is now full we also move the + * cursor forward and abort the walk. + */ +STATIC int +xfs_inumbers_walk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + const struct xfs_inobt_rec_incore *irec, + void *data) +{ + struct xfs_inumbers inogrp = { + .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), + .xi_alloccount = irec->ir_count - irec->ir_freecount, + .xi_allocmask = ~irec->ir_free, + .xi_version = XFS_INUMBERS_VERSION_V5, + }; + struct xfs_inumbers_chunk *ic = data; + int error; + + error = ic->formatter(ic->breq, &inogrp); + if (error && error != XFS_IBULK_ABORT) + return error; + + ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + + XFS_INODES_PER_CHUNK; + return error; } /* * Return inode number table for the filesystem. */ -int /* error status */ +int xfs_inumbers( - struct xfs_mount *mp,/* mount point for filesystem */ - xfs_ino_t *lastino,/* last inode returned */ - int *count,/* size of buffer/count returned */ - void __user *ubuffer,/* buffer with inode descriptions */ + struct xfs_ibulk *breq, inumbers_fmt_pf formatter) { - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); - struct xfs_btree_cur *cur = NULL; - struct xfs_buf *agbp = NULL; - struct xfs_inogrp *buffer; - int bcount; - int left = *count; - int bufidx = 0; + struct xfs_inumbers_chunk ic = { + .formatter = formatter, + .breq = breq, + }; int error = 0; - *count = 0; - if (agno >= mp->m_sb.sb_agcount || - *lastino != XFS_AGINO_TO_INO(mp, agno, agino)) - return error; + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) + return 0; - bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer))); - buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); - do { - struct xfs_inobt_rec_incore r; - int stat; - - if (!agbp) { - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) - break; - - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, - &stat); - if (error) - break; - if (!stat) - goto next_ag; - } - - error = xfs_inobt_get_rec(cur, &r, &stat); - if (error) - break; - if (!stat) - goto next_ag; - - agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; - buffer[bufidx].xi_startino = - XFS_AGINO_TO_INO(mp, agno, r.ir_startino); - buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount; - buffer[bufidx].xi_allocmask = ~r.ir_free; - if (++bufidx == bcount) { - long written; - - error = formatter(ubuffer, buffer, bufidx, &written); - if (error) - break; - ubuffer += written; - *count += bufidx; - bufidx = 0; - } - if (!--left) - break; - - error = xfs_btree_increment(cur, 0, &stat); - if (error) - break; - if (stat) - continue; - -next_ag: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - cur = NULL; - xfs_buf_relse(agbp); - agbp = NULL; - agino = 0; - agno++; - } while (agno < mp->m_sb.sb_agcount); - - if (!error) { - if (bufidx) { - long written; - - error = formatter(ubuffer, buffer, bufidx, &written); - if (!error) - *count += bufidx; - } - *lastino = XFS_AGINO_TO_INO(mp, agno, agino); - } + error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags, + xfs_inumbers_walk, breq->icount, &ic); - kmem_free(buffer); - if (cur) - xfs_btree_del_cursor(cur, error); - if (agbp) - xfs_buf_relse(agbp); + /* + * We found some inode groups, so clear the error status and return + * them. The lastino pointer will point directly at the inode that + * triggered any error that occurred, so on the next call the error + * will be triggered again and propagated to userspace as there will be + * no formatted inode groups in the buffer. + */ + if (breq->ocount > 0) + error = 0; return error; } + +/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ +void +xfs_inumbers_to_inogrp( + struct xfs_inogrp *ig1, + const struct xfs_inumbers *ig) +{ + ig1->xi_startino = ig->xi_startino; + ig1->xi_alloccount = ig->xi_alloccount; + ig1->xi_allocmask = ig->xi_allocmask; +} diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 8a822285b671..e90c1fc5b981 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -5,83 +5,55 @@ #ifndef __XFS_ITABLE_H__ #define __XFS_ITABLE_H__ -/* - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat - * structures (by the dmi library). This is a pointer to a formatter function - * that will iget the inode and fill in the appropriate structure. - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c - */ -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - int *ubused, - int *stat); +/* In-memory representation of a userspace request for batch inode data. */ +struct xfs_ibulk { + struct xfs_mount *mp; + void __user *ubuffer; /* user output buffer */ + xfs_ino_t startino; /* start with this inode */ + unsigned int icount; /* number of elements in ubuffer */ + unsigned int ocount; /* number of records returned */ + unsigned int flags; /* see XFS_IBULK_FLAG_* */ +}; + +/* Only iterate within the same AG as startino */ +#define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG) + +/* Return value that means we want to abort the walk. */ +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) /* - * Values for stat return value. + * Advance the user buffer pointer by one record of the given size. If the + * buffer is now full, return the appropriate error code. */ -#define BULKSTAT_RV_NOTHING 0 -#define BULKSTAT_RV_DIDONE 1 -#define BULKSTAT_RV_GIVEUP 2 +static inline int +xfs_ibulk_advance( + struct xfs_ibulk *breq, + size_t bytes) +{ + char __user *b = breq->ubuffer; + + breq->ubuffer = b + bytes; + breq->ocount++; + return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0; +} /* * Return stat information in bulk (by-inode) for the filesystem. */ -int /* error status */ -xfs_bulkstat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastino, /* last inode returned */ - int *count, /* size of buffer/count returned */ - bulkstat_one_pf formatter, /* func that'd fill a single buf */ - size_t statstruct_size,/* sizeof struct that we're filling */ - char __user *ubuffer,/* buffer with inode stats */ - int *done); /* 1 if there are more stats to get */ - -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ - void __user *ubuffer, /* buffer to write to */ - int ubsize, /* remaining user buffer sz */ - int *ubused, /* bytes used by formatter */ - const xfs_bstat_t *buffer); /* buffer to read from */ - -int -xfs_bulkstat_one_int( - xfs_mount_t *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - bulkstat_one_fmt_pf formatter, - int *ubused, - int *stat); -int -xfs_bulkstat_one( - xfs_mount_t *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - int *ubused, - int *stat); +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat); -typedef int (*inumbers_fmt_pf)( - void __user *ubuffer, /* buffer to write to */ - const xfs_inogrp_t *buffer, /* buffer to read from */ - long count, /* # of elements to read */ - long *written); /* # of bytes written */ +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); +void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1, + const struct xfs_bulkstat *bstat); -int -xfs_inumbers_fmt( - void __user *ubuffer, /* buffer to write to */ - const xfs_inogrp_t *buffer, /* buffer to read from */ - long count, /* # of elements to read */ - long *written); /* # of bytes written */ +typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp); -int /* error status */ -xfs_inumbers( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *last, /* last inode returned */ - int *count, /* size of buffer/count returned */ - void __user *buffer, /* buffer with inode info */ - inumbers_fmt_pf formatter); +int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter); +void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1, + const struct xfs_inumbers *ig); #endif /* __XFS_ITABLE_H__ */ diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c new file mode 100644 index 000000000000..8c7d727149ea --- /dev/null +++ b/fs/xfs/xfs_iwalk.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_iwalk.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_icache.h" +#include "xfs_health.h" +#include "xfs_trans.h" +#include "xfs_pwork.h" + +/* + * Walking Inodes in the Filesystem + * ================================ + * + * This iterator function walks a subset of filesystem inodes in increasing + * order from @startino until there are no more inodes. For each allocated + * inode it finds, it calls a walk function with the relevant inode number and + * a pointer to caller-provided data. The walk function can return the usual + * negative error code to stop the iteration; 0 to continue the iteration; or + * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the + * caller. + * + * Internally, we allow the walk function to do anything, which means that we + * cannot maintain the inobt cursor or our lock on the AGI buffer. We + * therefore cache the inobt records in kernel memory and only call the walk + * function when our memory buffer is full. @nr_recs is the number of records + * that we've cached, and @sz_recs is the size of our cache. + * + * It is the responsibility of the walk function to ensure it accesses + * allocated inodes, as the inobt records may be stale by the time they are + * acted upon. + */ + +struct xfs_iwalk_ag { + /* parallel work control data; will be null if single threaded */ + struct xfs_pwork pwork; + + struct xfs_mount *mp; + struct xfs_trans *tp; + + /* Where do we start the traversal? */ + xfs_ino_t startino; + + /* Array of inobt records we cache. */ + struct xfs_inobt_rec_incore *recs; + + /* Number of entries allocated for the @recs array. */ + unsigned int sz_recs; + + /* Number of entries in the @recs array that are in use. */ + unsigned int nr_recs; + + /* Inode walk function and data pointer. */ + xfs_iwalk_fn iwalk_fn; + xfs_inobt_walk_fn inobt_walk_fn; + void *data; + + /* + * Make it look like the inodes up to startino are free so that + * bulkstat can start its inode iteration at the correct place without + * needing to special case everywhere. + */ + unsigned int trim_start:1; + + /* Skip empty inobt records? */ + unsigned int skip_empty:1; +}; + +/* + * Loop over all clusters in a chunk for a given incore inode allocation btree + * record. Do a readahead if there are any allocated inodes in that cluster. + */ +STATIC void +xfs_iwalk_ichunk_ra( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irec) +{ + struct xfs_ino_geometry *igeo = M_IGEO(mp); + xfs_agblock_t agbno; + struct blk_plug plug; + int i; /* inode chunk index */ + + agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); + + blk_start_plug(&plug); + for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) { + xfs_inofree_t imask; + + imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster); + if (imask & ~irec->ir_free) { + xfs_btree_reada_bufs(mp, agno, agbno, + igeo->blocks_per_cluster, + &xfs_inode_buf_ops); + } + agbno += igeo->blocks_per_cluster; + } + blk_finish_plug(&plug); +} + +/* + * Set the bits in @irec's free mask that correspond to the inodes before + * @agino so that we skip them. This is how we restart an inode walk that was + * interrupted in the middle of an inode record. + */ +STATIC void +xfs_iwalk_adjust_start( + xfs_agino_t agino, /* starting inode of chunk */ + struct xfs_inobt_rec_incore *irec) /* btree record */ +{ + int idx; /* index into inode chunk */ + int i; + + idx = agino - irec->ir_startino; + + /* + * We got a right chunk with some left inodes allocated at it. Grab + * the chunk record. Mark all the uninteresting inodes free because + * they're before our start point. + */ + for (i = 0; i < idx; i++) { + if (XFS_INOBT_MASK(i) & ~irec->ir_free) + irec->ir_freecount++; + } + + irec->ir_free |= xfs_inobt_maskn(0, idx); +} + +/* Allocate memory for a walk. */ +STATIC int +xfs_iwalk_alloc( + struct xfs_iwalk_ag *iwag) +{ + size_t size; + + ASSERT(iwag->recs == NULL); + iwag->nr_recs = 0; + + /* Allocate a prefetch buffer for inobt records. */ + size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore); + iwag->recs = kmem_alloc(size, KM_MAYFAIL); + if (iwag->recs == NULL) + return -ENOMEM; + + return 0; +} + +/* Free memory we allocated for a walk. */ +STATIC void +xfs_iwalk_free( + struct xfs_iwalk_ag *iwag) +{ + kmem_free(iwag->recs); + iwag->recs = NULL; +} + +/* For each inuse inode in each cached inobt record, call our function. */ +STATIC int +xfs_iwalk_ag_recs( + struct xfs_iwalk_ag *iwag) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + xfs_ino_t ino; + unsigned int i, j; + xfs_agnumber_t agno; + int error; + + agno = XFS_INO_TO_AGNO(mp, iwag->startino); + for (i = 0; i < iwag->nr_recs; i++) { + struct xfs_inobt_rec_incore *irec = &iwag->recs[i]; + + trace_xfs_iwalk_ag_rec(mp, agno, irec); + + if (xfs_pwork_want_abort(&iwag->pwork)) + return 0; + + if (iwag->inobt_walk_fn) { + error = iwag->inobt_walk_fn(mp, tp, agno, irec, + iwag->data); + if (error) + return error; + } + + if (!iwag->iwalk_fn) + continue; + + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + if (xfs_pwork_want_abort(&iwag->pwork)) + return 0; + + /* Skip if this inode is free */ + if (XFS_INOBT_MASK(j) & irec->ir_free) + continue; + + /* Otherwise call our function. */ + ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j); + error = iwag->iwalk_fn(mp, tp, ino, iwag->data); + if (error) + return error; + } + } + + return 0; +} + +/* Delete cursor and let go of AGI. */ +static inline void +xfs_iwalk_del_inobt( + struct xfs_trans *tp, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp, + int error) +{ + if (*curpp) { + xfs_btree_del_cursor(*curpp, error); + *curpp = NULL; + } + if (*agi_bpp) { + xfs_trans_brelse(tp, *agi_bpp); + *agi_bpp = NULL; + } +} + +/* + * Set ourselves up for walking inobt records starting from a given point in + * the filesystem. + * + * If caller passed in a nonzero start inode number, load the record from the + * inobt and make the record look like all the inodes before agino are free so + * that we skip them, and then move the cursor to the next inobt record. This + * is how we support starting an iwalk in the middle of an inode chunk. + * + * If the caller passed in a start number of zero, move the cursor to the first + * inobt record. + * + * The caller is responsible for cleaning up the cursor and buffer pointer + * regardless of the error status. + */ +STATIC int +xfs_iwalk_ag_start( + struct xfs_iwalk_ag *iwag, + xfs_agnumber_t agno, + xfs_agino_t agino, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp, + int *has_more) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + struct xfs_inobt_rec_incore *irec; + int error; + + /* Set up a fresh cursor and empty the inobt cache. */ + iwag->nr_recs = 0; + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); + if (error) + return error; + + /* Starting at the beginning of the AG? That's easy! */ + if (agino == 0) + return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more); + + /* + * Otherwise, we have to grab the inobt record where we left off, stuff + * the record into our cache, and then see if there are more records. + * We require a lookup cache of at least two elements so that the + * caller doesn't have to deal with tearing down the cursor to walk the + * records. + */ + error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more); + if (error) + return error; + + /* + * If the LE lookup at @agino yields no records, jump ahead to the + * inobt cursor increment to see if there are more records to process. + */ + if (!*has_more) + goto out_advance; + + /* Get the record, should always work */ + irec = &iwag->recs[iwag->nr_recs]; + error = xfs_inobt_get_rec(*curpp, irec, has_more); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1); + + /* + * If the LE lookup yielded an inobt record before the cursor position, + * skip it and see if there's another one after it. + */ + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) + goto out_advance; + + /* + * If agino fell in the middle of the inode record, make it look like + * the inodes up to agino are free so that we don't return them again. + */ + if (iwag->trim_start) + xfs_iwalk_adjust_start(agino, irec); + + /* + * The prefetch calculation is supposed to give us a large enough inobt + * record cache that grab_ichunk can stage a partial first record and + * the loop body can cache a record without having to check for cache + * space until after it reads an inobt record. + */ + iwag->nr_recs++; + ASSERT(iwag->nr_recs < iwag->sz_recs); + +out_advance: + return xfs_btree_increment(*curpp, 0, has_more); +} + +/* + * The inobt record cache is full, so preserve the inobt cursor state and + * run callbacks on the cached inobt records. When we're done, restore the + * cursor state to wherever the cursor would have been had the cache not been + * full (and therefore we could've just incremented the cursor) if *@has_more + * is true. On exit, *@has_more will indicate whether or not the caller should + * try for more inode records. + */ +STATIC int +xfs_iwalk_run_callbacks( + struct xfs_iwalk_ag *iwag, + xfs_agnumber_t agno, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp, + int *has_more) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + struct xfs_inobt_rec_incore *irec; + xfs_agino_t restart; + int error; + + ASSERT(iwag->nr_recs > 0); + + /* Delete cursor but remember the last record we cached... */ + xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); + irec = &iwag->recs[iwag->nr_recs - 1]; + restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; + + error = xfs_iwalk_ag_recs(iwag); + if (error) + return error; + + /* ...empty the cache... */ + iwag->nr_recs = 0; + + if (!has_more) + return 0; + + /* ...and recreate the cursor just past where we left off. */ + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); + if (error) + return error; + + return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); +} + +/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ +STATIC int +xfs_iwalk_ag( + struct xfs_iwalk_ag *iwag) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + struct xfs_buf *agi_bp = NULL; + struct xfs_btree_cur *cur = NULL; + xfs_agnumber_t agno; + xfs_agino_t agino; + int has_more; + int error = 0; + + /* Set up our cursor at the right place in the inode btree. */ + agno = XFS_INO_TO_AGNO(mp, iwag->startino); + agino = XFS_INO_TO_AGINO(mp, iwag->startino); + error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more); + + while (!error && has_more) { + struct xfs_inobt_rec_incore *irec; + + cond_resched(); + if (xfs_pwork_want_abort(&iwag->pwork)) + goto out; + + /* Fetch the inobt record. */ + irec = &iwag->recs[iwag->nr_recs]; + error = xfs_inobt_get_rec(cur, irec, &has_more); + if (error || !has_more) + break; + + /* No allocated inodes in this chunk; skip it. */ + if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { + error = xfs_btree_increment(cur, 0, &has_more); + if (error) + break; + continue; + } + + /* + * Start readahead for this inode chunk in anticipation of + * walking the inodes. + */ + if (iwag->iwalk_fn) + xfs_iwalk_ichunk_ra(mp, agno, irec); + + /* + * If there's space in the buffer for more records, increment + * the btree cursor and grab more. + */ + if (++iwag->nr_recs < iwag->sz_recs) { + error = xfs_btree_increment(cur, 0, &has_more); + if (error || !has_more) + break; + continue; + } + + /* + * Otherwise, we need to save cursor state and run the callback + * function on the cached records. The run_callbacks function + * is supposed to return a cursor pointing to the record where + * we would be if we had been able to increment like above. + */ + ASSERT(has_more); + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, + &has_more); + } + + if (iwag->nr_recs == 0 || error) + goto out; + + /* Walk the unprocessed records in the cache. */ + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more); + +out: + xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error); + return error; +} + +/* + * We experimentally determined that the reduction in ioctl call overhead + * diminishes when userspace asks for more than 2048 inodes, so we'll cap + * prefetch at this point. + */ +#define IWALK_MAX_INODE_PREFETCH (2048U) + +/* + * Given the number of inodes to prefetch, set the number of inobt records that + * we cache in memory, which controls the number of inodes we try to read + * ahead. Set the maximum if @inodes == 0. + */ +static inline unsigned int +xfs_iwalk_prefetch( + unsigned int inodes) +{ + unsigned int inobt_records; + + /* + * If the caller didn't tell us the number of inodes they wanted, + * assume the maximum prefetch possible for best performance. + * Otherwise, cap prefetch at that maximum so that we don't start an + * absurd amount of prefetch. + */ + if (inodes == 0) + inodes = IWALK_MAX_INODE_PREFETCH; + inodes = min(inodes, IWALK_MAX_INODE_PREFETCH); + + /* Round the inode count up to a full chunk. */ + inodes = round_up(inodes, XFS_INODES_PER_CHUNK); + + /* + * In order to convert the number of inodes to prefetch into an + * estimate of the number of inobt records to cache, we require a + * conversion factor that reflects our expectations of the average + * loading factor of an inode chunk. Based on data gathered, most + * (but not all) filesystems manage to keep the inode chunks totally + * full, so we'll underestimate slightly so that our readahead will + * still deliver the performance we want on aging filesystems: + * + * inobt = inodes / (INODES_PER_CHUNK * (4 / 5)); + * + * The funny math is to avoid integer division. + */ + inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK); + + /* + * Allocate enough space to prefetch at least two inobt records so that + * we can cache both the record where the iwalk started and the next + * record. This simplifies the AG inode walk loop setup code. + */ + return max(inobt_records, 2U); +} + +/* + * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn + * will be called for each allocated inode, being passed the inode's number and + * @data. @max_prefetch controls how many inobt records' worth of inodes we + * try to readahead. + */ +int +xfs_iwalk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t startino, + unsigned int flags, + xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, + void *data) +{ + struct xfs_iwalk_ag iwag = { + .mp = mp, + .tp = tp, + .iwalk_fn = iwalk_fn, + .data = data, + .startino = startino, + .sz_recs = xfs_iwalk_prefetch(inode_records), + .trim_start = 1, + .skip_empty = 1, + .pwork = XFS_PWORK_SINGLE_THREADED, + }; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + int error; + + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); + + error = xfs_iwalk_alloc(&iwag); + if (error) + return error; + + for (; agno < mp->m_sb.sb_agcount; agno++) { + error = xfs_iwalk_ag(&iwag); + if (error) + break; + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); + if (flags & XFS_INOBT_WALK_SAME_AG) + break; + } + + xfs_iwalk_free(&iwag); + return error; +} + +/* Run per-thread iwalk work. */ +static int +xfs_iwalk_ag_work( + struct xfs_mount *mp, + struct xfs_pwork *pwork) +{ + struct xfs_iwalk_ag *iwag; + int error = 0; + + iwag = container_of(pwork, struct xfs_iwalk_ag, pwork); + if (xfs_pwork_want_abort(pwork)) + goto out; + + error = xfs_iwalk_alloc(iwag); + if (error) + goto out; + + error = xfs_iwalk_ag(iwag); + xfs_iwalk_free(iwag); +out: + kmem_free(iwag); + return error; +} + +/* + * Walk all the inodes in the filesystem using multiple threads to process each + * AG. + */ +int +xfs_iwalk_threaded( + struct xfs_mount *mp, + xfs_ino_t startino, + unsigned int flags, + xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, + bool polled, + void *data) +{ + struct xfs_pwork_ctl pctl; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + unsigned int nr_threads; + int error; + + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); + + nr_threads = xfs_pwork_guess_datadev_parallelism(mp); + error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk", + nr_threads); + if (error) + return error; + + for (; agno < mp->m_sb.sb_agcount; agno++) { + struct xfs_iwalk_ag *iwag; + + if (xfs_pwork_ctl_want_abort(&pctl)) + break; + + iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP); + iwag->mp = mp; + iwag->iwalk_fn = iwalk_fn; + iwag->data = data; + iwag->startino = startino; + iwag->sz_recs = xfs_iwalk_prefetch(inode_records); + xfs_pwork_queue(&pctl, &iwag->pwork); + startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); + if (flags & XFS_INOBT_WALK_SAME_AG) + break; + } + + if (polled) + xfs_pwork_poll(&pctl); + return xfs_pwork_destroy(&pctl); +} + +/* + * Allow callers to cache up to a page's worth of inobt records. This reflects + * the existing inumbers prefetching behavior. Since the inobt walk does not + * itself do anything with the inobt records, we can set a fairly high limit + * here. + */ +#define MAX_INOBT_WALK_PREFETCH \ + (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore)) + +/* + * Given the number of records that the user wanted, set the number of inobt + * records that we buffer in memory. Set the maximum if @inobt_records == 0. + */ +static inline unsigned int +xfs_inobt_walk_prefetch( + unsigned int inobt_records) +{ + /* + * If the caller didn't tell us the number of inobt records they + * wanted, assume the maximum prefetch possible for best performance. + */ + if (inobt_records == 0) + inobt_records = MAX_INOBT_WALK_PREFETCH; + + /* + * Allocate enough space to prefetch at least two inobt records so that + * we can cache both the record where the iwalk started and the next + * record. This simplifies the AG inode walk loop setup code. + */ + inobt_records = max(inobt_records, 2U); + + /* + * Cap prefetch at that maximum so that we don't use an absurd amount + * of memory. + */ + return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH); +} + +/* + * Walk all inode btree records in the filesystem starting from @startino. The + * @inobt_walk_fn will be called for each btree record, being passed the incore + * record and @data. @max_prefetch controls how many inobt records we try to + * cache ahead of time. + */ +int +xfs_inobt_walk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t startino, + unsigned int flags, + xfs_inobt_walk_fn inobt_walk_fn, + unsigned int inobt_records, + void *data) +{ + struct xfs_iwalk_ag iwag = { + .mp = mp, + .tp = tp, + .inobt_walk_fn = inobt_walk_fn, + .data = data, + .startino = startino, + .sz_recs = xfs_inobt_walk_prefetch(inobt_records), + .pwork = XFS_PWORK_SINGLE_THREADED, + }; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + int error; + + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL)); + + error = xfs_iwalk_alloc(&iwag); + if (error) + return error; + + for (; agno < mp->m_sb.sb_agcount; agno++) { + error = xfs_iwalk_ag(&iwag); + if (error) + break; + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); + if (flags & XFS_INOBT_WALK_SAME_AG) + break; + } + + xfs_iwalk_free(&iwag); + return error; +} diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h new file mode 100644 index 000000000000..6c960e10ed4d --- /dev/null +++ b/fs/xfs/xfs_iwalk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_IWALK_H__ +#define __XFS_IWALK_H__ + +/* Walk all inodes in the filesystem starting from @startino. */ +typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_ino_t ino, void *data); +/* Return values for xfs_iwalk_fn. */ +#define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE) +#define XFS_IWALK_ABORT (XFS_ITER_ABORT) + +int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, + unsigned int flags, xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, void *data); +int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino, + unsigned int flags, xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, bool poll, void *data); + +/* Only iterate inodes within the same AG as @startino. */ +#define XFS_IWALK_SAME_AG (0x1) + +#define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG) + +/* Walk all inode btree records in the filesystem starting from @startino. */ +typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, + const struct xfs_inobt_rec_incore *irec, + void *data); +/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */ +#define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT) + +int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_ino_t startino, unsigned int flags, + xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records, + void *data); + +/* Only iterate inobt records within the same AG as @startino. */ +#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG) + +#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG) + +#endif /* __XFS_IWALK_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index edbd5a210df2..ca15105681ca 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -110,8 +110,6 @@ typedef __u32 xfs_nlink_t; #define current_restore_flags_nested(sp, f) \ (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) -#define spinlock_destroy(lock) - #define NBBY 8 /* number of bits per byte */ /* @@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) return x; } +int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, + char *data, unsigned int op); + #define ASSERT_ALWAYS(expr) \ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 457ced3ee3e1..00e9f5c388d3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -16,11 +16,7 @@ #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_log_recover.h" -#include "xfs_inode.h" #include "xfs_trace.h" -#include "xfs_fsops.h" -#include "xfs_cksum.h" #include "xfs_sysfs.h" #include "xfs_sb.h" #include "xfs_health.h" @@ -45,21 +41,14 @@ STATIC int xlog_space_left( struct xlog *log, atomic64_t *head); -STATIC int -xlog_sync( - struct xlog *log, - struct xlog_in_core *iclog); STATIC void xlog_dealloc_log( struct xlog *log); /* local state machine functions */ -STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); -STATIC void -xlog_state_do_callback( - struct xlog *log, - int aborted, - struct xlog_in_core *iclog); +STATIC void xlog_state_done_syncing( + struct xlog_in_core *iclog, + bool aborted); STATIC int xlog_state_get_iclog_space( struct xlog *log, @@ -107,8 +96,7 @@ STATIC void xlog_verify_iclog( struct xlog *log, struct xlog_in_core *iclog, - int count, - bool syncing); + int count); STATIC void xlog_verify_tail_lsn( struct xlog *log, @@ -117,7 +105,7 @@ xlog_verify_tail_lsn( #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) -#define xlog_verify_iclog(a,b,c,d) +#define xlog_verify_iclog(a,b,c) #define xlog_verify_tail_lsn(a,b,c) #endif @@ -541,32 +529,6 @@ xfs_log_done( return lsn; } -/* - * Attaches a new iclog I/O completion callback routine during - * transaction commit. If the log is in error state, a non-zero - * return code is handed back and the caller is responsible for - * executing the callback at an appropriate time. - */ -int -xfs_log_notify( - struct xlog_in_core *iclog, - xfs_log_callback_t *cb) -{ - int abortflg; - - spin_lock(&iclog->ic_callback_lock); - abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); - if (!abortflg) { - ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || - (iclog->ic_state == XLOG_STATE_WANT_SYNC)); - cb->cb_next = NULL; - *(iclog->ic_callback_tail) = cb; - iclog->ic_callback_tail = &(cb->cb_next); - } - spin_unlock(&iclog->ic_callback_lock); - return abortflg; -} - int xfs_log_release_iclog( struct xfs_mount *mp, @@ -807,16 +769,12 @@ xfs_log_mount_finish( * The mount has failed. Cancel the recovery if it hasn't completed and destroy * the log. */ -int +void xfs_log_mount_cancel( struct xfs_mount *mp) { - int error; - - error = xlog_recover_cancel(mp->m_log); + xlog_recover_cancel(mp->m_log); xfs_log_unmount(mp); - - return error; } /* @@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) * Or, if we are doing a forced umount (typically because of IO errors). */ if (mp->m_flags & XFS_MOUNT_NORECOVERY || - xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { + xfs_readonly_buftarg(log->l_targ)) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); return 0; } @@ -1244,53 +1202,49 @@ xlog_space_left( } -/* - * Log function which is called when an io completes. - * - * The log manager needs its own routine, in order to control what - * happens with the buffer after the write completes. - */ static void -xlog_iodone(xfs_buf_t *bp) +xlog_ioend_work( + struct work_struct *work) { - struct xlog_in_core *iclog = bp->b_log_item; - struct xlog *l = iclog->ic_log; - int aborted = 0; + struct xlog_in_core *iclog = + container_of(work, struct xlog_in_core, ic_end_io_work); + struct xlog *log = iclog->ic_log; + bool aborted = false; + int error; + + error = blk_status_to_errno(iclog->ic_bio.bi_status); +#ifdef DEBUG + /* treat writes with injected CRC errors as failed */ + if (iclog->ic_fail_crc) + error = -EIO; +#endif /* - * Race to shutdown the filesystem if we see an error or the iclog is in - * IOABORT state. The IOABORT state is only set in DEBUG mode to inject - * CRC errors into log recovery. + * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || - iclog->ic_state & XLOG_STATE_IOABORT) { - if (iclog->ic_state & XLOG_STATE_IOABORT) - iclog->ic_state &= ~XLOG_STATE_IOABORT; - - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_stale(bp); - xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); + if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { + xfs_alert(log->l_mp, "log I/O error %d", error); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); /* * This flag will be propagated to the trans-committed * callback routines to let them know that the log-commit * didn't succeed. */ - aborted = XFS_LI_ABORTED; + aborted = true; } else if (iclog->ic_state & XLOG_STATE_IOERROR) { - aborted = XFS_LI_ABORTED; + aborted = true; } - /* log I/O is always issued ASYNC */ - ASSERT(bp->b_flags & XBF_ASYNC); xlog_state_done_syncing(iclog, aborted); + bio_uninit(&iclog->ic_bio); /* - * drop the buffer lock now that we are done. Nothing references - * the buffer after this, so an unmount waiting on this lock can now - * tear it down safely. As such, it is unsafe to reference the buffer - * (bp) after the unlock as we could race with it being freed. + * Drop the lock to signal that we are done. Nothing references the + * iclog after this, so an unmount waiting on this lock can now tear it + * down safely. As such, it is unsafe to reference the iclog after the + * unlock as we could race with it being freed. */ - xfs_buf_unlock(bp); + up(&iclog->ic_sema); } /* @@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp) * If the filesystem blocksize is too large, we may need to choose a * larger size since the directory code currently logs entire blocks. */ - STATIC void xlog_get_iclog_buffer_size( struct xfs_mount *mp, struct xlog *log) { - int size; - int xhdrs; - if (mp->m_logbufs <= 0) - log->l_iclog_bufs = XLOG_MAX_ICLOGS; - else - log->l_iclog_bufs = mp->m_logbufs; + mp->m_logbufs = XLOG_MAX_ICLOGS; + if (mp->m_logbsize <= 0) + mp->m_logbsize = XLOG_BIG_RECORD_BSIZE; + + log->l_iclog_bufs = mp->m_logbufs; + log->l_iclog_size = mp->m_logbsize; /* - * Buffer size passed in from mount system call. + * # headers = size / 32k - one header holds cycles from 32k of data. */ - if (mp->m_logbsize > 0) { - size = log->l_iclog_size = mp->m_logbsize; - log->l_iclog_size_log = 0; - while (size != 1) { - log->l_iclog_size_log++; - size >>= 1; - } - - if (xfs_sb_version_haslogv2(&mp->m_sb)) { - /* # headers = size / 32k - * one header holds cycles from 32k of data - */ - - xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; - if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE) - xhdrs++; - log->l_iclog_hsize = xhdrs << BBSHIFT; - log->l_iclog_heads = xhdrs; - } else { - ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE); - log->l_iclog_hsize = BBSIZE; - log->l_iclog_heads = 1; - } - goto done; - } - - /* All machines use 32kB buffers by default. */ - log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; - log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; - - /* the default log size is 16k or 32k which is one header sector */ - log->l_iclog_hsize = BBSIZE; - log->l_iclog_heads = 1; - -done: - /* are we being asked to make the sizes selected above visible? */ - if (mp->m_logbufs == 0) - mp->m_logbufs = log->l_iclog_bufs; - if (mp->m_logbsize == 0) - mp->m_logbsize = log->l_iclog_size; -} /* xlog_get_iclog_buffer_size */ - + log->l_iclog_heads = + DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE); + log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT; +} void xfs_log_work_queue( @@ -1422,7 +1337,6 @@ xlog_alloc_log( xlog_rec_header_t *head; xlog_in_core_t **iclogp; xlog_in_core_t *iclog, *prev_iclog=NULL; - xfs_buf_t *bp; int i; int error = -ENOMEM; uint log2_size = 0; @@ -1480,30 +1394,6 @@ xlog_alloc_log( xlog_get_iclog_buffer_size(mp, log); - /* - * Use a NULL block for the extra log buffer used during splits so that - * it will trigger errors if we ever try to do IO on it without first - * having set it up properly. - */ - error = -ENOMEM; - bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, - BTOBB(log->l_iclog_size), XBF_NO_IOACCT); - if (!bp) - goto out_free_log; - - /* - * The iclogbuf buffer locks are held over IO but we are not going to do - * IO yet. Hence unlock the buffer so that the log IO path can grab it - * when appropriately. - */ - ASSERT(xfs_buf_islocked(bp)); - xfs_buf_unlock(bp); - - /* use high priority wq for log I/O completion */ - bp->b_ioend_wq = mp->m_log_workqueue; - bp->b_iodone = xlog_iodone; - log->l_xbuf = bp; - spin_lock_init(&log->l_icloglock); init_waitqueue_head(&log->l_flush_wait); @@ -1516,29 +1406,22 @@ xlog_alloc_log( * xlog_in_core_t in xfs_log_priv.h for details. */ ASSERT(log->l_iclog_size >= 4096); - for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); - if (!*iclogp) + for (i = 0; i < log->l_iclog_bufs; i++) { + size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * + sizeof(struct bio_vec); + + iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL); + if (!iclog) goto out_free_iclog; - iclog = *iclogp; + *iclogp = iclog; iclog->ic_prev = prev_iclog; prev_iclog = iclog; - bp = xfs_buf_get_uncached(mp->m_logdev_targp, - BTOBB(log->l_iclog_size), - XBF_NO_IOACCT); - if (!bp) + iclog->ic_data = kmem_alloc_large(log->l_iclog_size, + KM_MAYFAIL); + if (!iclog->ic_data) goto out_free_iclog; - - ASSERT(xfs_buf_islocked(bp)); - xfs_buf_unlock(bp); - - /* use high priority wq for log I/O completion */ - bp->b_ioend_wq = mp->m_log_workqueue; - bp->b_iodone = xlog_iodone; - iclog->ic_bp = bp; - iclog->ic_data = bp->b_addr; #ifdef DEBUG log->l_iclog_bak[i] = &iclog->ic_header; #endif @@ -1552,36 +1435,43 @@ xlog_alloc_log( head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); - iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; + iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; atomic_set(&iclog->ic_refcnt, 0); spin_lock_init(&iclog->ic_callback_lock); - iclog->ic_callback_tail = &(iclog->ic_callback); + INIT_LIST_HEAD(&iclog->ic_callbacks); iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); + INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work); + sema_init(&iclog->ic_sema, 1); iclogp = &iclog->ic_next; } *iclogp = log->l_iclog; /* complete ring */ log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ + log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0, + mp->m_fsname); + if (!log->l_ioend_workqueue) + goto out_free_iclog; + error = xlog_cil_init(log); if (error) - goto out_free_iclog; + goto out_destroy_workqueue; return log; +out_destroy_workqueue: + destroy_workqueue(log->l_ioend_workqueue); out_free_iclog: for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { prev_iclog = iclog->ic_next; - if (iclog->ic_bp) - xfs_buf_free(iclog->ic_bp); + kmem_free(iclog->ic_data); kmem_free(iclog); } - spinlock_destroy(&log->l_icloglock); - xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); out: @@ -1766,42 +1656,155 @@ xlog_cksum( return xfs_end_cksum(crc); } -/* - * The bdstrat callback function for log bufs. This gives us a central - * place to trap bufs in case we get hit by a log I/O error and need to - * shutdown. Actually, in practice, even when we didn't get a log error, - * we transition the iclogs to IOERROR state *after* flushing all existing - * iclogs to disk. This is because we don't want anymore new transactions to be - * started or completed afterwards. - * - * We lock the iclogbufs here so that we can serialise against IO completion - * during unmount. We might be processing a shutdown triggered during unmount, - * and that can occur asynchronously to the unmount thread, and hence we need to - * ensure that completes before tearing down the iclogbufs. Hence we need to - * hold the buffer lock across the log IO to acheive that. - */ -STATIC int -xlog_bdstrat( - struct xfs_buf *bp) +static void +xlog_bio_end_io( + struct bio *bio) { - struct xlog_in_core *iclog = bp->b_log_item; + struct xlog_in_core *iclog = bio->bi_private; - xfs_buf_lock(bp); - if (iclog->ic_state & XLOG_STATE_IOERROR) { - xfs_buf_ioerror(bp, -EIO); - xfs_buf_stale(bp); - xfs_buf_ioend(bp); + queue_work(iclog->ic_log->l_ioend_workqueue, + &iclog->ic_end_io_work); +} + +static void +xlog_map_iclog_data( + struct bio *bio, + void *data, + size_t count) +{ + do { + struct page *page = kmem_to_page(data); + unsigned int off = offset_in_page(data); + size_t len = min_t(size_t, count, PAGE_SIZE - off); + + WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len); + + data += len; + count -= len; + } while (count); +} + +STATIC void +xlog_write_iclog( + struct xlog *log, + struct xlog_in_core *iclog, + uint64_t bno, + unsigned int count, + bool need_flush) +{ + ASSERT(bno < log->l_logBBsize); + + /* + * We lock the iclogbufs here so that we can serialise against I/O + * completion during unmount. We might be processing a shutdown + * triggered during unmount, and that can occur asynchronously to the + * unmount thread, and hence we need to ensure that completes before + * tearing down the iclogbufs. Hence we need to hold the buffer lock + * across the log IO to archieve that. + */ + down(&iclog->ic_sema); + if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) { /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of - * doing it here. Similarly, IO completion will unlock the - * buffer, so we don't do it here. + * doing it here. We kick of the state machine and unlock + * the buffer manually, the code needs to be kept in sync + * with the I/O completion path. */ - return 0; + xlog_state_done_syncing(iclog, XFS_LI_ABORTED); + up(&iclog->ic_sema); + return; } - xfs_buf_submit(bp); - return 0; + iclog->ic_io_size = count; + + bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); + bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; + iclog->ic_bio.bi_end_io = xlog_bio_end_io; + iclog->ic_bio.bi_private = iclog; + iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA; + if (need_flush) + iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + + xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size); + if (is_vmalloc_addr(iclog->ic_data)) + flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size); + + /* + * If this log buffer would straddle the end of the log we will have + * to split it up into two bios, so that we can continue at the start. + */ + if (bno + BTOBB(count) > log->l_logBBsize) { + struct bio *split; + + split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno, + GFP_NOIO, &fs_bio_set); + bio_chain(split, &iclog->ic_bio); + submit_bio(split); + + /* restart at logical offset zero for the remainder */ + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart; + } + + submit_bio(&iclog->ic_bio); +} + +/* + * We need to bump cycle number for the part of the iclog that is + * written to the start of the log. Watch out for the header magic + * number case, though. + */ +static void +xlog_split_iclog( + struct xlog *log, + void *data, + uint64_t bno, + unsigned int count) +{ + unsigned int split_offset = BBTOB(log->l_logBBsize - bno); + unsigned int i; + + for (i = split_offset; i < count; i += BBSIZE) { + uint32_t cycle = get_unaligned_be32(data + i); + + if (++cycle == XLOG_HEADER_MAGIC_NUM) + cycle++; + put_unaligned_be32(cycle, data + i); + } +} + +static int +xlog_calc_iclog_size( + struct xlog *log, + struct xlog_in_core *iclog, + uint32_t *roundoff) +{ + uint32_t count_init, count; + bool use_lsunit; + + use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) && + log->l_mp->m_sb.sb_logsunit > 1; + + /* Add for LR header */ + count_init = log->l_iclog_hsize + iclog->ic_offset; + + /* Round out the log write size */ + if (use_lsunit) { + /* we have a v2 stripe unit to use */ + count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); + } else { + count = BBTOB(BTOBB(count_init)); + } + + ASSERT(count >= count_init); + *roundoff = count - count_init; + + if (use_lsunit) + ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit); + else + ASSERT(*roundoff < BBTOB(1)); + return count; } /* @@ -1824,46 +1827,23 @@ xlog_bdstrat( * log will require grabbing the lock though. * * The entire log manager uses a logical block numbering scheme. Only - * log_sync (and then only bwrite()) know about the fact that the log may - * not start with block zero on a given device. The log block start offset - * is added immediately before calling bwrite(). + * xlog_write_iclog knows about the fact that the log may not start with + * block zero on a given device. */ - -STATIC int +STATIC void xlog_sync( struct xlog *log, struct xlog_in_core *iclog) { - xfs_buf_t *bp; - int i; - uint count; /* byte count of bwrite */ - uint count_init; /* initial count before roundup */ - int roundoff; /* roundoff to BB or stripe */ - int split = 0; /* split write into two regions */ - int error; - int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); - int size; + unsigned int count; /* byte count of bwrite */ + unsigned int roundoff; /* roundoff to BB or stripe */ + uint64_t bno; + unsigned int size; + bool need_flush = true, split = false; - XFS_STATS_INC(log->l_mp, xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); - /* Add for LR header */ - count_init = log->l_iclog_hsize + iclog->ic_offset; - - /* Round out the log write size */ - if (v2 && log->l_mp->m_sb.sb_logsunit > 1) { - /* we have a v2 stripe unit to use */ - count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); - } else { - count = BBTOB(BTOBB(count_init)); - } - roundoff = count - count_init; - ASSERT(roundoff >= 0); - ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && - roundoff < log->l_mp->m_sb.sb_logsunit) - || - (log->l_mp->m_sb.sb_logsunit <= 1 && - roundoff < BBTOB(1))); + count = xlog_calc_iclog_size(log, iclog, &roundoff); /* move grant heads by roundoff in sync */ xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); @@ -1874,41 +1854,19 @@ xlog_sync( /* real byte length */ size = iclog->ic_offset; - if (v2) + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) size += roundoff; iclog->ic_header.h_len = cpu_to_be32(size); - bp = iclog->ic_bp; - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); - + XFS_STATS_INC(log->l_mp, xs_log_writes); XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); - /* Do we need to split this write into 2 parts? */ - if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { - char *dptr; - - split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); - count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); - iclog->ic_bwritecnt = 2; + bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)); - /* - * Bump the cycle numbers at the start of each block in the - * part of the iclog that ends up in the buffer that gets - * written to the start of the log. - * - * Watch out for the header magic number case, though. - */ - dptr = (char *)&iclog->ic_header + count; - for (i = 0; i < split; i += BBSIZE) { - uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); - if (++cycle == XLOG_HEADER_MAGIC_NUM) - cycle++; - *(__be32 *)dptr = cpu_to_be32(cycle); - - dptr += BBSIZE; - } - } else { - iclog->ic_bwritecnt = 1; + /* Do we need to split this write into 2 parts? */ + if (bno + BTOBB(count) > log->l_logBBsize) { + xlog_split_iclog(log, &iclog->ic_header, bno, count); + split = true; } /* calculcate the checksum */ @@ -1921,18 +1879,15 @@ xlog_sync( * write on I/O completion and shutdown the fs. The subsequent mount * detects the bad CRC and attempts to recover. */ +#ifdef DEBUG if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); - iclog->ic_state |= XLOG_STATE_IOABORT; + iclog->ic_fail_crc = true; xfs_warn(log->l_mp, "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", be64_to_cpu(iclog->ic_header.h_lsn)); } - - bp->b_io_length = BTOBB(count); - bp->b_log_item = iclog; - bp->b_flags &= ~XBF_FLUSH; - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); +#endif /* * Flush the data device before flushing the log to make sure all meta @@ -1942,50 +1897,14 @@ xlog_sync( * synchronously here; for an internal log we can simply use the block * layer state machine for preflushes. */ - if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) + if (log->l_targ != log->l_mp->m_ddev_targp || split) { xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); - else - bp->b_flags |= XBF_FLUSH; - - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - - xlog_verify_iclog(log, iclog, count, true); - - /* account for log which doesn't start at block #0 */ - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); - - /* - * Don't call xfs_bwrite here. We do log-syncs even when the filesystem - * is shutting down. - */ - error = xlog_bdstrat(bp); - if (error) { - xfs_buf_ioerror_alert(bp, "xlog_sync"); - return error; + need_flush = false; } - if (split) { - bp = iclog->ic_log->l_xbuf; - XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ - xfs_buf_associate_memory(bp, - (char *)&iclog->ic_header + count, split); - bp->b_log_item = iclog; - bp->b_flags &= ~XBF_FLUSH; - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); - - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - - /* account for internal log which doesn't start at block #0 */ - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); - error = xlog_bdstrat(bp); - if (error) { - xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); - return error; - } - } - return 0; -} /* xlog_sync */ + + xlog_verify_iclog(log, iclog, count); + xlog_write_iclog(log, iclog, bno, count, need_flush); +} /* * Deallocate a log structure @@ -2005,31 +1924,21 @@ xlog_dealloc_log( */ iclog = log->l_iclog; for (i = 0; i < log->l_iclog_bufs; i++) { - xfs_buf_lock(iclog->ic_bp); - xfs_buf_unlock(iclog->ic_bp); + down(&iclog->ic_sema); + up(&iclog->ic_sema); iclog = iclog->ic_next; } - /* - * Always need to ensure that the extra buffer does not point to memory - * owned by another log buffer before we free it. Also, cycle the lock - * first to ensure we've completed IO on it. - */ - xfs_buf_lock(log->l_xbuf); - xfs_buf_unlock(log->l_xbuf); - xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); - xfs_buf_free(log->l_xbuf); - iclog = log->l_iclog; for (i = 0; i < log->l_iclog_bufs; i++) { - xfs_buf_free(iclog->ic_bp); next_iclog = iclog->ic_next; + kmem_free(iclog->ic_data); kmem_free(iclog); iclog = next_iclog; } - spinlock_destroy(&log->l_icloglock); log->l_mp->m_log = NULL; + destroy_workqueue(log->l_ioend_workqueue); kmem_free(log); } /* xlog_dealloc_log */ @@ -2069,7 +1978,7 @@ xlog_print_tic_res( /* match with XLOG_REG_TYPE_* in xfs_log.h */ #define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str - static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = { + static char *res_type_str[] = { REG_TYPE_STR(BFORMAT, "bformat"), REG_TYPE_STR(BCHUNK, "bchunk"), REG_TYPE_STR(EFI_FORMAT, "efi_format"), @@ -2089,8 +1998,15 @@ xlog_print_tic_res( REG_TYPE_STR(UNMOUNT, "unmount"), REG_TYPE_STR(COMMIT, "commit"), REG_TYPE_STR(TRANSHDR, "trans header"), - REG_TYPE_STR(ICREATE, "inode create") + REG_TYPE_STR(ICREATE, "inode create"), + REG_TYPE_STR(RUI_FORMAT, "rui_format"), + REG_TYPE_STR(RUD_FORMAT, "rud_format"), + REG_TYPE_STR(CUI_FORMAT, "cui_format"), + REG_TYPE_STR(CUD_FORMAT, "cud_format"), + REG_TYPE_STR(BUI_FORMAT, "bui_format"), + REG_TYPE_STR(BUD_FORMAT, "bud_format"), }; + BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1); #undef REG_TYPE_STR xfs_warn(mp, "ticket reservation summary:"); @@ -2603,7 +2519,7 @@ xlog_state_clean_log( if (iclog->ic_state == XLOG_STATE_DIRTY) { iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_offset = 0; - ASSERT(iclog->ic_callback == NULL); + ASSERT(list_empty_careful(&iclog->ic_callbacks)); /* * If the number of ops in this iclog indicate it just * contains the dummy transaction, we can @@ -2673,37 +2589,32 @@ xlog_state_clean_log( STATIC xfs_lsn_t xlog_get_lowest_lsn( - struct xlog *log) + struct xlog *log) { - xlog_in_core_t *lsn_log; - xfs_lsn_t lowest_lsn, lsn; + struct xlog_in_core *iclog = log->l_iclog; + xfs_lsn_t lowest_lsn = 0, lsn; - lsn_log = log->l_iclog; - lowest_lsn = 0; do { - if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { - lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); - if ((lsn && !lowest_lsn) || - (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { + if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) + continue; + + lsn = be64_to_cpu(iclog->ic_header.h_lsn); + if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0) lowest_lsn = lsn; - } - } - lsn_log = lsn_log->ic_next; - } while (lsn_log != log->l_iclog); + } while ((iclog = iclog->ic_next) != log->l_iclog); + return lowest_lsn; } - STATIC void xlog_state_do_callback( struct xlog *log, - int aborted, + bool aborted, struct xlog_in_core *ciclog) { xlog_in_core_t *iclog; xlog_in_core_t *first_iclog; /* used to know when we've * processed all iclogs once */ - xfs_log_callback_t *cb, *cb_next; int flushcnt = 0; xfs_lsn_t lowest_lsn; int ioerrors; /* counter: iclogs with errors */ @@ -2814,7 +2725,7 @@ xlog_state_do_callback( */ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); - if (iclog->ic_callback) + if (!list_empty_careful(&iclog->ic_callbacks)) atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(iclog->ic_header.h_lsn)); @@ -2831,26 +2742,20 @@ xlog_state_do_callback( * callbacks being added. */ spin_lock(&iclog->ic_callback_lock); - cb = iclog->ic_callback; - while (cb) { - iclog->ic_callback_tail = &(iclog->ic_callback); - iclog->ic_callback = NULL; - spin_unlock(&iclog->ic_callback_lock); + while (!list_empty(&iclog->ic_callbacks)) { + LIST_HEAD(tmp); - /* perform callbacks in the order given */ - for (; cb; cb = cb_next) { - cb_next = cb->cb_next; - cb->cb_func(cb->cb_arg, aborted); - } + list_splice_init(&iclog->ic_callbacks, &tmp); + + spin_unlock(&iclog->ic_callback_lock); + xlog_cil_process_committed(&tmp, aborted); spin_lock(&iclog->ic_callback_lock); - cb = iclog->ic_callback; } loopdidcallbacks++; funcdidcallbacks++; spin_lock(&log->l_icloglock); - ASSERT(iclog->ic_callback == NULL); spin_unlock(&iclog->ic_callback_lock); if (!(iclog->ic_state & XLOG_STATE_IOERROR)) iclog->ic_state = XLOG_STATE_DIRTY; @@ -2936,18 +2841,16 @@ xlog_state_do_callback( */ STATIC void xlog_state_done_syncing( - xlog_in_core_t *iclog, - int aborted) + struct xlog_in_core *iclog, + bool aborted) { - struct xlog *log = iclog->ic_log; + struct xlog *log = iclog->ic_log; spin_lock(&log->l_icloglock); ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || iclog->ic_state == XLOG_STATE_IOERROR); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); - ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); - /* * If we got an error, either on the first buffer, or in the case of @@ -2955,13 +2858,8 @@ xlog_state_done_syncing( * and none should ever be attempted to be written to disk * again. */ - if (iclog->ic_state != XLOG_STATE_IOERROR) { - if (--iclog->ic_bwritecnt == 1) { - spin_unlock(&log->l_icloglock); - return; - } + if (iclog->ic_state != XLOG_STATE_IOERROR) iclog->ic_state = XLOG_STATE_DONE_SYNC; - } /* * Someone could be sleeping prior to writing out the next @@ -3230,7 +3128,7 @@ xlog_state_release_iclog( * flags after this point. */ if (sync) - return xlog_sync(log, iclog); + xlog_sync(log, iclog); return 0; } /* xlog_state_release_iclog */ @@ -3821,8 +3719,7 @@ STATIC void xlog_verify_iclog( struct xlog *log, struct xlog_in_core *iclog, - int count, - bool syncing) + int count) { xlog_op_header_t *ophead; xlog_in_core_t *icptr; @@ -3866,7 +3763,7 @@ xlog_verify_iclog( /* clientid is only 1 byte */ p = &ophead->oh_clientid; field_offset = p - base_ptr; - if (!syncing || (field_offset & 0x1ff)) { + if (field_offset & 0x1ff) { clientid = ophead->oh_clientid; } else { idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); @@ -3889,7 +3786,7 @@ xlog_verify_iclog( /* check length */ p = &ophead->oh_len; field_offset = p - base_ptr; - if (!syncing || (field_offset & 0x1ff)) { + if (field_offset & 0x1ff) { op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((uintptr_t)&ophead->oh_len - @@ -4026,7 +3923,7 @@ xfs_log_force_umount( * avoid races. */ wake_up_all(&log->l_cilp->xc_commit_wait); - xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); + xlog_state_do_callback(log, true, NULL); #ifdef XFSERRORDEBUG { diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 73a64bf32f6f..84e06805160f 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -6,6 +6,8 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ +struct xfs_cil_ctx; + struct xfs_log_vec { struct xfs_log_vec *lv_next; /* next lv in build list */ int lv_niovecs; /* number of iovecs in lv */ @@ -72,16 +74,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, } /* - * Structure used to pass callback function and the function's argument - * to the log manager. - */ -typedef struct xfs_log_callback { - struct xfs_log_callback *cb_next; - void (*cb_func)(void *, int); - void *cb_arg; -} xfs_log_callback_t; - -/* * By comparing each component, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number */ @@ -125,12 +117,10 @@ int xfs_log_mount(struct xfs_mount *mp, xfs_daddr_t start_block, int num_bblocks); int xfs_log_mount_finish(struct xfs_mount *mp); -int xfs_log_mount_cancel(struct xfs_mount *); +void xfs_log_mount_cancel(struct xfs_mount *); xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); -int xfs_log_notify(struct xlog_in_core *iclog, - struct xfs_log_callback *callback_entry); int xfs_log_release_iclog(struct xfs_mount *mp, struct xlog_in_core *iclog); int xfs_log_reserve(struct xfs_mount *mp, @@ -148,6 +138,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket); void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, xfs_lsn_t *commit_lsn, bool regrant); +void xlog_cil_process_committed(struct list_head *list, bool aborted); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5e595948bc5a..fa5602d0fd7f 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -10,10 +10,7 @@ #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_alloc.h" #include "xfs_extent_busy.h" -#include "xfs_discard.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_log.h" @@ -246,7 +243,8 @@ xfs_cil_prepare_item( * shadow buffer, so update the the pointer to it appropriately. */ if (!old_lv) { - lv->lv_item->li_ops->iop_pin(lv->lv_item); + if (lv->lv_item->li_ops->iop_pin) + lv->lv_item->li_ops->iop_pin(lv->lv_item); lv->lv_item->li_lv_shadow = NULL; } else if (old_lv != lv) { ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); @@ -576,10 +574,9 @@ xlog_discard_busy_extents( */ static void xlog_cil_committed( - void *args, - int abort) + struct xfs_cil_ctx *ctx, + bool abort) { - struct xfs_cil_ctx *ctx = args; struct xfs_mount *mp = ctx->cil->xc_log->l_mp; /* @@ -614,6 +611,20 @@ xlog_cil_committed( kmem_free(ctx); } +void +xlog_cil_process_committed( + struct list_head *list, + bool aborted) +{ + struct xfs_cil_ctx *ctx; + + while ((ctx = list_first_entry_or_null(list, + struct xfs_cil_ctx, iclog_entry))) { + list_del(&ctx->iclog_entry); + xlog_cil_committed(ctx, aborted); + } +} + /* * Push the Committed Item List to the log. If @push_seq flag is zero, then it * is a background flush and so we can chose to ignore it. Otherwise, if the @@ -835,12 +846,15 @@ restart: if (commit_lsn == -1) goto out_abort; - /* attach all the transactions w/ busy extents to iclog */ - ctx->log_cb.cb_func = xlog_cil_committed; - ctx->log_cb.cb_arg = ctx; - error = xfs_log_notify(commit_iclog, &ctx->log_cb); - if (error) + spin_lock(&commit_iclog->ic_callback_lock); + if (commit_iclog->ic_state & XLOG_STATE_IOERROR) { + spin_unlock(&commit_iclog->ic_callback_lock); goto out_abort; + } + ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || + commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); + list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); + spin_unlock(&commit_iclog->ic_callback_lock); /* * now the checkpoint commit is complete and we've attached the @@ -864,7 +878,7 @@ out_skip: out_abort_free_ticket: xfs_log_ticket_put(tic); out_abort: - xlog_cil_committed(ctx, XFS_LI_ABORTED); + xlog_cil_committed(ctx, true); return -EIO; } @@ -984,6 +998,7 @@ xfs_log_commit_cil( { struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; + struct xfs_log_item *lip, *next; xfs_lsn_t xc_commit_lsn; /* @@ -1008,7 +1023,7 @@ xfs_log_commit_cil( /* * Once all the items of the transaction have been copied to the CIL, - * the items can be unlocked and freed. + * the items can be unlocked and possibly freed. * * This needs to be done before we drop the CIL context lock because we * have to update state in the log items and unlock them before they go @@ -1017,8 +1032,12 @@ xfs_log_commit_cil( * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ - xfs_trans_free_items(tp, xc_commit_lsn, false); - + trace_xfs_trans_commit_items(tp, _RET_IP_); + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { + xfs_trans_del_item(lip); + if (lip->li_ops->iop_committing) + lip->li_ops->iop_committing(lip, xc_commit_lsn); + } xlog_cil_push_background(log); up_read(&cil->xc_ctx_lock); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b5f82cb36202..b880c23cb6e4 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -10,7 +10,6 @@ struct xfs_buf; struct xlog; struct xlog_ticket; struct xfs_mount; -struct xfs_log_callback; /* * Flags for log structure @@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i) #define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ #define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ #define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ -#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */ #define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ #define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ @@ -179,11 +177,10 @@ typedef struct xlog_ticket { * the iclog. * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. * - ic_next is the pointer to the next iclog in the ring. - * - ic_bp is a pointer to the buffer used to write this incore log to disk. * - ic_log is a pointer back to the global log structure. - * - ic_callback is a linked list of callback function/argument pairs to be - * called after an iclog finishes writing. - * - ic_size is the full size of the header plus data. + * - ic_size is the full size of the log buffer, minus the cycle headers. + * - ic_io_size is the size of the currently pending log buffer write, which + * might be smaller than ic_size * - ic_offset is the current number of bytes written to in this iclog. * - ic_refcnt is bumped when someone is writing to the log. * - ic_state is the state of the iclog. @@ -193,7 +190,7 @@ typedef struct xlog_ticket { * structure cacheline aligned. The following fields can be contended on * by independent processes: * - * - ic_callback_* + * - ic_callbacks * - ic_refcnt * - fields protected by the global l_icloglock * @@ -206,23 +203,28 @@ typedef struct xlog_in_core { wait_queue_head_t ic_write_wait; struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; - struct xfs_buf *ic_bp; struct xlog *ic_log; - int ic_size; - int ic_offset; - int ic_bwritecnt; + u32 ic_size; + u32 ic_io_size; + u32 ic_offset; unsigned short ic_state; char *ic_datap; /* pointer to iclog data */ /* Callback structures need their own cacheline */ spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; - struct xfs_log_callback *ic_callback; - struct xfs_log_callback **ic_callback_tail; + struct list_head ic_callbacks; /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; xlog_in_core_2_t *ic_data; #define ic_header ic_data->hic_header +#ifdef DEBUG + bool ic_fail_crc : 1; +#endif + struct semaphore ic_sema; + struct work_struct ic_end_io_work; + struct bio ic_bio; + struct bio_vec ic_bvec[]; } xlog_in_core_t; /* @@ -243,7 +245,7 @@ struct xfs_cil_ctx { int space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */ - struct xfs_log_callback log_cb; /* completion callback hook. */ + struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ struct work_struct discard_endio_work; }; @@ -350,9 +352,8 @@ struct xlog { struct xfs_mount *l_mp; /* mount point */ struct xfs_ail *l_ailp; /* AIL log is working with */ struct xfs_cil *l_cilp; /* CIL log is working with */ - struct xfs_buf *l_xbuf; /* extra buffer for log - * wrapping */ struct xfs_buftarg *l_targ; /* buftarg of log */ + struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ struct delayed_work l_work; /* background flush work */ uint l_flags; uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ @@ -361,7 +362,6 @@ struct xlog { int l_iclog_heads; /* # of iclog header sectors */ uint l_sectBBsize; /* sector size in BBs (2^n) */ int l_iclog_size; /* size of log in bytes */ - int l_iclog_size_log; /* log power size of log */ int l_iclog_bufs; /* number of iclog buffers */ xfs_daddr_t l_logBBstart; /* start block of log */ int l_logsize; /* size of log in bytes */ @@ -418,7 +418,7 @@ xlog_recover( extern int xlog_recover_finish( struct xlog *log); -extern int +extern void xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9329f5adbfbe..13d1d3e95b88 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -13,8 +13,6 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_log.h" @@ -26,7 +24,6 @@ #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_quota.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_btree.h" @@ -79,7 +76,7 @@ struct xfs_buf_cancel { * are valid, false otherwise. */ static inline bool -xlog_verify_bp( +xlog_verify_bno( struct xlog *log, xfs_daddr_t blk_no, int bbcount) @@ -92,22 +89,19 @@ xlog_verify_bp( } /* - * Allocate a buffer to hold log data. The buffer needs to be able - * to map to a range of nbblks basic blocks at any valid (basic - * block) offset within the log. + * Allocate a buffer to hold log data. The buffer needs to be able to map to + * a range of nbblks basic blocks at any valid offset within the log. */ -STATIC xfs_buf_t * -xlog_get_bp( +static char * +xlog_alloc_buffer( struct xlog *log, int nbblks) { - struct xfs_buf *bp; - /* * Pass log block 0 since we don't have an addr yet, buffer will be * verified on read. */ - if (!xlog_verify_bp(log, 0, nbblks)) { + if (!xlog_verify_bno(log, 0, nbblks)) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); @@ -115,69 +109,48 @@ xlog_get_bp( } /* - * We do log I/O in units of log sectors (a power-of-2 - * multiple of the basic block size), so we round up the - * requested size to accommodate the basic blocks required - * for complete log sectors. + * We do log I/O in units of log sectors (a power-of-2 multiple of the + * basic block size), so we round up the requested size to accommodate + * the basic blocks required for complete log sectors. * - * In addition, the buffer may be used for a non-sector- - * aligned block offset, in which case an I/O of the - * requested size could extend beyond the end of the - * buffer. If the requested size is only 1 basic block it - * will never straddle a sector boundary, so this won't be - * an issue. Nor will this be a problem if the log I/O is - * done in basic blocks (sector size 1). But otherwise we - * extend the buffer by one extra log sector to ensure - * there's space to accommodate this possibility. + * In addition, the buffer may be used for a non-sector-aligned block + * offset, in which case an I/O of the requested size could extend + * beyond the end of the buffer. If the requested size is only 1 basic + * block it will never straddle a sector boundary, so this won't be an + * issue. Nor will this be a problem if the log I/O is done in basic + * blocks (sector size 1). But otherwise we extend the buffer by one + * extra log sector to ensure there's space to accommodate this + * possibility. */ if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); - if (bp) - xfs_buf_unlock(bp); - return bp; -} - -STATIC void -xlog_put_bp( - xfs_buf_t *bp) -{ - xfs_buf_free(bp); + return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL); } /* * Return the address of the start of the given block number's data * in a log buffer. The buffer covers a log sector-aligned region. */ -STATIC char * +static inline unsigned int xlog_align( struct xlog *log, - xfs_daddr_t blk_no, - int nbblks, - struct xfs_buf *bp) + xfs_daddr_t blk_no) { - xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); - - ASSERT(offset + nbblks <= bp->b_length); - return bp->b_addr + BBTOB(offset); + return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); } - -/* - * nbblks should be uint, but oh well. Just want to catch that 32-bit length. - */ -STATIC int -xlog_bread_noalign( - struct xlog *log, - xfs_daddr_t blk_no, - int nbblks, - struct xfs_buf *bp) +static int +xlog_do_io( + struct xlog *log, + xfs_daddr_t blk_no, + unsigned int nbblks, + char *data, + unsigned int op) { - int error; + int error; - if (!xlog_verify_bp(log, blk_no, nbblks)) { + if (!xlog_verify_bno(log, blk_no, nbblks)) { xfs_warn(log->l_mp, "Invalid log block/length (0x%llx, 0x%x) for buffer", blk_no, nbblks); @@ -187,107 +160,53 @@ xlog_bread_noalign( blk_no = round_down(blk_no, log->l_sectBBsize); nbblks = round_up(nbblks, log->l_sectBBsize); - ASSERT(nbblks > 0); - ASSERT(nbblks <= bp->b_length); - - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); - bp->b_flags |= XBF_READ; - bp->b_io_length = nbblks; - bp->b_error = 0; - error = xfs_buf_submit(bp); - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) - xfs_buf_ioerror_alert(bp, __func__); + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, + BBTOB(nbblks), data, op); + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { + xfs_alert(log->l_mp, + "log recovery %s I/O error at daddr 0x%llx len %d error %d", + op == REQ_OP_WRITE ? "write" : "read", + blk_no, nbblks, error); + } return error; } STATIC int -xlog_bread( +xlog_bread_noalign( struct xlog *log, xfs_daddr_t blk_no, int nbblks, - struct xfs_buf *bp, - char **offset) + char *data) { - int error; - - error = xlog_bread_noalign(log, blk_no, nbblks, bp); - if (error) - return error; - - *offset = xlog_align(log, blk_no, nbblks, bp); - return 0; + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); } -/* - * Read at an offset into the buffer. Returns with the buffer in it's original - * state regardless of the result of the read. - */ STATIC int -xlog_bread_offset( +xlog_bread( struct xlog *log, - xfs_daddr_t blk_no, /* block to read from */ - int nbblks, /* blocks to read */ - struct xfs_buf *bp, - char *offset) + xfs_daddr_t blk_no, + int nbblks, + char *data, + char **offset) { - char *orig_offset = bp->b_addr; - int orig_len = BBTOB(bp->b_length); - int error, error2; - - error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); - if (error) - return error; - - error = xlog_bread_noalign(log, blk_no, nbblks, bp); + int error; - /* must reset buffer pointer even on error */ - error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); - if (error) - return error; - return error2; + error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); + if (!error) + *offset = data + xlog_align(log, blk_no); + return error; } -/* - * Write out the buffer at the given block for the given number of blocks. - * The buffer is kept locked across the write and is returned locked. - * This can only be used for synchronous log writes. - */ STATIC int xlog_bwrite( struct xlog *log, xfs_daddr_t blk_no, int nbblks, - struct xfs_buf *bp) + char *data) { - int error; - - if (!xlog_verify_bp(log, blk_no, nbblks)) { - xfs_warn(log->l_mp, - "Invalid log block/length (0x%llx, 0x%x) for buffer", - blk_no, nbblks); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); - return -EFSCORRUPTED; - } - - blk_no = round_down(blk_no, log->l_sectBBsize); - nbblks = round_up(nbblks, log->l_sectBBsize); - - ASSERT(nbblks > 0); - ASSERT(nbblks <= bp->b_length); - - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); - xfs_buf_hold(bp); - xfs_buf_lock(bp); - bp->b_io_length = nbblks; - bp->b_error = 0; - - error = xfs_bwrite(bp); - if (error) - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_relse(bp); - return error; + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); } #ifdef DEBUG @@ -377,10 +296,9 @@ xlog_recover_iodone( * We're not going to bother about retrying * this during recovery. One strike! */ - if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buf_ioerror_alert(bp, __func__); - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); } } @@ -405,7 +323,7 @@ xlog_recover_iodone( STATIC int xlog_find_cycle_start( struct xlog *log, - struct xfs_buf *bp, + char *buffer, xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle) @@ -419,7 +337,7 @@ xlog_find_cycle_start( end_blk = *last_blk; mid_blk = BLK_AVG(first_blk, end_blk); while (mid_blk != first_blk && mid_blk != end_blk) { - error = xlog_bread(log, mid_blk, 1, bp, &offset); + error = xlog_bread(log, mid_blk, 1, buffer, &offset); if (error) return error; mid_cycle = xlog_get_cycle(offset); @@ -455,7 +373,7 @@ xlog_find_verify_cycle( { xfs_daddr_t i, j; uint cycle; - xfs_buf_t *bp; + char *buffer; xfs_daddr_t bufblks; char *buf = NULL; int error = 0; @@ -469,7 +387,7 @@ xlog_find_verify_cycle( bufblks = 1 << ffs(nbblks); while (bufblks > log->l_logBBsize) bufblks >>= 1; - while (!(bp = xlog_get_bp(log, bufblks))) { + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { bufblks >>= 1; if (bufblks < log->l_sectBBsize) return -ENOMEM; @@ -480,7 +398,7 @@ xlog_find_verify_cycle( bcount = min(bufblks, (start_blk + nbblks - i)); - error = xlog_bread(log, i, bcount, bp, &buf); + error = xlog_bread(log, i, bcount, buffer, &buf); if (error) goto out; @@ -498,7 +416,7 @@ xlog_find_verify_cycle( *new_blk = -1; out: - xlog_put_bp(bp); + kmem_free(buffer); return error; } @@ -522,7 +440,7 @@ xlog_find_verify_log_record( int extra_bblks) { xfs_daddr_t i; - xfs_buf_t *bp; + char *buffer; char *offset = NULL; xlog_rec_header_t *head = NULL; int error = 0; @@ -532,12 +450,14 @@ xlog_find_verify_log_record( ASSERT(start_blk != 0 || *last_blk != start_blk); - if (!(bp = xlog_get_bp(log, num_blks))) { - if (!(bp = xlog_get_bp(log, 1))) + buffer = xlog_alloc_buffer(log, num_blks); + if (!buffer) { + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; smallmem = 1; } else { - error = xlog_bread(log, start_blk, num_blks, bp, &offset); + error = xlog_bread(log, start_blk, num_blks, buffer, &offset); if (error) goto out; offset += ((num_blks - 1) << BBSHIFT); @@ -554,7 +474,7 @@ xlog_find_verify_log_record( } if (smallmem) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out; } @@ -607,7 +527,7 @@ xlog_find_verify_log_record( *last_blk = i; out: - xlog_put_bp(bp); + kmem_free(buffer); return error; } @@ -629,7 +549,7 @@ xlog_find_head( struct xlog *log, xfs_daddr_t *return_head_blk) { - xfs_buf_t *bp; + char *buffer; char *offset; xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; int num_scan_bblks; @@ -659,20 +579,20 @@ xlog_find_head( } first_blk = 0; /* get cycle # of 1st block */ - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; - error = xlog_bread(log, 0, 1, bp, &offset); + error = xlog_bread(log, 0, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; first_half_cycle = xlog_get_cycle(offset); last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ - error = xlog_bread(log, last_blk, 1, bp, &offset); + error = xlog_bread(log, last_blk, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; last_half_cycle = xlog_get_cycle(offset); ASSERT(last_half_cycle != 0); @@ -740,9 +660,10 @@ xlog_find_head( * ^ we want to locate this spot */ stop_on_cycle = last_half_cycle; - if ((error = xlog_find_cycle_start(log, bp, first_blk, - &head_blk, last_half_cycle))) - goto bp_err; + error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, + last_half_cycle); + if (error) + goto out_free_buffer; } /* @@ -762,7 +683,7 @@ xlog_find_head( if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks, stop_on_cycle, &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) head_blk = new_blk; } else { /* need to read 2 parts of log */ @@ -799,7 +720,7 @@ xlog_find_head( if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks - (int)head_blk, (stop_on_cycle - 1), &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) { head_blk = new_blk; goto validate_head; @@ -815,7 +736,7 @@ xlog_find_head( if ((error = xlog_find_verify_cycle(log, start_blk, (int)head_blk, stop_on_cycle, &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) head_blk = new_blk; } @@ -834,13 +755,13 @@ validate_head: if (error == 1) error = -EIO; if (error) - goto bp_err; + goto out_free_buffer; } else { start_blk = 0; ASSERT(head_blk <= INT_MAX); error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); if (error < 0) - goto bp_err; + goto out_free_buffer; if (error == 1) { /* We hit the beginning of the log during our search */ start_blk = log_bbnum - (num_scan_bblks - head_blk); @@ -853,14 +774,14 @@ validate_head: if (error == 1) error = -EIO; if (error) - goto bp_err; + goto out_free_buffer; if (new_blk != log_bbnum) head_blk = new_blk; } else if (error) - goto bp_err; + goto out_free_buffer; } - xlog_put_bp(bp); + kmem_free(buffer); if (head_blk == log_bbnum) *return_head_blk = 0; else @@ -873,9 +794,8 @@ validate_head: */ return 0; - bp_err: - xlog_put_bp(bp); - +out_free_buffer: + kmem_free(buffer); if (error) xfs_warn(log->l_mp, "failed to find log head"); return error; @@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr( xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int count, - struct xfs_buf *bp, + char *buffer, xfs_daddr_t *rblk, struct xlog_rec_header **rhead, bool *wrapped) @@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr( */ end_blk = head_blk > tail_blk ? tail_blk : 0; for (i = (int) head_blk - 1; i >= end_blk; i--) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr( */ if (tail_blk >= head_blk && found != count) { for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -969,7 +889,7 @@ xlog_seek_logrec_hdr( xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int count, - struct xfs_buf *bp, + char *buffer, xfs_daddr_t *rblk, struct xlog_rec_header **rhead, bool *wrapped) @@ -988,7 +908,7 @@ xlog_seek_logrec_hdr( */ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; for (i = (int) tail_blk; i <= end_blk; i++) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr( */ if (tail_blk > head_blk && found != count) { for (i = 0; i < (int) head_blk; i++) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -1069,22 +989,22 @@ xlog_verify_tail( int hsize) { struct xlog_rec_header *thead; - struct xfs_buf *bp; + char *buffer; xfs_daddr_t first_bad; int error = 0; bool wrapped; xfs_daddr_t tmp_tail; xfs_daddr_t orig_tail = *tail_blk; - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; /* * Make sure the tail points to a record (returns positive count on * success). */ - error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, &tmp_tail, &thead, &wrapped); if (error < 0) goto out; @@ -1113,8 +1033,8 @@ xlog_verify_tail( break; /* skip to the next record; returns positive count on success */ - error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, - &tmp_tail, &thead, &wrapped); + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, + buffer, &tmp_tail, &thead, &wrapped); if (error < 0) goto out; @@ -1129,7 +1049,7 @@ xlog_verify_tail( "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", orig_tail, *tail_blk); out: - xlog_put_bp(bp); + kmem_free(buffer); return error; } @@ -1151,13 +1071,13 @@ xlog_verify_head( struct xlog *log, xfs_daddr_t *head_blk, /* in/out: unverified head */ xfs_daddr_t *tail_blk, /* out: tail block */ - struct xfs_buf *bp, + char *buffer, xfs_daddr_t *rhead_blk, /* start blk of last record */ struct xlog_rec_header **rhead, /* ptr to last record */ bool *wrapped) /* last rec. wraps phys. log */ { struct xlog_rec_header *tmp_rhead; - struct xfs_buf *tmp_bp; + char *tmp_buffer; xfs_daddr_t first_bad; xfs_daddr_t tmp_rhead_blk; int found; @@ -1168,15 +1088,15 @@ xlog_verify_head( * Check the head of the log for torn writes. Search backwards from the * head until we hit the tail or the maximum number of log record I/Os * that could have been in flight at one time. Use a temporary buffer so - * we don't trash the rhead/bp pointers from the caller. + * we don't trash the rhead/buffer pointers from the caller. */ - tmp_bp = xlog_get_bp(log, 1); - if (!tmp_bp) + tmp_buffer = xlog_alloc_buffer(log, 1); + if (!tmp_buffer) return -ENOMEM; error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, - XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, - &tmp_rhead, &tmp_wrapped); - xlog_put_bp(tmp_bp); + XLOG_MAX_ICLOGS, tmp_buffer, + &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); + kmem_free(tmp_buffer); if (error < 0) return error; @@ -1205,8 +1125,8 @@ xlog_verify_head( * (i.e., the records with invalid CRC) if the cycle number * matches the the current cycle. */ - found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, - rhead_blk, rhead, wrapped); + found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, + buffer, rhead_blk, rhead, wrapped); if (found < 0) return found; if (found == 0) /* XXX: right thing to do here? */ @@ -1266,7 +1186,7 @@ xlog_check_unmount_rec( xfs_daddr_t *tail_blk, struct xlog_rec_header *rhead, xfs_daddr_t rhead_blk, - struct xfs_buf *bp, + char *buffer, bool *clean) { struct xlog_op_header *op_head; @@ -1309,7 +1229,7 @@ xlog_check_unmount_rec( if (*head_blk == after_umount_blk && be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); if (error) return error; @@ -1388,7 +1308,7 @@ xlog_find_tail( { xlog_rec_header_t *rhead; char *offset = NULL; - xfs_buf_t *bp; + char *buffer; int error; xfs_daddr_t rhead_blk; xfs_lsn_t tail_lsn; @@ -1402,11 +1322,11 @@ xlog_find_tail( return error; ASSERT(*head_blk < INT_MAX); - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; if (*head_blk == 0) { /* special case */ - error = xlog_bread(log, 0, 1, bp, &offset); + error = xlog_bread(log, 0, 1, buffer, &offset); if (error) goto done; @@ -1422,7 +1342,7 @@ xlog_find_tail( * block. This wraps all the way back around to the head so something is * seriously wrong if we can't find it. */ - error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, &rhead_blk, &rhead, &wrapped); if (error < 0) return error; @@ -1443,7 +1363,7 @@ xlog_find_tail( * state to determine whether recovery is necessary. */ error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, - rhead_blk, bp, &clean); + rhead_blk, buffer, &clean); if (error) goto done; @@ -1460,7 +1380,7 @@ xlog_find_tail( if (!clean) { xfs_daddr_t orig_head = *head_blk; - error = xlog_verify_head(log, head_blk, tail_blk, bp, + error = xlog_verify_head(log, head_blk, tail_blk, buffer, &rhead_blk, &rhead, &wrapped); if (error) goto done; @@ -1471,7 +1391,7 @@ xlog_find_tail( wrapped); tail_lsn = atomic64_read(&log->l_tail_lsn); error = xlog_check_unmount_rec(log, head_blk, tail_blk, - rhead, rhead_blk, bp, + rhead, rhead_blk, buffer, &clean); if (error) goto done; @@ -1505,11 +1425,11 @@ xlog_find_tail( * But... if the -device- itself is readonly, just skip this. * We can't recover this device anyway, so it won't matter. */ - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) + if (!xfs_readonly_buftarg(log->l_targ)) error = xlog_clear_stale_blocks(log, tail_lsn); done: - xlog_put_bp(bp); + kmem_free(buffer); if (error) xfs_warn(log->l_mp, "failed to locate log tail"); @@ -1537,7 +1457,7 @@ xlog_find_zeroed( struct xlog *log, xfs_daddr_t *blk_no) { - xfs_buf_t *bp; + char *buffer; char *offset; uint first_cycle, last_cycle; xfs_daddr_t new_blk, last_blk, start_blk; @@ -1547,35 +1467,36 @@ xlog_find_zeroed( *blk_no = 0; /* check totally zeroed log */ - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; - error = xlog_bread(log, 0, 1, bp, &offset); + error = xlog_bread(log, 0, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; - xlog_put_bp(bp); + kmem_free(buffer); return 1; } /* check partially zeroed log */ - error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); + error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ - xlog_put_bp(bp); + kmem_free(buffer); return 0; } /* we have a partially zeroed log */ last_blk = log_bbnum-1; - if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) - goto bp_err; + error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); + if (error) + goto out_free_buffer; /* * Validate the answer. Because there is no way to guarantee that @@ -1598,7 +1519,7 @@ xlog_find_zeroed( */ if ((error = xlog_find_verify_cycle(log, start_blk, (int)num_scan_bblks, 0, &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) last_blk = new_blk; @@ -1610,11 +1531,11 @@ xlog_find_zeroed( if (error == 1) error = -EIO; if (error) - goto bp_err; + goto out_free_buffer; *blk_no = last_blk; -bp_err: - xlog_put_bp(bp); +out_free_buffer: + kmem_free(buffer); if (error) return error; return 1; @@ -1657,7 +1578,7 @@ xlog_write_log_records( int tail_block) { char *offset; - xfs_buf_t *bp; + char *buffer; int balign, ealign; int sectbb = log->l_sectBBsize; int end_block = start_block + blocks; @@ -1674,7 +1595,7 @@ xlog_write_log_records( bufblks = 1 << ffs(blocks); while (bufblks > log->l_logBBsize) bufblks >>= 1; - while (!(bp = xlog_get_bp(log, bufblks))) { + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { bufblks >>= 1; if (bufblks < sectbb) return -ENOMEM; @@ -1686,9 +1607,9 @@ xlog_write_log_records( */ balign = round_down(start_block, sectbb); if (balign != start_block) { - error = xlog_bread_noalign(log, start_block, 1, bp); + error = xlog_bread_noalign(log, start_block, 1, buffer); if (error) - goto out_put_bp; + goto out_free_buffer; j = start_block - balign; } @@ -1705,29 +1626,28 @@ xlog_write_log_records( */ ealign = round_down(end_block, sectbb); if (j == 0 && (start_block + endcount > ealign)) { - offset = bp->b_addr + BBTOB(ealign - start_block); - error = xlog_bread_offset(log, ealign, sectbb, - bp, offset); + error = xlog_bread_noalign(log, ealign, sectbb, + buffer + BBTOB(ealign - start_block)); if (error) break; } - offset = xlog_align(log, start_block, endcount, bp); + offset = buffer + xlog_align(log, start_block); for (; j < endcount; j++) { xlog_add_record(log, offset, cycle, i+j, tail_cycle, tail_block); offset += BBSIZE; } - error = xlog_bwrite(log, start_block, endcount, bp); + error = xlog_bwrite(log, start_block, endcount, buffer); if (error) break; start_block += endcount; j = 0; } - out_put_bp: - xlog_put_bp(bp); +out_free_buffer: + kmem_free(buffer); return error; } @@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer( if (xfs_sb_version_hascrc(&mp->m_sb)) bp->b_ops = &xfs_inode_buf_ops; - inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; + inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; for (i = 0; i < inodes_per_buf; i++) { next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + offsetof(xfs_dinode_t, di_next_unlinked); @@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer( ASSERT(item->ri_buf[item_index].i_addr != NULL); ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); - ASSERT((reg_buf_offset + reg_buf_bytes) <= - BBTOB(bp->b_io_length)); + ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); /* * The current logged region contains a copy of the @@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer( ASSERT(nbits > 0); ASSERT(item->ri_buf[i].i_addr != NULL); ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); - ASSERT(BBTOB(bp->b_io_length) >= + ASSERT(BBTOB(bp->b_length) >= ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); /* @@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2( * * Also make sure that only inode buffers with good sizes stay in * the buffer cache. The kernel moves inodes in buffers of 1 block - * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode + * or inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel * running with a different inode cluster size. Regardless, if the - * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) - * for *our* value of mp->m_inode_cluster_size, then we need to keep + * the inode buffer size isn't max(blocksize, inode_cluster_size) + * for *our* value of inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't * overlap with future reads of those inodes. */ if (XFS_DINODE_MAGIC == be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && - (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, - (uint32_t)log->l_mp->m_inode_cluster_size))) { + (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) { xfs_buf_stale(bp); error = xfs_bwrite(bp); } else { - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); bp->b_iodone = xlog_recover_iodone; xfs_buf_delwri_queue(bp, buffer_list); } @@ -3260,7 +3178,7 @@ out_owner_change: /* re-generate the checksum. */ xfs_dinode_calc_crc(log->l_mp, dip); - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); bp->b_iodone = xlog_recover_iodone; xfs_buf_delwri_queue(bp, buffer_list); @@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2( } ASSERT(dq_f->qlf_size == 2); - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); bp->b_iodone = xlog_recover_iodone; xfs_buf_delwri_queue(bp, buffer_list); @@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2( { xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; - xfs_log_item_t *lip; + struct xfs_log_item *lip; uint64_t efi_id; struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; @@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2( { struct xfs_mount *mp = log->l_mp; struct xfs_icreate_log *icl; + struct xfs_ino_geometry *igeo = M_IGEO(mp); xfs_agnumber_t agno; xfs_agblock_t agbno; unsigned int count; @@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2( /* * The inode chunk is either full or sparse and we only support - * m_ialloc_min_blks sized sparse allocations at this time. + * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. */ - if (length != mp->m_ialloc_blks && - length != mp->m_ialloc_min_blks) { + if (length != igeo->ialloc_blks && + length != igeo->ialloc_min_blks) { xfs_warn(log->l_mp, "%s: unsupported chunk length", __FUNCTION__); return -EINVAL; @@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2( * buffers for cancellation so we don't overwrite anything written after * a cancellation. */ - bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); - nbufs = length / mp->m_blocks_per_cluster; + bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); + nbufs = length / igeo->blocks_per_cluster; for (i = 0, cancel_count = 0; i < nbufs; i++) { xfs_daddr_t daddr; daddr = XFS_AGB_TO_DADDR(mp, agno, - agbno + i * mp->m_blocks_per_cluster); + agbno + i * igeo->blocks_per_cluster); if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) cancel_count++; } @@ -4956,12 +4875,11 @@ out: * A cancel occurs when the mount has failed and we're bailing out. * Release all pending log intent items so they don't pin the AIL. */ -STATIC int +STATIC void xlog_recover_cancel_intents( struct xlog *log) { struct xfs_log_item *lip; - int error = 0; struct xfs_ail_cursor cur; struct xfs_ail *ailp; @@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents( xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->ail_lock); - return error; } /* @@ -5307,7 +5224,7 @@ xlog_do_recovery_pass( xfs_daddr_t blk_no, rblk_no; xfs_daddr_t rhead_blk; char *offset; - xfs_buf_t *hbp, *dbp; + char *hbp, *dbp; int error = 0, h_size, h_len; int error2 = 0; int bblks, split_bblks; @@ -5332,7 +5249,7 @@ xlog_do_recovery_pass( * iclog header and extract the header size from it. Get a * new hbp that is the correct size. */ - hbp = xlog_get_bp(log, 1); + hbp = xlog_alloc_buffer(log, 1); if (!hbp) return -ENOMEM; @@ -5374,23 +5291,23 @@ xlog_do_recovery_pass( hblks = h_size / XLOG_HEADER_CYCLE_SIZE; if (h_size % XLOG_HEADER_CYCLE_SIZE) hblks++; - xlog_put_bp(hbp); - hbp = xlog_get_bp(log, hblks); + kmem_free(hbp); + hbp = xlog_alloc_buffer(log, hblks); } else { hblks = 1; } } else { ASSERT(log->l_sectBBsize == 1); hblks = 1; - hbp = xlog_get_bp(log, 1); + hbp = xlog_alloc_buffer(log, 1); h_size = XLOG_BIG_RECORD_BSIZE; } if (!hbp) return -ENOMEM; - dbp = xlog_get_bp(log, BTOBB(h_size)); + dbp = xlog_alloc_buffer(log, BTOBB(h_size)); if (!dbp) { - xlog_put_bp(hbp); + kmem_free(hbp); return -ENOMEM; } @@ -5405,7 +5322,7 @@ xlog_do_recovery_pass( /* * Check for header wrapping around physical end-of-log */ - offset = hbp->b_addr; + offset = hbp; split_hblks = 0; wrapped_hblks = 0; if (blk_no + hblks <= log->l_logBBsize) { @@ -5441,8 +5358,8 @@ xlog_do_recovery_pass( * - order is important. */ wrapped_hblks = hblks - split_hblks; - error = xlog_bread_offset(log, 0, - wrapped_hblks, hbp, + error = xlog_bread_noalign(log, 0, + wrapped_hblks, offset + BBTOB(split_hblks)); if (error) goto bread_err2; @@ -5473,7 +5390,7 @@ xlog_do_recovery_pass( } else { /* This log record is split across the * physical end of log */ - offset = dbp->b_addr; + offset = dbp; split_bblks = 0; if (blk_no != log->l_logBBsize) { /* some data is before the physical @@ -5502,8 +5419,8 @@ xlog_do_recovery_pass( * _first_, then the log start (LR header end) * - order is important. */ - error = xlog_bread_offset(log, 0, - bblks - split_bblks, dbp, + error = xlog_bread_noalign(log, 0, + bblks - split_bblks, offset + BBTOB(split_bblks)); if (error) goto bread_err2; @@ -5551,9 +5468,9 @@ xlog_do_recovery_pass( } bread_err2: - xlog_put_bp(dbp); + kmem_free(dbp); bread_err1: - xlog_put_bp(hbp); + kmem_free(hbp); /* * Submit buffers that have been added from the last record processed, @@ -5687,7 +5604,7 @@ xlog_do_recover( * Now that we've finished replaying all buffer and inode * updates, re-read in the superblock and reverify it. */ - bp = xfs_getsb(mp, 0); + bp = xfs_getsb(mp); bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); ASSERT(!(bp->b_flags & XBF_WRITE)); bp->b_flags |= XBF_READ; @@ -5860,16 +5777,12 @@ xlog_recover_finish( return 0; } -int +void xlog_recover_cancel( struct xlog *log) { - int error = 0; - if (log->l_flags & XLOG_RECOVERY_NEEDED) - error = xlog_recover_cancel_intents(log); - - return error; + xlog_recover_cancel_intents(log); } #if defined(DEBUG) diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 6b736ea58d35..9804efe525a9 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -6,8 +6,8 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_error.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6b2bfe81dc51..322da6909290 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -12,9 +12,6 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_ialloc.h" @@ -27,7 +24,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_fsops.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_sysfs.h" #include "xfs_rmap_btree.h" @@ -430,30 +426,6 @@ xfs_update_alignment(xfs_mount_t *mp) } /* - * Set the maximum inode count for this filesystem - */ -STATIC void -xfs_set_maxicount(xfs_mount_t *mp) -{ - xfs_sb_t *sbp = &(mp->m_sb); - uint64_t icount; - - if (sbp->sb_imax_pct) { - /* - * Make sure the maximum inode count is a multiple - * of the units we allocate inodes in. - */ - icount = sbp->sb_dblocks * sbp->sb_imax_pct; - do_div(icount, 100); - do_div(icount, mp->m_ialloc_blks); - mp->m_maxicount = (icount * mp->m_ialloc_blks) << - sbp->sb_inopblog; - } else { - mp->m_maxicount = 0; - } -} - -/* * Set the default minimum read and write sizes unless * already specified in a mount option. * We use smaller I/O sizes when the file system @@ -509,29 +481,6 @@ xfs_set_low_space_thresholds( } } - -/* - * Set whether we're using inode alignment. - */ -STATIC void -xfs_set_inoalignment(xfs_mount_t *mp) -{ - if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) - mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; - else - mp->m_inoalign_mask = 0; - /* - * If we are using stripe alignment, check whether - * the stripe unit is a multiple of the inode alignment - */ - if (mp->m_dalign && mp->m_inoalign_mask && - !(mp->m_dalign & mp->m_inoalign_mask)) - mp->m_sinoalign = mp->m_dalign; - else - mp->m_sinoalign = 0; -} - /* * Check that the data (and log if separate) is an ok size. */ @@ -683,6 +632,7 @@ xfs_mountfs( { struct xfs_sb *sbp = &(mp->m_sb); struct xfs_inode *rip; + struct xfs_ino_geometry *igeo = M_IGEO(mp); uint64_t resblks; uint quotamount = 0; uint quotaflags = 0; @@ -749,12 +699,10 @@ xfs_mountfs( xfs_alloc_compute_maxlevels(mp); xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); - xfs_ialloc_compute_maxlevels(mp); + xfs_ialloc_setup_geometry(mp); xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); - xfs_set_maxicount(mp); - /* enable fail_at_unmount as default */ mp->m_fail_unmount = true; @@ -788,50 +736,22 @@ xfs_mountfs( xfs_set_low_space_thresholds(mp); /* - * Set the inode cluster size. - * This may still be overridden by the file system - * block size if it is larger than the chosen cluster size. - * - * For v5 filesystems, scale the cluster size with the inode size to - * keep a constant ratio of inode per cluster buffer, but only if mkfs - * has set the inode alignment value appropriately for larger cluster - * sizes. - */ - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; - if (xfs_sb_version_hascrc(&mp->m_sb)) { - int new_size = mp->m_inode_cluster_size; - - new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; - if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) - mp->m_inode_cluster_size = new_size; - } - mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp); - mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster); - mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp); - mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align); - - /* * If enabled, sparse inode chunk alignment is expected to match the * cluster size. Full inode chunk alignment must match the chunk size, * but that is checked on sb read verification... */ if (xfs_sb_version_hassparseinodes(&mp->m_sb) && mp->m_sb.sb_spino_align != - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) { xfs_warn(mp, "Sparse inode block alignment (%u) must match cluster size (%llu).", mp->m_sb.sb_spino_align, - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)); error = -EINVAL; goto out_remove_uuid; } /* - * Set inode alignment fields - */ - xfs_set_inoalignment(mp); - - /* * Check that the data (and log if separate) is an ok size. */ error = xfs_check_sizes(mp); @@ -1385,24 +1305,14 @@ xfs_mod_frextents( * xfs_getsb() is called to obtain the buffer for the superblock. * The buffer is returned locked and read in from disk. * The buffer should be released with a call to xfs_brelse(). - * - * If the flags parameter is BUF_TRYLOCK, then we'll only return - * the superblock buffer if it can be locked without sleeping. - * If it can't then we'll return NULL. */ struct xfs_buf * xfs_getsb( - struct xfs_mount *mp, - int flags) + struct xfs_mount *mp) { struct xfs_buf *bp = mp->m_sb_bp; - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) - return NULL; - xfs_buf_lock(bp); - } - + xfs_buf_lock(bp); xfs_buf_hold(bp); ASSERT(bp->b_flags & XBF_DONE); return bp; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c81a5cd7c228..4adb6837439a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -105,6 +105,7 @@ typedef struct xfs_mount { struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ + struct xfs_ino_geometry m_ino_geo; /* inode geometry */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ @@ -126,12 +127,6 @@ typedef struct xfs_mount { uint8_t m_blkbit_log; /* blocklog + NBBY */ uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ uint8_t m_agno_log; /* log #ag's */ - uint8_t m_agino_log; /* #bits for agino in inum */ - uint m_inode_cluster_size;/* min inode buf size */ - unsigned int m_inodes_per_cluster; - unsigned int m_blocks_per_cluster; - unsigned int m_cluster_align; - unsigned int m_cluster_align_inodes; uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ @@ -139,15 +134,12 @@ typedef struct xfs_mount { uint m_alloc_mnr[2]; /* min alloc btree records */ uint m_bmap_dmxr[2]; /* max bmap btree records */ uint m_bmap_dmnr[2]; /* min bmap btree records */ - uint m_inobt_mxr[2]; /* max inobt btree records */ - uint m_inobt_mnr[2]; /* min inobt btree records */ uint m_rmap_mxr[2]; /* max rmap btree records */ uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ - uint m_in_maxlevels; /* max inobt btree levels. */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_refc_maxlevels; /* max refcount btree level */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ @@ -159,20 +151,13 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint64_t m_flags; /* global mount flags */ bool m_finobt_nores; /* no per-AG finobt resv. */ - int m_ialloc_inos; /* inodes in inode allocation */ - int m_ialloc_blks; /* blocks in inode allocation */ - int m_ialloc_min_blks;/* min blocks in sparse inode - * allocation */ - int m_inoalign_mask;/* mask sb_inoalignmt if used */ uint m_qflags; /* quota status flags */ struct xfs_trans_resv m_resv; /* precomputed res values */ - uint64_t m_maxicount; /* maximum inode count */ uint64_t m_resblks; /* total reserved blocks */ uint64_t m_resblks_avail;/* available reserved blocks */ uint64_t m_resblks_save; /* reserved blks @ remount,ro */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ - int m_sinoalign; /* stripe unit inode alignment */ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ @@ -198,7 +183,6 @@ typedef struct xfs_mount { struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_cil_workqueue; struct workqueue_struct *m_reclaim_workqueue; - struct workqueue_struct *m_log_workqueue; struct workqueue_struct *m_eofblocks_workqueue; struct workqueue_struct *m_sync_workqueue; @@ -226,6 +210,8 @@ typedef struct xfs_mount { #endif } xfs_mount_t; +#define M_IGEO(mp) (&(mp)->m_ino_geo) + /* * Flags for m_flags. */ @@ -465,7 +451,7 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); -extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); +extern struct xfs_buf *xfs_getsb(xfs_mount_t *); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern bool xfs_fs_writable(struct xfs_mount *mp, int level); diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index c8ba98fae30a..b6701b4f59a9 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0); XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); + + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); } #endif /* __XFS_ONDISK_H */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index bde2c9f56a46..0c954cad7449 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -2,23 +2,16 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ -#include <linux/iomap.h> #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_log.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_error.h" #include "xfs_iomap.h" -#include "xfs_shared.h" -#include "xfs_bit.h" -#include "xfs_pnfs.h" /* * Ensure that we do not have any outstanding pNFS layouts that can be used by diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c new file mode 100644 index 000000000000..4bcc3e61056c --- /dev/null +++ b/fs/xfs/xfs_pwork.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_trace.h" +#include "xfs_sysctl.h" +#include "xfs_pwork.h" +#include <linux/nmi.h> + +/* + * Parallel Work Queue + * =================== + * + * Abstract away the details of running a large and "obviously" parallelizable + * task across multiple CPUs. Callers initialize the pwork control object with + * a desired level of parallelization and a work function. Next, they embed + * struct xfs_pwork in whatever structure they use to pass work context to a + * worker thread and queue that pwork. The work function will be passed the + * pwork item when it is run (from process context) and any returned error will + * be recorded in xfs_pwork_ctl.error. Work functions should check for errors + * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not + * stop workqueue item processing. + * + * This is the rough equivalent of the xfsprogs workqueue code, though we can't + * reuse that name here. + */ + +/* Invoke our caller's function. */ +static void +xfs_pwork_work( + struct work_struct *work) +{ + struct xfs_pwork *pwork; + struct xfs_pwork_ctl *pctl; + int error; + + pwork = container_of(work, struct xfs_pwork, work); + pctl = pwork->pctl; + error = pctl->work_fn(pctl->mp, pwork); + if (error && !pctl->error) + pctl->error = error; + if (atomic_dec_and_test(&pctl->nr_work)) + wake_up(&pctl->poll_wait); +} + +/* + * Set up control data for parallel work. @work_fn is the function that will + * be called. @tag will be written into the kernel threads. @nr_threads is + * the level of parallelism desired, or 0 for no limit. + */ +int +xfs_pwork_init( + struct xfs_mount *mp, + struct xfs_pwork_ctl *pctl, + xfs_pwork_work_fn work_fn, + const char *tag, + unsigned int nr_threads) +{ +#ifdef DEBUG + if (xfs_globals.pwork_threads >= 0) + nr_threads = xfs_globals.pwork_threads; +#endif + trace_xfs_pwork_init(mp, nr_threads, current->pid); + + pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag, + current->pid); + if (!pctl->wq) + return -ENOMEM; + pctl->work_fn = work_fn; + pctl->error = 0; + pctl->mp = mp; + atomic_set(&pctl->nr_work, 0); + init_waitqueue_head(&pctl->poll_wait); + + return 0; +} + +/* Queue some parallel work. */ +void +xfs_pwork_queue( + struct xfs_pwork_ctl *pctl, + struct xfs_pwork *pwork) +{ + INIT_WORK(&pwork->work, xfs_pwork_work); + pwork->pctl = pctl; + atomic_inc(&pctl->nr_work); + queue_work(pctl->wq, &pwork->work); +} + +/* Wait for the work to finish and tear down the control structure. */ +int +xfs_pwork_destroy( + struct xfs_pwork_ctl *pctl) +{ + destroy_workqueue(pctl->wq); + pctl->wq = NULL; + return pctl->error; +} + +/* + * Wait for the work to finish by polling completion status and touch the soft + * lockup watchdog. This is for callers such as mount which hold locks. + */ +void +xfs_pwork_poll( + struct xfs_pwork_ctl *pctl) +{ + while (wait_event_timeout(pctl->poll_wait, + atomic_read(&pctl->nr_work) == 0, HZ) == 0) + touch_softlockup_watchdog(); +} + +/* + * Return the amount of parallelism that the data device can handle, or 0 for + * no limit. + */ +unsigned int +xfs_pwork_guess_datadev_parallelism( + struct xfs_mount *mp) +{ + struct xfs_buftarg *btp = mp->m_ddev_targp; + + /* + * For now we'll go with the most conservative setting possible, + * which is two threads for an SSD and 1 thread everywhere else. + */ + return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1; +} diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h new file mode 100644 index 000000000000..8133124cf3bb --- /dev/null +++ b/fs/xfs/xfs_pwork.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_PWORK_H__ +#define __XFS_PWORK_H__ + +struct xfs_pwork; +struct xfs_mount; + +typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork); + +/* + * Parallel work coordination structure. + */ +struct xfs_pwork_ctl { + struct workqueue_struct *wq; + struct xfs_mount *mp; + xfs_pwork_work_fn work_fn; + struct wait_queue_head poll_wait; + atomic_t nr_work; + int error; +}; + +/* + * Embed this parallel work control item inside your own work structure, + * then queue work with it. + */ +struct xfs_pwork { + struct work_struct work; + struct xfs_pwork_ctl *pctl; +}; + +#define XFS_PWORK_SINGLE_THREADED { .pctl = NULL } + +/* Have we been told to abort? */ +static inline bool +xfs_pwork_ctl_want_abort( + struct xfs_pwork_ctl *pctl) +{ + return pctl && pctl->error; +} + +/* Have we been told to abort? */ +static inline bool +xfs_pwork_want_abort( + struct xfs_pwork *pwork) +{ + return xfs_pwork_ctl_want_abort(pwork->pctl); +} + +int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl, + xfs_pwork_work_fn work_fn, const char *tag, + unsigned int nr_threads); +void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork); +int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl); +void xfs_pwork_poll(struct xfs_pwork_ctl *pctl); +unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp); + +#endif /* __XFS_PWORK_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index aa6b6db3db0e..5e7a37f0cf84 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -13,19 +13,15 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" +#include "xfs_iwalk.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_bmap_btree.h" #include "xfs_bmap_util.h" #include "xfs_trans.h" #include "xfs_trans_space.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_cksum.h" /* * The global quota manager. There is only one of these for the entire @@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust( /* ARGSUSED */ STATIC int xfs_qm_dqusage_adjust( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* not used */ - int ubsize, /* not used */ - int *ubused, /* not used */ - int *res) /* result code value */ + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) { - xfs_inode_t *ip; - xfs_qcnt_t nblks; - xfs_filblks_t rtblks = 0; /* total rt blks */ - int error; + struct xfs_inode *ip; + xfs_qcnt_t nblks; + xfs_filblks_t rtblks = 0; /* total rt blks */ + int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust( * rootino must have its resources accounted for, not so with the quota * inodes. */ - if (xfs_is_quota_inode(&mp->m_sb, ino)) { - *res = BULKSTAT_RV_NOTHING; - return -EINVAL; - } + if (xfs_is_quota_inode(&mp->m_sb, ino)) + return 0; /* * We don't _need_ to take the ilock EXCL here because quotacheck runs * at mount time and therefore nobody will be racing chown/chproj. */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip); - if (error) { - *res = BULKSTAT_RV_NOTHING; + error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip); + if (error == -EINVAL || error == -ENOENT) + return 0; + if (error) return error; - } ASSERT(ip->i_delayed_blks == 0); @@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); if (error) goto error0; } @@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust( goto error0; } - xfs_irele(ip); - *res = BULKSTAT_RV_DIDONE; - return 0; - error0: xfs_irele(ip); - *res = BULKSTAT_RV_GIVEUP; return error; } @@ -1270,18 +1257,13 @@ STATIC int xfs_qm_quotacheck( xfs_mount_t *mp) { - int done, count, error, error2; - xfs_ino_t lastino; - size_t structsz; + int error, error2; uint flags; LIST_HEAD (buffer_list); struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; - count = INT_MAX; - structsz = 1; - lastino = 0; flags = 0; ASSERT(uip || gip || pip); @@ -1318,18 +1300,10 @@ xfs_qm_quotacheck( flags |= XFS_PQUOTA_CHKD; } - do { - /* - * Iterate thru all the inodes in the file system, - * adjusting the corresponding dquot counters in core. - */ - error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, - structsz, NULL, &done); - if (error) - break; - - } while (!done); + error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, + NULL); + if (error) + goto error_return; /* * We've made all the changes that we need to make incore. Flush them diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 3091e4bc04ef..5d72e88598b4 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -5,13 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_qm.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index b3190890f096..da7ad0383037 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -4,7 +4,6 @@ * All Rights Reserved. */ -#include <linux/capability.h> #include "xfs.h" #include "xfs_fs.h" @@ -12,17 +11,13 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_error.h" #include "xfs_quota.h" #include "xfs_qm.h" -#include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_defer.h" STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index a7c0c657dfaf..cd6c7210a373 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -4,6 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -11,10 +12,8 @@ #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_trans.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_qm.h" -#include <linux/quota.h> static void diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index fce38b56b962..d8288aa0670a 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -14,7 +14,6 @@ #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_refcount_item.h" #include "xfs_log.h" #include "xfs_refcount.h" @@ -95,15 +94,6 @@ xfs_cui_item_format( } /* - * Pinning has no meaning for an cui item, so just return. - */ -STATIC void -xfs_cui_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an CUI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the CUI transaction has been successfully committed to make it @@ -122,71 +112,22 @@ xfs_cui_item_unpin( } /* - * CUI items have no locking or pushing. However, since CUIs are pulled from - * the AIL when their corresponding CUDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the CUI out of - * the AIL. - */ -STATIC uint -xfs_cui_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The CUI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an CUD isn't going to be * constructed and thus we free the CUI here directly. */ STATIC void -xfs_cui_item_unlock( +xfs_cui_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_cui_release(CUI_ITEM(lip)); + xfs_cui_release(CUI_ITEM(lip)); } -/* - * The CUI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_cui_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * The CUI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_cui_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all cui log items. - */ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_size = xfs_cui_item_size, .iop_format = xfs_cui_item_format, - .iop_pin = xfs_cui_item_pin, .iop_unpin = xfs_cui_item_unpin, - .iop_unlock = xfs_cui_item_unlock, - .iop_committed = xfs_cui_item_committed, - .iop_push = xfs_cui_item_push, - .iop_committing = xfs_cui_item_committing, + .iop_release = xfs_cui_item_release, }; /* @@ -254,126 +195,250 @@ xfs_cud_item_format( } /* - * Pinning has no meaning for an cud item, so just return. + * The CUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the CUI and free the + * CUD. */ STATIC void -xfs_cud_item_pin( +xfs_cud_item_release( struct xfs_log_item *lip) { + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + + xfs_cui_release(cudp->cud_cuip); + kmem_zone_free(xfs_cud_zone, cudp); } -/* - * Since pinning has no meaning for an cud item, unpinning does - * not either. - */ -STATIC void -xfs_cud_item_unpin( - struct xfs_log_item *lip, - int remove) +static const struct xfs_item_ops xfs_cud_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_cud_item_size, + .iop_format = xfs_cud_item_format, + .iop_release = xfs_cud_item_release, +}; + +static struct xfs_cud_log_item * +xfs_trans_get_cud( + struct xfs_trans *tp, + struct xfs_cui_log_item *cuip) { + struct xfs_cud_log_item *cudp; + + cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); + xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, + &xfs_cud_item_ops); + cudp->cud_cuip = cuip; + cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; + + xfs_trans_add_item(tp, &cudp->cud_item); + return cudp; } /* - * There isn't much you can do to push on an cud item. It is simply stuck - * waiting for the log to be flushed to disk. + * Finish an refcount update and log it to the CUD. Note that the + * transaction is marked dirty regardless of whether the refcount + * update succeeds or fails to support the CUI/CUD lifecycle rules. */ -STATIC uint -xfs_cud_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +static int +xfs_trans_log_finish_refcount_update( + struct xfs_trans *tp, + struct xfs_cud_log_item *cudp, + enum xfs_refcount_intent_type type, + xfs_fsblock_t startblock, + xfs_extlen_t blockcount, + xfs_fsblock_t *new_fsb, + xfs_extlen_t *new_len, + struct xfs_btree_cur **pcur) { - return XFS_ITEM_PINNED; + int error; + + error = xfs_refcount_finish_one(tp, type, startblock, + blockcount, new_fsb, new_len, pcur); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the CUI and frees the CUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); + + return error; } -/* - * The CUD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the CUI and free the - * CUD. - */ -STATIC void -xfs_cud_item_unlock( - struct xfs_log_item *lip) +/* Sort refcount intents by AG. */ +static int +xfs_refcount_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + struct xfs_mount *mp = priv; + struct xfs_refcount_intent *ra; + struct xfs_refcount_intent *rb; + + ra = container_of(a, struct xfs_refcount_intent, ri_list); + rb = container_of(b, struct xfs_refcount_intent, ri_list); + return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - + XFS_FSB_TO_AGNO(mp, rb->ri_startblock); +} - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_cui_release(cudp->cud_cuip); - kmem_zone_free(xfs_cud_zone, cudp); +/* Get an CUI. */ +STATIC void * +xfs_refcount_update_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_cui_log_item *cuip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + cuip = xfs_cui_init(tp->t_mountp, count); + ASSERT(cuip != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &cuip->cui_item); + return cuip; +} + +/* Set the phys extent flags for this reverse mapping. */ +static void +xfs_trans_set_refcount_flags( + struct xfs_phys_extent *refc, + enum xfs_refcount_intent_type type) +{ + refc->pe_flags = 0; + switch (type) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + refc->pe_flags |= type; + break; + default: + ASSERT(0); } } -/* - * When the cud item is committed to disk, all we need to do is delete our - * reference to our partner cui item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from - * further referencing this item. - */ -STATIC xfs_lsn_t -xfs_cud_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Log refcount updates in the intent item. */ +STATIC void +xfs_refcount_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { - struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + struct xfs_cui_log_item *cuip = intent; + struct xfs_refcount_intent *refc; + uint next_extent; + struct xfs_phys_extent *ext; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); /* - * Drop the CUI reference regardless of whether the CUD has been - * aborted. Once the CUD transaction is constructed, it is the sole - * responsibility of the CUD to release the CUI (even if the CUI is - * aborted due to log I/O error). + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. */ - xfs_cui_release(cudp->cud_cuip); - kmem_zone_free(xfs_cud_zone, cudp); + next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; + ASSERT(next_extent < cuip->cui_format.cui_nextents); + ext = &cuip->cui_format.cui_extents[next_extent]; + ext->pe_startblock = refc->ri_startblock; + ext->pe_len = refc->ri_blockcount; + xfs_trans_set_refcount_flags(ext, refc->ri_type); +} - return (xfs_lsn_t)-1; +/* Get an CUD so we can process all the deferred refcount updates. */ +STATIC void * +xfs_refcount_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_cud(tp, intent); } -/* - * The CUD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_cud_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Process a deferred refcount update. */ +STATIC int +xfs_refcount_update_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) { + struct xfs_refcount_intent *refc; + xfs_fsblock_t new_fsb; + xfs_extlen_t new_aglen; + int error; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + error = xfs_trans_log_finish_refcount_update(tp, done_item, + refc->ri_type, + refc->ri_startblock, + refc->ri_blockcount, + &new_fsb, &new_aglen, + (struct xfs_btree_cur **)state); + /* Did we run out of reservation? Requeue what we didn't finish. */ + if (!error && new_aglen > 0) { + ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || + refc->ri_type == XFS_REFCOUNT_DECREASE); + refc->ri_startblock = new_fsb; + refc->ri_blockcount = new_aglen; + return -EAGAIN; + } + kmem_free(refc); + return error; } -/* - * This is the ops vector shared by all cud log items. - */ -static const struct xfs_item_ops xfs_cud_item_ops = { - .iop_size = xfs_cud_item_size, - .iop_format = xfs_cud_item_format, - .iop_pin = xfs_cud_item_pin, - .iop_unpin = xfs_cud_item_unpin, - .iop_unlock = xfs_cud_item_unlock, - .iop_committed = xfs_cud_item_committed, - .iop_push = xfs_cud_item_push, - .iop_committing = xfs_cud_item_committing, -}; +/* Clean up after processing deferred refcounts. */ +STATIC void +xfs_refcount_update_finish_cleanup( + struct xfs_trans *tp, + void *state, + int error) +{ + struct xfs_btree_cur *rcur = state; -/* - * Allocate and initialize an cud item with the given number of extents. - */ -struct xfs_cud_log_item * -xfs_cud_init( - struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + xfs_refcount_finish_one_cleanup(tp, rcur, error); +} +/* Abort all pending CUIs. */ +STATIC void +xfs_refcount_update_abort_intent( + void *intent) { - struct xfs_cud_log_item *cudp; + xfs_cui_release(intent); +} - cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); - xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); - cudp->cud_cuip = cuip; - cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; +/* Cancel a deferred refcount update. */ +STATIC void +xfs_refcount_update_cancel_item( + struct list_head *item) +{ + struct xfs_refcount_intent *refc; - return cudp; + refc = container_of(item, struct xfs_refcount_intent, ri_list); + kmem_free(refc); } +const struct xfs_defer_op_type xfs_refcount_update_defer_type = { + .max_items = XFS_CUI_MAX_FAST_EXTENTS, + .diff_items = xfs_refcount_update_diff_items, + .create_intent = xfs_refcount_update_create_intent, + .abort_intent = xfs_refcount_update_abort_intent, + .log_item = xfs_refcount_update_log_item, + .create_done = xfs_refcount_update_create_done, + .finish_item = xfs_refcount_update_finish_item, + .finish_cleanup = xfs_refcount_update_finish_cleanup, + .cancel_item = xfs_refcount_update_cancel_item, +}; + /* * Process a refcount update intent item that was recovered from the log. * We need to update the refcountbt. diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 3896dcc2368f..e47530f30489 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_cui_zone; extern struct kmem_zone *xfs_cud_zone; struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); -struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, - struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 680ae7662a78..c4ec7afd1170 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -11,21 +11,12 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_error.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_ioctl.h" #include "xfs_trace.h" -#include "xfs_log.h" #include "xfs_icache.h" -#include "xfs_pnfs.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" #include "xfs_refcount.h" @@ -33,11 +24,9 @@ #include "xfs_trans_space.h" #include "xfs_bit.h" #include "xfs_alloc.h" -#include "xfs_quota_defs.h" #include "xfs_quota.h" #include "xfs_reflink.h" #include "xfs_iomap.h" -#include "xfs_rmap_btree.h" #include "xfs_sb.h" #include "xfs_ag_resv.h" @@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range( /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - 0, 0, XFS_TRANS_NOFS, &tp); + 0, 0, 0, &tp); if (error) goto out; @@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent( resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); + XFS_TRANS_RESERVE, &tp); if (error) return error; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 127dc9c32a54..77ed557b6127 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -14,7 +14,6 @@ #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_rmap_item.h" #include "xfs_log.h" #include "xfs_rmap.h" @@ -94,15 +93,6 @@ xfs_rui_item_format( } /* - * Pinning has no meaning for an rui item, so just return. - */ -STATIC void -xfs_rui_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an RUI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the RUI transaction has been successfully committed to make it @@ -121,71 +111,22 @@ xfs_rui_item_unpin( } /* - * RUI items have no locking or pushing. However, since RUIs are pulled from - * the AIL when their corresponding RUDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the RUI out of - * the AIL. - */ -STATIC uint -xfs_rui_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The RUI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an RUD isn't going to be * constructed and thus we free the RUI here directly. */ STATIC void -xfs_rui_item_unlock( +xfs_rui_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_rui_release(RUI_ITEM(lip)); + xfs_rui_release(RUI_ITEM(lip)); } -/* - * The RUI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_rui_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * The RUI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_rui_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all rui log items. - */ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_size = xfs_rui_item_size, .iop_format = xfs_rui_item_format, - .iop_pin = xfs_rui_item_pin, .iop_unpin = xfs_rui_item_unpin, - .iop_unlock = xfs_rui_item_unlock, - .iop_committed = xfs_rui_item_committed, - .iop_push = xfs_rui_item_push, - .iop_committing = xfs_rui_item_committing, + .iop_release = xfs_rui_item_release, }; /* @@ -275,126 +216,271 @@ xfs_rud_item_format( } /* - * Pinning has no meaning for an rud item, so just return. + * The RUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the RUI and free the + * RUD. */ STATIC void -xfs_rud_item_pin( +xfs_rud_item_release( struct xfs_log_item *lip) { + struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + + xfs_rui_release(rudp->rud_ruip); + kmem_zone_free(xfs_rud_zone, rudp); } -/* - * Since pinning has no meaning for an rud item, unpinning does - * not either. - */ -STATIC void -xfs_rud_item_unpin( - struct xfs_log_item *lip, - int remove) +static const struct xfs_item_ops xfs_rud_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_rud_item_size, + .iop_format = xfs_rud_item_format, + .iop_release = xfs_rud_item_release, +}; + +static struct xfs_rud_log_item * +xfs_trans_get_rud( + struct xfs_trans *tp, + struct xfs_rui_log_item *ruip) { + struct xfs_rud_log_item *rudp; + + rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); + xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, + &xfs_rud_item_ops); + rudp->rud_ruip = ruip; + rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; + + xfs_trans_add_item(tp, &rudp->rud_item); + return rudp; } -/* - * There isn't much you can do to push on an rud item. It is simply stuck - * waiting for the log to be flushed to disk. - */ -STATIC uint -xfs_rud_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +/* Set the map extent flags for this reverse mapping. */ +static void +xfs_trans_set_rmap_flags( + struct xfs_map_extent *rmap, + enum xfs_rmap_intent_type type, + int whichfork, + xfs_exntst_t state) { - return XFS_ITEM_PINNED; + rmap->me_flags = 0; + if (state == XFS_EXT_UNWRITTEN) + rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; + if (whichfork == XFS_ATTR_FORK) + rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; + switch (type) { + case XFS_RMAP_MAP: + rmap->me_flags |= XFS_RMAP_EXTENT_MAP; + break; + case XFS_RMAP_MAP_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; + break; + case XFS_RMAP_UNMAP: + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; + break; + case XFS_RMAP_UNMAP_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; + break; + case XFS_RMAP_CONVERT: + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; + break; + case XFS_RMAP_CONVERT_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; + break; + case XFS_RMAP_ALLOC: + rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; + break; + case XFS_RMAP_FREE: + rmap->me_flags |= XFS_RMAP_EXTENT_FREE; + break; + default: + ASSERT(0); + } } /* - * The RUD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the RUI and free the - * RUD. + * Finish an rmap update and log it to the RUD. Note that the transaction is + * marked dirty regardless of whether the rmap update succeeds or fails to + * support the RUI/RUD lifecycle rules. */ -STATIC void -xfs_rud_item_unlock( - struct xfs_log_item *lip) +static int +xfs_trans_log_finish_rmap_update( + struct xfs_trans *tp, + struct xfs_rud_log_item *rudp, + enum xfs_rmap_intent_type type, + uint64_t owner, + int whichfork, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + xfs_exntst_t state, + struct xfs_btree_cur **pcur) { - struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + int error; - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_rui_release(rudp->rud_ruip); - kmem_zone_free(xfs_rud_zone, rudp); - } + error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, + startblock, blockcount, state, pcur); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the RUI and frees the RUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); + + return error; } -/* - * When the rud item is committed to disk, all we need to do is delete our - * reference to our partner rui item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from - * further referencing this item. - */ -STATIC xfs_lsn_t -xfs_rud_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Sort rmap intents by AG. */ +static int +xfs_rmap_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + struct xfs_mount *mp = priv; + struct xfs_rmap_intent *ra; + struct xfs_rmap_intent *rb; + + ra = container_of(a, struct xfs_rmap_intent, ri_list); + rb = container_of(b, struct xfs_rmap_intent, ri_list); + return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - + XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); +} + +/* Get an RUI. */ +STATIC void * +xfs_rmap_update_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_rui_log_item *ruip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + ruip = xfs_rui_init(tp->t_mountp, count); + ASSERT(ruip != NULL); /* - * Drop the RUI reference regardless of whether the RUD has been - * aborted. Once the RUD transaction is constructed, it is the sole - * responsibility of the RUD to release the RUI (even if the RUI is - * aborted due to log I/O error). + * Get a log_item_desc to point at the new item. */ - xfs_rui_release(rudp->rud_ruip); - kmem_zone_free(xfs_rud_zone, rudp); - - return (xfs_lsn_t)-1; + xfs_trans_add_item(tp, &ruip->rui_item); + return ruip; } -/* - * The RUD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ +/* Log rmap updates in the intent item. */ STATIC void -xfs_rud_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +xfs_rmap_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { + struct xfs_rui_log_item *ruip = intent; + struct xfs_rmap_intent *rmap; + uint next_extent; + struct xfs_map_extent *map; + + rmap = container_of(item, struct xfs_rmap_intent, ri_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); + + /* + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. + */ + next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; + ASSERT(next_extent < ruip->rui_format.rui_nextents); + map = &ruip->rui_format.rui_extents[next_extent]; + map->me_owner = rmap->ri_owner; + map->me_startblock = rmap->ri_bmap.br_startblock; + map->me_startoff = rmap->ri_bmap.br_startoff; + map->me_len = rmap->ri_bmap.br_blockcount; + xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, + rmap->ri_bmap.br_state); } -/* - * This is the ops vector shared by all rud log items. - */ -static const struct xfs_item_ops xfs_rud_item_ops = { - .iop_size = xfs_rud_item_size, - .iop_format = xfs_rud_item_format, - .iop_pin = xfs_rud_item_pin, - .iop_unpin = xfs_rud_item_unpin, - .iop_unlock = xfs_rud_item_unlock, - .iop_committed = xfs_rud_item_committed, - .iop_push = xfs_rud_item_push, - .iop_committing = xfs_rud_item_committing, -}; +/* Get an RUD so we can process all the deferred rmap updates. */ +STATIC void * +xfs_rmap_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_rud(tp, intent); +} -/* - * Allocate and initialize an rud item with the given number of extents. - */ -struct xfs_rud_log_item * -xfs_rud_init( - struct xfs_mount *mp, - struct xfs_rui_log_item *ruip) +/* Process a deferred rmap update. */ +STATIC int +xfs_rmap_update_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_rmap_intent *rmap; + int error; + + rmap = container_of(item, struct xfs_rmap_intent, ri_list); + error = xfs_trans_log_finish_rmap_update(tp, done_item, + rmap->ri_type, + rmap->ri_owner, rmap->ri_whichfork, + rmap->ri_bmap.br_startoff, + rmap->ri_bmap.br_startblock, + rmap->ri_bmap.br_blockcount, + rmap->ri_bmap.br_state, + (struct xfs_btree_cur **)state); + kmem_free(rmap); + return error; +} + +/* Clean up after processing deferred rmaps. */ +STATIC void +xfs_rmap_update_finish_cleanup( + struct xfs_trans *tp, + void *state, + int error) +{ + struct xfs_btree_cur *rcur = state; + + xfs_rmap_finish_one_cleanup(tp, rcur, error); +} +/* Abort all pending RUIs. */ +STATIC void +xfs_rmap_update_abort_intent( + void *intent) { - struct xfs_rud_log_item *rudp; + xfs_rui_release(intent); +} - rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); - xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); - rudp->rud_ruip = ruip; - rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; +/* Cancel a deferred rmap update. */ +STATIC void +xfs_rmap_update_cancel_item( + struct list_head *item) +{ + struct xfs_rmap_intent *rmap; - return rudp; + rmap = container_of(item, struct xfs_rmap_intent, ri_list); + kmem_free(rmap); } +const struct xfs_defer_op_type xfs_rmap_update_defer_type = { + .max_items = XFS_RUI_MAX_FAST_EXTENTS, + .diff_items = xfs_rmap_update_diff_items, + .create_intent = xfs_rmap_update_create_intent, + .abort_intent = xfs_rmap_update_abort_intent, + .log_item = xfs_rmap_update_log_item, + .create_done = xfs_rmap_update_create_done, + .finish_item = xfs_rmap_update_finish_item, + .finish_cleanup = xfs_rmap_update_finish_cleanup, + .cancel_item = xfs_rmap_update_cancel_item, +}; + /* * Process an rmap update intent item that was recovered from the log. * We need to update the rmapbt. diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index 7e482baa27f5..8708e4a5aa5c 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h @@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_rui_zone; extern struct kmem_zone *xfs_rud_zone; struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); -struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *, - struct xfs_rui_log_item *); int xfs_rui_copy_format(struct xfs_log_iovec *buf, struct xfs_rui_log_format *dst_rui_fmt); void xfs_rui_item_free(struct xfs_rui_log_item *); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ac0fcdad0c4e..5fa4db3c3e32 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -11,17 +11,11 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_buf.h" #include "xfs_icache.h" #include "xfs_rtalloc.h" diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index cc509743facd..113883c4f202 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -4,7 +4,6 @@ * All Rights Reserved. */ #include "xfs.h" -#include <linux/proc_fs.h> struct xstats xfsstats; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index a14d11d78bd8..f9450235533c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -11,18 +11,15 @@ #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_fsops.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" @@ -38,18 +35,8 @@ #include "xfs_refcount_item.h" #include "xfs_bmap_item.h" #include "xfs_reflink.h" -#include "xfs_defer.h" -#include <linux/namei.h> -#include <linux/dax.h> -#include <linux/init.h> -#include <linux/slab.h> #include <linux/magic.h> -#include <linux/mount.h> -#include <linux/mempool.h> -#include <linux/writeback.h> -#include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/parser.h> static const struct super_operations xfs_super_operations; @@ -582,7 +569,7 @@ xfs_set_inode_alloc( * Calculate how much should be reserved for inodes to meet * the max inode percentage. Used only for inode32. */ - if (mp->m_maxicount) { + if (M_IGEO(mp)->maxicount) { uint64_t icount; icount = sbp->sb_dblocks * sbp->sb_imax_pct; @@ -840,16 +827,10 @@ xfs_init_mount_workqueues( if (!mp->m_reclaim_workqueue) goto out_destroy_cil; - mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, - mp->m_fsname); - if (!mp->m_log_workqueue) - goto out_destroy_reclaim; - mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) - goto out_destroy_log; + goto out_destroy_reclaim; mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, mp->m_fsname); @@ -860,8 +841,6 @@ xfs_init_mount_workqueues( out_destroy_eofb: destroy_workqueue(mp->m_eofblocks_workqueue); -out_destroy_log: - destroy_workqueue(mp->m_log_workqueue); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); out_destroy_cil: @@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues( { destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); - destroy_workqueue(mp->m_log_workqueue); destroy_workqueue(mp->m_reclaim_workqueue); destroy_workqueue(mp->m_cil_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); @@ -1131,10 +1109,10 @@ xfs_fs_statfs( fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) + if (M_IGEO(mp)->maxicount) statp->f_files = min_t(typeof(statp->f_files), statp->f_files, - mp->m_maxicount); + M_IGEO(mp)->maxicount); /* If sb_icount overshot maxicount, report actual allocation */ statp->f_files = max_t(typeof(statp->f_files), @@ -1685,6 +1663,8 @@ xfs_fs_fill_super( sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; + sb->s_iflags |= SB_I_CGROUPWB; + set_posix_acl_flag(sb); /* version 5 superblocks support inode version counters. */ diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 21cb49a43d7c..763e43d22dee 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -38,6 +38,18 @@ extern void xfs_qm_exit(void); # define XFS_SCRUB_STRING #endif +#ifdef CONFIG_XFS_ONLINE_REPAIR +# define XFS_REPAIR_STRING "repair, " +#else +# define XFS_REPAIR_STRING +#endif + +#ifdef CONFIG_XFS_WARN +# define XFS_WARN_STRING "verbose warnings, " +#else +# define XFS_WARN_STRING +#endif + #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -49,6 +61,8 @@ extern void xfs_qm_exit(void); XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ XFS_SCRUB_STRING \ + XFS_REPAIR_STRING \ + XFS_WARN_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index b2c1177c717f..ed66fd2de327 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -12,23 +12,14 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_defer.h" #include "xfs_dir2.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" -#include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_btree.h" -#include "xfs_bmap_util.h" -#include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" -#include "xfs_symlink.h" #include "xfs_trans.h" -#include "xfs_log.h" /* ----- Kernel only functions below ----- */ int diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 0cc034dfb786..31b3bdbd2eba 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -4,10 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" -#include <linux/sysctl.h> -#include <linux/proc_fs.h> #include "xfs_error.h" -#include "xfs_stats.h" static struct ctl_table_header *xfs_table_header; diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index ad7f9be13087..8abf4640f1d5 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -82,6 +82,9 @@ enum { extern xfs_param_t xfs_params; struct xfs_globals { +#ifdef DEBUG + int pwork_threads; /* parallel workqueue threads */ +#endif int log_recovery_delay; /* log recovery delay (secs) */ int mount_delay; /* mount setup delay (secs) */ bool bug_on_assert; /* BUG() the kernel on assert failure */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index cabda13f3c64..ddd0bf7a4740 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -10,9 +10,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sysfs.h" -#include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_stats.h" #include "xfs_mount.h" struct xfs_sysfs_attr { @@ -206,11 +204,51 @@ always_cow_show( } XFS_SYSFS_ATTR_RW(always_cow); +#ifdef DEBUG +/* + * Override how many threads the parallel work queue is allowed to create. + * This has to be a debug-only global (instead of an errortag) because one of + * the main users of parallel workqueues is mount time quotacheck. + */ +STATIC ssize_t +pwork_threads_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val < -1 || val > num_possible_cpus()) + return -EINVAL; + + xfs_globals.pwork_threads = val; + + return count; +} + +STATIC ssize_t +pwork_threads_show( + struct kobject *kobject, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); +} +XFS_SYSFS_ATTR_RW(pwork_threads); +#endif /* DEBUG */ + static struct attribute *xfs_dbg_attrs[] = { ATTR_LIST(bug_on_assert), ATTR_LIST(log_recovery_delay), ATTR_LIST(mount_delay), ATTR_LIST(always_cow), +#ifdef DEBUG + ATTR_LIST(pwork_threads), +#endif NULL, }; diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index cb6489c22cad..bc85b89f88ca 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -15,24 +15,16 @@ #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_da_btree.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_attr.h" -#include "xfs_attr_leaf.h" #include "xfs_trans.h" -#include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_buf_item.h" #include "xfs_quota.h" -#include "xfs_iomap.h" -#include "xfs_aops.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_log_recover.h" -#include "xfs_inode_item.h" -#include "xfs_bmap_btree.h" #include "xfs_filestream.h" #include "xfs_fsmap.h" diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2464ea351f83..8094b1920eef 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); @@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup); DEFINE_TRANS_EVENT(xfs_trans_free); DEFINE_TRANS_EVENT(xfs_trans_roll); DEFINE_TRANS_EVENT(xfs_trans_add_item); +DEFINE_TRANS_EVENT(xfs_trans_commit_items); DEFINE_TRANS_EVENT(xfs_trans_free_items); TRACE_EVENT(xfs_iunlink_update_bucket, @@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name, \ DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); +TRACE_EVENT(xfs_iwalk_ag, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agino_t startino), + TP_ARGS(mp, agno, startino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, startino) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->startino = startino; + ), + TP_printk("dev %d:%d agno %d startino %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __entry->startino) +) + +TRACE_EVENT(xfs_iwalk_ag_rec, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irec), + TP_ARGS(mp, agno, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, startino) + __field(uint64_t, freemask) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->startino = irec->ir_startino; + __entry->freemask = irec->ir_free; + ), + TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __entry->startino, __entry->freemask) +) + +TRACE_EVENT(xfs_pwork_init, + TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid), + TP_ARGS(mp, nr_threads, pid), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, nr_threads) + __field(pid_t, pid) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->nr_threads = nr_threads; + __entry->pid = pid; + ), + TP_printk("dev %d:%d nr_threads %u pid %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nr_threads, __entry->pid) +) + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 912b42f5fe4a..d42a68d8313b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -11,7 +11,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_extent_busy.h" #include "xfs_quota.h" #include "xfs_trans.h" @@ -264,9 +263,7 @@ xfs_trans_alloc( * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ - tp = kmem_zone_zalloc(xfs_trans_zone, - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); - + tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); @@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas( xfs_buf_t *bp; int whole = 0; - bp = xfs_trans_getsb(tp, tp->t_mountp, 0); + bp = xfs_trans_getsb(tp, tp->t_mountp); sbp = XFS_BUF_TO_SBP(bp); /* @@ -767,10 +764,9 @@ xfs_trans_del_item( } /* Detach and unlock all of the items in a transaction */ -void +static void xfs_trans_free_items( struct xfs_trans *tp, - xfs_lsn_t commit_lsn, bool abort) { struct xfs_log_item *lip, *next; @@ -779,11 +775,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (commit_lsn != NULLCOMMITLSN) - lip->li_ops->iop_committing(lip, commit_lsn); if (abort) set_bit(XFS_LI_ABORTED, &lip->li_flags); - lip->li_ops->iop_unlock(lip); + if (lip->li_ops->iop_release) + lip->li_ops->iop_release(lip); } } @@ -804,7 +799,8 @@ xfs_log_item_batch_insert( for (i = 0; i < nr_items; i++) { struct xfs_log_item *lip = log_items[i]; - lip->li_ops->iop_unpin(lip, 0); + if (lip->li_ops->iop_unpin) + lip->li_ops->iop_unpin(lip, 0); } } @@ -815,7 +811,7 @@ xfs_log_item_batch_insert( * * If we are called with the aborted flag set, it is because a log write during * a CIL checkpoint commit has failed. In this case, all the items in the - * checkpoint have already gone through iop_commited and iop_unlock, which + * checkpoint have already gone through iop_committed and iop_committing, which * means that checkpoint commit abort handling is treated exactly the same * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is iop_committed processing, followed by an @@ -833,7 +829,7 @@ xfs_trans_committed_bulk( struct xfs_ail *ailp, struct xfs_log_vec *log_vector, xfs_lsn_t commit_lsn, - int aborted) + bool aborted) { #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; @@ -852,7 +848,16 @@ xfs_trans_committed_bulk( if (aborted) set_bit(XFS_LI_ABORTED, &lip->li_flags); - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); + + if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { + lip->li_ops->iop_release(lip); + continue; + } + + if (lip->li_ops->iop_committed) + item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); + else + item_lsn = commit_lsn; /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) @@ -864,7 +869,8 @@ xfs_trans_committed_bulk( */ if (aborted) { ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); - lip->li_ops->iop_unpin(lip, 1); + if (lip->li_ops->iop_unpin) + lip->li_ops->iop_unpin(lip, 1); continue; } @@ -882,7 +888,8 @@ xfs_trans_committed_bulk( xfs_trans_ail_update(ailp, lip, item_lsn); else spin_unlock(&ailp->ail_lock); - lip->li_ops->iop_unpin(lip, 0); + if (lip->li_ops->iop_unpin) + lip->li_ops->iop_unpin(lip, 0); continue; } @@ -998,7 +1005,7 @@ out_unreserve: tp->t_ticket = NULL; } current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); + xfs_trans_free_items(tp, !!error); xfs_trans_free(tp); XFS_STATS_INC(mp, xs_trans_empty); @@ -1060,7 +1067,7 @@ xfs_trans_cancel( /* mark this thread as no longer being in a transaction */ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); + xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c6e1c5704a8c..64d7f171ebd3 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -27,7 +27,7 @@ struct xfs_cud_log_item; struct xfs_bui_log_item; struct xfs_bud_log_item; -typedef struct xfs_log_item { +struct xfs_log_item { struct list_head li_ail; /* AIL pointers */ struct list_head li_trans; /* transaction list */ xfs_lsn_t li_lsn; /* last on-disk lsn */ @@ -48,7 +48,7 @@ typedef struct xfs_log_item { struct xfs_log_vec *li_lv; /* active log vector */ struct xfs_log_vec *li_lv_shadow; /* standby vector */ xfs_lsn_t li_seq; /* CIL commit seq */ -} xfs_log_item_t; +}; /* * li_flags use the (set/test/clear)_bit atomic interfaces because updates can @@ -67,17 +67,24 @@ typedef struct xfs_log_item { { (1 << XFS_LI_DIRTY), "DIRTY" } struct xfs_item_ops { - void (*iop_size)(xfs_log_item_t *, int *, int *); - void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); - void (*iop_pin)(xfs_log_item_t *); - void (*iop_unpin)(xfs_log_item_t *, int remove); + unsigned flags; + void (*iop_size)(struct xfs_log_item *, int *, int *); + void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); + void (*iop_pin)(struct xfs_log_item *); + void (*iop_unpin)(struct xfs_log_item *, int remove); uint (*iop_push)(struct xfs_log_item *, struct list_head *); - void (*iop_unlock)(xfs_log_item_t *); - xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_error)(xfs_log_item_t *, xfs_buf_t *); + void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); + void (*iop_release)(struct xfs_log_item *); + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); + void (*iop_error)(struct xfs_log_item *, xfs_buf_t *); }; +/* + * Release the log item as soon as committed. This is for items just logging + * intents that never need to be written back in place. + */ +#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0) + void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, int type, const struct xfs_item_ops *ops); @@ -203,7 +210,7 @@ xfs_trans_read_buf( flags, bpp, ops); } -struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); +struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); @@ -223,14 +230,6 @@ void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *); bool xfs_trans_buf_is_dirty(struct xfs_buf *bp); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); -struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *, - struct xfs_efi_log_item *, - uint); -int xfs_trans_free_extent(struct xfs_trans *, - struct xfs_efd_log_item *, xfs_fsblock_t, - xfs_extlen_t, - const struct xfs_owner_info *, - bool); int xfs_trans_commit(struct xfs_trans *); int xfs_trans_roll(struct xfs_trans **); int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *); @@ -245,37 +244,4 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, extern kmem_zone_t *xfs_trans_zone; -/* rmap updates */ -enum xfs_rmap_intent_type; - -struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp, - struct xfs_rui_log_item *ruip); -int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, - struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, - uint64_t owner, int whichfork, xfs_fileoff_t startoff, - xfs_fsblock_t startblock, xfs_filblks_t blockcount, - xfs_exntst_t state, struct xfs_btree_cur **pcur); - -/* refcount updates */ -enum xfs_refcount_intent_type; - -struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp, - struct xfs_cui_log_item *cuip); -int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp, - struct xfs_cud_log_item *cudp, - enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, - xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); - -/* mapping updates */ -enum xfs_bmap_intent_type; - -struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp, - struct xfs_bui_log_item *buip); -int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, - struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type, - struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, - xfs_fsblock_t startblock, xfs_filblks_t *blockcount, - xfs_exntst_t state); - #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d3a4e89bf4a0..6ccfd75d3c24 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -6,6 +6,7 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -74,29 +75,29 @@ xfs_ail_check( * Return a pointer to the last item in the AIL. If the AIL is empty, then * return NULL. */ -static xfs_log_item_t * +static struct xfs_log_item * xfs_ail_max( struct xfs_ail *ailp) { if (list_empty(&ailp->ail_head)) return NULL; - return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail); + return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail); } /* * Return a pointer to the item which follows the given item in the AIL. If * the given item is the last item in the list, then return NULL. */ -static xfs_log_item_t * +static struct xfs_log_item * xfs_ail_next( - struct xfs_ail *ailp, - xfs_log_item_t *lip) + struct xfs_ail *ailp, + struct xfs_log_item *lip) { if (lip->li_ail.next == &ailp->ail_head) return NULL; - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); + return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail); } /* @@ -109,10 +110,10 @@ xfs_ail_next( */ xfs_lsn_t xfs_ail_min_lsn( - struct xfs_ail *ailp) + struct xfs_ail *ailp) { - xfs_lsn_t lsn = 0; - xfs_log_item_t *lip; + xfs_lsn_t lsn = 0; + struct xfs_log_item *lip; spin_lock(&ailp->ail_lock); lip = xfs_ail_min(ailp); @@ -128,10 +129,10 @@ xfs_ail_min_lsn( */ static xfs_lsn_t xfs_ail_max_lsn( - struct xfs_ail *ailp) + struct xfs_ail *ailp) { - xfs_lsn_t lsn = 0; - xfs_log_item_t *lip; + xfs_lsn_t lsn = 0; + struct xfs_log_item *lip; spin_lock(&ailp->ail_lock); lip = xfs_ail_max(ailp); @@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear( * ascending traversal. Pass a @lsn of zero to initialise the cursor to the * first item in the AIL. Returns NULL if the list is empty. */ -xfs_log_item_t * +struct xfs_log_item * xfs_trans_ail_cursor_first( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn) { - xfs_log_item_t *lip; + struct xfs_log_item *lip; xfs_trans_ail_cursor_init(ailp, cur); @@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last( struct xfs_ail *ailp, xfs_lsn_t lsn) { - xfs_log_item_t *lip; + struct xfs_log_item *lip; list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) @@ -327,8 +328,8 @@ xfs_ail_splice( */ static void xfs_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip) + struct xfs_ail *ailp, + struct xfs_log_item *lip) { xfs_ail_check(ailp, lip); list_del(&lip->li_ail); @@ -347,6 +348,14 @@ xfsaild_push_item( if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) return XFS_ITEM_PINNED; + /* + * Consider the item pinned if a push callback is not defined so the + * caller will force the log. This should only happen for intent items + * as they are unpinned once the associated done item is committed to + * the on-disk log. + */ + if (!lip->li_ops->iop_push) + return XFS_ITEM_PINNED; return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); } @@ -356,7 +365,7 @@ xfsaild_push( { xfs_mount_t *mp = ailp->ail_mount; struct xfs_ail_cursor cur; - xfs_log_item_t *lip; + struct xfs_log_item *lip; xfs_lsn_t lsn; xfs_lsn_t target; long tout; @@ -611,10 +620,10 @@ xfsaild( */ void xfs_ail_push( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) { - xfs_log_item_t *lip; + struct xfs_log_item *lip; lip = xfs_ail_min(ailp); if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || @@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk( int nr_items, xfs_lsn_t lsn) __releases(ailp->ail_lock) { - xfs_log_item_t *mlip; + struct xfs_log_item *mlip; int mlip_changed = 0; int i; LIST_HEAD(tmp); diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c deleted file mode 100644 index e1c7d55b32c3..000000000000 --- a/fs/xfs/xfs_trans_bmap.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2016 Oracle. All Rights Reserved. - * Author: Darrick J. Wong <darrick.wong@oracle.com> - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_bmap_item.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_inode.h" - -/* - * This routine is called to allocate a "bmap update done" - * log item. - */ -struct xfs_bud_log_item * -xfs_trans_get_bud( - struct xfs_trans *tp, - struct xfs_bui_log_item *buip) -{ - struct xfs_bud_log_item *budp; - - budp = xfs_bud_init(tp->t_mountp, buip); - xfs_trans_add_item(tp, &budp->bud_item); - return budp; -} - -/* - * Finish an bmap update and log it to the BUD. Note that the - * transaction is marked dirty regardless of whether the bmap update - * succeeds or fails to support the BUI/BUD lifecycle rules. - */ -int -xfs_trans_log_finish_bmap_update( - struct xfs_trans *tp, - struct xfs_bud_log_item *budp, - enum xfs_bmap_intent_type type, - struct xfs_inode *ip, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, - xfs_exntst_t state) -{ - int error; - - error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, - startblock, blockcount, state); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the BUI and frees the BUD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); - - return error; -} - -/* Sort bmap intents by inode. */ -static int -xfs_bmap_update_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_bmap_intent *ba; - struct xfs_bmap_intent *bb; - - ba = container_of(a, struct xfs_bmap_intent, bi_list); - bb = container_of(b, struct xfs_bmap_intent, bi_list); - return ba->bi_owner->i_ino - bb->bi_owner->i_ino; -} - -/* Get an BUI. */ -STATIC void * -xfs_bmap_update_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_bui_log_item *buip; - - ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); - ASSERT(tp != NULL); - - buip = xfs_bui_init(tp->t_mountp); - ASSERT(buip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &buip->bui_item); - return buip; -} - -/* Set the map extent flags for this mapping. */ -static void -xfs_trans_set_bmap_flags( - struct xfs_map_extent *bmap, - enum xfs_bmap_intent_type type, - int whichfork, - xfs_exntst_t state) -{ - bmap->me_flags = 0; - switch (type) { - case XFS_BMAP_MAP: - case XFS_BMAP_UNMAP: - bmap->me_flags = type; - break; - default: - ASSERT(0); - } - if (state == XFS_EXT_UNWRITTEN) - bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; - if (whichfork == XFS_ATTR_FORK) - bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; -} - -/* Log bmap updates in the intent item. */ -STATIC void -xfs_bmap_update_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_bui_log_item *buip = intent; - struct xfs_bmap_intent *bmap; - uint next_extent; - struct xfs_map_extent *map; - - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; - ASSERT(next_extent < buip->bui_format.bui_nextents); - map = &buip->bui_format.bui_extents[next_extent]; - map->me_owner = bmap->bi_owner->i_ino; - map->me_startblock = bmap->bi_bmap.br_startblock; - map->me_startoff = bmap->bi_bmap.br_startoff; - map->me_len = bmap->bi_bmap.br_blockcount; - xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, - bmap->bi_bmap.br_state); -} - -/* Get an BUD so we can process all the deferred rmap updates. */ -STATIC void * -xfs_bmap_update_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_bud(tp, intent); -} - -/* Process a deferred rmap update. */ -STATIC int -xfs_bmap_update_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_bmap_intent *bmap; - xfs_filblks_t count; - int error; - - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - count = bmap->bi_bmap.br_blockcount; - error = xfs_trans_log_finish_bmap_update(tp, done_item, - bmap->bi_type, - bmap->bi_owner, bmap->bi_whichfork, - bmap->bi_bmap.br_startoff, - bmap->bi_bmap.br_startblock, - &count, - bmap->bi_bmap.br_state); - if (!error && count > 0) { - ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); - bmap->bi_bmap.br_blockcount = count; - return -EAGAIN; - } - kmem_free(bmap); - return error; -} - -/* Abort all pending BUIs. */ -STATIC void -xfs_bmap_update_abort_intent( - void *intent) -{ - xfs_bui_release(intent); -} - -/* Cancel a deferred rmap update. */ -STATIC void -xfs_bmap_update_cancel_item( - struct list_head *item) -{ - struct xfs_bmap_intent *bmap; - - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_free(bmap); -} - -const struct xfs_defer_op_type xfs_bmap_update_defer_type = { - .max_items = XFS_BUI_MAX_FAST_EXTENTS, - .diff_items = xfs_bmap_update_diff_items, - .create_intent = xfs_bmap_update_create_intent, - .abort_intent = xfs_bmap_update_abort_intent, - .log_item = xfs_bmap_update_log_item, - .create_done = xfs_bmap_update_create_done, - .finish_item = xfs_bmap_update_finish_item, - .cancel_item = xfs_bmap_update_cancel_item, -}; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 7d65ebf1e847..b5b3a78ef31c 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -10,11 +10,9 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" -#include "xfs_error.h" #include "xfs_trace.h" /* @@ -174,8 +172,7 @@ xfs_trans_get_buf_map( xfs_buf_t * xfs_trans_getsb( xfs_trans_t *tp, - struct xfs_mount *mp, - int flags) + struct xfs_mount *mp) { xfs_buf_t *bp; struct xfs_buf_log_item *bip; @@ -185,7 +182,7 @@ xfs_trans_getsb( * if tp is NULL. */ if (tp == NULL) - return xfs_getsb(mp, flags); + return xfs_getsb(mp); /* * If the superblock buffer already has this transaction @@ -203,7 +200,7 @@ xfs_trans_getsb( return bp; } - bp = xfs_getsb(mp, flags); + bp = xfs_getsb(mp); if (bp == NULL) return NULL; @@ -428,7 +425,7 @@ xfs_trans_brelse( /* * Mark the buffer as not needing to be unlocked when the buf item's - * iop_unlock() routine is called. The buffer must already be locked + * iop_committing() routine is called. The buffer must already be locked * and associated with the given transaction. */ /* ARGSUSED */ diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index cd664a03613f..1027c9ca6eb8 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -11,7 +11,6 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_quota.h" @@ -29,7 +28,6 @@ xfs_trans_dqjoin( xfs_trans_t *tp, xfs_dquot_t *dqp) { - ASSERT(dqp->q_transp != tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(dqp->q_logitem.qli_dquot == dqp); @@ -37,15 +35,8 @@ xfs_trans_dqjoin( * Get a log_item_desc to point at the new item. */ xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); - - /* - * Initialize d_transp so we can later determine if this dquot is - * associated with this transaction. - */ - dqp->q_transp = tp; } - /* * This is called to mark the dquot as needing * to be logged when the transaction is committed. The dquot must @@ -61,7 +52,6 @@ xfs_trans_log_dquot( xfs_trans_t *tp, xfs_dquot_t *dqp) { - ASSERT(dqp->q_transp == tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); tp->t_flags |= XFS_TRANS_DIRTY; @@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas( break; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(dqp->q_transp == tp); /* * adjust the actual number of blocks used diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c deleted file mode 100644 index 8ee7a3f8bb20..000000000000 --- a/fs/xfs/xfs_trans_extfree.c +++ /dev/null @@ -1,286 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_extfree_item.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_trace.h" - -/* - * This routine is called to allocate an "extent free done" - * log item that will hold nextents worth of extents. The - * caller must use all nextents extents, because we are not - * flexible about this at all. - */ -struct xfs_efd_log_item * -xfs_trans_get_efd(struct xfs_trans *tp, - struct xfs_efi_log_item *efip, - uint nextents) -{ - struct xfs_efd_log_item *efdp; - - ASSERT(tp != NULL); - ASSERT(nextents > 0); - - efdp = xfs_efd_init(tp->t_mountp, efip, nextents); - ASSERT(efdp != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &efdp->efd_item); - return efdp; -} - -/* - * Free an extent and log it to the EFD. Note that the transaction is marked - * dirty regardless of whether the extent free succeeds or fails to support the - * EFI/EFD lifecycle rules. - */ -int -xfs_trans_free_extent( - struct xfs_trans *tp, - struct xfs_efd_log_item *efdp, - xfs_fsblock_t start_block, - xfs_extlen_t ext_len, - const struct xfs_owner_info *oinfo, - bool skip_discard) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_extent *extp; - uint next_extent; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, - start_block); - int error; - - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); - - error = __xfs_free_extent(tp, start_block, ext_len, - oinfo, XFS_AG_RESV_NONE, skip_discard); - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the EFI and frees the EFD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); - - next_extent = efdp->efd_next_extent; - ASSERT(next_extent < efdp->efd_format.efd_nextents); - extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = start_block; - extp->ext_len = ext_len; - efdp->efd_next_extent++; - - return error; -} - -/* Sort bmap items by AG. */ -static int -xfs_extent_free_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_mount *mp = priv; - struct xfs_extent_free_item *ra; - struct xfs_extent_free_item *rb; - - ra = container_of(a, struct xfs_extent_free_item, xefi_list); - rb = container_of(b, struct xfs_extent_free_item, xefi_list); - return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - - XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); -} - -/* Get an EFI. */ -STATIC void * -xfs_extent_free_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_efi_log_item *efip; - - ASSERT(tp != NULL); - ASSERT(count > 0); - - efip = xfs_efi_init(tp->t_mountp, count); - ASSERT(efip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &efip->efi_item); - return efip; -} - -/* Log a free extent to the intent item. */ -STATIC void -xfs_extent_free_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_efi_log_item *efip = intent; - struct xfs_extent_free_item *free; - uint next_extent; - struct xfs_extent *extp; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; - ASSERT(next_extent < efip->efi_format.efi_nextents); - extp = &efip->efi_format.efi_extents[next_extent]; - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; -} - -/* Get an EFD so we can process all the free extents. */ -STATIC void * -xfs_extent_free_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_efd(tp, intent, count); -} - -/* Process a free extent. */ -STATIC int -xfs_extent_free_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_extent_free_item *free; - int error; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - error = xfs_trans_free_extent(tp, done_item, - free->xefi_startblock, - free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); - return error; -} - -/* Abort all pending EFIs. */ -STATIC void -xfs_extent_free_abort_intent( - void *intent) -{ - xfs_efi_release(intent); -} - -/* Cancel a free extent. */ -STATIC void -xfs_extent_free_cancel_item( - struct list_head *item) -{ - struct xfs_extent_free_item *free; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); -} - -const struct xfs_defer_op_type xfs_extent_free_defer_type = { - .max_items = XFS_EFI_MAX_FAST_EXTENTS, - .diff_items = xfs_extent_free_diff_items, - .create_intent = xfs_extent_free_create_intent, - .abort_intent = xfs_extent_free_abort_intent, - .log_item = xfs_extent_free_log_item, - .create_done = xfs_extent_free_create_done, - .finish_item = xfs_extent_free_finish_item, - .cancel_item = xfs_extent_free_cancel_item, -}; - -/* - * AGFL blocks are accounted differently in the reserve pools and are not - * inserted into the busy extent list. - */ -STATIC int -xfs_agfl_free_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_efd_log_item *efdp = done_item; - struct xfs_extent_free_item *free; - struct xfs_extent *extp; - struct xfs_buf *agbp; - int error; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - uint next_extent; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - ASSERT(free->xefi_blockcount == 1); - agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); - - trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); - - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); - if (!error) - error = xfs_free_agfl_block(tp, agno, agbno, agbp, - &free->xefi_oinfo); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the EFI and frees the EFD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); - - next_extent = efdp->efd_next_extent; - ASSERT(next_extent < efdp->efd_format.efd_nextents); - extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; - efdp->efd_next_extent++; - - kmem_free(free); - return error; -} - - -/* sub-type with special handling for AGFL deferred frees */ -const struct xfs_defer_op_type xfs_agfl_free_defer_type = { - .max_items = XFS_EFI_MAX_FAST_EXTENTS, - .diff_items = xfs_extent_free_diff_items, - .create_intent = xfs_extent_free_create_intent, - .abort_intent = xfs_extent_free_abort_intent, - .log_item = xfs_extent_free_log_item, - .create_done = xfs_extent_free_create_done, - .finish_item = xfs_agfl_free_finish_item, - .cancel_item = xfs_extent_free_cancel_item, -}; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 091eae9f4e74..2e073c1c4614 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -16,12 +16,10 @@ struct xfs_log_vec; void xfs_trans_init(struct xfs_mount *); void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); -void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, - bool abort); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, - xfs_lsn_t commit_lsn, int aborted); + xfs_lsn_t commit_lsn, bool aborted); /* * AIL traversal cursor. * diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c deleted file mode 100644 index 8d734728dd1b..000000000000 --- a/fs/xfs/xfs_trans_refcount.c +++ /dev/null @@ -1,240 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2016 Oracle. All Rights Reserved. - * Author: Darrick J. Wong <darrick.wong@oracle.com> - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_refcount_item.h" -#include "xfs_alloc.h" -#include "xfs_refcount.h" - -/* - * This routine is called to allocate a "refcount update done" - * log item. - */ -struct xfs_cud_log_item * -xfs_trans_get_cud( - struct xfs_trans *tp, - struct xfs_cui_log_item *cuip) -{ - struct xfs_cud_log_item *cudp; - - cudp = xfs_cud_init(tp->t_mountp, cuip); - xfs_trans_add_item(tp, &cudp->cud_item); - return cudp; -} - -/* - * Finish an refcount update and log it to the CUD. Note that the - * transaction is marked dirty regardless of whether the refcount - * update succeeds or fails to support the CUI/CUD lifecycle rules. - */ -int -xfs_trans_log_finish_refcount_update( - struct xfs_trans *tp, - struct xfs_cud_log_item *cudp, - enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, - xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, - struct xfs_btree_cur **pcur) -{ - int error; - - error = xfs_refcount_finish_one(tp, type, startblock, - blockcount, new_fsb, new_len, pcur); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the CUI and frees the CUD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); - - return error; -} - -/* Sort refcount intents by AG. */ -static int -xfs_refcount_update_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_mount *mp = priv; - struct xfs_refcount_intent *ra; - struct xfs_refcount_intent *rb; - - ra = container_of(a, struct xfs_refcount_intent, ri_list); - rb = container_of(b, struct xfs_refcount_intent, ri_list); - return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - - XFS_FSB_TO_AGNO(mp, rb->ri_startblock); -} - -/* Get an CUI. */ -STATIC void * -xfs_refcount_update_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_cui_log_item *cuip; - - ASSERT(tp != NULL); - ASSERT(count > 0); - - cuip = xfs_cui_init(tp->t_mountp, count); - ASSERT(cuip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &cuip->cui_item); - return cuip; -} - -/* Set the phys extent flags for this reverse mapping. */ -static void -xfs_trans_set_refcount_flags( - struct xfs_phys_extent *refc, - enum xfs_refcount_intent_type type) -{ - refc->pe_flags = 0; - switch (type) { - case XFS_REFCOUNT_INCREASE: - case XFS_REFCOUNT_DECREASE: - case XFS_REFCOUNT_ALLOC_COW: - case XFS_REFCOUNT_FREE_COW: - refc->pe_flags |= type; - break; - default: - ASSERT(0); - } -} - -/* Log refcount updates in the intent item. */ -STATIC void -xfs_refcount_update_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_cui_log_item *cuip = intent; - struct xfs_refcount_intent *refc; - uint next_extent; - struct xfs_phys_extent *ext; - - refc = container_of(item, struct xfs_refcount_intent, ri_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; - ASSERT(next_extent < cuip->cui_format.cui_nextents); - ext = &cuip->cui_format.cui_extents[next_extent]; - ext->pe_startblock = refc->ri_startblock; - ext->pe_len = refc->ri_blockcount; - xfs_trans_set_refcount_flags(ext, refc->ri_type); -} - -/* Get an CUD so we can process all the deferred refcount updates. */ -STATIC void * -xfs_refcount_update_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_cud(tp, intent); -} - -/* Process a deferred refcount update. */ -STATIC int -xfs_refcount_update_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_refcount_intent *refc; - xfs_fsblock_t new_fsb; - xfs_extlen_t new_aglen; - int error; - - refc = container_of(item, struct xfs_refcount_intent, ri_list); - error = xfs_trans_log_finish_refcount_update(tp, done_item, - refc->ri_type, - refc->ri_startblock, - refc->ri_blockcount, - &new_fsb, &new_aglen, - (struct xfs_btree_cur **)state); - /* Did we run out of reservation? Requeue what we didn't finish. */ - if (!error && new_aglen > 0) { - ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || - refc->ri_type == XFS_REFCOUNT_DECREASE); - refc->ri_startblock = new_fsb; - refc->ri_blockcount = new_aglen; - return -EAGAIN; - } - kmem_free(refc); - return error; -} - -/* Clean up after processing deferred refcounts. */ -STATIC void -xfs_refcount_update_finish_cleanup( - struct xfs_trans *tp, - void *state, - int error) -{ - struct xfs_btree_cur *rcur = state; - - xfs_refcount_finish_one_cleanup(tp, rcur, error); -} - -/* Abort all pending CUIs. */ -STATIC void -xfs_refcount_update_abort_intent( - void *intent) -{ - xfs_cui_release(intent); -} - -/* Cancel a deferred refcount update. */ -STATIC void -xfs_refcount_update_cancel_item( - struct list_head *item) -{ - struct xfs_refcount_intent *refc; - - refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_free(refc); -} - -const struct xfs_defer_op_type xfs_refcount_update_defer_type = { - .max_items = XFS_CUI_MAX_FAST_EXTENTS, - .diff_items = xfs_refcount_update_diff_items, - .create_intent = xfs_refcount_update_create_intent, - .abort_intent = xfs_refcount_update_abort_intent, - .log_item = xfs_refcount_update_log_item, - .create_done = xfs_refcount_update_create_done, - .finish_item = xfs_refcount_update_finish_item, - .finish_cleanup = xfs_refcount_update_finish_cleanup, - .cancel_item = xfs_refcount_update_cancel_item, -}; diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c deleted file mode 100644 index 5c7936b1be13..000000000000 --- a/fs/xfs/xfs_trans_rmap.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2016 Oracle. All Rights Reserved. - * Author: Darrick J. Wong <darrick.wong@oracle.com> - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_rmap_item.h" -#include "xfs_alloc.h" -#include "xfs_rmap.h" - -/* Set the map extent flags for this reverse mapping. */ -static void -xfs_trans_set_rmap_flags( - struct xfs_map_extent *rmap, - enum xfs_rmap_intent_type type, - int whichfork, - xfs_exntst_t state) -{ - rmap->me_flags = 0; - if (state == XFS_EXT_UNWRITTEN) - rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; - if (whichfork == XFS_ATTR_FORK) - rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; - switch (type) { - case XFS_RMAP_MAP: - rmap->me_flags |= XFS_RMAP_EXTENT_MAP; - break; - case XFS_RMAP_MAP_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; - break; - case XFS_RMAP_UNMAP: - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; - break; - case XFS_RMAP_UNMAP_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; - break; - case XFS_RMAP_CONVERT: - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; - break; - case XFS_RMAP_CONVERT_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; - break; - case XFS_RMAP_ALLOC: - rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; - break; - case XFS_RMAP_FREE: - rmap->me_flags |= XFS_RMAP_EXTENT_FREE; - break; - default: - ASSERT(0); - } -} - -struct xfs_rud_log_item * -xfs_trans_get_rud( - struct xfs_trans *tp, - struct xfs_rui_log_item *ruip) -{ - struct xfs_rud_log_item *rudp; - - rudp = xfs_rud_init(tp->t_mountp, ruip); - xfs_trans_add_item(tp, &rudp->rud_item); - return rudp; -} - -/* - * Finish an rmap update and log it to the RUD. Note that the transaction is - * marked dirty regardless of whether the rmap update succeeds or fails to - * support the RUI/RUD lifecycle rules. - */ -int -xfs_trans_log_finish_rmap_update( - struct xfs_trans *tp, - struct xfs_rud_log_item *rudp, - enum xfs_rmap_intent_type type, - uint64_t owner, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state, - struct xfs_btree_cur **pcur) -{ - int error; - - error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, - startblock, blockcount, state, pcur); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the RUI and frees the RUD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); - - return error; -} - -/* Sort rmap intents by AG. */ -static int -xfs_rmap_update_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_mount *mp = priv; - struct xfs_rmap_intent *ra; - struct xfs_rmap_intent *rb; - - ra = container_of(a, struct xfs_rmap_intent, ri_list); - rb = container_of(b, struct xfs_rmap_intent, ri_list); - return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - - XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); -} - -/* Get an RUI. */ -STATIC void * -xfs_rmap_update_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_rui_log_item *ruip; - - ASSERT(tp != NULL); - ASSERT(count > 0); - - ruip = xfs_rui_init(tp->t_mountp, count); - ASSERT(ruip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &ruip->rui_item); - return ruip; -} - -/* Log rmap updates in the intent item. */ -STATIC void -xfs_rmap_update_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_rui_log_item *ruip = intent; - struct xfs_rmap_intent *rmap; - uint next_extent; - struct xfs_map_extent *map; - - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; - ASSERT(next_extent < ruip->rui_format.rui_nextents); - map = &ruip->rui_format.rui_extents[next_extent]; - map->me_owner = rmap->ri_owner; - map->me_startblock = rmap->ri_bmap.br_startblock; - map->me_startoff = rmap->ri_bmap.br_startoff; - map->me_len = rmap->ri_bmap.br_blockcount; - xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, - rmap->ri_bmap.br_state); -} - -/* Get an RUD so we can process all the deferred rmap updates. */ -STATIC void * -xfs_rmap_update_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_rud(tp, intent); -} - -/* Process a deferred rmap update. */ -STATIC int -xfs_rmap_update_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_rmap_intent *rmap; - int error; - - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - error = xfs_trans_log_finish_rmap_update(tp, done_item, - rmap->ri_type, - rmap->ri_owner, rmap->ri_whichfork, - rmap->ri_bmap.br_startoff, - rmap->ri_bmap.br_startblock, - rmap->ri_bmap.br_blockcount, - rmap->ri_bmap.br_state, - (struct xfs_btree_cur **)state); - kmem_free(rmap); - return error; -} - -/* Clean up after processing deferred rmaps. */ -STATIC void -xfs_rmap_update_finish_cleanup( - struct xfs_trans *tp, - void *state, - int error) -{ - struct xfs_btree_cur *rcur = state; - - xfs_rmap_finish_one_cleanup(tp, rcur, error); -} - -/* Abort all pending RUIs. */ -STATIC void -xfs_rmap_update_abort_intent( - void *intent) -{ - xfs_rui_release(intent); -} - -/* Cancel a deferred rmap update. */ -STATIC void -xfs_rmap_update_cancel_item( - struct list_head *item) -{ - struct xfs_rmap_intent *rmap; - - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_free(rmap); -} - -const struct xfs_defer_op_type xfs_rmap_update_defer_type = { - .max_items = XFS_RUI_MAX_FAST_EXTENTS, - .diff_items = xfs_rmap_update_diff_items, - .create_intent = xfs_rmap_update_create_intent, - .abort_intent = xfs_rmap_update_abort_intent, - .log_item = xfs_rmap_update_log_item, - .create_done = xfs_rmap_update_create_done, - .finish_item = xfs_rmap_update_finish_item, - .finish_cleanup = xfs_rmap_update_finish_cleanup, - .cancel_item = xfs_rmap_update_cancel_item, -}; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 9a63016009a1..3123b5aaad2a 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -5,15 +5,12 @@ */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_acl.h" #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> |