diff options
346 files changed, 4265 insertions, 2293 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314df..cb207c79680d 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index 3e76276bd488..5becb441c3cb 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -562,7 +562,7 @@ The interesting knobs for XFS workqueues are as follows: Zoned Filesystems ================= -For zoned file systems, the following attribute is exposed in: +For zoned file systems, the following attributes are exposed in: /sys/fs/xfs/<dev>/zoned/ @@ -572,23 +572,10 @@ For zoned file systems, the following attribute is exposed in: is limited by the capabilities of the backing zoned device, file system size and the max_open_zones mount option. -Zoned Filesystems -================= - -For zoned file systems, the following attributes are exposed in: - - /sys/fs/xfs/<dev>/zoned/ - - max_open_zones (Min: 1 Default: Varies Max: UINTMAX) - This read-only attribute exposes the maximum number of open zones - available for data placement. The value is determined at mount time and - is limited by the capabilities of the backing zoned device, file system - size and the max_open_zones mount option. - - zonegc_low_space (Min: 0 Default: 0 Max: 100) - Define a percentage for how much of the unused space that GC should keep - available for writing. A high value will reclaim more of the space - occupied by unused blocks, creating a larger buffer against write - bursts at the cost of increased write amplification. Regardless - of this value, garbage collection will always aim to free a minimum - amount of blocks to keep max_open_zones open for data placement purposes. + zonegc_low_space (Min: 0 Default: 0 Max: 100) + Define a percentage for how much of the unused space that GC should keep + available for writing. A high value will reclaim more of the space + occupied by unused blocks, creating a larger buffer against write + bursts at the cost of increased write amplification. Regardless + of this value, garbage collection will always aim to free a minimum + amount of blocks to keep max_open_zones open for data placement purposes. diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst index 1565b9546e38..a8f307a3b499 100644 --- a/Documentation/arch/openrisc/openrisc_port.rst +++ b/Documentation/arch/openrisc/openrisc_port.rst @@ -7,10 +7,10 @@ target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k). For information about OpenRISC processors and ongoing development: - ======= ============================= + ======= ============================== website https://openrisc.io - email openrisc@lists.librecores.org - ======= ============================= + email linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -27,11 +27,11 @@ Toolchain binaries can be obtained from openrisc.io or our github releases page. Instructions for building the different toolchains can be found on openrisc.io or Stafford's toolchain build and release scripts. - ========== ================================================= - binaries https://github.com/openrisc/or1k-gcc/releases + ========== ========================================================== + binaries https://github.com/stffrdhrn/or1k-toolchain-build/releases toolchains https://openrisc.io/software building https://github.com/stffrdhrn/or1k-toolchain-build - ========== ================================================= + ========== ========================================================== 2) Building diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index de27e1620821..0acb4c9b8d90 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted into the Linux kernel, please implement support into LLVM's BPF back end. See LLVM_ section below for further information. +Q: What "BPF_INTERNAL" symbol namespace is for? +----------------------------------------------- +A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure +like preload kernel modules with light skeleton. Most symbols outside +of BPF_INTERNAL are not expected to be used by code outside of BPF either. +Symbols may lack the designation because they predate the namespaces, +or due to an oversight. + Stable submission ================= diff --git a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml index 8b3826243ddd..38e3ad50ff4f 100644 --- a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml +++ b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml @@ -27,7 +27,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array items: - minimum: 0 - maximum: 7 + maximum: 31 description: Offset in bit within the address range specified by reg. - minimum: 1 diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 39c209249c9c..3f6dc6a3a9f1 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -19,6 +19,7 @@ properties: - enum: - qcom,apq8064-qfprom - qcom,apq8084-qfprom + - qcom,ipq5018-qfprom - qcom,ipq5332-qfprom - qcom,ipq5424-qfprom - qcom,ipq6018-qfprom @@ -28,6 +29,8 @@ properties: - qcom,msm8226-qfprom - qcom,msm8916-qfprom - qcom,msm8917-qfprom + - qcom,msm8937-qfprom + - qcom,msm8960-qfprom - qcom,msm8974-qfprom - qcom,msm8976-qfprom - qcom,msm8996-qfprom @@ -51,6 +54,7 @@ properties: - qcom,sm8450-qfprom - qcom,sm8550-qfprom - qcom,sm8650-qfprom + - qcom,x1e80100-qfprom - const: qcom,qfprom reg: diff --git a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml index a44d44b32809..dc89020b0950 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml @@ -14,6 +14,7 @@ properties: enum: - rockchip,px30-otp - rockchip,rk3308-otp + - rockchip,rk3576-otp - rockchip,rk3588-otp reg: @@ -62,6 +63,8 @@ allOf: properties: clocks: maxItems: 3 + clock-names: + maxItems: 3 resets: maxItems: 1 reset-names: @@ -73,11 +76,33 @@ allOf: compatible: contains: enum: + - rockchip,rk3576-otp + then: + properties: + clocks: + maxItems: 3 + clock-names: + maxItems: 3 + resets: + minItems: 2 + maxItems: 2 + reset-names: + items: + - const: otp + - const: apb + + - if: + properties: + compatible: + contains: + enum: - rockchip,rk3588-otp then: properties: clocks: minItems: 4 + clock-names: + minItems: 4 resets: minItems: 3 reset-names: diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index bccd00a1ddd0..53d00ca643b3 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -56,19 +56,18 @@ properties: enum: - snps,dw-apb-ssi - snps,dwc-ssi-1.01a - - description: Microsemi Ocelot/Jaguar2 SoC SPI Controller - items: - - enum: - - mscc,ocelot-spi - - mscc,jaguar2-spi - - const: snps,dw-apb-ssi - description: Microchip Sparx5 SoC SPI Controller const: microchip,sparx5-spi - description: Amazon Alpine SPI Controller const: amazon,alpine-dw-apb-ssi - - description: Renesas RZ/N1 SPI Controller + - description: Vendor controllers which use snps,dw-apb-ssi as fallback items: - - const: renesas,rzn1-spi + - enum: + - mscc,ocelot-spi + - mscc,jaguar2-spi + - renesas,rzn1-spi + - sophgo,sg2042-spi + - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller const: intel,keembay-ssi @@ -88,10 +87,6 @@ properties: - renesas,r9a06g032-spi # RZ/N1D - renesas,r9a06g033-spi # RZ/N1S - const: renesas,rzn1-spi # RZ/N1 - - description: T-HEAD TH1520 SoC SPI Controller - items: - - const: thead,th1520-spi - - const: snps,dw-apb-ssi reg: minItems: 1 diff --git a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst index cadc580fa23b..d728e4db0b85 100644 --- a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 关于OpenRISC处理器和正在进行中的开发的信息: - ======= ============================= + ======= ============================== 网站 https://openrisc.io - 邮箱 openrisc@lists.librecores.org - ======= ============================= + 邮箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -36,11 +36,11 @@ OpenRISC工具链和Linux的构建指南 工具链的构建指南可以在openrisc.io或Stafford的工具链构建和发布脚本 中找到。 - ====== ================================================= - 二进制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二进制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具链 https://openrisc.io/software 构建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 构建 diff --git a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst index 422fe9f7a3f2..a1e4517dc601 100644 --- a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 關於OpenRISC處理器和正在進行中的開發的信息: - ======= ============================= + ======= ============================== 網站 https://openrisc.io - 郵箱 openrisc@lists.librecores.org - ======= ============================= + 郵箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -36,11 +36,11 @@ OpenRISC工具鏈和Linux的構建指南 工具鏈的構建指南可以在openrisc.io或Stafford的工具鏈構建和發佈腳本 中找到。 - ====== ================================================= - 二進制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二進制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具鏈 https://openrisc.io/software 構建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 構建 diff --git a/MAINTAINERS b/MAINTAINERS index f31aeb6b452e..3cbf9ac0d83f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3873,8 +3873,9 @@ AUXILIARY BUS DRIVER M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> R: Dave Ertman <david.m.ertman@intel.com> R: Ira Weiny <ira.weiny@intel.com> +R: Leon Romanovsky <leon@kernel.org> S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h @@ -7233,7 +7234,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> M: "Rafael J. Wysocki" <rafael@kernel.org> M: Danilo Krummrich <dakr@kernel.org> S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/core-api/kobject.rst F: drivers/base/ F: fs/debugfs/ @@ -10463,14 +10464,20 @@ S: Supported F: drivers/infiniband/hw/hfi1 HFS FILESYSTEM +M: Viacheslav Dubeyko <slava@dubeyko.com> +M: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> +M: Yangtao Li <frank.li@vivo.com> L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfs.rst F: fs/hfs/ HFSPLUS FILESYSTEM +M: Viacheslav Dubeyko <slava@dubeyko.com> +M: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> +M: Yangtao Li <frank.li@vivo.com> L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfsplus.rst F: fs/hfsplus/ @@ -13118,7 +13125,7 @@ KERNFS M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> M: Tejun Heo <tj@kernel.org> S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: fs/kernfs/ F: include/linux/kernfs.h @@ -18697,7 +18704,7 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi <lpieralisi@kernel.org> M: Krzysztof Wilczyński <kw@linux.com> -R: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> +M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> R: Rob Herring <robh@kernel.org> L: linux-pci@vger.kernel.org S: Supported @@ -18750,6 +18757,16 @@ F: include/asm-generic/pci* F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* + +PCI SUBSYSTEM [RUST] +M: Danilo Krummrich <dakr@kernel.org> +R: Bjorn Helgaas <bhelgaas@google.com> +R: Krzysztof Wilczyński <kwilczynski@kernel.org> +L: linux-pci@vger.kernel.org +S: Maintained +C: irc://irc.oftc.net/linux-pci +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git +F: rust/helpers/pci.c F: rust/kernel/pci.rs F: samples/rust/rust_driver_pci.rs @@ -25193,9 +25210,13 @@ S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS +M: Badhri Jagan Sridharan <badhri@google.com> L: linux-usb@vger.kernel.org -S: Orphan -F: drivers/usb/typec/tcpm/ +S: Maintained +F: drivers/usb/typec/tcpm/tcpci.c +F: drivers/usb/typec/tcpm/tcpm.c +F: include/linux/usb/tcpci.h +F: include/linux/usb/tcpm.h USB TYPEC TUSB1046 MUX DRIVER M: Romain Gantois <romain.gantois@bootlin.com> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1052,13 +1052,6 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) -#Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) -KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) - -#Currently, disable -Wunterminated-string-initialization as broken -KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) - # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a6..08ba91e6fb03 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 30a29e88994b..6e8aa8e72601 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void) return false; } - /* - * Systems affected by Cavium erratum 24756 are incompatible - * with KPTI. - */ - if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - if (is_midr_in_range_list(cavium_erratum_27456_cpus)) - return false; - } - return true; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b55f5f705750..6b0ad5070d3e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -const struct midr_range cavium_erratum_27456_cpus[] = { +static const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5e3c4b58f279..2004b4f41ade 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); -PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list); -#endif PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e57b043f324b..c6650cfe706c 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt) dsb(ishst); } +/* + * PI version of the Cavium Eratum 27456 detection, which makes it + * impossible to use non-global mappings. + */ +static bool __init ng_mappings_allowed(void) +{ + static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + /* Cavium ThunderX, T81 pass 1.0 */ + MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), + {}, + }; + + for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { + if (midr_is_cpu_model_range(read_cpuid_id(), r->model, + r->rv_min, r->rv_max)) + return false; + } + + return true; +} + asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) { static char const chosen_str[] __initconst = "/chosen"; @@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) u64 kaslr_seed = kaslr_early_init(fdt, chosen); if (kaslr_seed && kaslr_requires_kpti()) - arm64_use_ng_mappings = true; + arm64_use_ng_mappings = ng_mappings_allowed(); kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95fee..19ca57def629 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 067c0b994648..1a2cf012b8f2 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -73,6 +73,7 @@ config LOONGARCH select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_BPF_JIT diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 3177674228f8..45514f314664 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -22,22 +22,29 @@ struct sigcontext; #define kernel_fpu_available() cpu_has_fpu -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); - -extern void _init_fpu(unsigned int); -extern void _save_fp(struct loongarch_fpu *); -extern void _restore_fp(struct loongarch_fpu *); - -extern void _save_lsx(struct loongarch_fpu *fpu); -extern void _restore_lsx(struct loongarch_fpu *fpu); -extern void _init_lsx_upper(void); -extern void _restore_lsx_upper(struct loongarch_fpu *fpu); - -extern void _save_lasx(struct loongarch_fpu *fpu); -extern void _restore_lasx(struct loongarch_fpu *fpu); -extern void _init_lasx_upper(void); -extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +asmlinkage void _init_fpu(unsigned int); +asmlinkage void _save_fp(struct loongarch_fpu *); +asmlinkage void _restore_fp(struct loongarch_fpu *); +asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +asmlinkage void _save_lsx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lsx(struct loongarch_fpu *fpu); +asmlinkage void _init_lsx_upper(void); +asmlinkage void _restore_lsx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); + +asmlinkage void _save_lasx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lasx(struct loongarch_fpu *fpu); +asmlinkage void _init_lasx_upper(void); +asmlinkage void _restore_lasx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); static inline void enable_lsx(void); static inline void disable_lsx(void); diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h index e671978bf552..38566574e562 100644 --- a/arch/loongarch/include/asm/lbt.h +++ b/arch/loongarch/include/asm/lbt.h @@ -12,9 +12,13 @@ #include <asm/loongarch.h> #include <asm/processor.h> -extern void _init_lbt(void); -extern void _save_lbt(struct loongarch_lbt *); -extern void _restore_lbt(struct loongarch_lbt *); +asmlinkage void _init_lbt(void); +asmlinkage void _save_lbt(struct loongarch_lbt *); +asmlinkage void _restore_lbt(struct loongarch_lbt *); +asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _save_ftop_context(void __user *ftop); +asmlinkage int _restore_ftop_context(void __user *ftop); static inline int is_lbt_enabled(void) { diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index f3ddaed9ef7f..a5b63c84f854 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -33,9 +33,9 @@ struct pt_regs { unsigned long __last[]; } __aligned(8); -static inline int regs_irqs_disabled(struct pt_regs *regs) +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { - return arch_irqs_disabled_flags(regs->csr_prmd); + return !(regs->csr_prmd & CSR_PRMD_PIE); } static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 6ab640101457..28caf416ae36 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -458,6 +458,7 @@ SYM_FUNC_START(_save_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_fp_context) +EXPORT_SYMBOL_GPL(_save_fp_context) /* * a0: fpregs @@ -471,6 +472,7 @@ SYM_FUNC_START(_restore_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_fp_context) +EXPORT_SYMBOL_GPL(_restore_fp_context) /* * a0: fpregs @@ -484,6 +486,7 @@ SYM_FUNC_START(_save_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lsx_context) +EXPORT_SYMBOL_GPL(_save_lsx_context) /* * a0: fpregs @@ -497,6 +500,7 @@ SYM_FUNC_START(_restore_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lsx_context) +EXPORT_SYMBOL_GPL(_restore_lsx_context) /* * a0: fpregs @@ -510,6 +514,7 @@ SYM_FUNC_START(_save_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lasx_context) +EXPORT_SYMBOL_GPL(_save_lasx_context) /* * a0: fpregs @@ -523,6 +528,7 @@ SYM_FUNC_START(_restore_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lasx_context) +EXPORT_SYMBOL_GPL(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 001f061d226a..71678912d24c 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -90,6 +90,7 @@ SYM_FUNC_START(_save_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lbt_context) +EXPORT_SYMBOL_GPL(_save_lbt_context) /* * a0: scr @@ -110,6 +111,7 @@ SYM_FUNC_START(_restore_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lbt_context) +EXPORT_SYMBOL_GPL(_restore_lbt_context) /* * a0: ftop @@ -120,6 +122,7 @@ SYM_FUNC_START(_save_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_ftop_context) +EXPORT_SYMBOL_GPL(_save_ftop_context) /* * a0: ftop @@ -150,6 +153,7 @@ SYM_FUNC_START(_restore_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_ftop_context) +EXPORT_SYMBOL_GPL(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 7a555b600171..4740cb5b2388 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -51,27 +51,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -/* Assembly functions to move context to/from the FPU */ -extern asmlinkage int -_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); - -#ifdef CONFIG_CPU_HAS_LBT -extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _save_ftop_context(void __user *ftop); -extern asmlinkage int _restore_ftop_context(void __user *ftop); -#endif - struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 2ec3106c0da3..47fc2de6d150 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -553,9 +553,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); #else + bool pie = regs_irqs_disabled(regs); unsigned int *pc; - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); @@ -582,7 +583,7 @@ sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); #endif irqentry_exit(regs, state); @@ -621,12 +622,13 @@ static void bug_handler(struct pt_regs *regs) asmlinkage void noinstr do_bce(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned long era = exception_era(regs); u64 badv = 0, lower = 0, upper = ULONG_MAX; union loongarch_instruction insn; irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); current->thread.trap_nr = read_csr_excode(); @@ -692,7 +694,7 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -710,11 +712,12 @@ bad_era: asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned int opcode, bcode; unsigned long era = exception_era(regs); irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (__get_inst(&opcode, (u32 *)era, user)) @@ -780,7 +783,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) } out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -1015,6 +1018,7 @@ static void init_restore_lbt(void) asmlinkage void noinstr do_lbt(struct pt_regs *regs) { + bool pie = regs_irqs_disabled(regs); irqentry_state_t state = irqentry_enter(regs); /* @@ -1024,7 +1028,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) * (including the user using 'MOVGR2GCSR' to turn on TM, which * will not trigger the BTE), we need to check PRMD first. */ - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (!cpu_has_lbt) { @@ -1038,7 +1042,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) preempt_enable(); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 93f4acd44523..fe734dc062ed 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -111,7 +111,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { - kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); return ret; } /* Construct the mask by scanning the bit 27-30 */ @@ -127,7 +127,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) - kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); return ret; } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index d165cd38c6bb..80ea63d465b8 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -296,10 +296,10 @@ int kvm_arch_enable_virtualization_cpu(void) /* * Enable virtualization features granting guest direct control of * certain features: - * GCI=2: Trap on init or unimplement cache instruction. + * GCI=2: Trap on init or unimplemented cache instruction. * TORU=0: Trap on Root Unimplement. * CACTRL=1: Root control cache. - * TOP=0: Trap on Previlege. + * TOP=0: Trap on Privilege. * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8e427b379661..5af32ec62cb1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -294,6 +294,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { + kvm_lose_pmu(vcpu); /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -902,6 +903,13 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); + + /* + * When vCPU reset, clear the ESTAT and GINTC registers + * Other CSR registers are cleared with function _kvm_setcsr(). + */ + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_GINTC, 0); + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_ESTAT, 0); break; default: ret = -EINVAL; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index e4068906143b..cea84d7f2b91 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -47,7 +47,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd = pmd_offset(pud, addr); } } - return (pte_t *) pmd; + return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; } uint64_t pmd_to_entrylo(unsigned long pmd_val) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index fdb7f73ad160..06f11d9e4ec1 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -65,9 +65,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; -#ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; -#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h index 984c331ff5f4..0e60af486ec1 100644 --- a/arch/openrisc/include/asm/cacheflush.h +++ b/arch/openrisc/include/asm/cacheflush.h @@ -23,6 +23,9 @@ */ extern void local_dcache_page_flush(struct page *page); extern void local_icache_page_inv(struct page *page); +extern void local_dcache_range_flush(unsigned long start, unsigned long end); +extern void local_dcache_range_inv(unsigned long start, unsigned long end); +extern void local_icache_range_inv(unsigned long start, unsigned long end); /* * Data cache flushing always happen on the local cpu. Instruction cache @@ -39,6 +42,20 @@ extern void smp_icache_page_inv(struct page *page); #endif /* CONFIG_SMP */ /* + * Even if the actual block size is larger than L1_CACHE_BYTES, paddr + * can be incremented by L1_CACHE_BYTES. When paddr is written to the + * invalidate register, the entire cache line encompassing this address + * is invalidated. Each subsequent reference to the same cache line will + * not affect the invalidation process. + */ +#define local_dcache_block_flush(addr) \ + local_dcache_range_flush(addr, addr + L1_CACHE_BYTES) +#define local_dcache_block_inv(addr) \ + local_dcache_range_inv(addr, addr + L1_CACHE_BYTES) +#define local_icache_block_inv(addr) \ + local_icache_range_inv(addr, addr + L1_CACHE_BYTES) + +/* * Synchronizes caches. Whenever a cpu writes executable code to memory, this * should be called to make sure the processor sees the newly written code. */ diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 5e4744153d0e..3cfc4cf0b019 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -15,16 +15,21 @@ #ifndef __ASM_OPENRISC_CPUINFO_H #define __ASM_OPENRISC_CPUINFO_H +#include <asm/spr.h> +#include <asm/spr_defs.h> + +struct cache_desc { + u32 size; + u32 sets; + u32 block_size; + u32 ways; +}; + struct cpuinfo_or1k { u32 clock_frequency; - u32 icache_size; - u32 icache_block_size; - u32 icache_ways; - - u32 dcache_size; - u32 dcache_block_size; - u32 dcache_ways; + struct cache_desc icache; + struct cache_desc dcache; u16 coreid; }; @@ -32,4 +37,9 @@ struct cpuinfo_or1k { extern struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS]; extern void setup_cpuinfo(void); +/* + * Check if the cache component exists. + */ +extern bool cpu_cache_is_present(const unsigned int cache_type); + #endif /* __ASM_OPENRISC_CPUINFO_H */ diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index 79129161f3e0..e4c7d9bdd598 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := vmlinux.lds obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ - sys_call_table.o unwinder.o + sys_call_table.o unwinder.o cacheinfo.o obj-$(CONFIG_SMP) += smp.o sync-timer.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c new file mode 100644 index 000000000000..61230545e4ff --- /dev/null +++ b/arch/openrisc/kernel/cacheinfo.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OpenRISC cacheinfo support + * + * Based on work done for MIPS and LoongArch. All original copyrights + * apply as per the original source declaration. + * + * OpenRISC implementation: + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@proton.me> + */ + +#include <linux/cacheinfo.h> +#include <asm/cpuinfo.h> +#include <asm/spr.h> +#include <asm/spr_defs.h> + +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, struct cache_desc *cache, int cpu) +{ + this_leaf->type = type; + this_leaf->level = level; + this_leaf->coherency_line_size = cache->block_size; + this_leaf->number_of_sets = cache->sets; + this_leaf->ways_of_associativity = cache->ways; + this_leaf->size = cache->size; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); +} + +int init_cache_level(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + int leaves = 0, levels = 0; + unsigned long upr = mfspr(SPR_UPR); + unsigned long iccfgr, dccfgr; + + if (!(upr & SPR_UPR_UP)) { + printk(KERN_INFO + "-- no UPR register... unable to detect configuration\n"); + return -ENOENT; + } + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + dccfgr = mfspr(SPR_DCCFGR); + cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); + cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); + cpuinfo->dcache.size = + cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size; + leaves += 1; + printk(KERN_INFO + "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->dcache.size, cpuinfo->dcache.block_size, + cpuinfo->dcache.sets, cpuinfo->dcache.ways); + } else + printk(KERN_INFO "-- dcache disabled\n"); + + if (cpu_cache_is_present(SPR_UPR_ICP)) { + iccfgr = mfspr(SPR_ICCFGR); + cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); + cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); + cpuinfo->icache.size = + cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size; + leaves += 1; + printk(KERN_INFO + "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->icache.size, cpuinfo->icache.block_size, + cpuinfo->icache.sets, cpuinfo->icache.ways); + } else + printk(KERN_INFO "-- icache disabled\n"); + + if (!leaves) + return -ENOENT; + + levels = 1; + + this_cpu_ci->num_leaves = leaves; + this_cpu_ci->num_levels = levels; + + return 0; +} + +int populate_cache_leaves(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + int level = 1; + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu); + this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ? + CACHE_WRITE_BACK : CACHE_WRITE_THROUGH; + this_leaf++; + } + + if (cpu_cache_is_present(SPR_UPR_ICP)) + ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu); + + this_cpu_ci->cpu_map_populated = true; + + return 0; +} diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index b3edbb33b621..3a7b5baaa450 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -17,6 +17,7 @@ #include <linux/pagewalk.h> #include <asm/cpuinfo.h> +#include <asm/cacheflush.h> #include <asm/spr_defs.h> #include <asm/tlbflush.h> @@ -24,9 +25,6 @@ static int page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - pte_val(*pte) |= _PAGE_CI; /* @@ -36,8 +34,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, flush_tlb_kernel_range(addr, addr + PAGE_SIZE); /* Flush page out of dcache */ - for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(__pa(addr), __pa(next)); return 0; } @@ -98,21 +95,14 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) void arch_sync_dma_for_device(phys_addr_t addr, size_t size, enum dma_data_direction dir) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - switch (dir) { case DMA_TO_DEVICE: /* Flush the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(addr, addr + size); break; case DMA_FROM_DEVICE: /* Invalidate the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBIR, cl); + local_dcache_range_inv(addr, addr + size); break; default: /* diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index be56eaafc8b9..a9fb9cc6779e 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -113,21 +113,6 @@ static void print_cpuinfo(void) return; } - if (upr & SPR_UPR_DCP) - printk(KERN_INFO - "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->dcache_size, cpuinfo->dcache_block_size, - cpuinfo->dcache_ways); - else - printk(KERN_INFO "-- dcache disabled\n"); - if (upr & SPR_UPR_ICP) - printk(KERN_INFO - "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->icache_size, cpuinfo->icache_block_size, - cpuinfo->icache_ways); - else - printk(KERN_INFO "-- icache disabled\n"); - if (upr & SPR_UPR_DMP) printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), @@ -155,8 +140,6 @@ static void print_cpuinfo(void) void __init setup_cpuinfo(void) { struct device_node *cpu; - unsigned long iccfgr, dccfgr; - unsigned long cache_set_size; int cpu_id = smp_processor_id(); struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id]; @@ -164,20 +147,6 @@ void __init setup_cpuinfo(void) if (!cpu) panic("Couldn't find CPU%d in device tree...\n", cpu_id); - iccfgr = mfspr(SPR_ICCFGR); - cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); - cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); - cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); - cpuinfo->icache_size = - cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size; - - dccfgr = mfspr(SPR_DCCFGR); - cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); - cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); - cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); - cpuinfo->dcache_size = - cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size; - if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo->clock_frequency)) { printk(KERN_WARNING @@ -294,14 +263,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int vr, cpucfgr; unsigned int avr; unsigned int version; +#ifdef CONFIG_SMP struct cpuinfo_or1k *cpuinfo = v; + seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); +#endif vr = mfspr(SPR_VR); cpucfgr = mfspr(SPR_CPUCFGR); -#ifdef CONFIG_SMP - seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); -#endif if (vr & SPR_VR_UVRP) { vr = mfspr(SPR_VR2); version = vr & SPR_VR2_VER; @@ -320,14 +289,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV); } seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ); - seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size); - seq_printf(m, "dcache block size\t: %d bytes\n", - cpuinfo->dcache_block_size); - seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways); - seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size); - seq_printf(m, "icache block size\t: %d bytes\n", - cpuinfo->icache_block_size); - seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways); seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW)); diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c index eb43b73f3855..0f265b8e73ec 100644 --- a/arch/openrisc/mm/cache.c +++ b/arch/openrisc/mm/cache.c @@ -14,31 +14,70 @@ #include <asm/spr_defs.h> #include <asm/cache.h> #include <asm/cacheflush.h> +#include <asm/cpuinfo.h> #include <asm/tlbflush.h> -static __always_inline void cache_loop(struct page *page, const unsigned int reg) +/* + * Check if the cache component exists. + */ +bool cpu_cache_is_present(const unsigned int cache_type) { - unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; - unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); + unsigned long upr = mfspr(SPR_UPR); + unsigned long mask = SPR_UPR_UP | cache_type; + + return !((upr & mask) ^ mask); +} + +static __always_inline void cache_loop(unsigned long paddr, unsigned long end, + const unsigned short reg, const unsigned int cache_type) +{ + if (!cpu_cache_is_present(cache_type)) + return; - while (line < paddr + PAGE_SIZE) { - mtspr(reg, line); - line += L1_CACHE_BYTES; + while (paddr < end) { + mtspr(reg, paddr); + paddr += L1_CACHE_BYTES; } } +static __always_inline void cache_loop_page(struct page *page, const unsigned short reg, + const unsigned int cache_type) +{ + unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; + unsigned long end = paddr + PAGE_SIZE; + + paddr &= ~(L1_CACHE_BYTES - 1); + + cache_loop(paddr, end, reg, cache_type); +} + void local_dcache_page_flush(struct page *page) { - cache_loop(page, SPR_DCBFR); + cache_loop_page(page, SPR_DCBFR, SPR_UPR_DCP); } EXPORT_SYMBOL(local_dcache_page_flush); void local_icache_page_inv(struct page *page) { - cache_loop(page, SPR_ICBIR); + cache_loop_page(page, SPR_ICBIR, SPR_UPR_ICP); } EXPORT_SYMBOL(local_icache_page_inv); +void local_dcache_range_flush(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBFR, SPR_UPR_DCP); +} + +void local_dcache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBIR, SPR_UPR_DCP); +} + +void local_icache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_ICBIR, SPR_UPR_ICP); +} + void update_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { @@ -58,4 +97,3 @@ void update_cache(struct vm_area_struct *vma, unsigned long address, sync_icache_dcache(folio_page(folio, nr)); } } - diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index be1c2eb8bb94..e4904ca6f0a0 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -35,6 +35,7 @@ #include <asm/fixmap.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/cacheflush.h> int mem_init_done; @@ -176,8 +177,8 @@ void __init paging_init(void) barrier(); /* Invalidate instruction caches after code modification */ - mtspr(SPR_ICBIR, 0x900); - mtspr(SPR_ICBIR, 0xa00); + local_icache_block_inv(0x900); + local_icache_block_inv(0xa00); /* New TLB miss handlers and kernel page tables are in now place. * Make sure that page flags get updated for all pages in TLB by diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1db60fe13802..3d8dc822282a 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 34a5aec4908f..126bf3b06ab7 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -258,10 +258,6 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, break; } } - if (i == hdr->e_shnum) { - pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); - return -ENOEXEC; - } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 311e2112d782..9f764bc42b8c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -976,7 +976,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, return 0; } - +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { if (radix_enabled()) @@ -984,6 +984,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) return false; } +#endif int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) @@ -1120,6 +1121,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1145,8 +1159,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 3fbe0295ce14..95d7ba73d43d 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,7 +17,7 @@ config PPC_POWERNV select MMU_NOTIFIER select FORCE_SMP select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config OPAL_PRD diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index a934c2a262f6..fa3c2fff082a 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,7 +23,7 @@ config PPC_PSERIES select FORCE_SMP select SWIOTLB select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config PARAVIRT diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8de73f91bfa3..b59ffeb668d6 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page) flush_dcache_folio(page_folio(page)); } -/* - * RISC-V doesn't have an instruction to flush parts of the instruction cache, - * so instead we just flush the whole thing. - */ -#define flush_icache_range(start, end) flush_icache_all() #define flush_icache_user_page(vma, pg, addr, len) \ do { \ if (vma->vm_flags & VM_EXEC) \ @@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range flush_icache_range +static inline void flush_icache_range(unsigned long start, unsigned long end) +{ + flush_icache_all(); +} + extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; void riscv_init_cbo_blocksizes(void); diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77..cc15f7ca6cc1 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 81f55da81967..640fcac3af74 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -59,7 +59,7 @@ KBUILD_CFLAGS += $(CONFIG_CC_IMPLICIT_FALLTHROUGH) $(obj)/bzImage: asflags-y := $(SVGA_MODE) quiet_cmd_image = BUILD $@ - cmd_image = cp $< $@; truncate -s %4K $@; cat $(obj)/vmlinux.bin >>$@ + cmd_image = (dd if=$< bs=4k conv=sync status=none; cat $(filter-out $<,$(real-prereqs))) >$@ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin FORCE $(call if_changed,image) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6866cc5acb0b..3a4f031d2f44 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -629,7 +629,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (!event->attr.freq && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3bdae454a959..7bc174a1f1cb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include <asm/mtrr.h> #include <asm/msr-index.h> #include <asm/asm.h> +#include <asm/irq_remapping.h> #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> #include <asm/reboot.h> @@ -2423,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a33147520044..c88691b15f3c 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,8 @@ #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> +#include <asm/cpufeature.h> + #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include <asm-generic/pgalloc.h> @@ -29,16 +31,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif -#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. + * In case of Page Table Isolation active, we acquire two PGDs instead of one. + * Being order-1, it is both 8k in size and 8k-aligned. That lets us just + * flip bit 12 in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +static inline unsigned int pgd_allocation_order(void) +{ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + return 1; + return 0; +} /* * Allocate and free page tables. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9d8dd8deb2a7..9920122018a0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1299,6 +1299,14 @@ void __init e820__memblock_setup(void) memblock_add(entry->addr, entry->size); } + /* + * 32-bit systems are limited to 4BG of memory even with HIGHMEM and + * to even less without it. + * Discard memory after max_pfn - the actual limit detected at runtime. + */ + if (IS_ENABLED(CONFIG_X86_32)) + memblock_remove(PFN_PHYS(max_pfn), -1); + /* Throw away partial pages: */ memblock_trim_memory(PAGE_SIZE); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 80265162aeff..1f325304c4a8 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -42,7 +42,7 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { - free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)image->arch.pgd, pgd_allocation_order()); image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); @@ -59,7 +59,7 @@ static void machine_kexec_free_page_tables(struct kimage *image) static int machine_kexec_alloc_page_tables(struct kimage *image) { image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PGD_ALLOCATION_ORDER); + pgd_allocation_order()); #ifdef CONFIG_X86_PAE image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 65fd245a9953..7338879d1c0c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -796,12 +796,15 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) struct amd_svm_iommu_ir *ir; u64 entry; + if (WARN_ON_ONCE(!pi->ir_data)) + return -EINVAL; + /** * In some cases, the existing irte is updated and re-set, * so we need to check here if it's already been * added * to the ir_list. */ - if (pi->ir_data && (pi->prev_ga_tag != 0)) { + if (pi->prev_ga_tag) { struct kvm *kvm = svm->vcpu.kvm; u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); @@ -820,7 +823,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -896,10 +899,10 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) return 0; pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", @@ -933,6 +936,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -951,33 +956,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -993,6 +971,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ccda95e53f62..ba736cbb0587 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -11,6 +11,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#ifdef CREATE_TRACE_POINTS +#define tracing_kvm_rip_read(vcpu) ({ \ + typeof(vcpu) __vcpu = vcpu; \ + __vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \ + }) +#endif + /* * Tracepoint for guest mode entry. */ @@ -28,7 +35,7 @@ TRACE_EVENT(kvm_entry, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->rip = kvm_rip_read(vcpu); + __entry->rip = tracing_kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, @@ -319,7 +326,7 @@ TRACE_EVENT(name, \ ), \ \ TP_fast_assign( \ - __entry->guest_rip = kvm_rip_read(vcpu); \ + __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ @@ -423,7 +430,7 @@ TRACE_EVENT(kvm_page_fault, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->guest_rip = kvm_rip_read(vcpu); + __entry->guest_rip = tracing_kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 51116fe69a50..d70e5b90087d 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -297,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -335,21 +336,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; @@ -357,11 +345,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + enable_remapped_mode = false; + + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -369,6 +358,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3712dde0bf9d..df5b99ea1f18 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11098,7 +11098,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * Profile KVM exit RIPs: */ - if (unlikely(prof_on == KVM_PROFILING)) { + if (unlikely(prof_on == KVM_PROFILING && + !vcpu->arch.guest_state_protected)) { unsigned long rip = kvm_rip_read(vcpu); profile_hit(KVM_PROFILING, (void *)rip); } @@ -13556,25 +13557,27 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); -bool kvm_arch_has_irq_bypass(void) -{ - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; int ret; - irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); + + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = prod; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) kvm_arch_end_assignment(irqfd->kvm); + spin_unlock_irq(&kvm->irqfds.lock); + + return ret; } @@ -13584,9 +13587,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; WARN_ON(irqfd->producer != prod); - irqfd->producer = NULL; /* * When producer of consumer is unregistered, we change back to @@ -13594,12 +13597,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = NULL; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + spin_unlock_irq(&kvm->irqfds.lock); + + kvm_arch_end_assignment(irqfd->kvm); } @@ -13612,7 +13621,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return !!memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..f5dd84eb55dc 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a05fcddfc811..f7ae44d3dd9e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -360,7 +360,7 @@ static inline pgd_t *_pgd_alloc(struct mm_struct *mm) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); /* * Now PAE kernel is not running as a Xen domain. We can allocate @@ -380,7 +380,7 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); } static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac57259a432b..a4b4ebd41b8f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -73,7 +73,7 @@ int __init efi_alloc_page_tables(void) gfp_t gfp_mask; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); if (!efi_pgd) goto fail; @@ -96,7 +96,7 @@ free_p4d: if (pgtable_l5_enabled()) free_page((unsigned long)pgd_page_vaddr(*pgd)); free_pgd: - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); fail: return -ENOMEM; } diff --git a/block/bdev.c b/block/bdev.c index 6a34179192c9..889ec6e002d7 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev) get_order(bsize)); } -int set_blocksize(struct file *file, int size) +/** + * bdev_validate_blocksize - check that this block size is acceptable + * @bdev: blockdevice to check + * @block_size: block size to check + * + * For block device users that do not use buffer heads or the block device + * page cache, make sure that this block size can be used with the device. + * + * Return: On success zero is returned, negative error code on failure. + */ +int bdev_validate_blocksize(struct block_device *bdev, int block_size) { - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = I_BDEV(inode); - - if (blk_validate_block_size(size)) + if (blk_validate_block_size(block_size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_logical_block_size(bdev)) + if (block_size < bdev_logical_block_size(bdev)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(bdev_validate_blocksize); + +int set_blocksize(struct file *file, int size) +{ + struct inode *inode = file->f_mapping->host; + struct block_device *bdev = I_BDEV(inode); + int ret; + + ret = bdev_validate_blocksize(bdev, size); + if (ret) + return ret; + if (!file->private_data) return -EINVAL; /* Don't change the size if it is same as current */ if (inode->i_blkbits != blksize_bits(size)) { + /* + * Flush and truncate the pagecache before we reconfigure the + * mapping geometry because folio sizes are variable now. If a + * reader has already allocated a folio whose size is smaller + * than the new min_order but invokes readahead after the new + * min_order becomes visible, readahead will think there are + * "zero" blocks per folio and crash. Take the inode and + * invalidation locks to avoid racing with + * read/write/fallocate. + */ + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + sync_blockdev(bdev); + kill_bdev(bdev); + inode->i_blkbits = blksize_bits(size); mapping_set_folio_min_order(inode->i_mapping, get_order(size)); kill_bdev(bdev); + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); } return 0; } @@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part) blkdev_put_whole(whole); } -struct block_device *blkdev_get_no_open(dev_t dev) +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload) { struct block_device *bdev; struct inode *inode; inode = ilookup(blockdev_superblock, dev); - if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { + if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { blk_request_module(dev); inode = ilookup(blockdev_superblock, dev); if (inode) @@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (ret) return ERR_PTR(ret); - bdev = blkdev_get_no_open(dev); + bdev = blkdev_get_no_open(dev, true); if (!bdev) return ERR_PTR(-ENXIO); @@ -1274,18 +1312,15 @@ void sync_bdevs(bool wait) */ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask) { - struct inode *backing_inode; struct block_device *bdev; - backing_inode = d_backing_inode(path->dentry); - /* - * Note that backing_inode is the inode of a block device node file, - * not the block device's internal inode. Therefore it is *not* valid - * to use I_BDEV() here; the block device has to be looked up by i_rdev + * Note that d_backing_inode() returns the block device node inode, not + * the block device's internal inode. Therefore it is *not* valid to + * use I_BDEV() here; the block device has to be looked up by i_rdev * instead. */ - bdev = blkdev_get_no_open(backing_inode->i_rdev); + bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false); if (!bdev) return; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5905f277057b..ce93706555c5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) return -EINVAL; input = skip_spaces(input); - bdev = blkdev_get_no_open(MKDEV(major, minor)); + bdev = blkdev_get_no_open(MKDEV(major, minor), false); if (!bdev) return -ENODEV; if (bdev_is_partition(bdev)) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 6b2dbe645d23..4817e7ca03f8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi, /* * For read-ahead of large files to be effective, we need to read ahead * at least twice the optimal I/O size. + * + * There is no hardware limitation for the read-ahead size and the user + * might have increased the read-ahead size through sysfs, so don't ever + * decrease it. */ - bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + bdi->ra_pages = max3(bdi->ra_pages, + lim->io_opt * 2 / PAGE_SIZE, + VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0c77244a35c9..8f15d1aa6eb8 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) @@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: - if (cmd == BLKRESETZONE) + if (cmd == BLKRESETZONE) { filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + } return ret; } diff --git a/block/blk.h b/block/blk.h index 006e3be433d2..328075787814 100644 --- a/block/blk.h +++ b/block/blk.h @@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done) wait_for_completion_io(done); } +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); +void blkdev_put_no_open(struct block_device *bdev); + #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); diff --git a/block/fops.c b/block/fops.c index be9f1dbea9ce..82b672d15ea4 100644 --- a/block/fops.c +++ b/block/fops.c @@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (ret) return ret; - bdev = blkdev_get_no_open(inode->i_rdev); + bdev = blkdev_get_no_open(inode->i_rdev, true); if (!bdev) return -ENXIO; @@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = direct_write_fallback(iocb, from, ret, blkdev_buffered_write(iocb, from)); } else { + /* + * Take i_rwsem and invalidate_lock to avoid racing with + * set_blocksize changing i_blkbits/folio order and punching + * out the pagecache. + */ + inode_lock_shared(bd_inode); ret = blkdev_buffered_write(iocb, from); + inode_unlock_shared(bd_inode); } if (ret > 0) @@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct inode *bd_inode = bdev_file_inode(iocb->ki_filp); struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); loff_t size = bdev_nr_bytes(bdev); loff_t pos = iocb->ki_pos; @@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) goto reexpand; } + /* + * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize + * changing i_blkbits/folio order and punching out the pagecache. + */ + inode_lock_shared(bd_inode); ret = filemap_read(iocb, to, ret); + inode_unlock_shared(bd_inode); reexpand: if (unlikely(shorted)) @@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); /* @@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, fail: filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); return error; } diff --git a/block/ioctl.c b/block/ioctl.c index faa40f383e27..e472cc1030c6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (err) return err; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) @@ -174,6 +175,7 @@ out_unplug: blk_finish_plug(&plug); fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, end > bdev_nr_bytes(bdev)) return -EINVAL; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) @@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } diff --git a/crypto/scompress.c b/crypto/scompress.c index 36934c78d127..ffeedcf20b0f 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -163,11 +163,10 @@ static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) if (ret) goto unlock; } - if (!scomp_scratch_users) { + if (!scomp_scratch_users++) { ret = crypto_scomp_alloc_scratches(); if (ret) - goto unlock; - scomp_scratch_users++; + scomp_scratch_users--; } unlock: mutex_unlock(&scomp_lock); diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 5e1d709c6a46..ccaaf6c100c0 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -544,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); - ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n", + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", boot_params->power_profile); } @@ -646,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; if (IVPU_WA(disable_d0i2)) - boot_params->power_profile = 1; + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 300f749971d4..d2d82651976d 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -14,7 +14,7 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index b5891e91f7ab..ac0e22454596 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -428,16 +428,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -445,15 +446,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } @@ -466,7 +468,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work) if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 76052006bd87..5fc2c8ee61b1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6373,7 +6373,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd offset %lx\n", buffer->data_size, buffer->offsets_size, - proc->alloc.vm_start - buffer->user_data); + buffer->user_data - proc->alloc.vm_start); } static void print_binder_work_ilocked(struct seq_file *m, diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2796c0da8257..c0eb8c67a9ff 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2453,8 +2453,8 @@ static unsigned int ata_msense_control_ata_feature(struct ata_device *dev, */ put_unaligned_be16(ATA_FEATURE_SUB_MPAGE_LEN - 4, &buf[2]); - if (dev->flags & ATA_DFLAG_CDL) - buf[4] = 0x02; /* Support T2A and T2B pages */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + buf[4] = 0x02; /* T2A and T2B pages enabled */ else buf[4] = 0; @@ -3886,12 +3886,11 @@ static int ata_mselect_control_spg0(struct ata_queued_cmd *qc, } /* - * Translate MODE SELECT control mode page, sub-pages f2h (ATA feature mode + * Translate MODE SELECT control mode page, sub-page f2h (ATA feature mode * page) into a SET FEATURES command. */ -static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, - const u8 *buf, int len, - u16 *fp) +static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, + const u8 *buf, int len, u16 *fp) { struct ata_device *dev = qc->dev; struct ata_taskfile *tf = &qc->tf; @@ -3909,17 +3908,27 @@ static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, /* Check cdl_ctrl */ switch (buf[0] & 0x03) { case 0: - /* Disable CDL */ + /* Disable CDL if it is enabled */ + if (!(dev->flags & ATA_DFLAG_CDL_ENABLED)) + return 0; + ata_dev_dbg(dev, "Disabling CDL\n"); cdl_action = 0; dev->flags &= ~ATA_DFLAG_CDL_ENABLED; break; case 0x02: - /* Enable CDL T2A/T2B: NCQ priority must be disabled */ + /* + * Enable CDL if not already enabled. Since this is mutually + * exclusive with NCQ priority, allow this only if NCQ priority + * is disabled. + */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + return 0; if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { ata_dev_err(dev, "NCQ priority must be disabled to enable CDL\n"); return -EINVAL; } + ata_dev_dbg(dev, "Enabling CDL\n"); cdl_action = 1; dev->flags |= ATA_DFLAG_CDL_ENABLED; break; diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index afa4df4c5a3f..95717d509ca9 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -156,6 +156,16 @@ * }, * .ops = my_custom_ops, * }; + * + * Please note that such custom ops approach is valid, but it is hard to implement + * it right without global locks per-device to protect from auxiliary_drv removal + * during call to that ops. In addition, this implementation lacks proper module + * dependency, which causes to load/unload races between auxiliary parent and devices + * modules. + * + * The most easiest way to provide these ops reliably without needing to + * have a lock is to EXPORT_SYMBOL*() them and rely on already existing + * modules infrastructure for validity and correct dependencies chains. */ static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, diff --git a/drivers/base/base.h b/drivers/base/base.h index 0042e4774b0c..123031a757d9 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -73,6 +73,7 @@ static inline void subsys_put(struct subsys_private *sp) kset_put(&sp->subsys); } +struct subsys_private *bus_to_subsys(const struct bus_type *bus); struct subsys_private *class_to_subsys(const struct class *class); struct driver_private { @@ -180,6 +181,22 @@ int driver_add_groups(const struct device_driver *drv, const struct attribute_gr void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev); +static inline void device_set_driver(struct device *dev, const struct device_driver *drv) +{ + /* + * Majority (all?) read accesses to dev->driver happens either + * while holding device lock or in bus/driver code that is only + * invoked when the device is bound to a driver and there is no + * concern of the pointer being changed while it is being read. + * However when reading device's uevent file we read driver pointer + * without taking device lock (so we do not block there for + * arbitrary amount of time). We use WRITE_ONCE() here to prevent + * tearing so that READ_ONCE() can safely be used in uevent code. + */ + // FIXME - this cast should not be needed "soon" + WRITE_ONCE(dev->driver, (struct device_driver *)drv); +} + int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 5ea3b03af9ba..5e75e1bce551 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -57,7 +57,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ -static struct subsys_private *bus_to_subsys(const struct bus_type *bus) +struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; diff --git a/drivers/base/core.c b/drivers/base/core.c index d2f9d3a59d6b..cbc0099d8ef2 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2624,6 +2624,35 @@ static const char *dev_uevent_name(const struct kobject *kobj) return NULL; } +/* + * Try filling "DRIVER=<name>" uevent variable for a device. Because this + * function may race with binding and unbinding the device from a driver, + * we need to be careful. Binding is generally safe, at worst we miss the + * fact that the device is already bound to a driver (but the driver + * information that is delivered through uevents is best-effort, it may + * become obsolete as soon as it is generated anyways). Unbinding is more + * risky as driver pointer is transitioning to NULL, so READ_ONCE() should + * be used to make sure we are dealing with the same pointer, and to + * ensure that driver structure is not going to disappear from under us + * we take bus' drivers klist lock. The assumption that only registered + * driver can be bound to a device, and to unregister a driver bus code + * will take the same lock. + */ +static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (sp) { + scoped_guard(spinlock, &sp->klist_drivers.k_lock) { + struct device_driver *drv = READ_ONCE(dev->driver); + if (drv) + add_uevent_var(env, "DRIVER=%s", drv->name); + } + + subsys_put(sp); + } +} + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); @@ -2655,8 +2684,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Add "DRIVER=%s" variable if the device is bound to a driver */ + dev_driver_uevent(dev, env); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2726,11 +2755,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; @@ -3700,7 +3726,7 @@ done: device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: - dev->driver = NULL; + device_set_driver(dev, NULL); bus_remove_device(dev); BusError: device_remove_attrs(dev); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0e4b4aba885..b526e0e0f52d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -550,7 +550,7 @@ static void device_unbind_cleanup(struct device *dev) arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - dev->driver = NULL; + device_set_driver(dev, NULL); dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); @@ -629,8 +629,7 @@ static int really_probe(struct device *dev, const struct device_driver *drv) } re_probe: - // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + device_set_driver(dev, drv); /* If using pinctrl, bind pins now before probing */ ret = pinctrl_bind_pins(dev); @@ -1014,7 +1013,7 @@ static int __device_attach(struct device *dev, bool allow_async) if (ret == 0) ret = 1; else { - dev->driver = NULL; + device_set_driver(dev, NULL); ret = 0; } } else { diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 6dd1a8860f1c..31bfb3194b4c 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -296,7 +296,7 @@ static int delete_path(const char *nodepath) return err; } -static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) +static int dev_mynode(struct device *dev, struct inode *inode) { /* did we create it */ if (inode->i_private != &thread) @@ -304,13 +304,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta /* does the dev_t match */ if (is_blockdev(dev)) { - if (!S_ISBLK(stat->mode)) + if (!S_ISBLK(inode->i_mode)) return 0; } else { - if (!S_ISCHR(stat->mode)) + if (!S_ISCHR(inode->i_mode)) return 0; } - if (stat->rdev != dev->devt) + if (inode->i_rdev != dev->devt) return 0; /* ours */ @@ -321,20 +321,16 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; - struct kstat stat; - struct path p; + struct inode *inode; int deleted = 0; - int err; + int err = 0; dentry = kern_path_locked(nodename, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - p.mnt = parent.mnt; - p.dentry = dentry; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { + inode = d_inode(dentry); + if (dev_mynode(dev, inode)) { struct iattr newattrs; /* * before unlinking this node, reset permissions @@ -342,7 +338,7 @@ static int handle_remove(const char *nodename, struct device *dev) */ newattrs.ia_uid = GLOBAL_ROOT_UID; newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_mode = inode->i_mode & ~0777; newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE; inode_lock(d_inode(dentry)); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8f3a41d9bfaa..19469e7f88c2 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -816,21 +816,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state, return 0; } -static int __init add_boot_memory_block(unsigned long base_section_nr) -{ - unsigned long nr; - - for_each_present_section_nr(base_section_nr, nr) { - if (nr >= (base_section_nr + sections_per_block)) - break; - - return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, NULL, NULL); - } - - return 0; -} - static int add_hotplug_memory_block(unsigned long block_id, struct vmem_altmap *altmap, struct memory_group *group) @@ -957,7 +942,7 @@ static const struct attribute_group *memory_root_attr_groups[] = { void __init memory_dev_init(void) { int ret; - unsigned long block_sz, nr; + unsigned long block_sz, block_id, nr; /* Validate the configured memory block size */ block_sz = memory_block_size_bytes(); @@ -970,15 +955,23 @@ void __init memory_dev_init(void) panic("%s() failed to register subsystem: %d\n", __func__, ret); /* - * Create entries for memory sections that were found - * during boot and have been initialized + * Create entries for memory sections that were found during boot + * and have been initialized. Use @block_id to track the last + * handled block and initialize it to an invalid value (ULONG_MAX) + * to bypass the block ID matching check for the first present + * block so that it can be covered. */ - for (nr = 0; nr <= __highest_present_section_nr; - nr += sections_per_block) { - ret = add_boot_memory_block(nr); - if (ret) - panic("%s() failed to add memory block: %d\n", __func__, - ret); + block_id = ULONG_MAX; + for_each_present_section_nr(0, nr) { + if (block_id != ULONG_MAX && memory_block_id(nr) == block_id) + continue; + + block_id = memory_block_id(nr); + ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL); + if (ret) { + panic("%s() failed to add memory block: %d\n", + __func__, ret); + } } } diff --git a/drivers/base/module.c b/drivers/base/module.c index 5bc71bea883a..218aaa096455 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -42,16 +42,13 @@ int module_add_driver(struct module *mod, const struct device_driver *drv) if (mod) mk = &mod->mkobj; else if (drv->mod_name) { - struct kobject *mkobj; - - /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(module_kset, drv->mod_name); - if (mkobj) { - mk = container_of(mkobj, struct module_kobject, kobj); + /* Lookup or create built-in module entry in /sys/modules */ + mk = lookup_or_create_module_kobject(drv->mod_name); + if (mk) { /* remember our module structure */ drv->p->mkobj = mk; - /* kset_find_obj took a reference */ - kobject_put(mkobj); + /* lookup_or_create_module_kobject took a reference */ + kobject_put(&mk->kobj); } } diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index b1726a3515f6..5c78fa6ae772 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1080,6 +1080,7 @@ void software_node_notify(struct device *dev) if (!swnode) return; + kobject_get(&swnode->kobj); ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); if (ret) return; @@ -1089,8 +1090,6 @@ void software_node_notify(struct device *dev) sysfs_remove_link(&dev->kobj, "software_node"); return; } - - kobject_get(&swnode->kobj); } void software_node_notify_remove(struct device *dev) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2de7b2bd409d..f9032076bc06 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -201,15 +201,10 @@ struct ublk_params_header { static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset); + const struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); -static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) -{ - return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); -} - static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_ZONED; @@ -609,14 +604,19 @@ static void ublk_apply_params(struct ublk_device *ub) ublk_dev_param_zoned_apply(ub); } +static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { - return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); + return ubq->flags & UBLK_F_USER_COPY; } static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { - return !ublk_support_user_copy(ubq); + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); } static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) @@ -624,8 +624,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) /* * read()/write() is involved in user copy, so request reference * has to be grabbed + * + * for zero copy, request buffer need to be registered to io_uring + * buffer table, so reference is needed */ - return ublk_support_user_copy(ubq); + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, @@ -1683,14 +1686,31 @@ static void ublk_start_cancel(struct ublk_queue *ubq) ublk_put_disk(disk); } -static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, +static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, unsigned int issue_flags) { + struct ublk_io *io = &ubq->ios[tag]; + struct ublk_device *ub = ubq->dev; + struct request *req; bool done; if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) return; + /* + * Don't try to cancel this command if the request is started for + * avoiding race between io_uring_cmd_done() and + * io_uring_cmd_complete_in_task(). + * + * Either the started request will be aborted via __ublk_abort_rq(), + * then this uring_cmd is canceled next time, or it will be done in + * task work function ublk_dispatch_req() because io_uring guarantees + * that ublk_dispatch_req() is always called + */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + if (req && blk_mq_request_started(req)) + return; + spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); if (!done) @@ -1722,7 +1742,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1737,9 +1756,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (!ubq->canceling) ublk_start_cancel(ubq); - io = &ubq->ios[pdu->tag]; - WARN_ON_ONCE(io->cmd != cmd); - ublk_cancel_cmd(ubq, io, issue_flags); + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1752,7 +1770,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) int i; for (i = 0; i < ubq->q_depth; i++) - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ @@ -1886,15 +1904,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) } } -static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, - int tag) -{ - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - - ublk_queue_cmd(ubq, req); -} - static inline int ublk_check_cmd_op(u32 cmd_op) { u32 ioc_type = _IOC_TYPE(cmd_op); @@ -1940,13 +1949,20 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; + const struct ublk_io *io = &ubq->ios[tag]; struct request *req; int ret; + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + req = __ublk_check_and_get_req(ub, ubq, tag, 0); if (!req) return -EINVAL; @@ -1962,8 +1978,17 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { + const struct ublk_io *io = &ubq->ios[tag]; + + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + return io_buffer_unregister_bvec(cmd, index, issue_flags); } @@ -2067,7 +2092,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_REGISTER_IO_BUF: return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); if (ret) @@ -2103,8 +2128,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); - break; + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); + ublk_dispatch_req(ubq, req, issue_flags); + return -EIOCBQUEUED; default: goto out; } @@ -2118,13 +2144,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset) + const struct ublk_queue *ubq, int tag, size_t offset) { struct request *req; - if (!ublk_need_req_ref(ubq)) - return NULL; - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); if (!req) return NULL; @@ -2238,6 +2261,9 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, if (!ubq) return ERR_PTR(-EINVAL); + if (!ublk_support_user_copy(ubq)) + return ERR_PTR(-EACCES); + if (tag >= ubq->q_depth) return ERR_PTR(-EINVAL); @@ -2776,13 +2802,18 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE | UBLK_F_URING_CMD_COMP_IN_TASK; - /* GET_DATA isn't needed any more with USER_COPY */ - if (ublk_dev_is_user_copy(ub)) + /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; - /* Zoned storage support requires user copy feature */ + /* + * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for + * returning write_append_lba, which is only allowed in case of + * user copy or zero copy + */ if (ublk_dev_is_zoned(ub) && - (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) { + (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags & + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) { ret = -EINVAL; goto out_free_dev_number; } diff --git a/drivers/char/misc.c b/drivers/char/misc.c index f7dd455dd0dd..dda466f9181a 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -315,7 +315,7 @@ static int __init misc_init(void) goto fail_remove; err = -EIO; - if (register_chrdev(MISC_MAJOR, "misc", &misc_fops)) + if (__register_chrdev(MISC_MAJOR, 0, MINORMASK + 1, "misc", &misc_fops)) goto fail_printk; return 0; diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c index cdc842b32bab..75dce1ff2419 100644 --- a/drivers/comedi/drivers/jr3_pci.c +++ b/drivers/comedi/drivers/jr3_pci.c @@ -758,7 +758,7 @@ static void jr3_pci_detach(struct comedi_device *dev) struct jr3_pci_dev_private *devpriv = dev->private; if (devpriv) - timer_delete_sync(&devpriv->timer); + timer_shutdown_sync(&devpriv->timer); comedi_pci_detach(dev); } diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f9cb943d945..0d46402e3094 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default y + default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ tristate "Calxeda Highbank-based" depends on ARCH_HIGHBANK || COMPILE_TEST depends on CPUFREQ_DT && REGULATOR && PL320_MBOX - default m + default m if ARCH_HIGHBANK help This adds the CPUFreq driver for Calxeda Highbank SoC based boards. @@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ config ARM_MEDIATEK_CPUFREQ_HW tristate "MediaTek CPUFreq HW driver" depends on ARCH_MEDIATEK || COMPILE_TEST - default m + default m if ARCH_MEDIATEK help Support for the CPUFreq HW driver. Some MediaTek chipsets have a HW engine to offload the steps @@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" depends on CPU_S3C6410 || COMPILE_TEST - default y + default CPU_S3C6410 help This adds the CPUFreq driver for Samsung S3C6410 SoC. @@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" depends on CPU_S5PV210 || COMPILE_TEST - default y + default CPU_S5PV210 help This adds the CPUFreq driver for Samsung S5PV210 and S5PC110 SoCs. @@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help This adds the CPUFreq driver support for SPEAr SOCs. @@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra20/30 SOCs. @@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra124 SOCs. @@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default y + default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC help This adds CPU frequency driver support for Tegra194 SOCs. config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST - default y + default ARCH_OMAP2PLUS || ARCH_K3 help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 924314cdeebc..d26b610e4f24 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,8 +909,19 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) - policy->boost_supported = true; + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } return result; diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 4994c86feb57..b1d29b7af232 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct apple_cpu_priv *priv; struct cpufreq_frequency_table *p; unsigned int pstate; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + if (priv->info->cur_pstate_mask) { u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index b3d74f9adcf0..cb93f00bafdb 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -747,7 +747,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2aa00769cf09..a010da0f6337 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -175,6 +175,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8350", }, { .compatible = "qcom,sm8450", }, { .compatible = "qcom,sm8550", }, + { .compatible = "qcom,sm8650", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index acf19b0042bb..f45ded62b0e0 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -536,14 +536,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; + target_freq = clamp_val(target_freq, min, max); + if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -577,8 +581,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, if (unlikely(min > max)) min = max; - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2397,8 +2400,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2727,8 +2730,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * compiler optimizations around them because they may be accessed * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); trace_cpu_frequency_limits(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a7c38b8b3e78..0e65d37c9231 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -76,7 +76,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c03a91502f84..35de513af6c9 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -115,8 +115,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -147,7 +147,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f41ed0b9e610..ba9bf06f1c77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -598,6 +598,9 @@ static bool turbo_is_disabled(void) { u64 misc_en; + if (!cpu_feature_enabled(X86_FEATURE_IDA)) + return true; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c310aeebc8f3..944e899eb1be 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scmi_data *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct scmi_data *priv; unsigned long rate; int ret; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false); if (ret) return 0; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 17cda84f00df..dcbb0ae7dd47 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops; static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scpi_data *priv = policy->driver_data; - unsigned long rate = clk_get_rate(priv->clk); + struct cpufreq_policy *policy; + struct scpi_data *priv; + unsigned long rate; + + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + rate = clk_get_rate(priv->clk); return rate / 1000; } diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 47d6840b3489..744312a44279 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void) struct nvmem_cell *speedbin_nvmem; const struct of_device_id *match; struct device *cpu_dev; - u32 *speedbin; + void *speedbin_ptr; + u32 speedbin = 0; + size_t len; int ret; cpu_dev = get_cpu_device(0); @@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void) return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), "Could not get nvmem cell\n"); - speedbin = nvmem_cell_read(speedbin_nvmem, NULL); + speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); - if (IS_ERR(speedbin)) - return PTR_ERR(speedbin); + if (IS_ERR(speedbin_ptr)) + return PTR_ERR(speedbin_ptr); - ret = opp_data->efuse_xlate(*speedbin); + if (len <= 4) + memcpy(&speedbin, speedbin_ptr, len); + speedbin = le32_to_cpu(speedbin); - kfree(speedbin); + ret = opp_data->efuse_xlate(speedbin); + + kfree(speedbin_ptr); return ret; }; diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 15699299dc11..17b692eb3257 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -119,7 +119,7 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); +int cxl_gpf_port_setup(struct cxl_dport *dport); int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, int nid, resource_size_t *size); diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index f4daefe3180e..1498e2369c37 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -528,13 +528,13 @@ static void *cxlctl_set_feature(struct cxl_features_state *cxlfs, rc = cxl_set_feature(cxl_mbox, &feat_in->uuid, feat_in->version, feat_in->feat_data, data_size, flags, offset, &return_code); + *out_len = sizeof(*rpc_out); if (rc) { rpc_out->retval = return_code; return no_free_ptr(rpc_out); } rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; - *out_len = sizeof(*rpc_out); return no_free_ptr(rpc_out); } @@ -677,7 +677,7 @@ static void free_memdev_fwctl(void *_fwctl_dev) fwctl_put(fwctl_dev); } -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_features_state *cxlfs; @@ -700,7 +700,7 @@ int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) if (rc) return rc; - return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl, + return devm_add_action_or_reset(host, free_memdev_fwctl, no_free_ptr(fwctl_dev)); } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL"); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 96fecb799cbc..3b80e9a76ba8 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1072,14 +1072,20 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) +u16 cxl_gpf_get_dvsec(struct device *dev) { + struct pci_dev *pdev; + bool is_port = true; u16 dvsec; if (!dev_is_pci(dev)) return 0; - dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, + pdev = to_pci_dev(dev); + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) + is_port = false; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", @@ -1128,26 +1134,24 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) return rc; } -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) +int cxl_gpf_port_setup(struct cxl_dport *dport) { - struct pci_dev *pdev; - - if (!port) + if (!dport) return -EINVAL; - if (!port->gpf_dvsec) { + if (!dport->gpf_dvsec) { + struct pci_dev *pdev; int dvsec; - dvsec = cxl_gpf_get_dvsec(dport_dev, true); + dvsec = cxl_gpf_get_dvsec(dport->dport_dev); if (!dvsec) return -EINVAL; - port->gpf_dvsec = dvsec; + dport->gpf_dvsec = dvsec; + pdev = to_pci_dev(dport->dport_dev); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); } - pdev = to_pci_dev(dport_dev); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); - return 0; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0fd6646c1a2e..726bd4a7de27 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1678,7 +1678,7 @@ retry: if (rc && rc != -EBUSY) return rc; - cxl_gpf_port_setup(dport_dev, port); + cxl_gpf_port_setup(dport); /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 117c2e94c761..5ca7b0eed568 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -581,7 +581,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri resource_size_t rcrb = ri->base; void __iomem *addr; u32 bar0, bar1; - u16 cmd; u32 id; if (which == CXL_RCRB_UPSTREAM) @@ -603,7 +602,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri } id = readl(addr + PCI_VENDOR_ID); - cmd = readw(addr + PCI_COMMAND); bar0 = readl(addr + PCI_BASE_ADDRESS_0); bar1 = readl(addr + PCI_BASE_ADDRESS_1); iounmap(addr); @@ -618,8 +616,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri dev_err(dev, "Failed to access Downstream Port RCRB\n"); return CXL_RESOURCE_NONE; } - if (!(cmd & PCI_COMMAND_MEMORY)) - return CXL_RESOURCE_NONE; /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */ if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO)) return CXL_RESOURCE_NONE; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index be8a7dc77719..a9ab46eb0610 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -592,7 +592,6 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds - * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_port { struct device dev; @@ -616,7 +615,6 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; - int gpf_dvsec; }; /** @@ -664,6 +662,7 @@ struct cxl_rcrb_info { * @regs: Dport parsed register blocks * @coord: access coordinates (bandwidth and latency performance attributes) * @link_latency: calculated PCIe downstream latency + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_dport { struct device *dport_dev; @@ -675,6 +674,7 @@ struct cxl_dport { struct cxl_regs regs; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; long link_latency; + int gpf_dvsec; }; /** @@ -910,6 +910,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); +u16 cxl_gpf_get_dvsec(struct device *dev); #endif /* __CXL_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 7b14a154463c..785aa2af5eaa 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1018,7 +1018,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index d061fe3d2b86..e197883690ef 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -108,7 +108,7 @@ static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) return; } - if (!cxl_gpf_get_dvsec(cxlds->dev, false)) + if (!cxl_gpf_get_dvsec(cxlds->dev)) return; if (cxl_get_dirty_count(mds, &count)) { diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 3c52cb73237a..e3f990d888d7 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -1224,22 +1224,28 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) if (!svc->intel_svc_fcs) { dev_err(dev, "failed to allocate %s device\n", INTEL_FCS); ret = -ENOMEM; - goto err_unregister_dev; + goto err_unregister_rsu_dev; } ret = platform_device_add(svc->intel_svc_fcs); if (ret) { platform_device_put(svc->intel_svc_fcs); - goto err_unregister_dev; + goto err_unregister_rsu_dev; } + ret = of_platform_default_populate(dev_of_node(dev), NULL, dev); + if (ret) + goto err_unregister_fcs_dev; + dev_set_drvdata(dev, svc); pr_info("Intel Service Layer Driver Initialized\n"); return 0; -err_unregister_dev: +err_unregister_fcs_dev: + platform_device_unregister(svc->intel_svc_fcs); +err_unregister_rsu_dev: platform_device_unregister(svc->stratix10_svc_rsu); err_free_kfifo: kfifo_free(&controller->svc_fifo); @@ -1253,6 +1259,8 @@ static void stratix10_svc_drv_remove(struct platform_device *pdev) struct stratix10_svc *svc = dev_get_drvdata(&pdev->dev); struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); + of_platform_depopulate(ctrl->dev); + platform_device_unregister(svc->intel_svc_fcs); platform_device_unregister(svc->stratix10_svc_rsu); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2cba2b6ebe1c..f01925ed8176 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -188,7 +188,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index c98c647f981d..54cde090c3f4 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -121,7 +121,6 @@ struct adp_drv_private { dma_addr_t mask_iova; int be_irq; int fe_irq; - spinlock_t irq_lock; struct drm_pending_vblank_event *event; }; @@ -288,6 +287,7 @@ static void adp_crtc_atomic_enable(struct drm_crtc *crtc, writel(BIT(0), adp->be + ADBE_BLEND_EN3); writel(BIT(0), adp->be + ADBE_BLEND_BYPASS); writel(BIT(0), adp->be + ADBE_BLEND_EN4); + drm_crtc_vblank_on(crtc); } static void adp_crtc_atomic_disable(struct drm_crtc *crtc, @@ -310,6 +310,7 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { u32 frame_num = 1; + unsigned long flags; struct adp_drv_private *adp = crtc_to_adp(crtc); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); u64 new_size = ALIGN(new_state->mode.hdisplay * @@ -330,13 +331,19 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { - drm_crtc_vblank_get(crtc); - adp->event = crtc->state->event; + struct drm_pending_vblank_event *event = crtc->state->event; + + crtc->state->event = NULL; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, event); + else + adp->event = event; + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } - crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); } static const struct drm_crtc_funcs adp_crtc_funcs = { @@ -482,8 +489,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) u32 int_status; u32 int_ctl; - spin_lock(&adp->irq_lock); - int_status = readl(adp->fe + ADP_INT_STATUS); if (int_status & ADP_INT_STATUS_VBLANK) { drm_crtc_handle_vblank(&adp->crtc); @@ -501,7 +506,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) writel(int_status, adp->fe + ADP_INT_STATUS); - spin_unlock(&adp->irq_lock); return IRQ_HANDLED; } @@ -512,8 +516,7 @@ static int adp_drm_bind(struct device *dev) struct adp_drv_private *adp = to_adp(drm); int err; - adp_disable_vblank(adp); - writel(ADP_CTRL_FIFO_ON | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL); + writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL); adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0); if (IS_ERR(adp->next_bridge)) { @@ -567,8 +570,6 @@ static int adp_probe(struct platform_device *pdev) if (IS_ERR(adp)) return PTR_ERR(adp); - spin_lock_init(&adp->irq_lock); - dev_set_drvdata(&pdev->dev, &adp->drm); err = adp_parse_of(pdev, adp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 667080cc9ae1..44e120f9f764 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -77,22 +102,32 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct dma_buf *dmabuf = attach->dmabuf; struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); - u32 domains = bo->preferred_domains; + u32 domains = bo->allowed_domains; dma_resv_assert_held(dmabuf->resv); - /* - * Try pinning into VRAM to allow P2P with RDMA NICs without ODP + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP * support if all attachments can do P2P. If any attachment can't do * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. */ - list_for_each_entry(attach, &dmabuf->attachments, node) - if (!attach->peer2peer) - domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (WARN_ON(!domains)) + return -EINVAL; + return amdgpu_bo_pin(bo, domains); } @@ -164,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); @@ -470,6 +510,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec1..bed5ef4d8788 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d9..e0e84ef7f568 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -503,6 +503,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) } /** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + +/** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * * @vinst: VCN instance @@ -518,6 +564,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +629,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -775,9 +825,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9fed4471405f..536f73131c2d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1920,26 +1920,6 @@ static enum dmub_ips_disable_type dm_get_default_ips_mode( switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(3, 5, 0): case IP_VERSION(3, 6, 0): - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) - /* - * Other ASICs with DCN35 that have residency issues with - * IPS2 in idle. - * We want them to use IPS2 only in display off cases. - */ - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - break; case IP_VERSION(3, 5, 1): ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; break; @@ -3355,16 +3335,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); @@ -6521,12 +6501,12 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, const struct drm_display_mode *native_mode, bool scale_enabled) { - if (scale_enabled) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); - } else if (native_mode->clock == drm_mode->clock && - native_mode->htotal == drm_mode->htotal && - native_mode->vtotal == drm_mode->vtotal) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); + if (scale_enabled || ( + native_mode->clock == drm_mode->clock && + native_mode->htotal == drm_mode->htotal && + native_mode->vtotal == drm_mode->vtotal)) { + if (native_mode->crtc_clock) + copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); } else { /* no scaling nor amdgpu inserted, no need to patch */ } @@ -11043,6 +11023,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 5198a079b463..8f22ad966543 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -488,6 +497,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -544,7 +554,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 2cd35392e2da..1395a748d726 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -918,7 +918,7 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) { struct drm_connector *connector = data; struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev); - unsigned char start = block * EDID_LENGTH; + unsigned short start = block * EDID_LENGTH; struct edid *edid; int r; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 92f0a099d089..d9159ca55412 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 0, + .do_urgent_latency_adjustment = 1, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 34d2e097ca2e..5a5d48fadbf2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -35,6 +35,17 @@ #define DC_LOGGER \ link->ctx->logger +static void get_default_8b_10b_lttpr_aux_rd_interval( + union training_aux_rd_interval *training_rd_interval) +{ + /* LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX + * read reply to this register. Since old sinks with DPCD rev 1.1 + * and earlier may not support this register, assume the mandatory + * value is programmed by the LTTPR to avoid AUX timeout issues. + */ + training_rd_interval->raw = 0x4; +} + static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, const struct dc_link_settings *link_settings, enum lttpr_mode lttpr_mode) @@ -43,17 +54,22 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, uint32_t wait_in_micro_secs = 100; memset(&training_rd_interval, 0, sizeof(training_rd_interval)); - if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); - if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) - wait_in_micro_secs = 400; - if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) - wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); + + if (training_rd_interval.raw != 0) { + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; + if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) + wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + } } return wait_in_micro_secs; } @@ -71,13 +87,15 @@ static uint32_t get_eq_training_aux_rd_interval( DP_128B132B_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); - } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); + } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); } switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) { diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c299cd94d3f7..cf2463090d3a 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -964,6 +964,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -983,6 +987,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e7..de424e670995 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1469,9 +1469,9 @@ map_pages: } i += 1 << order; num_dma_mapped = i; + range->flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { range->flags.has_devmem_pages = true; range->dpagemap = dpagemap; diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 89e05a5bed1d..a4cd476f9b30 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 5170f72b0830..f91daefa9d2b 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -43,13 +43,13 @@ struct decon_data { unsigned int wincon_burstlen_shift; }; -static struct decon_data exynos7_decon_data = { +static const struct decon_data exynos7_decon_data = { .vidw_buf_start_base = 0x80, .shadowcon_win_protect_shift = 10, .wincon_burstlen_shift = 11, }; -static struct decon_data exynos7870_decon_data = { +static const struct decon_data exynos7870_decon_data = { .vidw_buf_start_base = 0x880, .shadowcon_win_protect_shift = 8, .wincon_burstlen_shift = 10, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index f313ae7bc3a3..6cc7bf77bcac 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -355,8 +355,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); - if (drm) - drm_atomic_helper_shutdown(drm); + drm_atomic_helper_shutdown(drm); } static struct platform_driver exynos_drm_platform_driver = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index b150cfd92f6e..09e33a26caaf 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -908,7 +908,7 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, u32 buf_num; u32 cfg; - DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueu[%d]\n", buf_id, enqueue); + DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueue[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 1ad87584b1c2..c394cc702d7d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -731,7 +731,7 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, /* * Setting dma-burst to 16Word causes permanent tearing for very small * buffers, e.g. cursor buffer. Burst Mode switching which based on - * plane size is not recommended as plane size varies alot towards the + * plane size is not recommended as plane size varies a lot towards the * end of the screen and rapid movement causes unstable DMA, but it is * still better to change dma-burst than displaying garbage. */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 08cf79a62025..e644e2382d77 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -312,9 +312,6 @@ static int vidi_get_modes(struct drm_connector *connector) else drm_edid = drm_edid_alloc(fake_edid_info, sizeof(fake_edid_info)); - if (!drm_edid) - return 0; - drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da3..4969d3de2bac 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } -#endif +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); +#endif #endif /*__INTEL_PXP_GSCCS_H__ */ diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 81d2ee37e773..49ff9f1f16d3 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ { .limits = { - .max_hdmi_phy_freq = 1650000, + .max_hdmi_phy_freq = 1650000000, }, .attrs = (const struct soc_device_attribute []) { { .soc_id = "GXL (S805*)", }, diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 3f9345c14f31..be4b0e4df6e1 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -37,7 +37,7 @@ struct meson_drm_match_data { }; struct meson_drm_soc_limits { - unsigned int max_hdmi_phy_freq; + unsigned long long max_hdmi_phy_freq; }; struct meson_drm { diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 6d1c9262a2cf..7752d8ac85f0 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, { struct meson_drm *priv = encoder_hdmi->priv; int vic = drm_match_cea_mode(mode); - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; - vclk_freq = mode->clock; + vclk_freq = mode->clock * 1000; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", + dev_dbg(priv->dev, + "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); @@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long clock = mode->clock * 1000; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; int vic = drm_match_cea_mode(mode); enum drm_mode_status status; @@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (status != MODE_OK) return status; - return meson_vclk_dmt_supported_freq(priv, mode->clock); + return meson_vclk_dmt_supported_freq(priv, clock); /* Check against supported VIC modes */ } else if (!meson_venc_hdmi_supported_vic(vic)) return MODE_BAD; - vclk_freq = mode->clock; + vclk_freq = clock; /* For 420, pixel clock is half unlike venc clock */ if (drm_mode_is_420_only(display_info, mode) || @@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", + dev_dbg(priv->dev, + "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc..3325580d885d 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,7 +110,10 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) +#define PIXEL_FREQ_1000_1001(_freq) \ + DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) +#define PHY_FREQ_1000_1001(_freq) \ + (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) /* VID PLL Dividers */ enum { @@ -360,11 +363,11 @@ enum { }; struct meson_vclk_params { - unsigned int pll_freq; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int pixel_freq; + unsigned long long pll_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long pixel_freq; unsigned int pll_od1; unsigned int pll_od2; unsigned int pll_od3; @@ -372,11 +375,11 @@ struct meson_vclk_params { unsigned int vclk_div; } params[] = { [MESON_VCLK_HDMI_ENCI_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 54000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 54000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -384,11 +387,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 27000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 27000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -396,11 +399,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_148500] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 74250000, .pll_od1 = 4, .pll_od2 = 1, .pll_od3 = 1, @@ -408,11 +411,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_74250] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 74250, - .venc_freq = 74250, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 74250000, + .venc_freq = 74250000, + .pixel_freq = 74250000, .pll_od1 = 2, .pll_od2 = 2, .pll_od3 = 2, @@ -420,11 +423,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_148500] = { - .pll_freq = 2970000, - .phy_freq = 1485000, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 148500, + .pll_freq = 2970000000, + .phy_freq = 1485000000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 148500000, .pll_od1 = 1, .pll_od2 = 2, .pll_od3 = 2, @@ -432,11 +435,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_297000] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 297000, - .vclk_freq = 297000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 297000000, + .vclk_freq = 297000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -444,11 +447,11 @@ struct meson_vclk_params { .vclk_div = 2, }, [MESON_VCLK_HDMI_594000] = { - .pll_freq = 5940000, - .phy_freq = 5940000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 594000, + .pll_freq = 5940000000, + .phy_freq = 5940000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 594000000, .pll_od1 = 1, .pll_od2 = 1, .pll_od3 = 2, @@ -456,11 +459,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_594000_YUV420] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -617,16 +620,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, 3 << 20, pll_od_to_reg(od3) << 20); } -#define XTAL_FREQ 24000 +#define XTAL_FREQ (24 * 1000 * 1000) static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { /* The GXBB PLL has a /2 pre-multiplier */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) - pll_freq /= 2; + pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); - return pll_freq / XTAL_FREQ; + return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); } #define HDMI_FRAC_MAX_GXBB 4096 @@ -635,12 +638,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, unsigned int m, - unsigned int pll_freq) + unsigned long long pll_freq) { - unsigned int parent_freq = XTAL_FREQ; + unsigned long long parent_freq = XTAL_FREQ; unsigned int frac_max = HDMI_FRAC_MAX_GXL; unsigned int frac_m; unsigned int frac; + u32 remainder; /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -652,11 +656,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, frac_max = HDMI_FRAC_MAX_G12A; /* We can have a perfect match !*/ - if (pll_freq / m == parent_freq && - pll_freq % m == 0) + if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && + remainder == 0) return 0; - frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); + frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); frac_m = m * frac_max; if (frac_m > frac) return frac_max; @@ -666,7 +670,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, } static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, - unsigned int m, + unsigned long long m, unsigned int frac) { if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -694,7 +698,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, } static bool meson_hdmi_pll_find_params(struct meson_drm *priv, - unsigned int freq, + unsigned long long freq, unsigned int *m, unsigned int *frac, unsigned int *od) @@ -706,7 +710,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, continue; *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", freq, *m, *frac, *od); if (meson_hdmi_pll_validate_params(priv, *m, *frac)) @@ -718,7 +722,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, /* pll_freq is the frequency after the OD dividers */ enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) { unsigned int od, m, frac; @@ -741,7 +745,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); /* pll_freq is the frequency after the OD dividers */ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { unsigned int od, m, frac, od1, od2, od3; @@ -756,7 +760,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, od1 = od / od2; } - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", pll_freq, m, frac, od1, od2, od3); meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); @@ -764,17 +768,18 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, return; } - DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", + DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", pll_freq); } enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq) +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq) { int i; - DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", + DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", phy_freq, vclk_freq); /* Check against soc revision/package limits */ @@ -785,19 +790,19 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", + DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", i, params[i].pixel_freq, - FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", + PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); + PHY_FREQ_1000_1001(params[i].phy_freq)); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && - vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && + vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -805,8 +810,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); -static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, - unsigned int od1, unsigned int od2, unsigned int od3, +static void meson_vclk_set(struct meson_drm *priv, + unsigned long long pll_base_freq, unsigned int od1, + unsigned int od2, unsigned int od3, unsigned int vid_pll_div, unsigned int vclk_div, unsigned int hdmi_tx_div, unsigned int venc_div, bool hdmi_use_enci, bool vic_alternate_clock) @@ -826,15 +832,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_generic_set(priv, pll_base_freq); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x3d; frac = vic_alternate_clock ? 0xd02 : 0xe00; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0x59 : 0x5a; frac = vic_alternate_clock ? 0xe8f : 0; break; - case 5940000: + case 5940000000: m = 0x7b; frac = vic_alternate_clock ? 0xa05 : 0xc00; break; @@ -844,15 +850,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x281 : 0x300; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x347 : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x102 : 0x200; break; @@ -861,15 +867,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x140b4 : 0x18000; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x1a3ee : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x8148 : 0x10000; break; @@ -1025,14 +1031,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci) { bool vic_alternate_clock = false; - unsigned int freq; - unsigned int hdmi_tx_div; - unsigned int venc_div; + unsigned long long freq; + unsigned long long hdmi_tx_div; + unsigned long long venc_div; if (target == MESON_VCLK_TARGET_CVBS) { meson_venci_cvbs_clock_config(priv); @@ -1052,27 +1058,27 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, return; } - hdmi_tx_div = vclk_freq / dac_freq; + hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); if (hdmi_tx_div == 0) { - pr_err("Fatal Error, invalid HDMI-TX freq %d\n", + pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", dac_freq); return; } - venc_div = vclk_freq / venc_freq; + venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); if (venc_div == 0) { - pr_err("Fatal Error, invalid HDMI venc freq %d\n", + pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", venc_freq); return; } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && + phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && (vclk_freq == params[freq].vclk_freq || - vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else @@ -1098,7 +1104,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } if (!params[freq].pixel_freq) { - pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); + pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", + vclk_freq); return; } diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h index 60617aaf18dd..7ac55744e574 100644 --- a/drivers/gpu/drm/meson/meson_vclk.h +++ b/drivers/gpu/drm/meson/meson_vclk.h @@ -20,17 +20,18 @@ enum { }; /* 27MHz is the CVBS Pixel Clock */ -#define MESON_VCLK_CVBS 27000 +#define MESON_VCLK_CVBS (27 * 1000 * 1000) enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq); +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq); void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci); #endif /* __MESON_VCLK_H */ diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7cc84472cece..edddfc036c6d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 7d68a8acfe2e..eb0f8373258c 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -129,11 +129,11 @@ static int jadard_unprepare(struct drm_panel *panel) { struct jadard *jadard = panel_to_jadard(panel); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(120); if (jadard->desc->reset_before_power_off_vcioo) { - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); usleep_range(1000, 2000); } diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982..925fbc2cda70 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 95b86003c50d..5bf3c969907c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1093,7 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; - + /** @hit_low: Whether we should attempt to swap BO's with low watermark threshold */ + /** @evict_low: If we cannot swap a bo when @try_low is false (first pass) */ bool hit_low, evict_low; }; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c..e2bb156c71fb 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -208,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; @@ -378,7 +383,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +401,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +516,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +534,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +591,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +727,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f233566..d1c76e503799 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..9095618648bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index f8c128524d9f..0b6547c06961 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -79,7 +79,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index b3b11be11650..59792e00cecf 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -307,7 +307,7 @@ void __init hv_get_partition_id(void) local_irq_save(flags); output = *this_cpu_ptr(hyperv_pcpu_input_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output); pt_id = output->partition_id; local_irq_restore(flags); @@ -566,9 +566,11 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); - kfree(*synic_eventring_tail); - *synic_eventring_tail = NULL; + if (hv_root_partition()) { + synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); + kfree(*synic_eventring_tail); + *synic_eventring_tail = NULL; + } return 0; } diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig index 4b6359326ede..4f7d2b6d79e2 100644 --- a/drivers/hwtracing/intel_th/Kconfig +++ b/drivers/hwtracing/intel_th/Kconfig @@ -60,6 +60,7 @@ config INTEL_TH_STH config INTEL_TH_MSU tristate "Intel(R) Trace Hub Memory Storage Unit" + depends on MMU help Memory Storage Unit (MSU) trace output device enables storing STP traces to system memory. It supports single diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index bf99d79a4192..7163950eb371 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -19,6 +19,7 @@ #include <linux/io.h> #include <linux/workqueue.h> #include <linux/dma-mapping.h> +#include <linux/pfn_t.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> @@ -976,7 +977,6 @@ static void msc_buffer_contig_free(struct msc *msc) for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { struct page *page = virt_to_page(msc->base + off); - page->mapping = NULL; __free_page(page); } @@ -1158,9 +1158,6 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win) int i; for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) { - struct page *page = msc_sg_page(sg); - - page->mapping = NULL; dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, sg_virt(sg), sg_dma_address(sg)); } @@ -1601,22 +1598,10 @@ static void msc_mmap_close(struct vm_area_struct *vma) { struct msc_iter *iter = vma->vm_file->private_data; struct msc *msc = iter->msc; - unsigned long pg; if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex)) return; - /* drop page _refcounts */ - for (pg = 0; pg < msc->nr_pages; pg++) { - struct page *page = msc_buffer_get_page(msc, pg); - - if (WARN_ON_ONCE(!page)) - continue; - - if (page->mapping) - page->mapping = NULL; - } - /* last mapping -- drop user_count */ atomic_dec(&msc->user_count); mutex_unlock(&msc->buf_mutex); @@ -1626,16 +1611,14 @@ static vm_fault_t msc_mmap_fault(struct vm_fault *vmf) { struct msc_iter *iter = vmf->vma->vm_file->private_data; struct msc *msc = iter->msc; + struct page *page; - vmf->page = msc_buffer_get_page(msc, vmf->pgoff); - if (!vmf->page) + page = msc_buffer_get_page(msc, vmf->pgoff); + if (!page) return VM_FAULT_SIGBUS; - get_page(vmf->page); - vmf->page->mapping = vmf->vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - return 0; + get_page(page); + return vmf_insert_mixed(vmf->vma, vmf->address, page_to_pfn_t(page)); } static const struct vm_operations_struct msc_mmap_ops = { @@ -1676,7 +1659,7 @@ out: atomic_dec(&msc->user_count); vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP); vma->vm_ops = &msc_mmap_ops; return ret; } diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dd9e26b7b718..14aa0d77df26 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3664,6 +3664,14 @@ found: while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index be8761bbef0f..f34209b08b4c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3869,6 +3869,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + if (ir_data->iommu == NULL) return -EINVAL; @@ -3879,21 +3882,11 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * we should not modify the IRTE */ if (!dev_data || !dev_data->use_vapic) - return 0; + return -EINVAL; ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; - /* Note: - * SVM tries to set up for VAPIC mode, but we are in - * legacy mode. So, we force legacy mode instead. - */ - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("%s: Fall back to using intr legacy remap\n", - __func__); - pi_data->is_guest_mode = false; - } - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9ba596430e7c..980cc6b33c43 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -411,6 +411,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b4c21aaed126..48d910399a1b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3388,6 +3388,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3398,11 +3399,21 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); - ret = -EINVAL; + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } @@ -4429,6 +4440,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b..cb0b993bebb4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3785,6 +3785,22 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); +} + +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + /* * The PCIe spec, in its wisdom, declares that the behaviour of the * device is undefined if you enable PASID support after ATS support. @@ -3792,22 +3808,12 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) * we can't yet know if we're ever going to use it. */ if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) iommu_enable_pci_ats(info); iommu_enable_pci_pri(info); - - return &iommu->iommu; -free_table: - intel_pasid_free_table(dev); -clear_rbtree: - device_rbtree_remove(info); -free: - kfree(info); - - return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) @@ -4391,6 +4397,7 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, @@ -4432,6 +4439,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4509,7 +4519,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c69894861866..dc98c39d2b20 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 02a680c73979..bf0d7d58c8b0 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -96,7 +96,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, ret = mcb_device_register(bus, mdev); if (ret < 0) - goto err; + return ret; return 0; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c8ed65cd87e..f0b5a6931161 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -68,6 +68,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2424,7 +2426,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2a283feb3319..cc3d3897ef42 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -5164,7 +5164,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 35100a435c88..9e175c5e0634 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -523,9 +523,10 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { *size = new_size; + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); } kfree(old_argv); @@ -1049,7 +1050,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; unsigned int bioset_flags = 0; - bool mempool_needs_integrity = t->integrity_supported; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1074,8 +1074,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * per_io_data_size = max(per_io_data_size, ti->per_io_data_size); min_pool_size = max(min_pool_size, ti->num_flush_bios); - - mempool_needs_integrity |= ti->mempool_needs_integrity; } pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index 04756302b878..98d3d123004c 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -37,6 +37,7 @@ struct pci1xxxx_gpio { struct auxiliary_device *aux_dev; void __iomem *reg_base; + raw_spinlock_t wa_lock; struct gpio_chip gpio; spinlock_t lock; int irq_base; @@ -167,7 +168,7 @@ static void pci1xxxx_gpio_irq_ack(struct irq_data *data) unsigned long flags; spin_lock_irqsave(&priv->lock, flags); - pci1xxx_assign_bit(priv->reg_base, INTR_STAT_OFFSET(gpio), (gpio % 32), true); + writel(BIT(gpio % 32), priv->reg_base + INTR_STAT_OFFSET(gpio)); spin_unlock_irqrestore(&priv->lock, flags); } @@ -257,6 +258,7 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) struct pci1xxxx_gpio *priv = dev_id; struct gpio_chip *gc = &priv->gpio; unsigned long int_status = 0; + unsigned long wa_flags; unsigned long flags; u8 pincount; int bit; @@ -280,7 +282,9 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) writel(BIT(bit), priv->reg_base + INTR_STATUS_OFFSET(gpiobank)); spin_unlock_irqrestore(&priv->lock, flags); irq = irq_find_mapping(gc->irq.domain, (bit + (gpiobank * 32))); - handle_nested_irq(irq); + raw_spin_lock_irqsave(&priv->wa_lock, wa_flags); + generic_handle_irq(irq); + raw_spin_unlock_irqrestore(&priv->wa_lock, wa_flags); } } spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index a5f88ec97df7..bc40b940ae21 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,7 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ /* diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d6ff9d82ae94..3f9c60b579ae 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, /* required last entry */ diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 7be1649b1972..da26a080916c 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -36,20 +36,24 @@ #define VSC_TP_XFER_TIMEOUT_BYTES 700 #define VSC_TP_PACKET_PADDING_SIZE 1 #define VSC_TP_PACKET_SIZE(pkt) \ - (sizeof(struct vsc_tp_packet) + le16_to_cpu((pkt)->len) + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + le16_to_cpu((pkt)->hdr.len) + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_PACKET_SIZE \ - (sizeof(struct vsc_tp_packet) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_XFER_SIZE \ (VSC_TP_MAX_PACKET_SIZE + VSC_TP_XFER_TIMEOUT_BYTES) #define VSC_TP_NEXT_XFER_LEN(len, offset) \ - (len + sizeof(struct vsc_tp_packet) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) + (len + sizeof(struct vsc_tp_packet_hdr) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) -struct vsc_tp_packet { +struct vsc_tp_packet_hdr { __u8 sync; __u8 cmd; __le16 len; __le32 seq; - __u8 buf[] __counted_by(len); +}; + +struct vsc_tp_packet { + struct vsc_tp_packet_hdr hdr; + __u8 buf[VSC_TP_MAX_XFER_SIZE - sizeof(struct vsc_tp_packet_hdr)]; }; struct vsc_tp { @@ -67,8 +71,8 @@ struct vsc_tp { u32 seq; /* command buffer */ - void *tx_buf; - void *rx_buf; + struct vsc_tp_packet *tx_buf; + struct vsc_tp_packet *rx_buf; atomic_t assert_cnt; wait_queue_head_t xfer_wait; @@ -158,12 +162,12 @@ static int vsc_tp_dev_xfer(struct vsc_tp *tp, void *obuf, void *ibuf, size_t len static int vsc_tp_xfer_helper(struct vsc_tp *tp, struct vsc_tp_packet *pkt, void *ibuf, u16 ilen) { - int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet); + int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet_hdr); int next_xfer_len = VSC_TP_PACKET_SIZE(pkt) + VSC_TP_XFER_TIMEOUT_BYTES; - u8 *src, *crc_src, *rx_buf = tp->rx_buf; + u8 *src, *crc_src, *rx_buf = (u8 *)tp->rx_buf; int count_down = VSC_TP_MAX_XFER_COUNT; u32 recv_crc = 0, crc = ~0; - struct vsc_tp_packet ack; + struct vsc_tp_packet_hdr ack; u8 *dst = (u8 *)&ack; bool synced = false; @@ -280,10 +284,10 @@ int vsc_tp_xfer(struct vsc_tp *tp, u8 cmd, const void *obuf, size_t olen, guard(mutex)(&tp->mutex); - pkt->sync = VSC_TP_PACKET_SYNC; - pkt->cmd = cmd; - pkt->len = cpu_to_le16(olen); - pkt->seq = cpu_to_le32(++tp->seq); + pkt->hdr.sync = VSC_TP_PACKET_SYNC; + pkt->hdr.cmd = cmd; + pkt->hdr.len = cpu_to_le16(olen); + pkt->hdr.seq = cpu_to_le32(++tp->seq); memcpy(pkt->buf, obuf, olen); crc = ~crc32(~0, (u8 *)pkt, sizeof(pkt) + olen); @@ -320,7 +324,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) guard(mutex)(&tp->mutex); /* rom xfer is big endian */ - cpu_to_be32_array(tp->tx_buf, obuf, words); + cpu_to_be32_array((u32 *)tp->tx_buf, obuf, words); ret = read_poll_timeout(gpiod_get_value_cansleep, ret, !ret, VSC_TP_ROM_XFER_POLL_DELAY_US, @@ -336,7 +340,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; if (ibuf) - be32_to_cpu_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, (u32 *)tp->rx_buf, words); return ret; } @@ -490,11 +494,11 @@ static int vsc_tp_probe(struct spi_device *spi) if (!tp) return -ENOMEM; - tp->tx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->tx_buf = devm_kzalloc(dev, sizeof(*tp->tx_buf), GFP_KERNEL); if (!tp->tx_buf) return -ENOMEM; - tp->rx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->rx_buf = devm_kzalloc(dev, sizeof(*tp->rx_buf), GFP_KERNEL); if (!tp->rx_buf) return -ENOMEM; diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index d294850a35a1..c4e5e2c977be 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -122,7 +122,6 @@ struct pci_endpoint_test { struct pci_endpoint_test_data { enum pci_barno test_reg_bar; size_t alignment; - int irq_type; }; static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, @@ -948,7 +947,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, test_reg_bar = data->test_reg_bar; test->test_reg_bar = test_reg_bar; test->alignment = data->alignment; - test->irq_type = data->irq_type; } init_completion(&test->irq_raised); @@ -970,10 +968,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - ret = pci_endpoint_test_alloc_irq_vectors(test, test->irq_type); - if (ret) - goto err_disable_irq; - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { base = pci_ioremap_bar(pdev, bar); @@ -1009,10 +1003,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, goto err_ida_remove; } - ret = pci_endpoint_test_request_irq(test); - if (ret) - goto err_kfree_test_name; - pci_endpoint_test_get_capabilities(test); misc_device = &test->miscdev; @@ -1020,7 +1010,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, misc_device->name = kstrdup(name, GFP_KERNEL); if (!misc_device->name) { ret = -ENOMEM; - goto err_release_irq; + goto err_kfree_test_name; } misc_device->parent = &pdev->dev; misc_device->fops = &pci_endpoint_test_fops; @@ -1036,9 +1026,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, err_kfree_name: kfree(misc_device->name); -err_release_irq: - pci_endpoint_test_release_irq(test); - err_kfree_test_name: kfree(test->name); @@ -1051,8 +1038,6 @@ err_iounmap: pci_iounmap(pdev, test->bar[bar]); } -err_disable_irq: - pci_endpoint_test_free_irq_vectors(test); pci_release_regions(pdev); err_disable_pdev: @@ -1092,23 +1077,19 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) static const struct pci_endpoint_test_data default_data = { .test_reg_bar = BAR_0, .alignment = SZ_4K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data am654_data = { .test_reg_bar = BAR_2, .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data j721e_data = { .alignment = 256, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data rk3588_data = { .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; /* diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 6824131b69b1..264e11fa58ea 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -691,8 +691,8 @@ config MMC_TMIO_CORE config MMC_SDHI tristate "Renesas SDHI SD/SDIO controller support" depends on SUPERH || ARCH_RENESAS || COMPILE_TEST + depends on (RESET_CONTROLLER && REGULATOR) || !OF select MMC_TMIO_CORE - select RESET_CONTROLLER if ARCH_RENESAS help This provides support for the SDHI SD/SDIO controller found in Renesas SuperH, ARM and ARM64 based SoCs diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index fa6526be3638..8c83e203c516 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1179,7 +1179,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (IS_ERR(rdev)) { dev_err(dev, "regulator register failed err=%ld", PTR_ERR(rdev)); ret = PTR_ERR(rdev); - goto efree; + goto edisclk; } priv->rdev = rdev; } @@ -1243,26 +1243,26 @@ int renesas_sdhi_probe(struct platform_device *pdev, num_irqs = platform_irq_count(pdev); if (num_irqs < 0) { ret = num_irqs; - goto eirq; + goto edisclk; } /* There must be at least one IRQ source */ if (!num_irqs) { ret = -ENXIO; - goto eirq; + goto edisclk; } for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - goto eirq; + goto edisclk; } ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) - goto eirq; + goto edisclk; } ret = tmio_mmc_host_probe(host); @@ -1274,8 +1274,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, return ret; -eirq: - tmio_mmc_host_remove(host); edisclk: renesas_sdhi_clk_disable(host); efree: diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index d47dfa80fb95..4d64b6935bb9 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -102,6 +102,7 @@ config NVME_TCP_TLS depends on NVME_TCP select NET_HANDSHAKE select KEYS + select TLS help Enables TLS encryption for NVMe TCP using the netlink handshake API. diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b178d52eac1b..2e30e9be7408 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3575,7 +3575,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - if (!nvme_try_sched_reset(&dev->ctrl)) + if (nvme_try_sched_reset(&dev->ctrl)) nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } @@ -3623,6 +3623,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */ .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, }, + { PCI_DEVICE(0x126f, 0x1001), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_BOGUS_NID, }, @@ -3646,6 +3649,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */ .driver_data = NVME_QUIRK_BROKEN_MSI }, + { PCI_DEVICE(0x15b7, 0x5009), /* Sandisk SN550 */ + .driver_data = NVME_QUIRK_BROKEN_MSI | + NVME_QUIRK_NO_DEEPEST_PS }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 72d260201d8c..aba365f97cf6 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1946,7 +1946,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1965,6 +1965,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -2032,7 +2057,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fb7446d6d682..4c253b433bf7 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -98,6 +98,7 @@ config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP select NET_HANDSHAKE + select TLS help Enables TLS encryption for the NVMe TCP target using the netlink handshake API. diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index cef8d77f477b..9429b8218408 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -600,13 +600,12 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); tls_key = NULL; - kfree_sensitive(tls_psk); } if (sq->ctrl->tls_key) key_put(sq->ctrl->tls_key); sq->ctrl->tls_key = tls_key; #endif - + kfree_sensitive(tls_psk); out_free_digest: kfree_sensitive(digest); out_free_psk: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 71f8d06998d6..245475c43127 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port) lockdep_assert_held(&nvmet_config_sem); + if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) + return -EINVAL; + ops = nvmet_transports[port->disc_addr.trtype]; if (!ops) { up_write(&nvmet_config_sem); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f2d0c920269b..12a5cb8641ca 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1560,6 +1560,9 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + if (!queue->state_change) + return; + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_data_ready = queue->data_ready; sock->sk->sk_state_change = queue->state_change; diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index fff85bbf0ecd..e206efc29a00 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -594,9 +594,11 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, cell->nbits = info->nbits; cell->np = info->np; - if (cell->nbits) + if (cell->nbits) { cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, BITS_PER_BYTE); + cell->raw_len = ALIGN(cell->bytes, nvmem->word_size); + } if (!IS_ALIGNED(cell->offset, nvmem->stride)) { dev_err(&nvmem->dev, @@ -605,6 +607,18 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, return -EINVAL; } + if (!IS_ALIGNED(cell->raw_len, nvmem->word_size)) { + dev_err(&nvmem->dev, + "cell %s raw len %zd unaligned to nvmem word size %d\n", + cell->name ?: "<unknown>", cell->raw_len, + nvmem->word_size); + + if (info->raw_len) + return -EINVAL; + + cell->raw_len = ALIGN(cell->raw_len, nvmem->word_size); + } + return 0; } @@ -837,7 +851,9 @@ static int nvmem_add_cells_from_dt(struct nvmem_device *nvmem, struct device_nod if (addr && len == (2 * sizeof(u32))) { info.bit_offset = be32_to_cpup(addr++); info.nbits = be32_to_cpup(addr); - if (info.bit_offset >= BITS_PER_BYTE || info.nbits < 1) { + if (info.bit_offset >= BITS_PER_BYTE * info.bytes || + info.nbits < 1 || + info.bit_offset + info.nbits > BITS_PER_BYTE * info.bytes) { dev_err(dev, "nvmem: invalid bits on %pOF\n", child); of_node_put(child); return -EINVAL; @@ -1630,21 +1646,29 @@ EXPORT_SYMBOL_GPL(nvmem_cell_put); static void nvmem_shift_read_buffer_in_place(struct nvmem_cell_entry *cell, void *buf) { u8 *p, *b; - int i, extra, bit_offset = cell->bit_offset; + int i, extra, bytes_offset; + int bit_offset = cell->bit_offset; p = b = buf; - if (bit_offset) { + + bytes_offset = bit_offset / BITS_PER_BYTE; + b += bytes_offset; + bit_offset %= BITS_PER_BYTE; + + if (bit_offset % BITS_PER_BYTE) { /* First shift */ - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; /* setup rest of the bytes if any */ for (i = 1; i < cell->bytes; i++) { /* Get bits from next byte and shift them towards msb */ - *p |= *b << (BITS_PER_BYTE - bit_offset); + *p++ |= *b << (BITS_PER_BYTE - bit_offset); - p = b; - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; } + } else if (p != b) { + memmove(p, b, cell->bytes - bytes_offset); + p += cell->bytes - 1; } else { /* point to the msb */ p += cell->bytes - 1; diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c index 116a39e804c7..a872c640b8c5 100644 --- a/drivers/nvmem/qfprom.c +++ b/drivers/nvmem/qfprom.c @@ -321,19 +321,32 @@ static int qfprom_reg_read(void *context, unsigned int reg, void *_val, size_t bytes) { struct qfprom_priv *priv = context; - u8 *val = _val; - int i = 0, words = bytes; + u32 *val = _val; void __iomem *base = priv->qfpcorrected; + int words = DIV_ROUND_UP(bytes, sizeof(u32)); + int i; if (read_raw_data && priv->qfpraw) base = priv->qfpraw; - while (words--) - *val++ = readb(base + reg + i++); + for (i = 0; i < words; i++) + *val++ = readl(base + reg + i * sizeof(u32)); return 0; } +/* Align reads to word boundary */ +static void qfprom_fixup_dt_cell_info(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell) +{ + unsigned int byte_offset = cell->offset % sizeof(u32); + + cell->bit_offset += byte_offset * BITS_PER_BYTE; + cell->offset -= byte_offset; + if (byte_offset && !cell->nbits) + cell->nbits = cell->bytes * BITS_PER_BYTE; +} + static void qfprom_runtime_disable(void *data) { pm_runtime_disable(data); @@ -358,10 +371,11 @@ static int qfprom_probe(struct platform_device *pdev) struct nvmem_config econfig = { .name = "qfprom", .add_legacy_fixed_of_cells = true, - .stride = 1, - .word_size = 1, + .stride = 4, + .word_size = 4, .id = NVMEM_DEVID_AUTO, .reg_read = qfprom_reg_read, + .fixup_dt_cell_info = qfprom_fixup_dt_cell_info, }; struct device *dev = &pdev->dev; struct resource *res; diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index ebc3f0b24166..d88f12c53242 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -59,7 +59,6 @@ #define RK3588_OTPC_AUTO_EN 0x08 #define RK3588_OTPC_INT_ST 0x84 #define RK3588_OTPC_DOUT0 0x20 -#define RK3588_NO_SECURE_OFFSET 0x300 #define RK3588_NBYTES 4 #define RK3588_BURST_NUM 1 #define RK3588_BURST_SHIFT 8 @@ -69,6 +68,7 @@ struct rockchip_data { int size; + int read_offset; const char * const *clks; int num_clks; nvmem_reg_read_t reg_read; @@ -196,7 +196,7 @@ static int rk3588_otp_read(void *context, unsigned int offset, addr_start = round_down(offset, RK3588_NBYTES) / RK3588_NBYTES; addr_end = round_up(offset + bytes, RK3588_NBYTES) / RK3588_NBYTES; addr_len = addr_end - addr_start; - addr_start += RK3588_NO_SECURE_OFFSET; + addr_start += otp->data->read_offset / RK3588_NBYTES; buf = kzalloc(array_size(addr_len, RK3588_NBYTES), GFP_KERNEL); if (!buf) @@ -274,12 +274,21 @@ static const struct rockchip_data px30_data = { .reg_read = px30_otp_read, }; +static const struct rockchip_data rk3576_data = { + .size = 0x100, + .read_offset = 0x700, + .clks = px30_otp_clocks, + .num_clks = ARRAY_SIZE(px30_otp_clocks), + .reg_read = rk3588_otp_read, +}; + static const char * const rk3588_otp_clocks[] = { "otp", "apb_pclk", "phy", "arb", }; static const struct rockchip_data rk3588_data = { .size = 0x400, + .read_offset = 0xc00, .clks = rk3588_otp_clocks, .num_clks = ARRAY_SIZE(rk3588_otp_clocks), .reg_read = rk3588_otp_read, @@ -295,6 +304,10 @@ static const struct of_device_id rockchip_otp_match[] = { .data = &px30_data, }, { + .compatible = "rockchip,rk3576-otp", + .data = &rk3576_data, + }, + { .compatible = "rockchip,rk3588-otp", .data = &rk3588_data, }, diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 54d6f4fa3ce1..e994c546422c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -187,6 +187,9 @@ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) panic("%s: kzalloc() failed!\n", __func__); tmp->res = r; tmp->dev = dev; + tmp->start = r->start; + tmp->end = r->end; + tmp->flags = r->flags; /* Fallback is smallest one or list is empty */ n = head; @@ -545,6 +548,7 @@ assign: pci_dbg(dev, "%s %pR: releasing\n", res_name, res); release_resource(res); + restore_dev_resource(dev_res); } /* Restore start/end/flags from saved list */ list_for_each_entry(save_res, &save_head, list) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 842a1e6cbfc4..18de31328540 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -37,16 +37,16 @@ static inline const struct group_desc *imx_pinctrl_find_group_by_name( struct pinctrl_dev *pctldev, const char *name) { - const struct group_desc *grp = NULL; + const struct group_desc *grp; int i; for (i = 0; i < pctldev->num_groups; i++) { grp = pinctrl_generic_get_group(pctldev, i); if (grp && !strcmp(grp->grp.name, name)) - break; + return grp; } - return grp; + return NULL; } static void imx_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index ced4ee509b5b..b4eb2beab691 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -449,7 +449,7 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, return -EOPNOTSUPP; virq = irq_find_mapping(eint->domain, eint_num); - eint_offset = (eint_num % 4) * 8; + eint_offset = (idx % 4) * 8; d = irq_get_irq_data(virq); set_offset = (idx / 4) * 4 + eint->regs->dbnc_set; diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 547a798b71c8..5d84a778683d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -6,6 +6,7 @@ */ #include <dt-bindings/pinctrl/mt65xx.h> +#include <linux/bitfield.h> #include <linux/bits.h> #include <linux/cleanup.h> #include <linux/gpio/driver.h> @@ -112,39 +113,19 @@ #define REG_LAN_LED1_MAPPING 0x0280 #define LAN4_LED_MAPPING_MASK GENMASK(18, 16) -#define LAN4_PHY4_LED_MAP BIT(18) -#define LAN4_PHY2_LED_MAP BIT(17) -#define LAN4_PHY1_LED_MAP BIT(16) -#define LAN4_PHY0_LED_MAP 0 -#define LAN4_PHY3_LED_MAP GENMASK(17, 16) +#define LAN4_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN4_LED_MAPPING_MASK, (_n)) #define LAN3_LED_MAPPING_MASK GENMASK(14, 12) -#define LAN3_PHY4_LED_MAP BIT(14) -#define LAN3_PHY2_LED_MAP BIT(13) -#define LAN3_PHY1_LED_MAP BIT(12) -#define LAN3_PHY0_LED_MAP 0 -#define LAN3_PHY3_LED_MAP GENMASK(13, 12) +#define LAN3_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN3_LED_MAPPING_MASK, (_n)) #define LAN2_LED_MAPPING_MASK GENMASK(10, 8) -#define LAN2_PHY4_LED_MAP BIT(12) -#define LAN2_PHY2_LED_MAP BIT(11) -#define LAN2_PHY1_LED_MAP BIT(10) -#define LAN2_PHY0_LED_MAP 0 -#define LAN2_PHY3_LED_MAP GENMASK(11, 10) +#define LAN2_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN2_LED_MAPPING_MASK, (_n)) #define LAN1_LED_MAPPING_MASK GENMASK(6, 4) -#define LAN1_PHY4_LED_MAP BIT(6) -#define LAN1_PHY2_LED_MAP BIT(5) -#define LAN1_PHY1_LED_MAP BIT(4) -#define LAN1_PHY0_LED_MAP 0 -#define LAN1_PHY3_LED_MAP GENMASK(5, 4) +#define LAN1_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN1_LED_MAPPING_MASK, (_n)) #define LAN0_LED_MAPPING_MASK GENMASK(2, 0) -#define LAN0_PHY4_LED_MAP BIT(3) -#define LAN0_PHY2_LED_MAP BIT(2) -#define LAN0_PHY1_LED_MAP BIT(1) -#define LAN0_PHY0_LED_MAP 0 -#define LAN0_PHY3_LED_MAP GENMASK(2, 1) +#define LAN0_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN0_LED_MAPPING_MASK, (_n)) /* CONF */ #define REG_I2C_SDA_E2 0x001c @@ -1476,8 +1457,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1491,8 +1472,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1506,8 +1487,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1521,8 +1502,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1540,8 +1521,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1555,8 +1536,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1570,8 +1551,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1585,8 +1566,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1604,8 +1585,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1619,8 +1600,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1634,8 +1615,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1649,8 +1630,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1668,8 +1649,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1683,8 +1664,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1698,8 +1679,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1713,8 +1694,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, @@ -1732,8 +1713,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1747,8 +1728,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1762,8 +1743,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1777,8 +1758,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1796,8 +1777,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1811,8 +1792,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1826,8 +1807,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1841,8 +1822,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1860,8 +1841,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1875,8 +1856,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1890,8 +1871,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1905,8 +1886,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1924,8 +1905,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1939,8 +1920,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1954,8 +1935,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1969,8 +1950,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 91edb539925a..8596f3541265 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1015,9 +1015,15 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) if (!pctl->eint) return -ENOMEM; - pctl->eint->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pctl->eint->base)) - return PTR_ERR(pctl->eint->base); + pctl->eint->nbase = 1; + /* mtk-eint expects an array */ + pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); + if (!pctl->eint->base) + return -ENOMEM; + + pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pctl->eint->base[0])) + return PTR_ERR(pctl->eint->base[0]); pctl->eint->irq = irq_of_parse_and_map(np, 0); if (!pctl->eint->irq) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 253a0cc57e39..e5a32a0532ee 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -487,7 +487,7 @@ static int meson_pinconf_get(struct pinctrl_dev *pcdev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: if (meson_pinconf_get_pull(pc, pin) == param) - arg = 1; + arg = 60000; else return -EINVAL; break; diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index 1af11cd95fb0..b94fb4ee0ec3 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -46,7 +46,9 @@ .out_bit = 1, \ .intr_enable_bit = 0, \ .intr_status_bit = 0, \ - .intr_target_bit = 5, \ + .intr_wakeup_present_bit = 6, \ + .intr_wakeup_enable_bit = 7, \ + .intr_target_bit = 8, \ .intr_target_kpss_val = 3, \ .intr_raw_status_bit = 4, \ .intr_polarity_bit = 1, \ diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index d789d6cab794..0329fafe14eb 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -644,10 +644,9 @@ static void amd_pmc_s2idle_check(void) struct smu_metrics table; int rc; - /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ - if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && - table.s0i3_last_entry_status) - usleep_range(10000, 20000); + /* Avoid triggering OVP */ + if (!get_metrics_table(pdev, &table) && table.s0i3_last_entry_status) + msleep(2500); /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 38ef778e8c19..0c697b46f436 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -304,6 +304,7 @@ struct asus_wmi { u32 kbd_rgb_dev; bool kbd_rgb_state_available; + bool oobe_state_available; u8 throttle_thermal_policy_mode; u32 throttle_thermal_policy_dev; @@ -1826,7 +1827,7 @@ static int asus_wmi_led_init(struct asus_wmi *asus) goto error; } - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE)) { + if (asus->oobe_state_available) { /* * Disable OOBE state, so that e.g. the keyboard backlight * works. @@ -4723,6 +4724,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU); asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); + asus->oobe_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) && dmi_check_system(asus_ally_mcu_quirk); @@ -4970,6 +4972,13 @@ static int asus_hotk_restore(struct device *device) } if (!IS_ERR_OR_NULL(asus->kbd_led.dev)) kbd_led_update(asus); + if (asus->oobe_state_available) { + /* + * Disable OOBE state, so that e.g. the keyboard backlight + * works. + */ + asus_wmi_set_devstate(ASUS_WMI_DEVID_OOBE, 1, NULL); + } if (asus_wmi_has_fnlock_key(asus)) asus_wmi_fnlock_update(asus); diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 0c3be03385f8..08b82c151e07 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -70,6 +70,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { .driver_data = &generic_quirks, }, { + .ident = "Alienware m15 R7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m15 R7"), + }, + .driver_data = &generic_quirks, + }, + { .ident = "Alienware m16 R1", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), @@ -655,12 +663,10 @@ static int thermal_profile_probe(void *drvdata, unsigned long *choices) for (u32 i = 0; i < sys_desc[3]; i++) { ret = wmax_thermal_information(priv->wdev, WMAX_OPERATION_LIST_IDS, i + first_mode, &out_data); - - if (ret == -EIO) - return ret; - if (ret == -EBADRQC) break; + if (ret) + return ret; if (!is_wmax_thermal_code(out_data)) continue; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 17a09b7784ed..ede483573fe0 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1294,6 +1294,16 @@ static const struct key_entry ideapad_keymap[] = { /* Specific to some newer models */ { KE_KEY, 0x3e | IDEAPAD_WMI_KEY, { KEY_MICMUTE } }, { KE_KEY, 0x3f | IDEAPAD_WMI_KEY, { KEY_RFKILL } }, + /* Star- (User Assignable Key) */ + { KE_KEY, 0x44 | IDEAPAD_WMI_KEY, { KEY_PROG1 } }, + /* Eye */ + { KE_KEY, 0x45 | IDEAPAD_WMI_KEY, { KEY_PROG3 } }, + /* Performance toggle also Fn+Q, handled inside ideapad_wmi_notify() */ + { KE_KEY, 0x3d | IDEAPAD_WMI_KEY, { KEY_PROG4 } }, + /* shift + prtsc */ + { KE_KEY, 0x2d | IDEAPAD_WMI_KEY, { KEY_CUT } }, + { KE_KEY, 0x29 | IDEAPAD_WMI_KEY, { KEY_TOUCHPAD_TOGGLE } }, + { KE_KEY, 0x2a | IDEAPAD_WMI_KEY, { KEY_ROOT_MENU } }, { KE_END }, }; @@ -2080,6 +2090,12 @@ static void ideapad_wmi_notify(struct wmi_device *wdev, union acpi_object *data) dev_dbg(&wdev->dev, "WMI fn-key event: 0x%llx\n", data->integer.value); + /* performance button triggered by 0x3d */ + if (data->integer.value == 0x3d && priv->dytc) { + platform_profile_cycle(); + break; + } + /* 0x02 FnLock, 0x03 Esc */ if (data->integer.value == 0x02 || data->integer.value == 0x03) ideapad_fn_lock_led_notify(priv, data->integer.value == 0x02); diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 88a1a9ff2f34..0b5e43444ed6 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -44,16 +44,17 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); static const struct acpi_device_id intel_hid_ids[] = { - {"INT33D5", 0}, - {"INTC1051", 0}, - {"INTC1054", 0}, - {"INTC1070", 0}, - {"INTC1076", 0}, - {"INTC1077", 0}, - {"INTC1078", 0}, - {"INTC107B", 0}, - {"INTC10CB", 0}, - {"", 0}, + { "INT33D5" }, + { "INTC1051" }, + { "INTC1054" }, + { "INTC1070" }, + { "INTC1076" }, + { "INTC1077" }, + { "INTC1078" }, + { "INTC107B" }, + { "INTC10CB" }, + { "INTC10CC" }, + { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 40bbf8e45fa4..bdee5d00f30b 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -146,15 +146,13 @@ static int uncore_event_cpu_online(unsigned int cpu) { struct uncore_data *data; int target; + int ret; /* Check if there is an online cpu in the package for uncore MSR */ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; - /* Use this CPU on this die as a control CPU */ - cpumask_set_cpu(cpu, &uncore_cpu_mask); - data = uncore_get_instance(cpu); if (!data) return 0; @@ -163,7 +161,14 @@ static int uncore_event_cpu_online(unsigned int cpu) data->die_id = topology_die_id(cpu); data->domain_id = UNCORE_DOMAIN_ID_INVALID; - return uncore_freq_add_entry(data, cpu); + ret = uncore_freq_add_entry(data, cpu); + if (ret) + return ret; + + /* Use this CPU on this die as a control CPU */ + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + return 0; } static int uncore_event_cpu_offline(unsigned int cpu) diff --git a/drivers/pps/generators/pps_gen_tio.c b/drivers/pps/generators/pps_gen_tio.c index 1d5ffe055463..de00a85bfafa 100644 --- a/drivers/pps/generators/pps_gen_tio.c +++ b/drivers/pps/generators/pps_gen_tio.c @@ -230,7 +230,7 @@ static int pps_gen_tio_probe(struct platform_device *pdev) hrtimer_setup(&tio->timer, hrtimer_callback, CLOCK_REALTIME, HRTIMER_MODE_ABS); spin_lock_init(&tio->lock); - platform_set_drvdata(pdev, &tio); + platform_set_drvdata(pdev, tio); return 0; } diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 3fcb1ad3b070..1d7901a8f0e4 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -174,6 +174,9 @@ static void mpi3mr_print_event_data(struct mpi3mr_ioc *mrioc, char *desc = NULL; u16 event; + if (!(mrioc->logging_level & MPI3_DEBUG_EVENT)) + return; + event = event_reply->event; switch (event) { @@ -451,6 +454,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) return 0; } + atomic_set(&mrioc->admin_pend_isr, 0); reply_desc = (struct mpi3_default_reply_descriptor *)mrioc->admin_reply_base + admin_reply_ci; @@ -565,7 +569,7 @@ int mpi3mr_process_op_reply_q(struct mpi3mr_ioc *mrioc, WRITE_ONCE(op_req_q->ci, le16_to_cpu(reply_desc->request_queue_ci)); mpi3mr_process_op_reply_desc(mrioc, reply_desc, &reply_dma, reply_qidx); - atomic_dec(&op_reply_q->pend_ios); + if (reply_dma) mpi3mr_repost_reply_buf(mrioc, reply_dma); num_op_reply++; @@ -2925,6 +2929,7 @@ static int mpi3mr_setup_admin_qpair(struct mpi3mr_ioc *mrioc) mrioc->admin_reply_ci = 0; mrioc->admin_reply_ephase = 1; atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (!mrioc->admin_req_base) { mrioc->admin_req_base = dma_alloc_coherent(&mrioc->pdev->dev, @@ -4653,6 +4658,7 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) if (mrioc->admin_reply_base) memset(mrioc->admin_reply_base, 0, mrioc->admin_reply_q_sz); atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (mrioc->init_cmds.reply) { memset(mrioc->init_cmds.reply, 0, sizeof(*mrioc->init_cmds.reply)); diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index dc4bd422b601..486db5b2f05d 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -891,7 +891,7 @@ static bool myrb_enable_mmio(struct myrb_hba *cb, mbox_mmio_init_t mmio_init_fn) status = mmio_init_fn(pdev, base, &mbox); if (status != MYRB_STATUS_SUCCESS) { dev_err(&pdev->dev, - "Failed to enable mailbox, statux %02X\n", + "Failed to enable mailbox, status %02X\n", status); return false; } diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 53daf923ad8e..518a252eb6aa 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -707,26 +707,23 @@ void scsi_cdl_check(struct scsi_device *sdev) */ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) { - struct scsi_mode_data data; - struct scsi_sense_hdr sshdr; - struct scsi_vpd *vpd; - bool is_ata = false; char buf[64]; + bool is_ata; int ret; if (!sdev->cdl_supported) return -EOPNOTSUPP; rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (vpd) - is_ata = true; + is_ata = rcu_dereference(sdev->vpd_pg89); rcu_read_unlock(); /* * For ATA devices, CDL needs to be enabled with a SET FEATURES command. */ if (is_ata) { + struct scsi_mode_data data; + struct scsi_sense_hdr sshdr; char *buf_data; int len; @@ -735,16 +732,30 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) if (ret) return -EINVAL; - /* Enable CDL using the ATA feature page */ + /* Enable or disable CDL using the ATA feature page */ len = min_t(size_t, sizeof(buf), data.length - data.header_length - data.block_descriptor_length); buf_data = buf + data.header_length + data.block_descriptor_length; - if (enable) - buf_data[4] = 0x02; - else - buf_data[4] = 0; + + /* + * If we want to enable CDL and CDL is already enabled on the + * device, do nothing. This avoids needlessly resetting the CDL + * statistics on the device as that is implied by the CDL enable + * action. Similar to this, there is no need to do anything if + * we want to disable CDL and CDL is already disabled. + */ + if (enable) { + if ((buf_data[4] & 0x03) == 0x02) + goto out; + buf_data[4] &= ~0x03; + buf_data[4] |= 0x02; + } else { + if ((buf_data[4] & 0x03) == 0x00) + goto out; + buf_data[4] &= ~0x03; + } ret = scsi_mode_select(sdev, 1, 0, buf_data, len, 5 * HZ, 3, &data, &sshdr); @@ -756,6 +767,7 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) } } +out: sdev->cdl_enable = enable; return 0; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0d29470e86b0..1b43013d72c0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1253,8 +1253,12 @@ EXPORT_SYMBOL_GPL(scsi_alloc_request); */ static void scsi_cleanup_rq(struct request *rq) { + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); + + cmd->flags = 0; + if (rq->rq_flags & RQF_DONTPREP) { - scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq)); + scsi_mq_uninit_cmd(cmd); rq->rq_flags &= ~RQF_DONTPREP; } } diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 5ea6399e6c9b..10a602d4843a 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -353,9 +353,6 @@ static int intel_link_probe(struct auxiliary_device *auxdev, /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - ret = sdw_bus_master_add(bus, dev, dev->fwnode); if (ret) { dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); @@ -640,7 +637,10 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { + /* Prevent runtime PM from racing with the code below. */ + pm_runtime_disable(dev); + + if (pm_runtime_status_suspended(dev)) { dev_dbg(dev, "pm_runtime status: suspended\n"); clock_stop_quirks = sdw->link_res->clock_stop_quirks; @@ -648,7 +648,7 @@ static int __maybe_unused intel_suspend(struct device *dev) if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || !clock_stop_quirks) { - if (pm_runtime_suspended(dev->parent)) { + if (pm_runtime_status_suspended(dev->parent)) { /* * paranoia check: this should not happen with the .prepare * resume to full power @@ -715,7 +715,6 @@ static int __maybe_unused intel_resume(struct device *dev) struct sdw_cdns *cdns = dev_get_drvdata(dev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; - int link_flags; int ret; if (bus->prop.hw_disabled || !sdw->startup_done) { @@ -724,23 +723,6 @@ static int __maybe_unused intel_resume(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - pm_runtime_resume(bus->dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); @@ -761,6 +743,14 @@ static int __maybe_unused intel_resume(struct device *dev) } /* + * Runtime PM has been disabled in intel_suspend(), so set the status + * to active because the device has just been resumed and re-enable + * runtime PM. + */ + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + /* * after system resume, the pm_runtime suspend() may kick in * during the enumeration, before any children device force the * master device to remain active. Using pm_runtime_get() diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index a31a1db07aa4..5db0639d3b01 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -596,7 +596,11 @@ u64 spi_mem_calc_op_duration(struct spi_mem_op *op) ns_per_cycles = 1000000000 / op->max_freq; ncycles += ((op->cmd.nbytes * 8) / op->cmd.buswidth) / (op->cmd.dtr ? 2 : 1); ncycles += ((op->addr.nbytes * 8) / op->addr.buswidth) / (op->addr.dtr ? 2 : 1); - ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + + /* Dummy bytes are optional for some SPI flash memory operations */ + if (op->dummy.nbytes) + ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + ncycles += ((op->data.nbytes * 8) / op->data.buswidth) / (op->data.dtr ? 2 : 1); return ncycles * ns_per_cycles; diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 17eb67e19132..94948c8781e8 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -142,7 +142,7 @@ static void qcom_spi_set_read_loc_first(struct qcom_nand_controller *snandc, else if (reg == NAND_READ_LOCATION_1) snandc->regs->read_location1 = locreg_val; else if (reg == NAND_READ_LOCATION_2) - snandc->regs->read_location1 = locreg_val; + snandc->regs->read_location2 = locreg_val; else if (reg == NAND_READ_LOCATION_3) snandc->regs->read_location3 = locreg_val; } @@ -1307,8 +1307,7 @@ static int qcom_spi_send_cmdaddr(struct qcom_nand_controller *snandc, snandc->qspi->addr1 = cpu_to_le32(s_op.addr1_reg << 16); snandc->qspi->addr2 = cpu_to_le32(s_op.addr2_reg); snandc->qspi->cmd = cpu_to_le32(cmd); - qcom_spi_block_erase(snandc); - return 0; + return qcom_spi_block_erase(snandc); default: break; } diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 668022098b1e..9ec9823409cc 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -960,6 +960,10 @@ err_pm_resume: err_pm_enable: pm_runtime_force_suspend(ospi->dev); mutex_destroy(&ospi->lock); + if (ospi->dma_chtx) + dma_release_channel(ospi->dma_chtx); + if (ospi->dma_chrx) + dma_release_channel(ospi->dma_chrx); return ret; } diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 3822d7c8d8ed..2a8bb798e95b 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if (setup->unit != SPI_DELAY_UNIT_SCK || - hold->unit != SPI_DELAY_UNIT_SCK || - inactive->unit != SPI_DELAY_UNIT_SCK) { + if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1244ef3aa86c..620ba6e0ab07 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4263,8 +4263,8 @@ int iscsit_close_connection( spin_unlock(&iscsit_global->ts_bitmap_lock); iscsit_stop_timers_for_cmds(conn); - iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + iscsit_stop_nopin_response_timer(conn); if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn); diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 1b137e068444..3449945493ce 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1746,6 +1746,12 @@ msm_serial_early_console_setup_dm(struct earlycon_device *device, if (!device->port.membase) return -ENODEV; + /* Disable DM / single-character modes */ + msm_write(&device->port, 0, UARTDM_DMEN); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_RX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_TX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_TX_ENABLE, MSM_UART_CR); + device->con->write = msm_serial_early_write_dm; return 0; } diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 5904a2d4cefa..054a8e630ace 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -563,8 +563,11 @@ static void sifive_serial_break_ctl(struct uart_port *port, int break_state) static int sifive_serial_startup(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_enable_rxwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); return 0; } @@ -572,9 +575,12 @@ static int sifive_serial_startup(struct uart_port *port) static void sifive_serial_shutdown(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_disable_rxwm(ssp); __ssp_disable_txwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); } /** diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 0bd6544e30a6..791e2f1f7c0b 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -193,13 +193,12 @@ int set_selection_user(const struct tiocl_selection __user *sel, return -EFAULT; /* - * TIOCL_SELCLEAR, TIOCL_SELPOINTER and TIOCL_SELMOUSEREPORT are OK to - * use without CAP_SYS_ADMIN as they do not modify the selection. + * TIOCL_SELCLEAR and TIOCL_SELPOINTER are OK to use without + * CAP_SYS_ADMIN as they do not modify the selection. */ switch (v.sel_mode) { case TIOCL_SELCLEAR: case TIOCL_SELPOINTER: - case TIOCL_SELMOUSEREPORT: break; default: if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 240ce135bbfb..f1294c29f484 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -677,13 +677,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) unsigned long flags; int err; - if (!ufshcd_cmd_inflight(lrbp->cmd)) { - dev_err(hba->dev, - "%s: skip abort. cmd at tag %d already completed.\n", - __func__, tag); - return FAILED; - } - /* Skip task abort in case previous aborts failed and report failure */ if (lrbp->req_abort_skip) { dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", @@ -692,6 +685,11 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) } hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) { + dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n", + __func__, tag); + return FAILED; + } if (ufshcd_mcq_sqe_search(hba, hwq, tag)) { /* diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 44156041d88f..7735421e3991 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -278,6 +278,7 @@ static const struct ufs_dev_quirk ufs_fixups[] = { .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE | + UFS_DEVICE_QUIRK_PA_HIBER8TIME | UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS }, { .wmanufacturerid = UFS_VENDOR_SKHYNIX, .model = UFS_ANY_MODEL, @@ -5677,6 +5678,8 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba, continue; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + continue; if (force_compl) { ufshcd_mcq_compl_all_cqes_lock(hba, hwq); @@ -7262,8 +7265,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, err = -EINVAL; } } - ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, - (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); return err; } @@ -8470,6 +8471,31 @@ out: return ret; } +/** + * ufshcd_quirk_override_pa_h8time - Ensures proper adjustment of PA_HIBERN8TIME. + * @hba: per-adapter instance + * + * Some UFS devices require specific adjustments to the PA_HIBERN8TIME parameter + * to ensure proper hibernation timing. This function retrieves the current + * PA_HIBERN8TIME value and increments it by 100us. + */ +static void ufshcd_quirk_override_pa_h8time(struct ufs_hba *hba) +{ + u32 pa_h8time; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_HIBERN8TIME), &pa_h8time); + if (ret) { + dev_err(hba->dev, "Failed to get PA_HIBERN8TIME: %d\n", ret); + return; + } + + /* Increment by 1 to increase hibernation time by 100 µs */ + ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME), pa_h8time + 1); + if (ret) + dev_err(hba->dev, "Failed updating PA_HIBERN8TIME: %d\n", ret); +} + static void ufshcd_tune_unipro_params(struct ufs_hba *hba) { ufshcd_vops_apply_dev_quirks(hba); @@ -8480,6 +8506,9 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE) ufshcd_quirk_tune_host_pa_tactivate(hba); + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_HIBER8TIME) + ufshcd_quirk_override_pa_h8time(hba); } static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 1b37449fbffc..c0761ccc1381 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -33,6 +33,10 @@ ((((c) >> 16) & MCQ_QCFGPTR_MASK) * MCQ_QCFGPTR_UNIT) #define MCQ_QCFG_SIZE 0x40 +/* De-emphasis for gear-5 */ +#define DEEMPHASIS_3_5_dB 0x04 +#define NO_DEEMPHASIS 0x0 + enum { TSTBUS_UAWM, TSTBUS_UARM, @@ -795,6 +799,23 @@ static int ufs_qcom_icc_update_bw(struct ufs_qcom_host *host) return ufs_qcom_icc_set_bw(host, bw_table.mem_bw, bw_table.cfg_bw); } +static void ufs_qcom_set_tx_hs_equalizer(struct ufs_hba *hba, u32 gear, u32 tx_lanes) +{ + u32 equalizer_val; + int ret, i; + + /* Determine the equalizer value based on the gear */ + equalizer_val = (gear == 5) ? DEEMPHASIS_3_5_dB : NO_DEEMPHASIS; + + for (i = 0; i < tx_lanes; i++) { + ret = ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(TX_HS_EQUALIZER, i), + equalizer_val); + if (ret) + dev_err(hba->dev, "%s: failed equalizer lane %d\n", + __func__, i); + } +} + static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, const struct ufs_pa_layer_attr *dev_max_params, @@ -846,6 +867,11 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, dev_req_params->gear_tx, PA_INITIAL_ADAPT); } + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING) + ufs_qcom_set_tx_hs_equalizer(hba, + dev_req_params->gear_tx, dev_req_params->lane_tx); + break; case POST_CHANGE: if (ufs_qcom_cfg_timers(hba, false)) { @@ -893,6 +919,16 @@ static int ufs_qcom_quirk_host_pa_saveconfigtime(struct ufs_hba *hba) (pa_vs_config_reg1 | (1 << 12))); } +static void ufs_qcom_override_pa_tx_hsg1_sync_len(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_dme_peer_set(hba, UIC_ARG_MIB(PA_TX_HSG1_SYNC_LENGTH), + PA_TX_HSG1_SYNC_LENGTH_VAL); + if (err) + dev_err(hba->dev, "Failed (%d) set PA_TX_HSG1_SYNC_LENGTH\n", err); +} + static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) { int err = 0; @@ -900,6 +936,9 @@ static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME) err = ufs_qcom_quirk_host_pa_saveconfigtime(hba); + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH) + ufs_qcom_override_pa_tx_hsg1_sync_len(hba); + return err; } @@ -914,6 +953,10 @@ static struct ufs_dev_quirk ufs_qcom_dev_fixups[] = { { .wmanufacturerid = UFS_VENDOR_WDC, .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE }, + { .wmanufacturerid = UFS_VENDOR_SAMSUNG, + .model = UFS_ANY_MODEL, + .quirk = UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH | + UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING }, {} }; diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h index d0e6ec9128e7..05d4cb569c50 100644 --- a/drivers/ufs/host/ufs-qcom.h +++ b/drivers/ufs/host/ufs-qcom.h @@ -122,8 +122,11 @@ enum { TMRLUT_HW_CGC_EN | OCSC_HW_CGC_EN) /* QUniPro Vendor specific attributes */ +#define PA_TX_HSG1_SYNC_LENGTH 0x1552 #define PA_VS_CONFIG_REG1 0x9000 #define DME_VS_CORE_CLK_CTRL 0xD002 +#define TX_HS_EQUALIZER 0x0037 + /* bit and mask definitions for DME_VS_CORE_CLK_CTRL attribute */ #define CLK_1US_CYCLES_MASK_V4 GENMASK(27, 16) #define CLK_1US_CYCLES_MASK GENMASK(7, 0) @@ -141,6 +144,21 @@ enum { #define UNIPRO_CORE_CLK_FREQ_201_5_MHZ 202 #define UNIPRO_CORE_CLK_FREQ_403_MHZ 403 +/* TX_HSG1_SYNC_LENGTH attr value */ +#define PA_TX_HSG1_SYNC_LENGTH_VAL 0x4A + +/* + * Some ufs device vendors need a different TSync length. + * Enable this quirk to give an additional TX_HS_SYNC_LENGTH. + */ +#define UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH BIT(16) + +/* + * Some ufs device vendors need a different Deemphasis setting. + * Enable this quirk to tune TX Deemphasis parameters. + */ +#define UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING BIT(17) + /* ICE allocator type to share AES engines among TX stream and RX stream */ #define ICE_ALLOCATOR_TYPE 2 diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 694aa1457739..d9d8dc05b235 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1963,6 +1963,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) unsigned int bit; unsigned long reg; + local_bh_disable(); spin_lock_irqsave(&priv_dev->lock, flags); reg = readl(&priv_dev->regs->usb_ists); @@ -2004,6 +2005,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) irqend: writel(~0, &priv_dev->regs->ep_ien); spin_unlock_irqrestore(&priv_dev->lock, flags); + local_bh_enable(); return ret; } diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 1a7fc638213e..4f8bfd242b59 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -336,6 +336,13 @@ static int ci_hdrc_imx_notify_event(struct ci_hdrc *ci, unsigned int event) return ret; } +static void ci_hdrc_imx_disable_regulator(void *arg) +{ + struct ci_hdrc_imx_data *data = arg; + + regulator_disable(data->hsic_pad_regulator); +} + static int ci_hdrc_imx_probe(struct platform_device *pdev) { struct ci_hdrc_imx_data *data; @@ -394,6 +401,13 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) "Failed to enable HSIC pad regulator\n"); goto err_put; } + ret = devm_add_action_or_reset(dev, + ci_hdrc_imx_disable_regulator, data); + if (ret) { + dev_err(dev, + "Failed to add regulator devm action\n"); + goto err_put; + } } } @@ -432,11 +446,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_get_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = imx_prepare_enable_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = clk_prepare_enable(data->clk_wakeup); if (ret) @@ -470,7 +484,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) { pdata.flags |= CI_HDRC_OVERRIDE_PHY_CONTROL; data->override_phy_control = true; - usb_phy_init(pdata.usb_phy); + ret = usb_phy_init(pdata.usb_phy); + if (ret) { + dev_err(dev, "Failed to init phy\n"); + goto err_clk; + } } if (pdata.flags & CI_HDRC_SUPPORTS_RUNTIME_PM) @@ -479,7 +497,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_usbmisc_init(data->usbmisc_data); if (ret) { dev_err(dev, "usbmisc init failed, ret=%d\n", ret); - goto err_clk; + goto phy_shutdown; } data->ci_pdev = ci_hdrc_add_device(dev, @@ -488,7 +506,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (IS_ERR(data->ci_pdev)) { ret = PTR_ERR(data->ci_pdev); dev_err_probe(dev, ret, "ci_hdrc_add_device failed\n"); - goto err_clk; + goto phy_shutdown; } if (data->usbmisc_data) { @@ -522,19 +540,20 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); +phy_shutdown: + if (data->override_phy_control) + usb_phy_shutdown(data->phy); err_clk: clk_disable_unprepare(data->clk_wakeup); err_wakeup_clk: imx_disable_unprepare_clks(dev); -disable_hsic_regulator: - if (data->hsic_pad_regulator) - /* don't overwrite original ret (cf. EPROBE_DEFER) */ - regulator_disable(data->hsic_pad_regulator); +qos_remove_request: if (pdata.flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); data->ci_pdev = NULL; err_put: - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); return ret; } @@ -556,10 +575,9 @@ static void ci_hdrc_imx_remove(struct platform_device *pdev) clk_disable_unprepare(data->clk_wakeup); if (data->plat_data->flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); - if (data->hsic_pad_regulator) - regulator_disable(data->hsic_pad_regulator); } - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); } static void ci_hdrc_imx_shutdown(struct platform_device *pdev) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 86ee39db013f..16e7fa4d488d 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -726,7 +726,7 @@ static int wdm_open(struct inode *inode, struct file *file) rv = -EBUSY; goto out; } - + smp_rmb(); /* ordered against wdm_wwan_port_stop() */ rv = usb_autopm_get_interface(desc->intf); if (rv < 0) { dev_err(&desc->intf->dev, "Error autopm - %d\n", rv); @@ -829,6 +829,7 @@ static struct usb_class_driver wdm_class = { static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); + int rv; /* The interface is both exposed via the WWAN framework and as a * legacy usbmisc chardev. If chardev is already open, just fail @@ -848,7 +849,15 @@ static int wdm_wwan_port_start(struct wwan_port *port) wwan_port_txon(port); /* Start getting events */ - return usb_submit_urb(desc->validity, GFP_KERNEL); + rv = usb_submit_urb(desc->validity, GFP_KERNEL); + if (rv < 0) { + wwan_port_txoff(port); + desc->manage_power(desc->intf, 0); + /* this must be last lest we race with chardev open */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); + } + + return rv; } static void wdm_wwan_port_stop(struct wwan_port *port) @@ -859,8 +868,10 @@ static void wdm_wwan_port_stop(struct wwan_port *port) poison_urbs(desc); desc->manage_power(desc->intf, 0); clear_bit(WDM_READ, &desc->flags); - clear_bit(WDM_WWAN_IN_USE, &desc->flags); unpoison_urbs(desc); + smp_wmb(); /* ordered against wdm_open() */ + /* this must be last lest we open a poisoned device */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); } static void wdm_wwan_port_tx_complete(struct urb *urb) @@ -868,7 +879,7 @@ static void wdm_wwan_port_tx_complete(struct urb *urb) struct sk_buff *skb = urb->context; struct wdm_device *desc = skb_shinfo(skb)->destructor_arg; - usb_autopm_put_interface(desc->intf); + usb_autopm_put_interface_async(desc->intf); wwan_port_txon(desc->wwanp); kfree_skb(skb); } @@ -898,7 +909,7 @@ static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb) req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; - req->wIndex = desc->inum; + req->wIndex = desc->inum; /* already converted */ req->wLength = cpu_to_le16(skb->len); skb_shinfo(skb)->destructor_arg = desc; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8efbacc5bc34..36d3df7d040c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -369,6 +369,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* SanDisk Corp. SanDisk 3.2Gen1 */ + { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, @@ -383,6 +386,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0904, 0x6103), .driver_info = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Silicon Motion Flash Drive */ + { USB_DEVICE(0x090c, 0x1000), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Sound Devices USBPre2 */ { USB_DEVICE(0x0926, 0x0202), .driver_info = USB_QUIRK_ENDPOINT_IGNORE }, @@ -539,6 +545,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* VLI disk */ + { USB_DEVICE(0x2109, 0x0711), .driver_info = USB_QUIRK_NO_LPM }, + /* Raydium Touchscreen */ { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index a33a42ba0249..4ca7f6240d07 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -207,15 +207,13 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) skip_usb3_phy: /* ulpi reset via gpio-modepin or gpio-framework driver */ - reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); + reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset_gpio)) { return dev_err_probe(dev, PTR_ERR(reset_gpio), "Failed to request reset GPIO\n"); } if (reset_gpio) { - /* Toggle ulpi to reset the phy. */ - gpiod_set_value_cansleep(reset_gpio, 1); usleep_range(5000, 10000); gpiod_set_value_cansleep(reset_gpio, 0); usleep_range(5000, 10000); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 47e73c4ed62d..8c30d86cc4e3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4617,6 +4617,12 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) if (!count) return IRQ_NONE; + if (count > evt->length) { + dev_err_ratelimited(dwc->dev, "invalid count(%u) > evt->length(%u)\n", + count, evt->length); + return IRQ_NONE; + } + evt->count = count; evt->flags |= DWC3_EVENT_PENDING; diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 900ea0d368e0..9f0a6b27e47c 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -165,6 +165,25 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) return 0; } +static int ohci_quirk_loongson(struct usb_hcd *hcd) +{ + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + + /* + * Loongson's LS7A OHCI controller (rev 0x02) has a + * flaw. MMIO register with offset 0x60/64 is treated + * as legacy PS2-compatible keyboard/mouse interface. + * Since OHCI only use 4KB BAR resource, LS7A OHCI's + * 32KB BAR is wrapped around (the 2nd 4KB BAR space + * is the same as the 1st 4KB internally). So add 4KB + * offset (0x1000) to the OHCI registers as a quirk. + */ + if (pdev->revision == 0x2) + hcd->regs += SZ_4K; /* SZ_4K = 0x1000 */ + + return 0; +} + static int ohci_quirk_qemu(struct usb_hcd *hcd) { struct ohci_hcd *ohci = hcd_to_ohci(hcd); @@ -225,6 +244,10 @@ static const struct pci_device_id ohci_pci_quirks[] = { .driver_data = (unsigned long)ohci_quirk_amd700, }, { + PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, 0x7a24), + .driver_data = (unsigned long)ohci_quirk_loongson, + }, + { .vendor = PCI_VENDOR_ID_APPLE, .device = 0x003f, .subvendor = PCI_SUBVENDOR_ID_REDHAT_QUMRANET, diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c0f226584a40..486347776cb2 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1878,9 +1878,10 @@ int xhci_bus_resume(struct usb_hcd *hcd) int max_ports, port_index; int sret; u32 next_state; - u32 temp, portsc; + u32 portsc; struct xhci_hub *rhub; struct xhci_port **ports; + bool disabled_irq = false; rhub = xhci_get_rhub(hcd); ports = rhub->ports; @@ -1896,17 +1897,20 @@ int xhci_bus_resume(struct usb_hcd *hcd) return -ESHUTDOWN; } - /* delay the irqs */ - temp = readl(&xhci->op_regs->command); - temp &= ~CMD_EIE; - writel(temp, &xhci->op_regs->command); - /* bus specific resume for ports we suspended at bus_suspend */ - if (hcd->speed >= HCD_USB3) + if (hcd->speed >= HCD_USB3) { next_state = XDEV_U0; - else + } else { next_state = XDEV_RESUME; - + if (bus_state->bus_suspended) { + /* + * prevent port event interrupts from interfering + * with usb2 port resume process + */ + xhci_disable_interrupter(xhci->interrupters[0]); + disabled_irq = true; + } + } port_index = max_ports; while (port_index--) { portsc = readl(ports[port_index]->addr); @@ -1974,11 +1978,9 @@ int xhci_bus_resume(struct usb_hcd *hcd) (void) readl(&xhci->op_regs->command); bus_state->next_statechange = jiffies + msecs_to_jiffies(5); - /* re-enable irqs */ - temp = readl(&xhci->op_regs->command); - temp |= CMD_EIE; - writel(temp, &xhci->op_regs->command); - temp = readl(&xhci->op_regs->command); + /* re-enable interrupter */ + if (disabled_irq) + xhci_enable_interrupter(xhci->interrupters[0]); spin_unlock_irqrestore(&xhci->lock, flags); return 0; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 5d64c297721c..b906bc2eea5f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -561,8 +561,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, * pointer command pending because the device can choose to start any * stream once the endpoint is on the HW schedule. */ - if (ep_state & (EP_STOP_CMD_PENDING | SET_DEQ_PENDING | EP_HALTED | - EP_CLEARING_TT | EP_STALLED)) + if ((ep_state & EP_STOP_CMD_PENDING) || (ep_state & SET_DEQ_PENDING) || + (ep_state & EP_HALTED) || (ep_state & EP_CLEARING_TT)) return; trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id)); @@ -2573,9 +2573,6 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET); return; - case COMP_STALL_ERROR: - ep->ep_state |= EP_STALLED; - break; default: /* do nothing */ break; @@ -2916,7 +2913,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (xhci_spurious_success_tx_event(xhci, ep_ring)) { xhci_dbg(xhci, "Spurious event dma %pad, comp_code %u after %u\n", &ep_trb_dma, trb_comp_code, ep_ring->old_trb_comp_code); - ep_ring->old_trb_comp_code = trb_comp_code; + ep_ring->old_trb_comp_code = 0; return 0; } @@ -3780,7 +3777,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, * enqueue a No Op TRB, this can prevent the Setup and Data Stage * TRB to be breaked by the Link TRB. */ - if (trb_is_link(ep_ring->enqueue + 1)) { + if (last_trb_on_seg(ep_ring->enq_seg, ep_ring->enqueue + 1)) { field = TRB_TYPE(TRB_TR_NOOP) | ep_ring->cycle_state; queue_trb(xhci, ep_ring, false, 0, 0, TRB_INTR_TARGET(0), field); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0452b8d65832..90eb491267b5 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -322,7 +322,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } -static int xhci_enable_interrupter(struct xhci_interrupter *ir) +int xhci_enable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -335,7 +335,7 @@ static int xhci_enable_interrupter(struct xhci_interrupter *ir) return 0; } -static int xhci_disable_interrupter(struct xhci_interrupter *ir) +int xhci_disable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -1605,11 +1605,6 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto free_priv; } - /* Class driver might not be aware ep halted due to async URB giveback */ - if (*ep_state & EP_STALLED) - dev_dbg(&urb->dev->dev, "URB %p queued before clearing halt\n", - urb); - switch (usb_endpoint_type(&urb->ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: @@ -1770,8 +1765,8 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) goto done; } - /* In these cases no commands are pending but the endpoint is stopped */ - if (ep->ep_state & (EP_CLEARING_TT | EP_STALLED)) { + /* In this case no commands are pending but the endpoint is stopped */ + if (ep->ep_state & EP_CLEARING_TT) { /* and cancelled TDs can be given back right away */ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n", urb->dev->slot_id, ep_index, ep->ep_state); @@ -3209,11 +3204,8 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, ep = &vdev->eps[ep_index]; - spin_lock_irqsave(&xhci->lock, flags); - - ep->ep_state &= ~EP_STALLED; - /* Bail out if toggle is already being cleared by a endpoint reset */ + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; spin_unlock_irqrestore(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 37860f1e3aba..242ab9fbc8ae 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -664,7 +664,7 @@ struct xhci_virt_ep { unsigned int err_count; unsigned int ep_state; #define SET_DEQ_PENDING (1 << 0) -#define EP_HALTED (1 << 1) /* Halted host ep handling */ +#define EP_HALTED (1 << 1) /* For stall handling */ #define EP_STOP_CMD_PENDING (1 << 2) /* For URB cancellation */ /* Transitioning the endpoint to using streams, don't enqueue URBs */ #define EP_GETTING_STREAMS (1 << 3) @@ -675,7 +675,6 @@ struct xhci_virt_ep { #define EP_SOFT_CLEAR_TOGGLE (1 << 7) /* usb_hub_clear_tt_buffer is in progress */ #define EP_CLEARING_TT (1 << 8) -#define EP_STALLED (1 << 9) /* For stall handling */ /* ---- Related to URB cancellation ---- */ struct list_head cancelled_td_list; struct xhci_hcd *xhci; @@ -1891,6 +1890,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct usb_tt *tt, gfp_t mem_flags); int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, u32 imod_interval); +int xhci_enable_interrupter(struct xhci_interrupter *ir); +int xhci_disable_interrupter(struct xhci_interrupter *ir); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9b34e23b7091..6ac7a0a5cf07 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1093,6 +1093,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, + /* Abacus Electrics */ + { USB_DEVICE(FTDI_VID, ABACUS_OPTICAL_PROBE_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 52be47d684ea..9acb6f837327 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -443,6 +443,11 @@ #define LINX_FUTURE_2_PID 0xF44C /* Linx future device */ /* + * Abacus Electrics + */ +#define ABACUS_OPTICAL_PROBE_PID 0xf458 /* ABACUS ELECTRICS Optical Probe */ + +/* * Oceanic product ids */ #define FTDI_OCEANIC_PID 0xF460 /* Oceanic dive instrument */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5cd26dac2069..27879cc57536 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -611,6 +611,7 @@ static void option_instat_callback(struct urb *urb); /* Sierra Wireless products */ #define SIERRA_VENDOR_ID 0x1199 #define SIERRA_PRODUCT_EM9191 0x90d3 +#define SIERRA_PRODUCT_EM9291 0x90e3 /* UNISOC (Spreadtrum) products */ #define UNISOC_VENDOR_ID 0x1782 @@ -2432,6 +2433,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 2c12449ff60c..a0afaf254d12 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -100,6 +100,11 @@ DEVICE(nokia, NOKIA_IDS); { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ DEVICE_N(novatel_gps, NOVATEL_IDS, 3); +/* OWON electronic test and measurement equipment driver */ +#define OWON_IDS() \ + { USB_DEVICE(0x5345, 0x1234) } /* HDS200 oscilloscopes and others */ +DEVICE(owon, OWON_IDS); + /* Siemens USB/MPI adapter */ #define SIEMENS_IDS() \ { USB_DEVICE(0x908, 0x0004) } @@ -134,6 +139,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &motorola_tetra_device, &nokia_device, &novatel_gps_device, + &owon_device, &siemens_mpi_device, &suunto_device, &vivopay_device, @@ -153,6 +159,7 @@ static const struct usb_device_id id_table[] = { MOTOROLA_TETRA_IDS(), NOKIA_IDS(), NOVATEL_IDS(), + OWON_IDS(), SIEMENS_IDS(), SUUNTO_IDS(), VIVOPAY_IDS(), diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 1f8c9b16a0fb..d460d71b4257 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -83,6 +83,13 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_LUNS), +/* Reported-by: Oliver Neukum <oneukum@suse.com> */ +UNUSUAL_DEV(0x125f, 0xa94a, 0x0160, 0x0160, + "ADATA", + "Portable HDD CH94", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9c76c3d0c6cf..67a533e35150 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1052,9 +1052,11 @@ struct typec_partner *typec_register_partner(struct typec_port *port, partner->usb_mode = USB_MODE_USB3; } + mutex_lock(&port->partner_link_lock); ret = device_register(&partner->dev); if (ret) { dev_err(&port->dev, "failed to register partner (%d)\n", ret); + mutex_unlock(&port->partner_link_lock); put_device(&partner->dev); return ERR_PTR(ret); } @@ -1063,6 +1065,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, typec_partner_link_device(partner, port->usb2_dev); if (port->usb3_dev) typec_partner_link_device(partner, port->usb3_dev); + mutex_unlock(&port->partner_link_lock); return partner; } @@ -1083,12 +1086,18 @@ void typec_unregister_partner(struct typec_partner *partner) port = to_typec_port(partner->dev.parent); - if (port->usb2_dev) + mutex_lock(&port->partner_link_lock); + if (port->usb2_dev) { typec_partner_unlink_device(partner, port->usb2_dev); - if (port->usb3_dev) + port->usb2_dev = NULL; + } + if (port->usb3_dev) { typec_partner_unlink_device(partner, port->usb3_dev); + port->usb3_dev = NULL; + } device_unregister(&partner->dev); + mutex_unlock(&port->partner_link_lock); } EXPORT_SYMBOL_GPL(typec_unregister_partner); @@ -2041,10 +2050,11 @@ static struct typec_partner *typec_get_partner(struct typec_port *port) static void typec_partner_attach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; struct usb_device *udev = to_usb_device(dev); enum usb_mode usb_mode; + mutex_lock(&port->partner_link_lock); if (udev->speed < USB_SPEED_SUPER) { usb_mode = USB_MODE_USB2; port->usb2_dev = dev; @@ -2053,18 +2063,22 @@ static void typec_partner_attach(struct typec_connector *con, struct device *dev port->usb3_dev = dev; } + partner = typec_get_partner(port); if (partner) { typec_partner_set_usb_mode(partner, usb_mode); typec_partner_link_device(partner, dev); put_device(&partner->dev); } + mutex_unlock(&port->partner_link_lock); } static void typec_partner_deattach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; + mutex_lock(&port->partner_link_lock); + partner = typec_get_partner(port); if (partner) { typec_partner_unlink_device(partner, dev); put_device(&partner->dev); @@ -2074,6 +2088,7 @@ static void typec_partner_deattach(struct typec_connector *con, struct device *d port->usb2_dev = NULL; else if (port->usb3_dev == dev) port->usb3_dev = NULL; + mutex_unlock(&port->partner_link_lock); } /** @@ -2614,6 +2629,7 @@ struct typec_port *typec_register_port(struct device *parent, ida_init(&port->mode_ids); mutex_init(&port->port_type_lock); + mutex_init(&port->partner_link_lock); port->id = id; port->ops = cap->ops; diff --git a/drivers/usb/typec/class.h b/drivers/usb/typec/class.h index b3076a24ad2e..db2fe96c48ff 100644 --- a/drivers/usb/typec/class.h +++ b/drivers/usb/typec/class.h @@ -59,6 +59,7 @@ struct typec_port { enum typec_port_type port_type; enum usb_mode usb_mode; struct mutex port_type_lock; + struct mutex partner_link_lock; enum typec_orientation orientation; struct typec_switch *sw; diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 7c930ef77380..effafc3e0ced 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1425,6 +1425,8 @@ alloc_done: open_bucket_for_each(c, &wp->ptrs, ob, i) wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 69ec6a012898..4c1e33cf57c0 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -110,7 +110,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob); + ob_push(c, ob->sectors_free < block_sectors(c) + ? &ptrs + : &keep, ob); wp->ptrs = keep; mutex_unlock(&wp->lock); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a3db328dee31..d6e4a496f02b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct bkey_i key; __u64 key ## _pad[pad] +enum bch_bkey_type_flags { + BKEY_TYPE_strict_btree_checks = BIT(0), +}; + /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k) * * - WHITEOUT: for hash table btrees */ -#define BCH_BKEY_TYPES() \ - x(deleted, 0) \ - x(whiteout, 1) \ - x(error, 2) \ - x(cookie, 3) \ - x(hash_whiteout, 4) \ - x(btree_ptr, 5) \ - x(extent, 6) \ - x(reservation, 7) \ - x(inode, 8) \ - x(inode_generation, 9) \ - x(dirent, 10) \ - x(xattr, 11) \ - x(alloc, 12) \ - x(quota, 13) \ - x(stripe, 14) \ - x(reflink_p, 15) \ - x(reflink_v, 16) \ - x(inline_data, 17) \ - x(btree_ptr_v2, 18) \ - x(indirect_inline_data, 19) \ - x(alloc_v2, 20) \ - x(subvolume, 21) \ - x(snapshot, 22) \ - x(inode_v2, 23) \ - x(alloc_v3, 24) \ - x(set, 25) \ - x(lru, 26) \ - x(alloc_v4, 27) \ - x(backpointer, 28) \ - x(inode_v3, 29) \ - x(bucket_gens, 30) \ - x(snapshot_tree, 31) \ - x(logged_op_truncate, 32) \ - x(logged_op_finsert, 33) \ - x(accounting, 34) \ - x(inode_alloc_cursor, 35) +#define BCH_BKEY_TYPES() \ + x(deleted, 0, 0) \ + x(whiteout, 1, 0) \ + x(error, 2, 0) \ + x(cookie, 3, 0) \ + x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \ + x(extent, 6, BKEY_TYPE_strict_btree_checks) \ + x(reservation, 7, BKEY_TYPE_strict_btree_checks) \ + x(inode, 8, BKEY_TYPE_strict_btree_checks) \ + x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \ + x(dirent, 10, BKEY_TYPE_strict_btree_checks) \ + x(xattr, 11, BKEY_TYPE_strict_btree_checks) \ + x(alloc, 12, BKEY_TYPE_strict_btree_checks) \ + x(quota, 13, BKEY_TYPE_strict_btree_checks) \ + x(stripe, 14, BKEY_TYPE_strict_btree_checks) \ + x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \ + x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \ + x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \ + x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \ + x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \ + x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \ + x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \ + x(set, 25, 0) \ + x(lru, 26, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \ + x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \ + x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \ + x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \ + x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \ + x(accounting, 34, BKEY_TYPE_strict_btree_checks) \ + x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks) enum bch_bkey_type { -#define x(name, nr) KEY_TYPE_##name = nr, +#define x(name, nr, ...) KEY_TYPE_##name = nr, BCH_BKEY_TYPES() #undef x KEY_TYPE_MAX, @@ -863,6 +867,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); +LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 15c93576b5c2..00d05ccfaf73 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -21,7 +21,7 @@ #include "xattr.h" const char * const bch2_bkey_types[] = { -#define x(name, nr) #name, +#define x(name, nr, ...) #name, BCH_BKEY_TYPES() #undef x NULL @@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ }) const struct bkey_ops bch2_bkey_ops[] = { -#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, +#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name, BCH_BKEY_TYPES() #undef x }; @@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = { #undef x }; +static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = { +#define x(name, nr, flags) [KEY_TYPE_##name] = flags, + BCH_BKEY_TYPES() +#undef x +}; + const char *bch2_btree_node_type_str(enum btree_node_type type) { return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); @@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && - (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && + enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX + ? bch2_bkey_type_flags[k.k->type] + : 0; + + bool strict_key_type_allowed = + (from.flags & BCH_VALIDATE_commit) || + type == BKEY_TYPE_btree || + (from.btree < BTREE_ID_NR && + (bkey_flags & BKEY_TYPE_strict_btree_checks)); + + bkey_fsck_err_on(strict_key_type_allowed && + k.k->type < KEY_TYPE_MAX && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7b98ba2dec64..37b69d89341f 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -47,6 +47,27 @@ #define DROP_PREV_NODE 11 #define DID_FILL_FROM_SCAN 12 +/* + * Returns true if it's a btree we can easily reconstruct, or otherwise won't + * cause data loss if it's missing: + */ +static bool btree_id_important(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return false; + + switch (btree) { + case BTREE_ID_quotas: + case BTREE_ID_snapshot_trees: + case BTREE_ID_logged_ops: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return false; + default: + return true; + } +} + static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -534,8 +555,10 @@ reconstruct_root: r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index e34e9598ef25..59fa527ac685 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && - !bkey_eq(iter->pos, POS_MAX)) { + !bkey_eq(iter->pos, POS_MAX) && + !((iter->flags & BTREE_ITER_is_extents) && + iter->pos.offset == U64_MAX)) { + /* * bkey_start_pos(), for extents, is not monotonically * increasing until after filtering for snapshots: @@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode); int ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 7d6c971db23c..ade3b5addd75 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); + new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL); if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 44b5fe430370..00307356d7c8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1389,7 +1389,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, printbuf_exit(&buf); } -static void +static int bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1411,7 +1411,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, insert = bkey_next(insert)) bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - if (bch2_btree_node_check_topology(trans, b)) { + int ret = bch2_btree_node_check_topology(trans, b); + if (ret) { struct printbuf buf = PRINTBUF; for (struct bkey_i *k = keys->keys; @@ -1421,11 +1422,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, prt_newline(&buf); } - panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s", + (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf); + dump_stack(); + return ret; } memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); keys->top_p -= insert->_data - keys->keys_p; + return 0; } static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) @@ -1559,11 +1564,11 @@ static void __btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, - struct btree_trans *trans, - btree_path_idx_t path_idx, - struct btree *b, - struct keylist *keys) +static int btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + btree_path_idx_t path_idx, + struct btree *b, + struct keylist *keys) { struct btree_path *path = trans->paths + path_idx; @@ -1573,8 +1578,12 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + if (ret) + return ret; } + + return 0; } static int btree_split(struct btree_update *as, struct btree_trans *trans, @@ -1607,8 +1616,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, __btree_split_node(as, trans, b, n, keys); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); - btree_split_insert_keys(as, trans, path, n2, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys) ?: + btree_split_insert_keys(as, trans, path, n2, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1654,7 +1665,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + if (ret) + goto err; } } else { trace_and_count(c, btree_node_compact, trans, b); @@ -1662,7 +1675,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1809,15 +1824,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - ret = bch2_btree_node_check_topology(trans, b); + + ret = bch2_btree_node_check_topology(trans, b) ?: + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); if (ret) { bch2_btree_node_unlock_write(trans, path, b); return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4ef261e8db4f..31fbc2716d8b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -604,6 +604,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + if (!bucket_valid(ca, bucket.offset)) { + if (insert) { + bch2_dev_bucket_missing(ca, bucket.offset); + ret = -BCH_ERR_trigger_pointer; + } + goto err; + } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); @@ -1307,13 +1314,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - bucket_gens->nbuckets = min(bucket_gens->nbuckets, - old_bucket_gens->nbuckets); - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; + u64 copy = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); memcpy(bucket_gens->b, old_bucket_gens->b, - bucket_gens->nbuckets); + sizeof(bucket_gens->b[0]) * copy); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 8488a7578115..8a680e52c1ed 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,8 +13,8 @@ #include <linux/dcache.h> -static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) +int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); @@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info * #endif } -static inline int bch2_maybe_casefold(struct btree_trans *trans, - const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) -{ - if (likely(!info->cf_encoding)) { - *out_cf = *str; - return 0; - } else { - return bch2_casefold(trans, info, str, out_cf); - } -} - static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) @@ -697,7 +685,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv vfs_d_type(d.v->d_type)); if (ret) ctx->pos = d.k->p.offset + 1; - return ret; + return !ret; } int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) @@ -722,7 +710,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) if (ret2 > 0) continue; - ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target)); + ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); }))); bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 0880772b80a9..9838a7ba7ed1 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,6 +23,21 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, + const struct qstr *, struct qstr *); + +static inline int bch2_maybe_casefold(struct btree_trans *trans, + const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + if (likely(!info->cf_encoding)) { + *out_cf = *str; + return 0; + } else { + return bch2_casefold(trans, info, str, out_cf); + } +} + struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 1186280b29e9..2ca3cbf12b71 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -470,23 +470,22 @@ inval: int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - struct bch_member *mi; - int ret, v = -1; + lockdep_assert_held(&c->sb_lock); - if (!strlen(name) || !strcmp(name, "none")) - return 0; - v = bch2_disk_path_find_or_create(&c->disk_sb, name); - if (v < 0) - return v; + if (!strlen(name) || !strcmp(name, "none")) { + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, 0); + } else { + int v = bch2_disk_path_find_or_create(&c->disk_sb, name); + if (v < 0) + return v; - ret = bch2_sb_disk_groups_to_cpu(c); - if (ret) - return ret; + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, v + 1); + } - mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_GROUP(mi, v + 1); - return 0; + return bch2_sb_disk_groups_to_cpu(c); } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a396865e8b17..fff58b78327c 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2204,10 +2204,10 @@ void bch2_fs_ec_stop(struct bch_fs *c) static bool bch2_fs_ec_flush_done(struct bch_fs *c) { - bool ret; + sched_annotate_sleep(); mutex_lock(&c->ec_stripe_new_lock); - ret = list_empty(&c->ec_stripe_new_list); + bool ret = list_empty(&c->ec_stripe_new_list); mutex_unlock(&c->ec_stripe_new_lock); return ret; diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index baf5dfb32298..6b8695b1349c 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, { struct fsck_err_state *s; - if (!test_bit(BCH_FS_fsck_running, &c->flags)) - return NULL; - list_for_each_entry(s, &c->fsck_error_msgs, list) if (s->id == id) { /* @@ -481,7 +478,9 @@ int __bch2_fsck_err(struct bch_fs *c, } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { - prt_str(out, ", shutting down"); + prt_str_indented(out, ", shutting down\n" + "error not marked as autofix and not in fsck\n" + "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; ret = -BCH_ERR_fsck_errors_not_fixed; @@ -639,14 +638,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, return ret; } -void bch2_flush_fsck_errs(struct bch_fs *c) +static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) { struct fsck_err_state *s, *n; mutex_lock(&c->fsck_error_msgs_lock); list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { - if (s->ratelimited && s->last_msg) + if (print && s->ratelimited && s->last_msg) bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); @@ -657,6 +656,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c) mutex_unlock(&c->fsck_error_msgs_lock); } +void bch2_flush_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, true); +} + +void bch2_free_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, false); +} + int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum, u64 offset) { diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index d0d024dc714b..4a364fd44abe 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -93,6 +93,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, _flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__) void bch2_flush_fsck_errs(struct bch_fs *); +void bch2_free_fsck_errs(struct bch_fs *); #define fsck_err_wrap(_do) \ ({ \ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 65c2c33d253d..9657144666b8 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -144,10 +144,25 @@ int __must_check bch2_write_inode_size(struct bch_fs *c, void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { - bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, - "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, sectors, - inode->ei_inode.bi_sectors); + if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, sectors, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, buf.buf, &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (sectors < 0) + sectors = -inode->v.i_blocks; + else + sectors = 0; + } + inode->v.i_blocks += sectors; #ifdef CONFIG_BCACHEFS_QUOTA @@ -502,11 +517,22 @@ int bchfs_truncate(struct mnt_idmap *idmap, goto err; } - bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && - !bch2_journal_error(&c->journal), c, - "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, - inode->ei_inode.bi_sectors); + if (unlikely(!inode->v.i_size && inode->v.i_blocks && + !bch2_journal_error(&c->journal))) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + } ret = bch2_setattr_nonsize(idmap, inode, iattr); err: diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 14886e1d4d6d..a82dfce9e4ad 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -21,206 +21,6 @@ #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ -struct flags_set { - unsigned mask; - unsigned flags; - - unsigned projid; - - bool set_projinherit; - bool projinherit; -}; - -static int bch2_inode_flags_set(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - /* - * We're relying on btree locking here for exclusion with other ioctl - * calls - use the flags in the btree (@bi), not inode->i_flags: - */ - struct flags_set *s = p; - unsigned newflags = s->flags; - unsigned oldflags = bi->bi_flags & s->mask; - - if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - if (!S_ISREG(bi->bi_mode) && - !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) - return -EINVAL; - - if ((newflags ^ oldflags) & BCH_INODE_casefolded) { -#ifdef CONFIG_UNICODE - int ret = 0; - /* Not supported on individual files. */ - if (!S_ISDIR(bi->bi_mode)) - return -EOPNOTSUPP; - - /* - * Make sure the dir is empty, as otherwise we'd need to - * rehash everything and update the dirent keys. - */ - ret = bch2_empty_dir_trans(trans, inode_inum(inode)); - if (ret < 0) - return ret; - - ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); - if (ret) - return ret; - - bch2_check_set_feature(c, BCH_FEATURE_casefolding); -#else - printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); - return -EOPNOTSUPP; -#endif - } - - if (s->set_projinherit) { - bi->bi_fields_set &= ~(1 << Inode_opt_project); - bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); - } - - bi->bi_flags &= ~s->mask; - bi->bi_flags |= newflags; - - bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); - return 0; -} - -static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) -{ - unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); - - return put_user(flags, arg); -} - -static int bch2_ioc_setflags(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - void __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; - unsigned uflags; - int ret; - - if (get_user(uflags, (int __user *) arg)) - return -EFAULT; - - s.flags = map_flags_rev(bch_flags_to_uflags, uflags); - if (uflags) - return -EOPNOTSUPP; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto setflags_out; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_write_inode(c, inode, bch2_inode_flags_set, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - -setflags_out: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - -static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct fsxattr fa = { 0 }; - - fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); - - if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) - fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; - - fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; - - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; - - return 0; -} - -static int fssetxattr_inode_update_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct flags_set *s = p; - - if (s->projid != bi->bi_project) { - bi->bi_fields_set |= 1U << Inode_opt_project; - bi->bi_project = s->projid; - } - - return bch2_inode_flags_set(trans, inode, bi, p); -} - -static int bch2_ioc_fssetxattr(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; - struct fsxattr fa; - int ret; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - s.set_projinherit = true; - s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; - fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; - - s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); - if (fa.fsx_xflags) - return -EOPNOTSUPP; - - if (fa.fsx_projid >= U32_MAX) - return -EINVAL; - - /* - * inode fields accessible via the xattr interface are stored with a +1 - * bias, so that 0 means unset: - */ - s.projid = fa.fsx_projid + 1; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto err; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_set_projid(c, inode, fa.fsx_projid) ?: - bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); -err: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - static int bch2_reinherit_attrs_fn(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) long ret; switch (cmd) { - case FS_IOC_GETFLAGS: - ret = bch2_ioc_getflags(inode, (int __user *) arg); - break; - - case FS_IOC_SETFLAGS: - ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); - break; - - case FS_IOC_FSGETXATTR: - ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); - break; - - case FS_IOC_FSSETXATTR: - ret = bch2_ioc_fssetxattr(c, file, inode, - (void __user *) arg); - break; - case BCHFS_IOC_REINHERIT_ATTRS: ret = bch2_ioc_reinherit_attrs(c, file, inode, (void __user *) arg); diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index ecd3bfdcde21..a657e4994b71 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -2,81 +2,6 @@ #ifndef _BCACHEFS_FS_IOCTL_H #define _BCACHEFS_FS_IOCTL_H -/* Inode flags: */ - -/* bcachefs inode flags -> vfs inode flags: */ -static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_sync] = S_SYNC, - [__BCH_INODE_immutable] = S_IMMUTABLE, - [__BCH_INODE_append] = S_APPEND, - [__BCH_INODE_noatime] = S_NOATIME, - [__BCH_INODE_casefolded] = S_CASEFOLD, -}; - -/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_sync] = FS_SYNC_FL, - [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, - [__BCH_INODE_append] = FS_APPEND_FL, - [__BCH_INODE_nodump] = FS_NODUMP_FL, - [__BCH_INODE_noatime] = FS_NOATIME_FL, - [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, -}; - -/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -static const __maybe_unused unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_sync] = FS_XFLAG_SYNC, - [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_append] = FS_XFLAG_APPEND, - [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, - [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, - //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; -}; - -#define set_flags(_map, _in, _out) \ -do { \ - unsigned _i; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & (1 << _i)) \ - (_out) |= _map[_i]; \ - else \ - (_out) &= ~_map[_i]; \ -} while (0) - -#define map_flags(_map, _in) \ -({ \ - unsigned _out = 0; \ - \ - set_flags(_map, _in, _out); \ - _out; \ -}) - -#define map_flags_rev(_map, _in) \ -({ \ - unsigned _i, _out = 0; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & _map[_i]) { \ - (_out) |= 1 << _i; \ - (_in) &= ~_map[_i]; \ - } \ - (_out); \ -}) - -#define map_defined(_map) \ -({ \ - unsigned _in = ~0; \ - \ - map_flags_rev(_map, _in); \ -}) - -/* Set VFS inode flags from bcachefs inode: */ -static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) -{ - set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); -} - long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5a41b1a8e54f..113db85b6ef9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -33,6 +33,7 @@ #include <linux/backing-dev.h> #include <linux/exportfs.h> #include <linux/fiemap.h> +#include <linux/fileattr.h> #include <linux/fs_context.h> #include <linux/module.h> #include <linux/pagemap.h> @@ -51,6 +52,24 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_subvolume *); +/* Set VFS inode flags from bcachefs inode: */ +static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode) +{ + static const __maybe_unused unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, + }; + + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); + + if (bch2_inode_casefold(c, &inode->ei_inode)) + inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; +} + void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -79,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, inode->ei_inode = *bi; - bch2_inode_flags_to_vfs(inode); + bch2_inode_flags_to_vfs(c, inode); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -631,13 +650,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dirent_iter = {}; subvol_inum inum = {}; struct printbuf buf = PRINTBUF; + struct qstr lookup_name; + int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); + if (ret) + return ERR_PTR(ret); + + struct btree_iter dirent_iter = {}; struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc, - dir_hash_info, dir, name, 0); - int ret = bkey_err(k); + dir_hash_info, dir, &lookup_name, 0); + ret = bkey_err(k); if (ret) return ERR_PTR(ret); @@ -825,6 +849,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, */ set_nlink(&inode->v, 0); } + + if (IS_CASEFOLDED(vdir)) + d_invalidate(dentry); err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -1235,10 +1262,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } +struct bch_fiemap_extent { + struct bkey_buf kbuf; + unsigned flags; +}; + static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, - struct bkey_s_c k, unsigned flags) + struct bch_fiemap_extent *fe) { + struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); + unsigned flags = fe->flags; + + BUG_ON(!k.k->size); + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1291,110 +1328,223 @@ static int bch2_fill_extent(struct bch_fs *c, } } -static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - u64 start, u64 len) +/* + * Scan a range of an inode for data in pagecache. + * + * Intended to be retryable, so don't modify the output params until success is + * imminent. + */ +static int +bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, + bool nonblock) { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; - bool have_extent = false; - int ret = 0; + loff_t dstart, dend; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); - if (ret) + dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); + if (dstart < 0) + return dstart; + + if (dstart == *end) { + *start = dstart; + return 0; + } + + dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); + if (dend < 0) + return dend; + + /* race */ + BUG_ON(dstart == dend); + + *start = dstart; + *end = dend; + return 0; +} + +/* + * Scan a range of pagecache that corresponds to a file mapping hole in the + * extent btree. If data is found, fake up an extent key so it looks like a + * delalloc extent to the rest of the fiemap processing code. + */ +static int +bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode, + u64 start, u64 end, struct bch_fiemap_extent *cur) +{ + struct bch_fs *c = trans->c; + struct bkey_i_extent *delextent; + struct bch_extent_ptr ptr = {}; + loff_t dstart = start << 9, dend = end << 9; + int ret; + + /* + * We hold btree locks here so we cannot block on folio locks without + * dropping trans locks first. Run a nonblocking scan for the common + * case of no folios over holes and fall back on failure. + * + * Note that dropping locks like this is technically racy against + * writeback inserting to the extent tree, but a non-sync fiemap scan is + * fundamentally racy with writeback anyways. Therefore, just report the + * range as delalloc regardless of whether we have to cycle trans locks. + */ + ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true); + if (ret == -EAGAIN) + ret = drop_locks_do(trans, + bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false)); + if (ret < 0) return ret; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); - if (start + len < start) - return -EINVAL; + /* + * Create a fake extent key in the buffer. We have to add a dummy extent + * pointer for the fill code to add an extent entry. It's explicitly + * zeroed to reflect delayed allocation (i.e. phys offset 0). + */ + bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); + delextent = bkey_extent_init(cur->kbuf.k); + delextent->k.p = POS(inode->ei_inum.inum, dend >> 9); + delextent->k.size = (dend - dstart) >> 9; + bch2_bkey_append_ptr(&delextent->k_i, ptr); - start >>= 9; + cur->flags = FIEMAP_EXTENT_DELALLOC; - bch2_bkey_buf_init(&cur); - bch2_bkey_buf_init(&prev); - trans = bch2_trans_get(c); + return 0; +} + +static int bch2_next_fiemap_extent(struct btree_trans *trans, + struct bch_inode_info *inode, + u64 start, u64 end, + struct bch_fiemap_extent *cur) +{ + u32 snapshot; + int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); + if (ret) + return ret; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start), 0); + SPOS(inode->ei_inum.inum, start, snapshot), 0); - while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - enum btree_id data_btree = BTREE_ID_extents; + struct bkey_s_c k = + bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); + ret = bkey_err(k); + if (ret) + goto err; - bch2_trans_begin(trans); + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur); + if (ret) + goto err; - u32 snapshot; - ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); - if (ret) - continue; + struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); - bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + /* + * Does the pagecache or the btree take precedence? + * + * It _should_ be the pagecache, so that we correctly report delalloc + * extents when dirty in the pagecache (we're COW, after all). + * + * But we'd have to add per-sector writeback tracking to + * bch_folio_state, otherwise we report delalloc extents for clean + * cached data in the pagecache. + * + * We should do this, but even then fiemap won't report stable mappings: + * on bcachefs data moves around in the background (copygc, rebalance) + * and we don't provide a way for userspace to lock that out. + */ + if (k.k && + bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)), + pagecache_start)) { + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + bch2_cut_front(iter.pos, cur->kbuf.k); + bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k); + cur->flags = 0; + } else if (k.k) { + bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k); + } - k = bch2_btree_iter_peek_max(trans, &iter, end); - ret = bkey_err(k); + if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) { + unsigned sectors = cur->kbuf.k->k.size; + s64 offset_into_extent = 0; + enum btree_id data_btree = BTREE_ID_extents; + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) - continue; + goto err; - if (!k.k) - break; + struct bkey_i *k = cur->kbuf.k; + sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(trans, &iter); - continue; - } + bch2_cut_front(POS(k->k.p.inode, + bkey_start_offset(&k->k) + offset_into_extent), + k); + bch2_key_resize(&k->k, sectors); + k->k.p = iter.pos; + k->k.p.offset += k->k.size; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + u64 start, u64 len) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct btree_trans *trans; + struct bch_fiemap_extent cur, prev; + int ret = 0; + + ret = fiemap_prep(&ei->v, info, start, &len, 0); + if (ret) + return ret; + + if (start + len < start) + return -EINVAL; + + start >>= 9; + u64 end = (start + len) >> 9; - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; + bch2_bkey_buf_init(&cur.kbuf); + bch2_bkey_buf_init(&prev.kbuf); + bkey_init(&prev.kbuf.k->k); - bch2_bkey_buf_reassemble(&cur, c, k); + trans = bch2_trans_get(c); - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &cur); + while (start < end) { + ret = lockrestart_do(trans, + bch2_next_fiemap_extent(trans, ei, start, end, &cur)); if (ret) - continue; + goto err; - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); + BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start); + BUG_ON(cur.kbuf.k->k.p.offset > end); - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + if (bkey_start_offset(&cur.kbuf.k->k) == end) + break; - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); - bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter.pos; - cur.k->k.p.offset += cur.k->k.size; + start = cur.kbuf.k->k.p.offset; - if (have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(prev.k), 0); + ret = bch2_fill_extent(c, info, &prev); if (ret) - break; + goto err; } - bkey_copy(prev.k, cur.k); - have_extent = true; - - bch2_btree_iter_set_pos(trans, &iter, - POS(iter.pos.inode, iter.pos.offset + sectors)); + bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k); + prev.flags = cur.flags; } - bch2_trans_iter_exit(trans, &iter); - if (!ret && have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), - FIEMAP_EXTENT_LAST); + prev.flags |= FIEMAP_EXTENT_LAST; + ret = bch2_fill_extent(c, info, &prev); } - +err: bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret < 0 ? ret : 0; + bch2_bkey_buf_exit(&cur.kbuf, c); + bch2_bkey_buf_exit(&prev.kbuf, c); + + return bch2_err_class(ret < 0 ? ret : 0); } static const struct vm_operations_struct bch_vm_ops = { @@ -1449,6 +1599,165 @@ static int bch2_open(struct inode *vinode, struct file *file) return generic_file_open(vinode, file); } +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const __maybe_unused unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const __maybe_unused unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, +}; + +static int bch2_fileattr_get(struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); + + if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) + fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + + if (bch2_inode_casefold(c, &inode->ei_inode)) + fa->flags |= FS_CASEFOLD_FL; + + fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; + return 0; +} + +struct flags_set { + unsigned mask; + unsigned flags; + unsigned projid; + bool set_project; + bool set_casefold; + bool casefold; +}; + +static int fssetxattr_inode_update_fn(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = trans->c; + struct flags_set *s = p; + + /* + * We're relying on btree locking here for exclusion with other ioctl + * calls - use the flags in the btree (@bi), not inode->i_flags: + */ + if (!S_ISREG(bi->bi_mode) && + !S_ISDIR(bi->bi_mode) && + (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) + return -EINVAL; + + if (s->casefold != bch2_inode_casefold(c, bi)) { +#ifdef CONFIG_UNICODE + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inode_inum(inode)); + if (ret < 0) + return ret; + + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + if (ret) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); + + bi->bi_casefold = s->casefold + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); + +#else + printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); + return -EOPNOTSUPP; +#endif + } + + if (s->set_project) { + bi->bi_project = s->projid; + bi->bi_fields_set |= BIT(Inode_opt_project); + } + + bi->bi_flags &= ~s->mask; + bi->bi_flags |= s->flags; + + bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); + return 0; +} + +static int bch2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct flags_set s = {}; + int ret; + + if (fa->fsx_valid) { + fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT; + + s.mask = map_defined(bch_flags_to_xflags); + s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); + if (fa->fsx_xflags) + return -EOPNOTSUPP; + + if (fa->fsx_projid >= U32_MAX) + return -EINVAL; + + /* + * inode fields accessible via the xattr interface are stored with a +1 + * bias, so that 0 means unset: + */ + if ((inode->ei_inode.bi_project || + fa->fsx_projid) && + inode->ei_inode.bi_project != fa->fsx_projid + 1) { + s.projid = fa->fsx_projid + 1; + s.set_project = true; + } + } + + if (fa->flags_valid) { + s.mask = map_defined(bch_flags_to_uflags); + + s.set_casefold = true; + s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0; + fa->flags &= ~FS_CASEFOLD_FL; + + s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); + if (fa->flags) + return -EOPNOTSUPP; + } + + mutex_lock(&inode->ei_update_lock); + ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + (s.set_project + ? bch2_set_projid(c, inode, fa->fsx_projid) + : 0) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ATTR_CTIME); + mutex_unlock(&inode->ei_update_lock); + return ret; +} + static const struct file_operations bch_file_operations = { .open = bch2_open, .llseek = bch2_llseek, @@ -1476,6 +1785,8 @@ static const struct inode_operations bch_file_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_dir_inode_operations = { @@ -1496,6 +1807,8 @@ static const struct inode_operations bch_dir_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct file_operations bch_dir_file_operations = { @@ -1518,6 +1831,8 @@ static const struct inode_operations bch_symlink_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_special_inode_operations = { @@ -1528,6 +1843,8 @@ static const struct inode_operations bch_special_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct address_space_operations bch_address_space_operations = { @@ -2254,6 +2571,11 @@ got_sb: if (ret) goto err_put_super; +#ifdef CONFIG_UNICODE + sb->s_encoding = c->cf_encoding; +#endif + generic_set_sb_d_ops(sb); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); bch_err_msg(c, ret, "mounting: error getting root inode"); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f82cfbf460d0..c74af15b14f2 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k) } } +static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi) +{ + /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */ + return bi->bi_casefold + ? bi->bi_casefold - 1 + : c->opts.casefold; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 117110af1e3f..87e193e8ed25 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -103,7 +103,8 @@ struct bch_inode_generation { x(bi_parent_subvol, 32) \ x(bi_nocow, 8) \ x(bi_depth, 32) \ - x(bi_inodes_32bit, 8) + x(bi_inodes_32bit, 8) \ + x(bi_casefold, 8) /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ @@ -117,7 +118,8 @@ struct bch_inode_generation { x(background_target, 16) \ x(erasure_code, 16) \ x(nocow, 8) \ - x(inodes_32bit, 8) + x(inodes_32bit, 8) \ + x(casefold, 8) enum inode_opt_id { #define x(name, ...) \ @@ -137,8 +139,7 @@ enum inode_opt_id { x(i_sectors_dirty, 6) \ x(unlinked, 7) \ x(backptr_untrusted, 8) \ - x(has_child_snapshot, 9) \ - x(casefolded, 10) + x(has_child_snapshot, 9) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index a418fa62f09d..c1237da079ed 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -255,6 +255,27 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, } if (i_sectors_delta) { + s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); + if (unlikely(bi_sectors + i_sectors_delta < 0)) { + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", + extent_iter->pos.inode, bi_sectors, i_sectors_delta); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, inode_i_sectors_underflow, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (i_sectors_delta < 0) + i_sectors_delta = -bi_sectors; + else + i_sectors_delta = 0; + } + le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); inode_update_flags = 0; } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index d8f74b6d0a75..bb45d3634194 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; - BUG_ON(sectors > buf->sectors); + if (unlikely(sectors > buf->sectors)) { + struct printbuf err = PRINTBUF; + err.atomic++; + + prt_printf(&err, "journal entry overran reserved space: %u > %u\n", + sectors, buf->sectors); + prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n", + le32_to_cpu(buf->data->u64s), buf->u64s_reserved, + j->cur_entry_u64s, + c->block_bits); + prt_printf(&err, "fatal error - emergency read only"); + bch2_journal_halt_locked(j); + + bch_err(c, "%s", err.buf); + printbuf_exit(&err); + return; + } + buf->sectors = sectors; /* @@ -1462,8 +1479,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); - - set_bit(JOURNAL_running, &j->flags); j->last_flush_write = jiffies; j->reservations.idx = journal_cur_seq(j); @@ -1474,6 +1489,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return 0; } +void bch2_journal_set_replay_done(struct journal *j) +{ + /* + * journal_space_available must happen before setting JOURNAL_running + * JOURNAL_running must happen before JOURNAL_replay_done + */ + spin_lock(&j->lock); + bch2_journal_space_available(j); + + set_bit(JOURNAL_need_flush_write, &j->flags); + set_bit(JOURNAL_running, &j->flags); + set_bit(JOURNAL_replay_done, &j->flags); + spin_unlock(&j->lock); +} + /* init/exit: */ void bch2_dev_journal_exit(struct bch_dev *ca) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 47828771f9c2..641e20c05a14 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j) struct bch_dev; -static inline void bch2_journal_set_replay_done(struct journal *j) -{ - BUG_ON(!test_bit(JOURNAL_running, &j->flags)); - set_bit(JOURNAL_replay_done, &j->flags); -} - void bch2_journal_unblock(struct journal *); void bch2_journal_block(struct journal *); struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); @@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); int bch2_fs_journal_start(struct journal *, u64); +void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2a54ac79189b..63cdf885c9e2 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1782,7 +1782,7 @@ static CLOSURE_CALLBACK(journal_write_submit) struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); if (!ca) { /* XXX: fix this */ - bch_err(c, "missing device for journal write\n"); + bch_err(c, "missing device %u for journal write", ptr->dev); continue; } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 5d1547aa118a..ea670c3c43d8 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -252,7 +252,10 @@ void bch2_journal_space_available(struct journal *j) bch2_journal_set_watermark(j); out: - j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; + j->cur_entry_sectors = !ret + ? round_down(j->space[journal_space_discarded].next_entry, + block_sectors(c)) + : 0; j->cur_entry_error = ret; if (!ret) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 159410c50861..96873372b516 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h index ea181fef5bc9..d1885cf67a45 100644 --- a/fs/bcachefs/movinggc.h +++ b/fs/bcachefs/movinggc.h @@ -5,6 +5,15 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); +static inline void bch2_copygc_wakeup(struct bch_fs *c) +{ + rcu_read_lock(); + struct task_struct *p = rcu_dereference(c->copygc_thread); + if (p) + wake_up_process(p); + rcu_read_unlock(); +} + void bch2_copygc_stop(struct bch_fs *); int bch2_copygc_start(struct bch_fs *); void bch2_fs_copygc_init(struct bch_fs *); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 0d65ea96f7a2..52c58c6d53d2 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - /* Inherit casefold state from parent. */ - if (S_ISDIR(mode)) - new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded; - if (!(flags & BCH_CREATE_SNAPSHOT)) { /* Normal create path - allocate a new inode: */ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); @@ -347,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold) + continue; + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 4d06313076ff..dfb14810124c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -228,6 +228,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_ERASURE_CODE, false, \ NULL, "Enable erasure coding (DO NOT USE YET)") \ + x(casefold, u8, \ + OPT_FS|OPT_INODE|OPT_FORMAT, \ + OPT_BOOL(), \ + BCH_SB_CASEFOLD, false, \ + NULL, "Dirent lookups are casefolded") \ x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index c63fa53f30d2..4ccdfc1f34aa 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -262,7 +262,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, writepoint_ptr(&c->rebalance_write_point), true); @@ -664,7 +671,7 @@ void bch2_rebalance_stop(struct bch_fs *c) c->rebalance.thread = NULL; if (p) { - /* for sychronizing with rebalance_wakeup() */ + /* for sychronizing with bch2_rebalance_wakeup() */ synchronize_rcu(); kthread_stop(p); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 62a3859d3823..e5e8eb4a2dd1 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); int bch2_set_fs_needs_rebalance(struct bch_fs *); -static inline void rebalance_wakeup(struct bch_fs *c) +static inline void bch2_rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 606d684e6f23..d6c4ef819d40 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -18,6 +18,7 @@ #include "journal_seq_blacklist.h" #include "logged_ops.h" #include "move.h" +#include "movinggc.h" #include "namei.h" #include "quota.h" #include "rebalance.h" @@ -1129,13 +1130,13 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - set_bit(BCH_FS_accounting_replay_done, &c->flags); - bch2_journal_set_replay_done(&c->journal); - ret = bch2_fs_read_write_early(c); if (ret) goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_journal_set_replay_done(&c->journal); + for_each_member_device(c, ca) { ret = bch2_dev_usage_init(ca, false); if (ret) { @@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c) c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); if (ret) diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 593ff142530d..22f72bb5b853 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -12,6 +12,7 @@ #include "journal.h" #include "lru.h" #include "logged_ops.h" +#include "movinggc.h" #include "rebalance.h" #include "recovery.h" #include "recovery_passes.h" @@ -262,49 +263,52 @@ int bch2_run_recovery_passes(struct bch_fs *c) */ c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { - c->next_recovery_pass = c->curr_recovery_pass + 1; + spin_lock_irq(&c->recovery_pass_lock); - spin_lock_irq(&c->recovery_pass_lock); + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { + unsigned prev_done = c->recovery_pass_done; unsigned pass = c->curr_recovery_pass; + c->next_recovery_pass = pass + 1; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) { - spin_unlock_irq(&c->recovery_pass_lock); + c->curr_recovery_pass > c->opts.recovery_pass_last) break; - } - if (!should_run_recovery_pass(c, pass)) { - c->curr_recovery_pass++; - c->recovery_pass_done = max(c->recovery_pass_done, pass); + if (should_run_recovery_pass(c, pass)) { spin_unlock_irq(&c->recovery_pass_lock); - continue; - } - spin_unlock_irq(&c->recovery_pass_lock); - - ret = bch2_run_recovery_pass(c, pass) ?: - bch2_journal_flush(&c->journal); - - if (!ret && !test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, pass); - - spin_lock_irq(&c->recovery_pass_lock); - if (c->next_recovery_pass < c->curr_recovery_pass) { - /* - * bch2_run_explicit_recovery_pass() was called: we - * can't always catch -BCH_ERR_restart_recovery because - * it may have been called from another thread (btree - * node read completion) - */ - ret = 0; - c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); - } else { - c->recovery_passes_complete |= BIT_ULL(pass); - c->recovery_pass_done = max(c->recovery_pass_done, pass); + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + if (!ret && !test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, pass); + spin_lock_irq(&c->recovery_pass_lock); + + if (c->next_recovery_pass < c->curr_recovery_pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + ret = 0; + c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); + } else { + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); + } } + c->curr_recovery_pass = c->next_recovery_pass; - spin_unlock_irq(&c->recovery_pass_lock); + + if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && + c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) { + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + } } + spin_unlock_irq(&c->recovery_pass_lock); + return ret; } diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index acb5d845841e..badd0e17ada5 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -20,6 +20,10 @@ * x(version, recovery_passes, errors...) */ #define UPGRADE_TABLE() \ + x(snapshot_2, \ + RECOVERY_PASS_ALL_FSCK, \ + BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ + BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ x(backpointers, \ RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index dc53d25c7cbb..3b69a924086f 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -46,7 +46,7 @@ enum bch_fsck_flags { x(btree_node_unsupported_version, 34, 0) \ x(btree_node_bset_older_than_sb_min, 35, 0) \ x(btree_node_bset_newer_than_sb, 36, 0) \ - x(btree_node_data_missing, 37, 0) \ + x(btree_node_data_missing, 37, FSCK_AUTOFIX) \ x(btree_node_bset_after_end, 38, 0) \ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ x(btree_node_replicas_data_mismatch, 40, 0) \ @@ -205,9 +205,9 @@ enum bch_fsck_flags { x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ - x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \ x(subvol_to_missing_root, 188, 0) \ - x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ @@ -236,6 +236,9 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -317,7 +320,9 @@ enum bch_fsck_flags { x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ x(dirent_cf_name_too_big, 304, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \ - x(MAX, 308, 0) + x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ + x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ + x(MAX, 314, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 116131f95815..72779912939b 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -15,9 +15,11 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } -void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) +void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) { - bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); + bch2_fs_inconsistent(ca->fs, + "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", + bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); } #define x(t, n, ...) [n] = #t, diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 06bb41a3f360..42786657522c 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -249,20 +249,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); - if (ca && !bucket_valid(ca, bucket.offset)) { + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { bch2_dev_put(ca); ca = NULL; } return ca; } -void bch2_dev_bucket_missing(struct bch_fs *, struct bpos); +void bch2_dev_bucket_missing(struct bch_dev *, u64); static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) { - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket); - if (!ca) - bch2_dev_bucket_missing(c, bucket); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { + bch2_dev_bucket_missing(ca, bucket.offset); + bch2_dev_put(ca); + ca = NULL; + } return ca; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b7de29aed839..fec569c7deb1 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -396,7 +396,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) u32 subvol = 0, s; rcu_read_lock(); - while (id) { + while (id && bch2_snapshot_exists(c, id)) { s = snapshot_t(c, id)->subvol; if (s && (!subvol || s < subvol)) diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 09a354a26c3b..0c1a00539bd1 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u8 type; - struct unicode_map *cf_encoding; + struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of * the siphash_key (k0) is used as the key. @@ -44,11 +44,10 @@ struct bch_hash_info { static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { - /* XXX ick */ struct bch_hash_info info = { .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 5537283d0bea..d0209f7658bb 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,6 +6,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -44,8 +45,8 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, snapid); + return bch2_run_explicit_recovery_pass(c, + BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 25b6bce05c3c..cb5d960aed92 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1102,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); printbuf_exit(&buf); - return -BCH_ERR_sb_not_downgraded; + ret = -BCH_ERR_sb_not_downgraded; + goto out; } darray_for_each(online_devices, ca) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e8a17ed1615d..27943082c093 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) return ret; } -static int bch2_fs_read_write_late(struct bch_fs *c) -{ - int ret; - - /* - * Data move operations can't run until after check_snapshots has - * completed, and bch2_snapshot_is_ancestor() is available. - * - * Ideally we'd start copygc/rebalance earlier instead of waiting for - * all of recovery/fsck to complete: - */ - ret = bch2_copygc_start(c); - if (ret) { - bch_err(c, "error starting copygc thread"); - return ret; - } - - ret = bch2_rebalance_start(c); - if (ret) { - bch_err(c, "error starting rebalance thread"); - return ret; - } - - return 0; -} - static int __bch2_fs_read_write(struct bch_fs *c, bool early) { int ret; @@ -466,29 +440,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) clear_bit(BCH_FS_clean_shutdown, &c->flags); + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { + bch2_dev_allocator_add(c, ca); + percpu_ref_reinit(&ca->io_ref[WRITE]); + } + bch2_recalc_capacity(c); + /* * First journal write must be a flush write: after a clean shutdown we * don't read the journal, so the first journal write may end up * overwriting whatever was there previously, and there must always be * at least one non-flush write in the journal or recovery will fail: */ + spin_lock(&c->journal.lock); set_bit(JOURNAL_need_flush_write, &c->journal.flags); set_bit(JOURNAL_running, &c->journal.flags); - - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { - bch2_dev_allocator_add(c, ca); - percpu_ref_reinit(&ca->io_ref[WRITE]); - } - bch2_recalc_capacity(c); + bch2_journal_space_available(&c->journal); + spin_unlock(&c->journal.lock); ret = bch2_fs_mark_dirty(c); if (ret) goto err; - spin_lock(&c->journal.lock); - bch2_journal_space_available(&c->journal); - spin_unlock(&c->journal.lock); - ret = bch2_journal_reclaim_start(&c->journal); if (ret) goto err; @@ -504,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) atomic_long_inc(&c->writes[i]); } #endif - if (!early) { - ret = bch2_fs_read_write_late(c); - if (ret) - goto err; + + ret = bch2_copygc_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting copygc thread"); + goto err; + } + + ret = bch2_rebalance_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting rebalance thread"); + goto err; } bch2_do_discards(c); @@ -551,8 +531,13 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); +#ifdef CONFIG_UNICODE + utf8_unload(c->cf_encoding); +#endif + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_free_fsck_errs(c); bch2_fs_accounting_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); @@ -842,25 +827,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; -#ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; - } -#else - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif - pr_uuid(&name, c->sb.user_uuid.b); ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; if (ret) @@ -960,6 +926,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; +#ifdef CONFIG_UNICODE + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; @@ -1023,6 +1012,40 @@ static void print_mount_opts(struct bch_fs *c) printbuf_exit(&p); } +static bool bch2_fs_may_start(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i, flags = 0; + + if (c->opts.very_degraded) + flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; + + if (c->opts.degraded) + flags |= BCH_FORCE_IF_DEGRADED; + + if (!c->opts.degraded && + !c->opts.very_degraded) { + mutex_lock(&c->sb_lock); + + for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + + ca = bch2_dev_locked(c, i); + + if (!bch2_dev_is_online(ca) && + (ca->mi.state == BCH_MEMBER_STATE_rw || + ca->mi.state == BCH_MEMBER_STATE_ro)) { + mutex_unlock(&c->sb_lock); + return false; + } + } + mutex_unlock(&c->sb_lock); + } + + return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); +} + int bch2_fs_start(struct bch_fs *c) { time64_t now = ktime_get_real_seconds(); @@ -1030,6 +1053,9 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); + if (!bch2_fs_may_start(c)) + return -BCH_ERR_insufficient_devices_to_start; + down_write(&c->state_lock); mutex_lock(&c->sb_lock); @@ -1082,13 +1108,10 @@ int bch2_fs_start(struct bch_fs *c) wake_up(&c->ro_ref_wait); down_write(&c->state_lock); - if (c->opts.read_only) { + if (c->opts.read_only) bch2_fs_read_only(c); - } else { - ret = !test_bit(BCH_FS_rw, &c->flags) - ? bch2_fs_read_write(c) - : bch2_fs_read_write_late(c); - } + else if (!test_bit(BCH_FS_rw, &c->flags)) + ret = bch2_fs_read_write(c); up_write(&c->state_lock); err: @@ -1500,7 +1523,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) printbuf_exit(&name); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return 0; } @@ -1559,40 +1582,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, } } -static bool bch2_fs_may_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned i, flags = 0; - - if (c->opts.very_degraded) - flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; - - if (c->opts.degraded) - flags |= BCH_FORCE_IF_DEGRADED; - - if (!c->opts.degraded && - !c->opts.very_degraded) { - mutex_lock(&c->sb_lock); - - for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { - if (!bch2_member_exists(c->disk_sb.sb, i)) - continue; - - ca = bch2_dev_locked(c, i); - - if (!bch2_dev_is_online(ca) && - (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); - return false; - } - } - mutex_unlock(&c->sb_lock); - } - - return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); -} - static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { bch2_dev_io_ref_stop(ca, WRITE); @@ -1646,7 +1635,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -2228,11 +2217,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } up_write(&c->state_lock); - if (!bch2_fs_may_start(c)) { - ret = -BCH_ERR_insufficient_devices_to_start; - goto err_print; - } - if (!c->opts.nostart) { ret = bch2_fs_start(c); if (ret) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e5f003c29369..82ee333ddd21 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -654,11 +654,10 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, bch2_set_rebalance_needs_scan(c, 0); if (v && id == Opt_rebalance_enabled) - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); - if (v && id == Opt_copygc_enabled && - c->copygc_thread) - wake_up_process(c->copygc_thread); + if (v && id == Opt_copygc_enabled) + bch2_copygc_wakeup(c); if (id == Opt_discard && !ca) { mutex_lock(&c->sb_lock); diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index c265b102267a..782a05fe7656 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static int test_peek_end(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; @@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6ba5071ab6dd..3e52c7f8ddd2 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -739,4 +739,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len) *--dst = *src++; } +#define set_flags(_map, _in, _out) \ +do { \ + unsigned _i; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + else \ + (_out) &= ~_map[_i]; \ +} while (0) + +#define map_flags(_map, _in) \ +({ \ + unsigned _out = 0; \ + \ + set_flags(_map, _in, _out); \ + _out; \ +}) + +#define map_flags_rev(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & _map[_i]) { \ + (_out) |= 1 << _i; \ + (_in) &= ~_map[_i]; \ + } \ + (_out); \ +}) + +#define map_defined(_map) \ +({ \ + unsigned _in = ~0; \ + \ + map_flags_rev(_map, _in); \ +}) + #endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h index c7916011ef34..67426e33d04e 100644 --- a/fs/bcachefs/xattr_format.h +++ b/fs/bcachefs/xattr_format.h @@ -13,7 +13,13 @@ struct bch_xattr { __u8 x_type; __u8 x_name_len; __le16 x_val_len; - __u8 x_name[] __counted_by(x_name_len); + /* + * x_name contains the name and value counted by + * x_name_len + x_val_len. The introduction of + * __counted_by(x_name_len) caused a false positive + * detection of an out of bounds write. + */ + __u8 x_name[]; } __packed __aligned(8); #endif /* _BCACHEFS_XATTR_FORMAT_H */ diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 197f5e51c474..8515c31f563b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2047,7 +2047,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc) subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&folio->mapping->i_private_lock); - bit_start++; + bit_start += sectors_per_node; continue; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cc67d1a2d611..bdafe4d4c4a5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2129,12 +2129,13 @@ next_slot: /* * If the found extent starts after requested offset, then - * adjust extent_end to be right before this extent begins + * adjust cur_offset to be right before this extent begins. */ if (found_key.offset > cur_offset) { - extent_end = found_key.offset; - extent_type = 0; - goto must_cow; + if (cow_start == (u64)-1) + cow_start = cur_offset; + cur_offset = found_key.offset; + goto next_slot; } /* @@ -5681,8 +5682,10 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) return inode; path = btrfs_alloc_path(); - if (!path) + if (!path) { + iget_failed(&inode->vfs_inode); return ERR_PTR(-ENOMEM); + } ret = btrfs_read_locked_inode(inode, path); btrfs_free_path(path); diff --git a/fs/buffer.c b/fs/buffer.c index c7abb4a029dc..7be23ff20b27 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_write_sync); -/* - * Various filesystems appear to want __find_get_block to be non-blocking. - * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the blockdev mapping's - * i_private_lock. - * - * Hack idea: for the blockdev mapping, i_private_lock contention - * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_private_lock. - */ static struct buffer_head * -__find_get_block_slow(struct block_device *bdev, sector_t block) +__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; @@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (IS_ERR(folio)) goto out; - spin_lock(&bd_mapping->i_private_lock); + /* + * Folio lock protects the buffers. Callers that cannot block + * will fallback to serializing vs try_to_free_buffers() via + * the i_private_lock. + */ + if (atomic) + spin_lock(&bd_mapping->i_private_lock); + else + folio_lock(folio); + head = folio_buffers(folio); if (!head) goto out_unlock; + /* + * Upon a noref migration, the folio lock serializes here; + * otherwise bail. + */ + if (test_bit_acquire(BH_Migrate, &head->b_state)) { + WARN_ON(!atomic); + goto out_unlock; + } + bh = head; do { if (!buffer_mapped(bh)) @@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) 1 << blkbits); } out_unlock: - spin_unlock(&bd_mapping->i_private_lock); + if (atomic) + spin_unlock(&bd_mapping->i_private_lock); + else + folio_unlock(folio); folio_put(folio); out: return ret; @@ -656,7 +667,9 @@ EXPORT_SYMBOL(generic_buffers_fsync); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { - struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); + struct buffer_head *bh; + + bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); @@ -1386,16 +1399,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return - * NULL + * NULL. Atomic context callers may also return NULL if the buffer is being + * migrated; similarly the page is not marked accessed either. */ -struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +static struct buffer_head * +find_get_block_common(struct block_device *bdev, sector_t block, + unsigned size, bool atomic) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ - bh = __find_get_block_slow(bdev, block); + bh = __find_get_block_slow(bdev, block, atomic); if (bh) bh_lru_install(bh); } else @@ -1403,8 +1418,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) return bh; } + +struct buffer_head * +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +{ + return find_get_block_common(bdev, block, size, true); +} EXPORT_SYMBOL(__find_get_block); +/* same as __find_get_block() but allows sleeping contexts */ +struct buffer_head * +__find_get_block_nonatomic(struct block_device *bdev, sector_t block, + unsigned size) +{ + return find_get_block_common(bdev, block, size, false); +} +EXPORT_SYMBOL(__find_get_block_nonatomic); + /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. @@ -1422,7 +1452,12 @@ EXPORT_SYMBOL(__find_get_block); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + struct buffer_head *bh; + + if (gfpflags_allow_blocking(gfp)) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); might_alloc(gfp); if (bh) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 6ac2bd555e86..06cd2963e41e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2367,7 +2367,7 @@ static int fill_fscrypt_truncate(struct inode *inode, /* Try to writeback the dirty pagecaches */ if (issued & (CEPH_CAP_FILE_BUFFER)) { - loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; + loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1; ret = filemap_write_and_wait_range(inode->i_mapping, orig_pos, lend); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38bc8d74f4cc..e7ecc7c8a729 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (!bh || !buffer_uptodate(bh)) /* * If the block is not in the buffer cache, then it - * must have been written out. + * must have been written out, or, most unlikely, is + * being migrated - false failure should be OK here. */ goto out; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f88424c28194..1e98c5be4e0a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6642,7 +6642,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, for (i = 0; i < count; i++) { cond_resched(); if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); + bh = sb_find_get_block_nonatomic(inode->i_sb, + block + i); ext4_forget(handle, is_metadata, inode, bh, block + i); } } diff --git a/fs/file.c b/fs/file.c index dc3f7e120e3e..3a3146664cf3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,7 +26,7 @@ #include "internal.h" -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) +static noinline bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) { /* * If the reference count was already in the dead zone, then this diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 0cf0fddbee81..1467f6790747 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, bh = bh_in; if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } @@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, /* If there is a different buffer_head lying around in * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh2 = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) @@ -464,7 +466,8 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, + bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); @@ -492,9 +495,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal) struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; - bh = __find_get_block(journal->j_fs_dev, - record->blocknr, - journal->j_blocksize); + bh = __find_get_block_nonatomic(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh); diff --git a/fs/namespace.c b/fs/namespace.c index d9ca80dcc544..98a5cd756e9a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2826,56 +2826,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath) struct vfsmount *mnt = path->mnt; struct dentry *dentry; struct mountpoint *mp = ERR_PTR(-ENOENT); + struct path under = {}; for (;;) { - struct mount *m; + struct mount *m = real_mount(mnt); if (beneath) { - m = real_mount(mnt); + path_put(&under); read_seqlock_excl(&mount_lock); - dentry = dget(m->mnt_mountpoint); + under.mnt = mntget(&m->mnt_parent->mnt); + under.dentry = dget(m->mnt_mountpoint); read_sequnlock_excl(&mount_lock); + dentry = under.dentry; } else { dentry = path->dentry; } inode_lock(dentry->d_inode); - if (unlikely(cant_mount(dentry))) { - inode_unlock(dentry->d_inode); - goto out; - } - namespace_lock(); - if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { + if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) + break; // not to be mounted on + + if (beneath && unlikely(m->mnt_mountpoint != dentry || + &m->mnt_parent->mnt != under.mnt)) { namespace_unlock(); inode_unlock(dentry->d_inode); - goto out; + continue; // got moved } mnt = lookup_mnt(path); - if (likely(!mnt)) + if (unlikely(mnt)) { + namespace_unlock(); + inode_unlock(dentry->d_inode); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + continue; // got overmounted + } + mp = get_mountpoint(dentry); + if (IS_ERR(mp)) break; - - namespace_unlock(); - inode_unlock(dentry->d_inode); - if (beneath) - dput(dentry); - path_put(path); - path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); - } - - mp = get_mountpoint(dentry); - if (IS_ERR(mp)) { - namespace_unlock(); - inode_unlock(dentry->d_inode); + if (beneath) { + /* + * @under duplicates the references that will stay + * at least until namespace_unlock(), so the path_put() + * below is safe (and OK to do under namespace_lock - + * we are not dropping the final references here). + */ + path_put(&under); + } + return mp; } - -out: + namespace_unlock(); + inode_unlock(dentry->d_inode); if (beneath) - dput(dentry); - + path_put(&under); return mp; } @@ -2886,14 +2892,11 @@ static inline struct mountpoint *lock_mount(struct path *path) static void unlock_mount(struct mountpoint *where) { - struct dentry *dentry = where->m_dentry; - + inode_unlock(where->m_dentry->d_inode); read_seqlock_excl(&mount_lock); put_mountpoint(where); read_sequnlock_excl(&mount_lock); - namespace_unlock(); - inode_unlock(dentry->d_inode); } static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded..87f861e9004f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1961,12 +1961,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); + fsnotify_clear_marks_by_group(group, obj_type); return 0; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f1b4b3e611cb..c7a9729dc9d0 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1249,7 +1249,7 @@ static int ocfs2_force_read_journal(struct inode *inode) } for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 18d67ab113f0..1b79fe07476f 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1266,10 +1266,9 @@ typedef struct smb_com_query_information_rsp { typedef struct smb_com_setattr_req { struct smb_hdr hdr; /* wct = 8 */ __le16 attr; - __le16 time_low; - __le16 time_high; + __le32 last_write_time; __le16 reserved[5]; /* must be zero */ - __u16 ByteCount; + __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 59f6fdfe560e..ecf774a8f1ca 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -395,6 +395,10 @@ extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon); extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); +extern int SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 60cb264a01e5..f55457b4b82e 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5171,6 +5171,63 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +int +SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb) +{ + SETATTR_REQ *pSMB; + SETATTR_RSP *pSMBr; + struct timespec64 ts; + int bytes_returned; + int name_len; + int rc; + + cifs_dbg(FYI, "In %s path %s\n", __func__, fileName); + +retry: + rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len = + cifsConvertToUTF16((__le16 *) pSMB->fileName, + fileName, PATH_MAX, nls_codepage, + cifs_remap(cifs_sb)); + name_len++; /* trailing null */ + name_len *= 2; + } else { + name_len = copy_path_name(pSMB->fileName, fileName); + } + /* Only few attributes can be set by this command, others are not accepted by Win9x. */ + pSMB->attr = cpu_to_le16(le32_to_cpu(attributes) & + (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_ARCHIVE)); + /* Zero write time value (in both NT and SETATTR formats) means to not change it. */ + if (le64_to_cpu(write_time) != 0) { + ts = cifs_NTtimeToUnix(write_time); + pSMB->last_write_time = cpu_to_le32(ts.tv_sec); + } + pSMB->BufferFormat = 0x04; + name_len++; /* account for buffer type byte */ + inc_rfc1001_len(pSMB, (__u16)name_len); + pSMB->ByteCount = cpu_to_le16(name_len); + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) + cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc); + + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto retry; + + return rc; +} + /* Some legacy servers such as NT4 require that the file times be set on an open handle, rather than by pathname - this is awkward due to potential access conflicts on the open, but it is unavoidable for these diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index df976ce6aed9..6bf04d9a5491 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3753,28 +3753,7 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) } } - /* - * Clamp the rsize/wsize mount arguments if they are too big for the server - * and set the rsize/wsize to the negotiated values if not passed in by - * the user on mount - */ - if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { - cifs_sb->ctx->wsize = - round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); - /* - * in the very unlikely event that the server sent a max write size under PAGE_SIZE, - * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 - */ - if (cifs_sb->ctx->wsize == 0) { - cifs_sb->ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); - } - } - if ((cifs_sb->ctx->rsize == 0) || - (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) - cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); - + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); /* * The cookie is initialized from volume info returned above. * Inside cifs_fscache_get_super_cookie it checks diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9e8f404b9e56..851b74f557c1 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -160,10 +160,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), - cifs_sb->ctx); + cifs_negotiate_rsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 2980941b9667..a634a34d4086 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1021,6 +1021,7 @@ static int smb3_reconfigure(struct fs_context *fc) struct dentry *root = fc->root; struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; + unsigned int rsize = ctx->rsize, wsize = ctx->wsize; char *new_password = NULL, *new_password2 = NULL; bool need_recon = false; int rc; @@ -1103,11 +1104,8 @@ static int smb3_reconfigure(struct fs_context *fc) STEAL_STRING(cifs_sb, ctx, iocharset); /* if rsize or wsize not passed in on remount, use previous values */ - if (ctx->rsize == 0) - ctx->rsize = cifs_sb->ctx->rsize; - if (ctx->wsize == 0) - ctx->wsize = cifs_sb->ctx->wsize; - + ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; + ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); @@ -1312,7 +1310,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, __func__); goto cifs_parse_mount_err; } - ctx->bsize = result.uint_32; + ctx->bsize = CIFS_ALIGN_BSIZE(fc, result.uint_32); ctx->got_bsize = true; break; case Opt_rasize: @@ -1336,24 +1334,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->rasize = result.uint_32; break; case Opt_rsize: - ctx->rsize = result.uint_32; + ctx->rsize = CIFS_ALIGN_RSIZE(fc, result.uint_32); ctx->got_rsize = true; ctx->vol_rsize = ctx->rsize; break; case Opt_wsize: - ctx->wsize = result.uint_32; + ctx->wsize = CIFS_ALIGN_WSIZE(fc, result.uint_32); ctx->got_wsize = true; - if (ctx->wsize % PAGE_SIZE != 0) { - ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); - if (ctx->wsize == 0) { - ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); - } else { - cifs_dbg(VFS, - "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", - ctx->wsize, PAGE_SIZE); - } - } ctx->vol_wsize = ctx->wsize; break; case Opt_acregmax: diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index d1d29249bcdb..9e83302ce4b8 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -20,6 +20,21 @@ cifs_dbg(VFS, fmt, ## __VA_ARGS__); \ } while (0) +static inline size_t cifs_io_align(struct fs_context *fc, + const char *name, size_t size) +{ + if (!size || !IS_ALIGNED(size, PAGE_SIZE)) { + cifs_errorf(fc, "unaligned %s, making it a multiple of %lu bytes\n", + name, PAGE_SIZE); + size = umax(round_down(size, PAGE_SIZE), PAGE_SIZE); + } + return size; +} + +#define CIFS_ALIGN_WSIZE(_fc, _size) cifs_io_align(_fc, "wsize", _size) +#define CIFS_ALIGN_RSIZE(_fc, _size) cifs_io_align(_fc, "rsize", _size) +#define CIFS_ALIGN_BSIZE(_fc, _size) cifs_io_align(_fc, "bsize", _size) + enum smb_version { Smb_1 = 1, Smb_20, @@ -361,4 +376,36 @@ static inline void cifs_mount_unlock(void) mutex_unlock(&cifs_mount_mutex); } +static inline void cifs_negotiate_rsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_rsize(tcon, ctx), PAGE_SIZE); + if (ctx->rsize) + size = umax(umin(ctx->rsize, size), PAGE_SIZE); + ctx->rsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_wsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + if (ctx->wsize) + size = umax(umin(ctx->wsize, size), PAGE_SIZE); + ctx->wsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_iosize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + cifs_negotiate_rsize(server, ctx, tcon); + cifs_negotiate_wsize(server, ctx, tcon); +} + #endif diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 0adeec652dc1..b27a182629ec 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -432,7 +432,7 @@ cifs_negotiate(const unsigned int xid, } static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -467,7 +467,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) } static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -543,24 +543,104 @@ static int cifs_query_path_info(const unsigned int xid, const char *full_path, struct cifs_open_info_data *data) { - int rc; + int rc = -EOPNOTSUPP; FILE_ALL_INFO fi = {}; + struct cifs_search_info search_info = {}; + bool non_unicode_wildcard = false; data->reparse_point = false; data->adjust_tz = false; - /* could do find first instead but this returns more info */ - rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls, - cifs_remap(cifs_sb)); /* - * BB optimize code so we do not make the above call when server claims - * no NT SMB support and the above call failed at least once - set flag - * in tcon or mount. + * First try CIFSSMBQPathInfo() function which returns more info + * (NumberOfLinks) than CIFSFindFirst() fallback function. + * Some servers like Win9x do not support SMB_QUERY_FILE_ALL_INFO over + * TRANS2_QUERY_PATH_INFORMATION, but supports it with filehandle over + * TRANS2_QUERY_FILE_INFORMATION (function CIFSSMBQFileInfo(). But SMB + * Open command on non-NT servers works only for files, does not work + * for directories. And moreover Win9x SMB server returns bogus data in + * SMB_QUERY_FILE_ALL_INFO Attributes field. So for non-NT servers, + * do not even use CIFSSMBQPathInfo() or CIFSSMBQFileInfo() function. */ - if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { + if (tcon->ses->capabilities & CAP_NT_SMBS) + rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + + /* + * Non-UNICODE variant of fallback functions below expands wildcards, + * so they cannot be used for querying paths with wildcard characters. + */ + if (rc && !(tcon->ses->capabilities & CAP_UNICODE) && strpbrk(full_path, "*?\"><")) + non_unicode_wildcard = true; + + /* + * Then fallback to CIFSFindFirst() which works also with non-NT servers + * but does not does not provide NumberOfLinks. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && + !non_unicode_wildcard) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) + search_info.info_level = SMB_FIND_FILE_INFO_STANDARD; + else + search_info.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; + rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb, NULL, + CIFS_SEARCH_CLOSE_ALWAYS | CIFS_SEARCH_CLOSE_AT_END, + &search_info, false); + if (rc == 0) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) { + FIND_FILE_STANDARD_INFO *di; + int offset = tcon->ses->server->timeAdj; + + di = (FIND_FILE_STANDARD_INFO *)search_info.srch_entries_start; + fi.CreationTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->CreationDate, di->CreationTime, offset))); + fi.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastAccessDate, di->LastAccessTime, offset))); + fi.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastWriteDate, di->LastWriteTime, offset))); + fi.ChangeTime = fi.LastWriteTime; + fi.Attributes = cpu_to_le32(le16_to_cpu(di->Attributes)); + fi.AllocationSize = cpu_to_le64(le32_to_cpu(di->AllocationSize)); + fi.EndOfFile = cpu_to_le64(le32_to_cpu(di->DataSize)); + } else { + FILE_FULL_DIRECTORY_INFO *di; + + di = (FILE_FULL_DIRECTORY_INFO *)search_info.srch_entries_start; + fi.CreationTime = di->CreationTime; + fi.LastAccessTime = di->LastAccessTime; + fi.LastWriteTime = di->LastWriteTime; + fi.ChangeTime = di->ChangeTime; + fi.Attributes = di->ExtFileAttributes; + fi.AllocationSize = di->AllocationSize; + fi.EndOfFile = di->EndOfFile; + fi.EASize = di->EaSize; + } + fi.NumberOfLinks = cpu_to_le32(1); + fi.DeletePending = 0; + fi.Directory = !!(le32_to_cpu(fi.Attributes) & ATTR_DIRECTORY); + cifs_buf_release(search_info.ntwrk_buf_start); + } else if (!full_path[0]) { + /* + * CIFSFindFirst() does not work on root path if the + * root path was exported on the server from the top + * level path (drive letter). + */ + rc = -EOPNOTSUPP; + } + } + + /* + * If everything failed then fallback to the legacy SMB command + * SMB_COM_QUERY_INFORMATION which works with all servers, but + * provide just few information. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && !non_unicode_wildcard) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); data->adjust_tz = true; + } else if ((rc == -EOPNOTSUPP || rc == -EINVAL) && non_unicode_wildcard) { + /* Path with non-UNICODE wildcard character cannot exist. */ + rc = -ENOENT; } if (!rc) { @@ -639,6 +719,13 @@ static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; FILE_ALL_INFO fi = {}; + /* + * CIFSSMBQFileInfo() for non-NT servers returns bogus data in + * Attributes fields. So do not use this command for non-NT servers. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return -EOPNOTSUPP; + if (cfile->symlink_target) { data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!data->symlink_target) @@ -809,6 +896,9 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifsFileInfo *open_file; + FILE_BASIC_INFO new_buf; + struct cifs_open_info_data query_data; + __le64 write_time = buf->LastWriteTime; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; @@ -816,20 +906,58 @@ smb_set_file_info(struct inode *inode, const char *full_path, /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; tcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; + } else { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); } - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; + /* + * Non-NT servers interprets zero time value in SMB_SET_FILE_BASIC_INFO + * over TRANS2_SET_FILE_INFORMATION as a valid time value. NT servers + * interprets zero time value as do not change existing value on server. + * API of ->set_file_info() callback expects that zero time value has + * the NT meaning - do not change. Therefore if server is non-NT and + * some time values in "buf" are zero, then fetch missing time values. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS) && + (!buf->CreationTime || !buf->LastAccessTime || + !buf->LastWriteTime || !buf->ChangeTime)) { + rc = cifs_query_path_info(xid, tcon, cifs_sb, full_path, &query_data); + if (rc) { + if (open_file) { + cifsFileInfo_put(open_file); + open_file = NULL; + } + goto out; + } + /* + * Original write_time from buf->LastWriteTime is preserved + * as SMBSetInformation() interprets zero as do not change. + */ + new_buf = *buf; + buf = &new_buf; + if (!buf->CreationTime) + buf->CreationTime = query_data.fi.CreationTime; + if (!buf->LastAccessTime) + buf->LastAccessTime = query_data.fi.LastAccessTime; + if (!buf->LastWriteTime) + buf->LastWriteTime = query_data.fi.LastWriteTime; + if (!buf->ChangeTime) + buf->ChangeTime = query_data.fi.ChangeTime; } - tcon = tlink_tcon(tlink); + + if (open_file) + goto set_via_filehandle; rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, cifs_sb); @@ -850,8 +978,45 @@ smb_set_file_info(struct inode *inode, const char *full_path, .fid = &fid, }; - cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); - rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (S_ISDIR(inode->i_mode) && !(tcon->ses->capabilities & CAP_NT_SMBS)) { + /* Opening directory path is not possible on non-NT servers. */ + rc = -EOPNOTSUPP; + } else { + /* + * Use cifs_open_file() instead of CIFS_open() as the + * cifs_open_file() selects the correct function which + * works also on non-NT servers. + */ + rc = cifs_open_file(xid, &oparms, &oplock, NULL); + /* + * Opening path for writing on non-NT servers is not + * possible when the read-only attribute is already set. + * Non-NT server in this case returns -EACCES. For those + * servers the only possible way how to clear the read-only + * bit is via SMB_COM_SETATTR command. + */ + if (rc == -EACCES && + (cinode->cifsAttrs & ATTR_READONLY) && + le32_to_cpu(buf->Attributes) != 0 && /* 0 = do not change attrs */ + !(le32_to_cpu(buf->Attributes) & ATTR_READONLY) && + !(tcon->ses->capabilities & CAP_NT_SMBS)) + rc = -EOPNOTSUPP; + } + + /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */ + if (rc == -EOPNOTSUPP) { + cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n"); + rc = SMBSetInformation(xid, tcon, full_path, + buf->Attributes != 0 ? buf->Attributes : cpu_to_le32(cinode->cifsAttrs), + write_time, + cifs_sb->local_nls, cifs_sb); + if (rc == 0) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + else + rc = -EACCES; + goto out; + } + if (rc != 0) { if (rc == -EIO) rc = -EINVAL; @@ -859,6 +1024,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, } netpid = current->tgid; + cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for attrs/times not supported by this server\n"); set_via_filehandle: rc = CIFSSMBSetFileInfo(xid, tcon, buf, fid.netfid, netpid); @@ -869,6 +1035,21 @@ set_via_filehandle: CIFSSMBClose(xid, tcon, fid.netfid); else cifsFileInfo_put(open_file); + + /* + * Setting the read-only bit is not honered on non-NT servers when done + * via open-semantics. So for setting it, use SMB_COM_SETATTR command. + * This command works only after the file is closed, so use it only when + * operation was called without the filehandle. + */ + if (open_file == NULL && + !(tcon->ses->capabilities & CAP_NT_SMBS) && + le32_to_cpu(buf->Attributes) & ATTR_READONLY) { + SMBSetInformation(xid, tcon, full_path, + buf->Attributes, + 0 /* do not change write time */, + cifs_sb->local_nls, cifs_sb); + } out: if (tlink != NULL) cifs_put_tlink(tlink); @@ -1161,8 +1342,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, - .negotiate_wsize = cifs_negotiate_wsize, - .negotiate_rsize = cifs_negotiate_rsize, + .negotiate_wsize = smb1_negotiate_wsize, + .negotiate_rsize = smb1_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c4d52bebd37d..0b35816d551f 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2921,6 +2921,7 @@ replay_again: req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len); pc_buf = iov[n_iov-1].iov_base; } @@ -4092,12 +4093,8 @@ static void cifs_renegotiate_iosize(struct TCP_Server_Info *server, return; spin_lock(&tcon->sb_list_lock); - list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) { - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tcon, cifs_sb->ctx); - cifs_sb->ctx->wsize = - server->ops->negotiate_wsize(tcon, cifs_sb->ctx); - } + list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); spin_unlock(&tcon->sb_list_lock); } diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 83caa3849749..b3d121052408 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -550,7 +550,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, retval = -ENOMEM; goto out; } - sess->user = user; + + if (!sess->user) { + /* First successful authentication */ + sess->user = user; + } else { + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_debug(AUTH, "different user tried to reuse session\n"); + retval = -EPERM; + ksmbd_free_user(user); + goto out; + } + ksmbd_free_user(user); + } memcpy(sess->sess_key, resp->payload, resp->session_key_len); memcpy(out_blob, resp->payload + resp->session_key_len, diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 3f45f28f6f0f..9dec4c2940bc 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -59,10 +59,12 @@ static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess) struct ksmbd_session_rpc *entry; long index; + down_write(&sess->rpc_lock); xa_for_each(&sess->rpc_handle_list, index, entry) { xa_erase(&sess->rpc_handle_list, index); __session_rpc_close(sess, entry); } + up_write(&sess->rpc_lock); xa_destroy(&sess->rpc_handle_list); } @@ -92,7 +94,7 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) { struct ksmbd_session_rpc *entry, *old; struct ksmbd_rpc_command *resp; - int method; + int method, id; method = __rpc_method(rpc_name); if (!method) @@ -102,26 +104,29 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) if (!entry) return -ENOMEM; + down_read(&sess->rpc_lock); entry->method = method; - entry->id = ksmbd_ipc_id_alloc(); - if (entry->id < 0) + entry->id = id = ksmbd_ipc_id_alloc(); + if (id < 0) goto free_entry; - old = xa_store(&sess->rpc_handle_list, entry->id, entry, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->rpc_handle_list, id, entry, KSMBD_DEFAULT_GFP); if (xa_is_err(old)) goto free_id; - resp = ksmbd_rpc_open(sess, entry->id); + resp = ksmbd_rpc_open(sess, id); if (!resp) goto erase_xa; + up_read(&sess->rpc_lock); kvfree(resp); - return entry->id; + return id; erase_xa: xa_erase(&sess->rpc_handle_list, entry->id); free_id: ksmbd_rpc_id_free(entry->id); free_entry: kfree(entry); + up_read(&sess->rpc_lock); return -EINVAL; } @@ -129,9 +134,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id) { struct ksmbd_session_rpc *entry; + down_write(&sess->rpc_lock); entry = xa_erase(&sess->rpc_handle_list, id); if (entry) __session_rpc_close(sess, entry); + up_write(&sess->rpc_lock); } int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id) @@ -439,6 +446,7 @@ static struct ksmbd_session *__session_create(int protocol) sess->sequence_number = 1; rwlock_init(&sess->tree_conns_lock); atomic_set(&sess->refcnt, 2); + init_rwsem(&sess->rpc_lock); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h index f21348381d59..c5749d6ec715 100644 --- a/fs/smb/server/mgmt/user_session.h +++ b/fs/smb/server/mgmt/user_session.h @@ -63,6 +63,7 @@ struct ksmbd_session { rwlock_t tree_conns_lock; atomic_t refcnt; + struct rw_semaphore rpc_lock; }; static inline int test_session_flag(struct ksmbd_session *sess, int bit) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 57839f9708bb..46aa08245742 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1445,7 +1445,7 @@ static int ntlm_authenticate(struct ksmbd_work *work, { struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; - struct channel *chann = NULL; + struct channel *chann = NULL, *old; struct ksmbd_user *user; u64 prev_id; int sz, rc; @@ -1557,7 +1557,12 @@ binding_session: return -ENOMEM; chann->conn = conn; - xa_store(&sess->ksmbd_chann_list, (long)conn, chann, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->ksmbd_chann_list, (long)conn, chann, + KSMBD_DEFAULT_GFP); + if (xa_is_err(old)) { + kfree(chann); + return xa_err(old); + } } } @@ -1602,11 +1607,6 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { @@ -2249,10 +2249,6 @@ int smb2_session_logoff(struct ksmbd_work *work) sess->state = SMB2_SESSION_EXPIRED; up_write(&conn->session_lock); - if (sess->user) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP); rsp->StructureSize = cpu_to_le16(4); diff --git a/fs/splice.c b/fs/splice.c index 90d464241f15..4d6df083e0c0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -45,7 +45,7 @@ * here if set to avoid blocking other users of this pipe if splice is * being done on it. */ -static noinline void noinline pipe_clear_nowait(struct file *file) +static noinline void pipe_clear_nowait(struct file *file) { fmode_t fmode = READ_ONCE(file->f_mode); diff --git a/fs/xattr.c b/fs/xattr.c index 02bee149ad96..fabb2a04501e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) error = -EBADF; @@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) return -EBADF; diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 8c541ca71872..81c94dd1d596 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -170,7 +170,8 @@ bool xfs_zoned_need_gc( struct xfs_mount *mp) { - s64 available, free; + s64 available, free, threshold; + s32 remainder; if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) return false; @@ -183,7 +184,12 @@ xfs_zoned_need_gc( return true; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); - if (available < mult_frac(free, mp->m_zonegc_low_space, 100)) + + threshold = div_s64_rem(free, 100, &remainder); + threshold = threshold * mp->m_zonegc_low_space + + remainder * div_s64(mp->m_zonegc_low_space, 100); + + if (available < threshold) return true; return false; diff --git a/include/cxl/features.h b/include/cxl/features.h index a3bb34694c06..5f7f842765a5 100644 --- a/include/cxl/features.h +++ b/include/cxl/features.h @@ -66,7 +66,7 @@ struct cxl_memdev; #ifdef CONFIG_CXL_FEATURES inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); int devm_cxl_setup_features(struct cxl_dev_state *cxlds); -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd); +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd); #else static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) { @@ -78,7 +78,8 @@ static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds) return -EOPNOTSUPP; } -static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +static inline int devm_cxl_setup_fwctl(struct device *host, + struct cxl_memdev *cxlmd) { return -EOPNOTSUPP; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2b8875a82ff8..9a1f0ee40b56 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1637,6 +1637,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1693,10 +1694,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f0a4ad7839b6..0029ff880e27 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -222,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -397,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a45..50b14a5661c7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5..7a5b391dcc01 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -776,8 +776,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -840,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -855,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -908,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -923,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -993,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1008,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1017,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1032,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c433..85ab710ec0e7 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 7db62fbc0500..31551e4cb8f3 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -61,7 +61,6 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) atomic_long_set(&ref->refcnt, cnt - 1); } -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt); bool __file_ref_put(file_ref_t *ref, unsigned long cnt); /** @@ -178,20 +177,14 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) */ static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) { - long old, new; + long old; old = atomic_long_read(&ref->refcnt); - do { - if (unlikely(old < 0)) - return __file_ref_put_badval(ref, old); - - if (old == FILE_REF_ONEREF) - new = FILE_REF_DEAD; - else - new = old - 1; - } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new)); - - return new == FILE_REF_DEAD; + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); } /** diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8b..fc27b53c58c2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -907,21 +907,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 6fa0a268d538..097be89487bf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a..3a8d35d41fda 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -440,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -456,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ diff --git a/include/linux/module.h b/include/linux/module.h index d94b196d5a34..b3329110d668 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d9d0cb827117..f030adc462ee 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -53,43 +53,70 @@ struct landlock_ruleset_attr { __u64 scoped; }; -/* - * sys_landlock_create_ruleset() flags: +/** + * DOC: landlock_create_ruleset_flags + * + * **Flags** * - * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI - * version. - * - %LANDLOCK_CREATE_RULESET_ERRATA: Get a bitmask of fixed issues. + * %LANDLOCK_CREATE_RULESET_VERSION + * Get the highest supported Landlock ABI version (starting at 1). + * + * %LANDLOCK_CREATE_RULESET_ERRATA + * Get a bitmask of fixed issues for the current Landlock ABI version. */ /* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) #define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) /* clang-format on */ -/* - * sys_landlock_restrict_self() flags: - * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF: Do not create any log related to the - * enforced restrictions. This should only be set by tools launching unknown - * or untrusted programs (e.g. a sandbox tool, container runtime, system - * service manager). Because programs sandboxing themselves should fix any - * denied access, they should not set this flag to be aware of potential - * issues reported by system's logs (i.e. audit). - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON: Explicitly ask to continue - * logging denied access requests even after an :manpage:`execve(2)` call. - * This flag should only be set if all the programs than can legitimately be - * executed will not try to request a denied access (which could spam audit - * logs). - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF: Do not create any log related - * to the enforced restrictions coming from future nested domains created by - * the caller or its descendants. This should only be set according to a - * runtime configuration (i.e. not hardcoded) by programs launching other - * unknown or untrusted programs that may create their own Landlock domains - * and spam logs. The main use case is for container runtimes to enable users - * to mute buggy sandboxed programs for a specific container image. Other use - * cases include sandboxer tools and init systems. Unlike - * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, - * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF does not impact the requested - * restriction (if any) but only the future nested domains. +/** + * DOC: landlock_restrict_self_flags + * + * **Flags** + * + * By default, denied accesses originating from programs that sandbox themselves + * are logged via the audit subsystem. Such events typically indicate unexpected + * behavior, such as bugs or exploitation attempts. However, to avoid excessive + * logging, access requests denied by a domain not created by the originating + * program are not logged by default. The rationale is that programs should know + * their own behavior, but not necessarily the behavior of other programs. This + * default configuration is suitable for most programs that sandbox themselves. + * For specific use cases, the following flags allow programs to modify this + * default logging behavior. + * + * The %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF and + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON flags apply to the newly created + * Landlock domain. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * Disables logging of denied accesses originating from the thread creating + * the Landlock domain, as well as its children, as long as they continue + * running the same executable code (i.e., without an intervening + * :manpage:`execve(2)` call). This is intended for programs that execute + * unknown code without invoking :manpage:`execve(2)`, such as script + * interpreters. Programs that only sandbox themselves should not set this + * flag, so users can be notified of unauthorized access attempts via system + * logs. + * + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * Enables logging of denied accesses after an :manpage:`execve(2)` call, + * providing visibility into unauthorized access attempts by newly executed + * programs within the created Landlock domain. This flag is recommended + * only when all potential executables in the domain are expected to comply + * with the access restrictions, as excessive audit log entries could make + * it more difficult to identify critical events. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * Disables logging of denied accesses originating from nested Landlock + * domains created by the caller or its descendants. This flag should be set + * according to runtime configuration, not hardcoded, to avoid suppressing + * important security events. It is useful for container runtimes or + * sandboxing tools that may launch programs which themselves create + * Landlock domains and could otherwise generate excessive logs. Unlike + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects + * future nested domains, not the one being created. It can also be used + * with a @ruleset_fd value of -1 to mute subdomain logs without creating a + * domain. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 41ff44dfa1db..f52de5ed1b3b 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -107,4 +107,10 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) +/* + * Some ufs devices may need more time to be in hibern8 before exiting. + * Enable this quirk to give it an additional 100us. + */ +#define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) + #endif /* UFS_QUIRKS_H_ */ diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f60d0a9d505e..9414ca6d101c 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -123,11 +123,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqMask:\t0x%x\n", sq_mask); seq_printf(m, "SqHead:\t%u\n", sq_head); seq_printf(m, "SqTail:\t%u\n", sq_tail); - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); + seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head)); seq_printf(m, "CqMask:\t0x%x\n", cq_mask); seq_printf(m, "CqHead:\t%u\n", cq_head); seq_printf(m, "CqTail:\t%u\n", cq_tail); - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); + seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail)); seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head); sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c6209fe44cb1..a2b256e96d5d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -872,10 +872,15 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); - __io_cq_lock(ctx); - posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + if (!ctx->lockless_cq) { + spin_lock(&ctx->completion_lock); + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + spin_unlock(&ctx->completion_lock); + } else { + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + } + ctx->submit_state.cq_flush = true; - __io_cq_unlock_post(ctx); return posted; } @@ -1078,21 +1083,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync) while (node) { req = container_of(node, struct io_kiocb, io_task_work.node); node = node->next; - if (sync && last_ctx != req->ctx) { + if (last_ctx != req->ctx) { if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } last_ctx = req->ctx; percpu_ref_get(&last_ctx->refs); } - if (llist_add(&req->io_task_work.node, - &req->ctx->fallback_llist)) - schedule_delayed_work(&req->ctx->fallback_work, 1); + if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist)) + schedule_delayed_work(&last_ctx->fallback_work, 1); } if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5a5adc66b8e2..92b606d60020 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2189,7 +2189,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ b = &htab->buckets[i]; rcu_read_lock(); head = &b->head; - hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { key = elem->key; if (is_percpu) { /* current cpu value for percpu map */ diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 2fdf3c978db1..774e5a538811 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -89,5 +89,6 @@ static void __exit fini(void) } late_initcall(load); module_exit(fini); +MODULE_IMPORT_NS("BPF_INTERNAL"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Embedded BPF programs for introspection in bpffs"); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9794446bc8c6..64c3393e8270 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1583,7 +1583,7 @@ struct bpf_map *bpf_map_get(u32 ufd) return map; } -EXPORT_SYMBOL(bpf_map_get); +EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); struct bpf_map *bpf_map_get_with_uref(u32 ufd) { @@ -3364,7 +3364,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) bpf_link_inc(link); return link; } -EXPORT_SYMBOL(bpf_link_get_from_fd); +EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); static void bpf_tracing_link_release(struct bpf_link *link) { @@ -6020,7 +6020,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) return ____bpf_sys_bpf(cmd, attr, size); } } -EXPORT_SYMBOL(kern_sys_bpf); +EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); static const struct bpf_func_proto bpf_sys_bpf_proto = { .func = bpf_sys_bpf, diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 3b2bdca9f1d4..77c8d9487a9a 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -336,16 +336,22 @@ static phys_addr_t dma_reserved_default_memory_size __initdata; static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - if (!rmem->priv) { - struct dma_coherent_mem *mem; + struct dma_coherent_mem *mem = rmem->priv; + if (!mem) { mem = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size, true); if (IS_ERR(mem)) return PTR_ERR(mem); rmem->priv = mem; } - dma_assign_coherent_memory(dev, rmem->priv); + + /* Warn if the device potentially can't use the reserved memory */ + if (mem->device_base + rmem->size - 1 > + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit)) + dev_warn(dev, "reserved memory is beyond device's set DMA address range\n"); + + dma_assign_coherent_memory(dev, mem); return 0; } diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 055da410ac71..8df0dfaaca18 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -64,8 +64,7 @@ struct cma *dma_contiguous_default_area; * Users, who want to set the size of global CMA area for their system * should use cma= kernel parameter. */ -static const phys_addr_t size_bytes __initconst = - (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; +#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M) static phys_addr_t size_cmdline __initdata = -1; static phys_addr_t base_cmdline __initdata; static phys_addr_t limit_cmdline __initdata; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index cda127027e48..051a32988040 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -910,6 +910,19 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); +static bool __dma_addressing_limited(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops) || use_dma_iommu(dev)) + return false; + return !dma_direct_all_ram_mapped(dev); +} + /** * dma_addressing_limited - return if the device is addressing limited * @dev: device to check @@ -920,15 +933,11 @@ EXPORT_SYMBOL(dma_set_coherent_mask); */ bool dma_addressing_limited(struct device *dev) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev)) - return true; - - if (unlikely(ops) || use_dma_iommu(dev)) + if (!__dma_addressing_limited(dev)) return false; - return !dma_direct_all_ram_mapped(dev); + + dev_dbg(dev, "device is DMA addressing limited\n"); + return true; } EXPORT_SYMBOL_GPL(dma_addressing_limited); diff --git a/kernel/events/core.c b/kernel/events/core.c index e93c19565914..95e703891b24 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3943,7 +3943,7 @@ static int merge_sched_in(struct perf_event *event, void *data) perf_event_set_state(event, PERF_EVENT_STATE_ERROR); if (*perf_event_fasync(event)) - event->pending_kill = POLL_HUP; + event->pending_kill = POLL_ERR; perf_event_wakeup(event); } else { @@ -6075,7 +6075,7 @@ static __poll_t perf_poll(struct file *file, poll_table *wait) if (unlikely(READ_ONCE(event->state) == PERF_EVENT_STATE_ERROR && event->attr.pinned)) - return events; + return EPOLLERR; /* * Pin the event->rb by taking event->mmap_mutex; otherwise diff --git a/kernel/params.c b/kernel/params.c index 2509f216c9f3..e668fc90b83e 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -760,38 +760,35 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init locate_module_kobject(const char *name) +struct module_kobject __modinit * lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; int err; kobj = kset_find_obj(module_kset, name); - if (kobj) { - mk = to_module_kobject(kobj); - } else { - mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); - BUG_ON(!mk); - - mk->mod = THIS_MODULE; - mk->kobj.kset = module_kset; - err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, - "%s", name); -#ifdef CONFIG_MODULES - if (!err) - err = sysfs_create_file(&mk->kobj, &module_uevent.attr); -#endif - if (err) { - kobject_put(&mk->kobj); - pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", - name, err); - return NULL; - } + if (kobj) + return to_module_kobject(kobj); - /* So that we hold reference in both cases. */ - kobject_get(&mk->kobj); + mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); + if (!mk) + return NULL; + + mk->mod = THIS_MODULE; + mk->kobj.kset = module_kset; + err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); + if (IS_ENABLED(CONFIG_MODULES) && !err) + err = sysfs_create_file(&mk->kobj, &module_uevent.attr); + if (err) { + kobject_put(&mk->kobj); + pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", + name, err); + return NULL; } + /* So that we hold reference in both cases. */ + kobject_get(&mk->kobj); + return mk; } @@ -802,7 +799,7 @@ static void __init kernel_add_sysfs_param(const char *name, struct module_kobject *mk; int err; - mk = locate_module_kobject(name); + mk = lookup_or_create_module_kobject(name); if (!mk) return; @@ -873,7 +870,7 @@ static void __init version_sysfs_builtin(void) int err; for (vattr = __start___modver; vattr < __stop___modver; vattr++) { - mk = locate_module_kobject(vattr->module_name); + mk = lookup_or_create_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); WARN_ON_ONCE(err); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e43993a4e580..0fb9bf995a47 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7081,9 +7081,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) h_nr_idle = task_has_idle_policy(p); if (task_sleep || task_delayed || !se->sched_delayed) h_nr_runnable = 1; - } else { - cfs_rq = group_cfs_rq(se); - slice = cfs_rq_min_slice(cfs_rq); } for_each_sched_entity(se) { @@ -7093,6 +7090,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) if (p && &p->se == se) return -1; + slice = cfs_rq_min_slice(cfs_rq); break; } diff --git a/mm/memblock.c b/mm/memblock.c index 0a53db4d9f7b..d3509414b8c3 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2183,11 +2183,14 @@ static void __init memmap_init_reserved_pages(void) struct memblock_region *region; phys_addr_t start, end; int nid; + unsigned long max_reserved; /* * set nid on all reserved pages and also treat struct * pages for the NOMAP regions as PageReserved */ +repeat: + max_reserved = memblock.reserved.max; for_each_mem_region(region) { nid = memblock_get_region_node(region); start = region->base; @@ -2196,8 +2199,15 @@ static void __init memmap_init_reserved_pages(void) if (memblock_is_nomap(region)) reserve_bootmem_region(start, end, nid); - memblock_set_node(start, end, &memblock.reserved, nid); + memblock_set_node(start, region->size, &memblock.reserved, nid); } + /* + * 'max' is changed means memblock.reserved has been doubled its + * array, which may result a new reserved region before current + * 'start'. Now we should repeat the procedure to set its node id. + */ + if (max_reserved != memblock.reserved.max) + goto repeat; /* * initialize struct pages for reserved regions that don't have diff --git a/mm/migrate.c b/mm/migrate.c index f3ee6d8d5e2e..676d9cfc7059 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping, return -EAGAIN; if (check_refs) { - bool busy; + bool busy, migrating; bool invalidated = false; + migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state); + VM_WARN_ON_ONCE(migrating); recheck_buffers: busy = false; spin_lock(&mapping->i_private_lock); @@ -859,12 +861,12 @@ recheck_buffers: } bh = bh->b_this_page; } while (bh != head); + spin_unlock(&mapping->i_private_lock); if (busy) { if (invalidated) { rc = -EAGAIN; goto unlock_buffers; } - spin_unlock(&mapping->i_private_lock); invalidate_bh_lrus(); invalidated = true; goto recheck_buffers; @@ -883,7 +885,7 @@ recheck_buffers: unlock_buffers: if (check_refs) - spin_unlock(&mapping->i_private_lock); + clear_bit_unlock(BH_Migrate, &head->b_state); bh = head; do { unlock_buffer(bh); diff --git a/mm/slub.c b/mm/slub.c index dc9e729e1d26..be8b09e09d30 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2028,8 +2028,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static noinline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2049,18 +2048,6 @@ static noinline void free_slab_obj_exts(struct slab *slab) slab->obj_exts = 0; } -static inline bool need_slab_obj_ext(void) -{ - if (mem_alloc_profiling_enabled()) - return true; - - /* - * CONFIG_MEMCG creates vector of obj_cgroup objects conditionally - * inside memcg_slab_post_alloc_hook. No other users for now. - */ - return false; -} - #else /* CONFIG_SLAB_OBJ_EXT */ static inline void init_slab_obj_exts(struct slab *slab) @@ -2077,11 +2064,6 @@ static inline void free_slab_obj_exts(struct slab *slab) { } -static inline bool need_slab_obj_ext(void) -{ - return false; -} - #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -2129,7 +2111,7 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) static inline void alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) + if (mem_alloc_profiling_enabled()) __alloc_tagging_slab_alloc_hook(s, object, flags); } @@ -2601,8 +2583,12 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online() || need_slab_obj_ext()) - free_slab_obj_exts(slab); + /* + * The slab object extensions should now be freed regardless of + * whether mem_alloc_profiling_enabled() or not because profiling + * might have been disabled after slab->obj_exts got allocated. + */ + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b24afec24138..6664ea73ccf8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -220,16 +220,6 @@ void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); -void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); -} -EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); - #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -297,19 +287,6 @@ static void osd_req_op_cls_request_info_pagelist( ceph_osd_data_pagelist_init(osd_data, pagelist); } -void osd_req_op_cls_request_data_pagelist( - struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, cls, request_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); - osd_req->r_ops[which].cls.indata_len += pagelist->length; - osd_req->r_ops[which].indata_len += pagelist->length; -} -EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist); - void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 004cdb59f010..7ce5e28a6c03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,13 +1536,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. - * - * If someone frequently calls the flush interface, we should - * immediately clean the corresponding cache_detail instead of - * continuously accumulating nextcheck. */ - if (cd->flush_time >= now && cd->flush_time < (now + 5)) + if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index f04b058b09b2..2494c96e105f 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) -use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; +use crate::{bindings, device::Device, error::Error, error::Result, ffi, str::CStr}; use core::ptr::NonNull; /// # Invariants @@ -12,7 +12,11 @@ use core::ptr::NonNull; /// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`, /// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`. struct FwFunc( - unsafe extern "C" fn(*mut *const bindings::firmware, *const u8, *mut bindings::device) -> i32, + unsafe extern "C" fn( + *mut *const bindings::firmware, + *const ffi::c_char, + *mut bindings::device, + ) -> i32, ); impl FwFunc { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5b632635e00d..95a4fa1f1e44 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -376,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 2d6e59561c9d..d88acdf40855 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -8,6 +8,7 @@ # Default set of warnings, always enabled KBUILD_CFLAGS += -Wall +KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -Werror=implicit-function-declaration KBUILD_CFLAGS += -Werror=implicit-int @@ -56,6 +57,13 @@ KBUILD_CFLAGS += -Wno-pointer-sign # globally built with -Wcast-function-type. KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) +# Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + +# Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) + # The allocators already balk at large sizes, so silence the compiler # warnings for bounds checks involving those possible values. While # -Wno-alloc-size-larger-than would normally be used here, earlier versions @@ -82,7 +90,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) -KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused # diff --git a/security/landlock/domain.c b/security/landlock/domain.c index bae2e9909013..a647b68e8d06 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -16,6 +16,7 @@ #include <linux/path.h> #include <linux/pid.h> #include <linux/sched.h> +#include <linux/signal.h> #include <linux/uidgid.h> #include "access.h" @@ -99,8 +100,7 @@ static struct landlock_details *get_current_details(void) return ERR_PTR(-ENOMEM); memcpy(details->exe_path, path_str, path_size); - WARN_ON_ONCE(current_cred() != current_real_cred()); - details->pid = get_pid(task_pid(current)); + details->pid = get_pid(task_tgid(current)); details->uid = from_kuid(&init_user_ns, current_uid()); get_task_comm(details->comm, current); return details; diff --git a/security/landlock/domain.h b/security/landlock/domain.h index ed0d348e214c..7fb70b25f85a 100644 --- a/security/landlock/domain.h +++ b/security/landlock/domain.h @@ -130,7 +130,7 @@ int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy); static inline void landlock_free_hierarchy_details(struct landlock_hierarchy *const hierarchy) { - if (WARN_ON_ONCE(!hierarchy || !hierarchy->details)) + if (!hierarchy || !hierarchy->details) return; put_pid(hierarchy->details->pid); diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 54a9f29e6ebb..b9561e3417ae 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -169,20 +169,16 @@ const int landlock_abi_version = 7; * the new ruleset. * @size: Size of the pointed &struct landlock_ruleset_attr (needed for * backward and forward compatibility). - * @flags: Supported value: + * @flags: Supported values: + * * - %LANDLOCK_CREATE_RULESET_VERSION * - %LANDLOCK_CREATE_RULESET_ERRATA * * This system call enables to create a new Landlock ruleset, and returns the * related file descriptor on success. * - * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is - * 0, then the returned value is the highest supported Landlock ABI version - * (starting at 1). - * - * If @flags is %LANDLOCK_CREATE_RULESET_ERRATA and @attr is NULL and @size is - * 0, then the returned value is a bitmask of fixed issues for the current - * Landlock ABI version. + * If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is + * set, then @attr must be NULL and @size must be 0. * * Possible returned errors are: * @@ -191,6 +187,9 @@ const int landlock_abi_version = 7; * - %E2BIG: @attr or @size inconsistencies; * - %EFAULT: @attr or @size inconsistencies; * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_create_ruleset_flags */ SYSCALL_DEFINE3(landlock_create_ruleset, const struct landlock_ruleset_attr __user *const, attr, @@ -452,18 +451,15 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * @ruleset_fd: File descriptor tied to the ruleset to merge with the target. * @flags: Supported values: * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF * * This system call enables to enforce a Landlock ruleset on the current * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * - * It is allowed to only pass the %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF - * flag with a @ruleset_fd value of -1. - * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; @@ -475,6 +471,9 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * %CAP_SYS_ADMIN in its namespace. * - %E2BIG: The maximum number of stacked rulesets is reached for the current * thread. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_restrict_self_flags */ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..f5dd84eb55dc 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9..b9ce3aab15fe 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include <sys/poll.h> #include <sys/utsname.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index f2957a3e36fe..bf9caa908f89 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(dev, "No CXL FWCTL setup\n"); diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..7bb885b0c32d 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -43,6 +43,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_AUDIT=y +CONFIG_PRIME_NUMBERS=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 67503089e6a0..01e836fba488 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void) return 0; } +#ifdef CONFIG_NUMA +static int memblock_set_node_check(void) +{ + unsigned long i, max_reserved; + struct memblock_region *rgn; + void *orig_region; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + orig_region = memblock.reserved.regions; + + /* Equally Split range to node 0 and 1*/ + memblock_set_node(memblock_start_of_DRAM(), + memblock_phys_mem_size() / 2, &memblock.memory, 0); + memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2, + memblock_phys_mem_size() / 2, &memblock.memory, 1); + + ASSERT_EQ(memblock.memory.cnt, 2); + rgn = &memblock.memory.regions[0]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 0); + rgn = &memblock.memory.regions[1]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 1); + + /* Reserve 126 regions with the last one across node boundary */ + for (i = 0; i < 125; i++) + memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8); + + memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8, + SZ_16); + + /* + * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") + * do following process to set nid to each memblock.reserved region. + * But it may miss some region if memblock_set_node() double the + * array. + * + * By checking 'max', we make sure all region nid is set properly. + */ +repeat: + max_reserved = memblock.reserved.max; + for_each_mem_region(rgn) { + int nid = memblock_get_region_node(rgn); + + memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid); + } + if (max_reserved != memblock.reserved.max) + goto repeat; + + /* Confirm each region has valid node set */ + for_each_reserved_mem_region(rgn) { + ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn))); + if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1)) + ASSERT_EQ(1, memblock_get_region_node(rgn)); + else + ASSERT_EQ(0, memblock_get_region_node(rgn)); + } + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + +static int memblock_set_node_checks(void) +{ + prefix_reset(); + prefix_push("memblock_set_node"); + test_print("Running memblock_set_node tests...\n"); + + memblock_set_node_check(); + + prefix_pop(); + + return 0; +} +#else +static int memblock_set_node_checks(void) +{ + return 0; +} +#endif + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2444,6 +2545,7 @@ int memblock_basic_checks(void) memblock_bottom_up_checks(); memblock_trim_memory_checks(); memblock_overlaps_region_checks(); + memblock_set_node_checks(); return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9..5fea3209566e 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ out: for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862..0a99fd404f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 13a2e22f5465..863df7c0fdd0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -221,7 +221,7 @@ #define CAN_USE_GOTOL #endif -#if _clang_major__ >= 18 +#if __clang_major__ >= 18 #define CAN_USE_BPF_ST #endif diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c new file mode 100644 index 000000000000..82307166f789 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Intel Corporation */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, __u64); + __type(value, __u64); +} hashmap SEC(".maps"); + +static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg) +{ + bpf_map_delete_elem(map, key); + bpf_map_update_elem(map, key, val, 0); + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + (void)skb; + + bpf_for_each_map_elem(&hashmap, cb, NULL, 0); + + return 0; +} diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd1..59a71f22fb11 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata, static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +69,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify) self->root_id = get_mnt_id(_metadata, "/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b9054086a0c9..18a6014920b5 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -300,15 +300,22 @@ out: return err; } -static int __maybe_unused matches_log_domain_allocated(int audit_fd, +static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, __u64 *domain_id) { - return audit_match_record( - audit_fd, AUDIT_LANDLOCK_DOMAIN, - REGEX_LANDLOCK_PREFIX - " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" - " exe=\"[^\"]\\+\" comm=\".*_test\"$", - domain_id); + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=%d uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); } static int __maybe_unused matches_log_domain_deallocated( diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index a0643070c403..cfc571afd0eb 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -9,6 +9,7 @@ #include <errno.h> #include <limits.h> #include <linux/landlock.h> +#include <pthread.h> #include <stdlib.h> #include <sys/mount.h> #include <sys/prctl.h> @@ -40,7 +41,6 @@ FIXTURE(audit) { struct audit_filter audit_filter; int audit_fd; - __u64(*domain_stack)[16]; }; FIXTURE_SETUP(audit) @@ -60,18 +60,10 @@ FIXTURE_SETUP(audit) TH_LOG("Failed to initialize audit: %s", error_msg); } clear_cap(_metadata, CAP_AUDIT_CONTROL); - - self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ASSERT_NE(MAP_FAILED, self->domain_stack); - memset(self->domain_stack, 0, sizeof(*self->domain_stack)); } FIXTURE_TEARDOWN(audit) { - EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); - set_cap(_metadata, CAP_AUDIT_CONTROL); EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); clear_cap(_metadata, CAP_AUDIT_CONTROL); @@ -83,9 +75,15 @@ TEST_F(audit, layers) .scoped = LANDLOCK_SCOPE_SIGNAL, }; int status, ruleset_fd, i; + __u64(*domain_stack)[16]; __u64 prev_dom = 3; pid_t child; + domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, domain_stack); + memset(domain_stack, 0, sizeof(*domain_stack)); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); @@ -94,7 +92,7 @@ TEST_F(audit, layers) child = fork(); ASSERT_LE(0, child); if (child == 0) { - for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) { __u64 denial_dom = 1; __u64 allocated_dom = 2; @@ -107,7 +105,8 @@ TEST_F(audit, layers) matches_log_signal(_metadata, self->audit_fd, getppid(), &denial_dom)); EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(denial_dom, 1); EXPECT_NE(denial_dom, 0); EXPECT_EQ(denial_dom, allocated_dom); @@ -115,7 +114,7 @@ TEST_F(audit, layers) /* Checks that the new domain is younger than the previous one. */ EXPECT_GT(allocated_dom, prev_dom); prev_dom = allocated_dom; - (*self->domain_stack)[i] = allocated_dom; + (*domain_stack)[i] = allocated_dom; } /* Checks that we reached the maximum number of layers. */ @@ -142,23 +141,143 @@ TEST_F(audit, layers) /* Purges log from deallocated domains. */ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, &deallocated_dom)); - EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*self->domain_stack)[i], i); + (*domain_stack)[i], i); } } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, sizeof(audit_tv_default))); - EXPECT_EQ(0, close(ruleset_fd)); } +struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; +}; + +static void *thread_audit_test(void *arg) +{ + const struct thread_data *data = (struct thread_data *)arg; + uintptr_t err = 0; + char buffer; + + /* TGID and TID are different for a second thread. */ + if (getpid() == gettid()) { + err = 1; + goto out; + } + + if (landlock_restrict_self(data->ruleset_fd, 0)) { + err = 2; + goto out; + } + + if (close(data->ruleset_fd)) { + err = 3; + goto out; + } + + /* Creates a denial to get the domain ID. */ + if (kill(data->parent_pid, 0) != -1) { + err = 4; + goto out; + } + + if (EPERM != errno) { + err = 5; + goto out; + } + + /* Signals the parent to read denial logs. */ + if (write(data->pipe_child, ".", 1) != 1) { + err = 6; + goto out; + } + + /* Waits for the parent to update audit filters. */ + if (read(data->pipe_parent, &buffer, 1) != 1) { + err = 7; + goto out; + } + +out: + close(data->pipe_child); + close(data->pipe_parent); + return (void *)err; +} + +/* Checks that the PID tied to a domain is not a TID but the TGID. */ +TEST_F(audit, thread) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + __u64 deallocated_dom = 3; + pthread_t thread; + int pipe_child[2], pipe_parent[2]; + char buffer; + struct thread_data child_data; + + child_data.parent_pid = getppid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + child_data.pipe_child = pipe_child[1]; + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + child_data.pipe_parent = pipe_parent[0]; + child_data.ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, child_data.ruleset_fd); + + /* TGID and TID are the same for the initial thread . */ + EXPECT_EQ(getpid(), gettid()); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test, + &child_data)); + + /* Waits for the child to generate a denial. */ + ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Matches the signal log to get the domain ID. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + child_data.parent_pid, &denial_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(), + &allocated_dom)); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + /* Signals the thread to exit, which will generate a domain deallocation. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; @@ -273,7 +392,8 @@ TEST_F(audit_flags, signal) /* Checks domain information records. */ EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(*self->domain_id, 1); EXPECT_NE(*self->domain_id, 0); EXPECT_EQ(*self->domain_id, allocated_dom); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index f819011a8798..73729382d40f 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled) EXPECT_EQ(EXDEV, errno); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d1)); - EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, + matches_log_domain_allocated(self->audit_fd, getpid(), NULL)); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d3)); diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 48ec048f47af..277f92f9d753 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,3 @@ -TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh +TEST_PROGS = set_pcie_cooling_state.sh +TEST_FILES = set_pcie_speed.sh include ../lib.mk diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index ec4624a283bc..f34ac0bac696 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -9,6 +9,7 @@ TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 759f06637146..842b40736a9b 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -536,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -658,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -1237,7 +1245,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1313,7 +1321,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1351,9 +1359,8 @@ int main(int argc, char *argv[]) ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; break; case 'g': - value = strtol(optarg, NULL, 10); - if (value) - ctx.flags |= UBLK_F_NEED_GET_DATA; + ctx.flags |= UBLK_F_NEED_GET_DATA; + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 29571eb296f1..44ee1e4ac55b 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -86,9 +86,6 @@ struct dev_ctx { unsigned int fg:1; unsigned int recovery:1; - /* fault_inject */ - long long delay_us; - int _evtfd; int _shmid; @@ -118,6 +115,7 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) unsigned short flags; unsigned short refs; /* used by target code only */ diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 9fc111f64576..a81210ca3e99 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -17,8 +17,8 @@ _get_disk_dev_t() { local minor dev=/dev/ublkb"${dev_id}" - major=$(stat -c '%Hr' "$dev") - minor=$(stat -c '%Lr' "$dev") + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 714630b4b329..3bb00a347402 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -3,7 +3,7 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh -TID="generic_04" +TID="generic_05" ERR_CODE=0 ublk_run_recover_test() diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 000000000000..cba86451fa5e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index a7071b10224d..88601b48f1cd 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -47,15 +47,15 @@ _create_backfile 0 256M _create_backfile 1 256M for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & wait done if _have_feature "ZERO_COPY"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & wait done fi |