diff options
299 files changed, 3494 insertions, 2093 deletions
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index 3e76276bd488..5becb441c3cb 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -562,7 +562,7 @@ The interesting knobs for XFS workqueues are as follows: Zoned Filesystems ================= -For zoned file systems, the following attribute is exposed in: +For zoned file systems, the following attributes are exposed in: /sys/fs/xfs/<dev>/zoned/ @@ -572,23 +572,10 @@ For zoned file systems, the following attribute is exposed in: is limited by the capabilities of the backing zoned device, file system size and the max_open_zones mount option. -Zoned Filesystems -================= - -For zoned file systems, the following attributes are exposed in: - - /sys/fs/xfs/<dev>/zoned/ - - max_open_zones (Min: 1 Default: Varies Max: UINTMAX) - This read-only attribute exposes the maximum number of open zones - available for data placement. The value is determined at mount time and - is limited by the capabilities of the backing zoned device, file system - size and the max_open_zones mount option. - - zonegc_low_space (Min: 0 Default: 0 Max: 100) - Define a percentage for how much of the unused space that GC should keep - available for writing. A high value will reclaim more of the space - occupied by unused blocks, creating a larger buffer against write - bursts at the cost of increased write amplification. Regardless - of this value, garbage collection will always aim to free a minimum - amount of blocks to keep max_open_zones open for data placement purposes. + zonegc_low_space (Min: 0 Default: 0 Max: 100) + Define a percentage for how much of the unused space that GC should keep + available for writing. A high value will reclaim more of the space + occupied by unused blocks, creating a larger buffer against write + bursts at the cost of increased write amplification. Regardless + of this value, garbage collection will always aim to free a minimum + amount of blocks to keep max_open_zones open for data placement purposes. diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst index 1565b9546e38..a8f307a3b499 100644 --- a/Documentation/arch/openrisc/openrisc_port.rst +++ b/Documentation/arch/openrisc/openrisc_port.rst @@ -7,10 +7,10 @@ target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k). For information about OpenRISC processors and ongoing development: - ======= ============================= + ======= ============================== website https://openrisc.io - email openrisc@lists.librecores.org - ======= ============================= + email linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -27,11 +27,11 @@ Toolchain binaries can be obtained from openrisc.io or our github releases page. Instructions for building the different toolchains can be found on openrisc.io or Stafford's toolchain build and release scripts. - ========== ================================================= - binaries https://github.com/openrisc/or1k-gcc/releases + ========== ========================================================== + binaries https://github.com/stffrdhrn/or1k-toolchain-build/releases toolchains https://openrisc.io/software building https://github.com/stffrdhrn/or1k-toolchain-build - ========== ================================================= + ========== ========================================================== 2) Building diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index de27e1620821..0acb4c9b8d90 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted into the Linux kernel, please implement support into LLVM's BPF back end. See LLVM_ section below for further information. +Q: What "BPF_INTERNAL" symbol namespace is for? +----------------------------------------------- +A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure +like preload kernel modules with light skeleton. Most symbols outside +of BPF_INTERNAL are not expected to be used by code outside of BPF either. +Symbols may lack the designation because they predate the namespaces, +or due to an oversight. + Stable submission ================= diff --git a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml index 8b3826243ddd..38e3ad50ff4f 100644 --- a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml +++ b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml @@ -27,7 +27,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array items: - minimum: 0 - maximum: 7 + maximum: 31 description: Offset in bit within the address range specified by reg. - minimum: 1 diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 39c209249c9c..3f6dc6a3a9f1 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -19,6 +19,7 @@ properties: - enum: - qcom,apq8064-qfprom - qcom,apq8084-qfprom + - qcom,ipq5018-qfprom - qcom,ipq5332-qfprom - qcom,ipq5424-qfprom - qcom,ipq6018-qfprom @@ -28,6 +29,8 @@ properties: - qcom,msm8226-qfprom - qcom,msm8916-qfprom - qcom,msm8917-qfprom + - qcom,msm8937-qfprom + - qcom,msm8960-qfprom - qcom,msm8974-qfprom - qcom,msm8976-qfprom - qcom,msm8996-qfprom @@ -51,6 +54,7 @@ properties: - qcom,sm8450-qfprom - qcom,sm8550-qfprom - qcom,sm8650-qfprom + - qcom,x1e80100-qfprom - const: qcom,qfprom reg: diff --git a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml index a44d44b32809..dc89020b0950 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml @@ -14,6 +14,7 @@ properties: enum: - rockchip,px30-otp - rockchip,rk3308-otp + - rockchip,rk3576-otp - rockchip,rk3588-otp reg: @@ -62,6 +63,8 @@ allOf: properties: clocks: maxItems: 3 + clock-names: + maxItems: 3 resets: maxItems: 1 reset-names: @@ -73,11 +76,33 @@ allOf: compatible: contains: enum: + - rockchip,rk3576-otp + then: + properties: + clocks: + maxItems: 3 + clock-names: + maxItems: 3 + resets: + minItems: 2 + maxItems: 2 + reset-names: + items: + - const: otp + - const: apb + + - if: + properties: + compatible: + contains: + enum: - rockchip,rk3588-otp then: properties: clocks: minItems: 4 + clock-names: + minItems: 4 resets: minItems: 3 reset-names: diff --git a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst index cadc580fa23b..d728e4db0b85 100644 --- a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 关于OpenRISC处理器和正在进行中的开发的信息: - ======= ============================= + ======= ============================== 网站 https://openrisc.io - 邮箱 openrisc@lists.librecores.org - ======= ============================= + 邮箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -36,11 +36,11 @@ OpenRISC工具链和Linux的构建指南 工具链的构建指南可以在openrisc.io或Stafford的工具链构建和发布脚本 中找到。 - ====== ================================================= - 二进制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二进制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具链 https://openrisc.io/software 构建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 构建 diff --git a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst index 422fe9f7a3f2..a1e4517dc601 100644 --- a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 關於OpenRISC處理器和正在進行中的開發的信息: - ======= ============================= + ======= ============================== 網站 https://openrisc.io - 郵箱 openrisc@lists.librecores.org - ======= ============================= + 郵箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -36,11 +36,11 @@ OpenRISC工具鏈和Linux的構建指南 工具鏈的構建指南可以在openrisc.io或Stafford的工具鏈構建和發佈腳本 中找到。 - ====== ================================================= - 二進制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二進制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具鏈 https://openrisc.io/software 構建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 構建 diff --git a/MAINTAINERS b/MAINTAINERS index fa1e04e87d1d..3cbf9ac0d83f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3191,6 +3191,12 @@ M: Dinh Nguyen <dinguyen@kernel.org> S: Maintained F: drivers/clk/socfpga/ +ARM/SOCFPGA DWMAC GLUE LAYER +M: Maxime Chevallier <maxime.chevallier@bootlin.com> +S: Maintained +F: Documentation/devicetree/bindings/net/socfpga-dwmac.txt +F: drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c + ARM/SOCFPGA EDAC BINDINGS M: Matthew Gerlach <matthew.gerlach@altera.com> S: Maintained @@ -3867,8 +3873,9 @@ AUXILIARY BUS DRIVER M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> R: Dave Ertman <david.m.ertman@intel.com> R: Ira Weiny <ira.weiny@intel.com> +R: Leon Romanovsky <leon@kernel.org> S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h @@ -7227,7 +7234,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> M: "Rafael J. Wysocki" <rafael@kernel.org> M: Danilo Krummrich <dakr@kernel.org> S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/core-api/kobject.rst F: drivers/base/ F: fs/debugfs/ @@ -10457,14 +10464,20 @@ S: Supported F: drivers/infiniband/hw/hfi1 HFS FILESYSTEM +M: Viacheslav Dubeyko <slava@dubeyko.com> +M: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> +M: Yangtao Li <frank.li@vivo.com> L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfs.rst F: fs/hfs/ HFSPLUS FILESYSTEM +M: Viacheslav Dubeyko <slava@dubeyko.com> +M: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> +M: Yangtao Li <frank.li@vivo.com> L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfsplus.rst F: fs/hfsplus/ @@ -13112,7 +13125,7 @@ KERNFS M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> M: Tejun Heo <tj@kernel.org> S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: fs/kernfs/ F: include/linux/kernfs.h @@ -16812,6 +16825,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: drivers/ptp/ +F: drivers/s390/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h @@ -16821,6 +16835,7 @@ F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h +F: include/linux/ism.h F: include/linux/netdev* F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h @@ -18689,7 +18704,7 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi <lpieralisi@kernel.org> M: Krzysztof Wilczyński <kw@linux.com> -R: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> +M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> R: Rob Herring <robh@kernel.org> L: linux-pci@vger.kernel.org S: Supported @@ -18742,6 +18757,16 @@ F: include/asm-generic/pci* F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* + +PCI SUBSYSTEM [RUST] +M: Danilo Krummrich <dakr@kernel.org> +R: Bjorn Helgaas <bhelgaas@google.com> +R: Krzysztof Wilczyński <kwilczynski@kernel.org> +L: linux-pci@vger.kernel.org +S: Maintained +C: irc://irc.oftc.net/linux-pci +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git +F: rust/helpers/pci.c F: rust/kernel/pci.rs F: samples/rust/rust_driver_pci.rs @@ -21312,6 +21337,7 @@ L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported F: drivers/s390/net/ +F: include/linux/ism.h S390 PCI SUBSYSTEM M: Niklas Schnelle <schnelle@linux.ibm.com> @@ -25184,9 +25210,13 @@ S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS +M: Badhri Jagan Sridharan <badhri@google.com> L: linux-usb@vger.kernel.org -S: Orphan -F: drivers/usb/typec/tcpm/ +S: Maintained +F: drivers/usb/typec/tcpm/tcpci.c +F: drivers/usb/typec/tcpm/tcpm.c +F: include/linux/usb/tcpci.h +F: include/linux/usb/tcpm.h USB TYPEC TUSB1046 MUX DRIVER M: Romain Gantois <romain.gantois@bootlin.com> @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1053,11 +1053,11 @@ NOSTDINC_FLAGS += -nostdinc KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) #Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) -#Currently, disable -Wunterminated-string-initialization as an error -KBUILD_CFLAGS += $(call cc-option, -Wno-error=unterminated-string-initialization) +#Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a6..08ba91e6fb03 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 30a29e88994b..6e8aa8e72601 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void) return false; } - /* - * Systems affected by Cavium erratum 24756 are incompatible - * with KPTI. - */ - if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - if (is_midr_in_range_list(cavium_erratum_27456_cpus)) - return false; - } - return true; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b55f5f705750..6b0ad5070d3e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -const struct midr_range cavium_erratum_27456_cpus[] = { +static const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5e3c4b58f279..2004b4f41ade 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); -PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list); -#endif PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e57b043f324b..c6650cfe706c 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt) dsb(ishst); } +/* + * PI version of the Cavium Eratum 27456 detection, which makes it + * impossible to use non-global mappings. + */ +static bool __init ng_mappings_allowed(void) +{ + static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + /* Cavium ThunderX, T81 pass 1.0 */ + MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), + {}, + }; + + for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { + if (midr_is_cpu_model_range(read_cpuid_id(), r->model, + r->rv_min, r->rv_max)) + return false; + } + + return true; +} + asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) { static char const chosen_str[] __initconst = "/chosen"; @@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) u64 kaslr_seed = kaslr_early_init(fdt, chosen); if (kaslr_seed && kaslr_requires_kpti()) - arm64_use_ng_mappings = true; + arm64_use_ng_mappings = ng_mappings_allowed(); kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95fee..19ca57def629 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 067c0b994648..1a2cf012b8f2 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -73,6 +73,7 @@ config LOONGARCH select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_BPF_JIT diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 3177674228f8..45514f314664 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -22,22 +22,29 @@ struct sigcontext; #define kernel_fpu_available() cpu_has_fpu -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); - -extern void _init_fpu(unsigned int); -extern void _save_fp(struct loongarch_fpu *); -extern void _restore_fp(struct loongarch_fpu *); - -extern void _save_lsx(struct loongarch_fpu *fpu); -extern void _restore_lsx(struct loongarch_fpu *fpu); -extern void _init_lsx_upper(void); -extern void _restore_lsx_upper(struct loongarch_fpu *fpu); - -extern void _save_lasx(struct loongarch_fpu *fpu); -extern void _restore_lasx(struct loongarch_fpu *fpu); -extern void _init_lasx_upper(void); -extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +asmlinkage void _init_fpu(unsigned int); +asmlinkage void _save_fp(struct loongarch_fpu *); +asmlinkage void _restore_fp(struct loongarch_fpu *); +asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +asmlinkage void _save_lsx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lsx(struct loongarch_fpu *fpu); +asmlinkage void _init_lsx_upper(void); +asmlinkage void _restore_lsx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); + +asmlinkage void _save_lasx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lasx(struct loongarch_fpu *fpu); +asmlinkage void _init_lasx_upper(void); +asmlinkage void _restore_lasx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); static inline void enable_lsx(void); static inline void disable_lsx(void); diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h index e671978bf552..38566574e562 100644 --- a/arch/loongarch/include/asm/lbt.h +++ b/arch/loongarch/include/asm/lbt.h @@ -12,9 +12,13 @@ #include <asm/loongarch.h> #include <asm/processor.h> -extern void _init_lbt(void); -extern void _save_lbt(struct loongarch_lbt *); -extern void _restore_lbt(struct loongarch_lbt *); +asmlinkage void _init_lbt(void); +asmlinkage void _save_lbt(struct loongarch_lbt *); +asmlinkage void _restore_lbt(struct loongarch_lbt *); +asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _save_ftop_context(void __user *ftop); +asmlinkage int _restore_ftop_context(void __user *ftop); static inline int is_lbt_enabled(void) { diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index f3ddaed9ef7f..a5b63c84f854 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -33,9 +33,9 @@ struct pt_regs { unsigned long __last[]; } __aligned(8); -static inline int regs_irqs_disabled(struct pt_regs *regs) +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { - return arch_irqs_disabled_flags(regs->csr_prmd); + return !(regs->csr_prmd & CSR_PRMD_PIE); } static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4853e8b04c6f..f9dcaa60033d 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -21,10 +21,10 @@ obj-$(CONFIG_CPU_HAS_LBT) += lbt.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o -CFLAGS_module.o += $(call cc-option,-Wno-override-init,) -CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) -CFLAGS_traps.o += $(call cc-option,-Wno-override-init,) -CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) +CFLAGS_module.o += $(call cc-disable-warning, override-init) +CFLAGS_syscall.o += $(call cc-disable-warning, override-init) +CFLAGS_traps.o += $(call cc-disable-warning, override-init) +CFLAGS_perf_event.o += $(call cc-disable-warning, override-init) ifdef CONFIG_FUNCTION_TRACER ifndef CONFIG_DYNAMIC_FTRACE diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 6ab640101457..28caf416ae36 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -458,6 +458,7 @@ SYM_FUNC_START(_save_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_fp_context) +EXPORT_SYMBOL_GPL(_save_fp_context) /* * a0: fpregs @@ -471,6 +472,7 @@ SYM_FUNC_START(_restore_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_fp_context) +EXPORT_SYMBOL_GPL(_restore_fp_context) /* * a0: fpregs @@ -484,6 +486,7 @@ SYM_FUNC_START(_save_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lsx_context) +EXPORT_SYMBOL_GPL(_save_lsx_context) /* * a0: fpregs @@ -497,6 +500,7 @@ SYM_FUNC_START(_restore_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lsx_context) +EXPORT_SYMBOL_GPL(_restore_lsx_context) /* * a0: fpregs @@ -510,6 +514,7 @@ SYM_FUNC_START(_save_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lasx_context) +EXPORT_SYMBOL_GPL(_save_lasx_context) /* * a0: fpregs @@ -523,6 +528,7 @@ SYM_FUNC_START(_restore_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lasx_context) +EXPORT_SYMBOL_GPL(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 001f061d226a..71678912d24c 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -90,6 +90,7 @@ SYM_FUNC_START(_save_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lbt_context) +EXPORT_SYMBOL_GPL(_save_lbt_context) /* * a0: scr @@ -110,6 +111,7 @@ SYM_FUNC_START(_restore_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lbt_context) +EXPORT_SYMBOL_GPL(_restore_lbt_context) /* * a0: ftop @@ -120,6 +122,7 @@ SYM_FUNC_START(_save_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_ftop_context) +EXPORT_SYMBOL_GPL(_save_ftop_context) /* * a0: ftop @@ -150,6 +153,7 @@ SYM_FUNC_START(_restore_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_ftop_context) +EXPORT_SYMBOL_GPL(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 7a555b600171..4740cb5b2388 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -51,27 +51,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -/* Assembly functions to move context to/from the FPU */ -extern asmlinkage int -_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); - -#ifdef CONFIG_CPU_HAS_LBT -extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _save_ftop_context(void __user *ftop); -extern asmlinkage int _restore_ftop_context(void __user *ftop); -#endif - struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 2ec3106c0da3..47fc2de6d150 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -553,9 +553,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); #else + bool pie = regs_irqs_disabled(regs); unsigned int *pc; - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); @@ -582,7 +583,7 @@ sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); #endif irqentry_exit(regs, state); @@ -621,12 +622,13 @@ static void bug_handler(struct pt_regs *regs) asmlinkage void noinstr do_bce(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned long era = exception_era(regs); u64 badv = 0, lower = 0, upper = ULONG_MAX; union loongarch_instruction insn; irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); current->thread.trap_nr = read_csr_excode(); @@ -692,7 +694,7 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -710,11 +712,12 @@ bad_era: asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned int opcode, bcode; unsigned long era = exception_era(regs); irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (__get_inst(&opcode, (u32 *)era, user)) @@ -780,7 +783,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) } out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -1015,6 +1018,7 @@ static void init_restore_lbt(void) asmlinkage void noinstr do_lbt(struct pt_regs *regs) { + bool pie = regs_irqs_disabled(regs); irqentry_state_t state = irqentry_enter(regs); /* @@ -1024,7 +1028,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) * (including the user using 'MOVGR2GCSR' to turn on TM, which * will not trigger the BTE), we need to check PRMD first. */ - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (!cpu_has_lbt) { @@ -1038,7 +1042,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) preempt_enable(); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index f4c8e35c216a..cb41d9265662 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -21,4 +21,4 @@ kvm-y += intc/eiointc.o kvm-y += intc/pch_pic.o kvm-y += irqfd.o -CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) +CFLAGS_exit.o += $(call cc-disable-warning, override-init) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 93f4acd44523..fe734dc062ed 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -111,7 +111,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { - kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); return ret; } /* Construct the mask by scanning the bit 27-30 */ @@ -127,7 +127,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) - kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); return ret; } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index d165cd38c6bb..80ea63d465b8 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -296,10 +296,10 @@ int kvm_arch_enable_virtualization_cpu(void) /* * Enable virtualization features granting guest direct control of * certain features: - * GCI=2: Trap on init or unimplement cache instruction. + * GCI=2: Trap on init or unimplemented cache instruction. * TORU=0: Trap on Root Unimplement. * CACTRL=1: Root control cache. - * TOP=0: Trap on Previlege. + * TOP=0: Trap on Privilege. * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8e427b379661..5af32ec62cb1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -294,6 +294,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { + kvm_lose_pmu(vcpu); /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -902,6 +903,13 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); + + /* + * When vCPU reset, clear the ESTAT and GINTC registers + * Other CSR registers are cleared with function _kvm_setcsr(). + */ + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_GINTC, 0); + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_ESTAT, 0); break; default: ret = -EINVAL; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index e4068906143b..cea84d7f2b91 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -47,7 +47,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd = pmd_offset(pud, addr); } } - return (pte_t *) pmd; + return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; } uint64_t pmd_to_entrylo(unsigned long pmd_val) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index fdb7f73ad160..06f11d9e4ec1 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -65,9 +65,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; -#ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; -#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h index 984c331ff5f4..0e60af486ec1 100644 --- a/arch/openrisc/include/asm/cacheflush.h +++ b/arch/openrisc/include/asm/cacheflush.h @@ -23,6 +23,9 @@ */ extern void local_dcache_page_flush(struct page *page); extern void local_icache_page_inv(struct page *page); +extern void local_dcache_range_flush(unsigned long start, unsigned long end); +extern void local_dcache_range_inv(unsigned long start, unsigned long end); +extern void local_icache_range_inv(unsigned long start, unsigned long end); /* * Data cache flushing always happen on the local cpu. Instruction cache @@ -39,6 +42,20 @@ extern void smp_icache_page_inv(struct page *page); #endif /* CONFIG_SMP */ /* + * Even if the actual block size is larger than L1_CACHE_BYTES, paddr + * can be incremented by L1_CACHE_BYTES. When paddr is written to the + * invalidate register, the entire cache line encompassing this address + * is invalidated. Each subsequent reference to the same cache line will + * not affect the invalidation process. + */ +#define local_dcache_block_flush(addr) \ + local_dcache_range_flush(addr, addr + L1_CACHE_BYTES) +#define local_dcache_block_inv(addr) \ + local_dcache_range_inv(addr, addr + L1_CACHE_BYTES) +#define local_icache_block_inv(addr) \ + local_icache_range_inv(addr, addr + L1_CACHE_BYTES) + +/* * Synchronizes caches. Whenever a cpu writes executable code to memory, this * should be called to make sure the processor sees the newly written code. */ diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 5e4744153d0e..3cfc4cf0b019 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -15,16 +15,21 @@ #ifndef __ASM_OPENRISC_CPUINFO_H #define __ASM_OPENRISC_CPUINFO_H +#include <asm/spr.h> +#include <asm/spr_defs.h> + +struct cache_desc { + u32 size; + u32 sets; + u32 block_size; + u32 ways; +}; + struct cpuinfo_or1k { u32 clock_frequency; - u32 icache_size; - u32 icache_block_size; - u32 icache_ways; - - u32 dcache_size; - u32 dcache_block_size; - u32 dcache_ways; + struct cache_desc icache; + struct cache_desc dcache; u16 coreid; }; @@ -32,4 +37,9 @@ struct cpuinfo_or1k { extern struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS]; extern void setup_cpuinfo(void); +/* + * Check if the cache component exists. + */ +extern bool cpu_cache_is_present(const unsigned int cache_type); + #endif /* __ASM_OPENRISC_CPUINFO_H */ diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index 79129161f3e0..e4c7d9bdd598 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := vmlinux.lds obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ - sys_call_table.o unwinder.o + sys_call_table.o unwinder.o cacheinfo.o obj-$(CONFIG_SMP) += smp.o sync-timer.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c new file mode 100644 index 000000000000..61230545e4ff --- /dev/null +++ b/arch/openrisc/kernel/cacheinfo.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OpenRISC cacheinfo support + * + * Based on work done for MIPS and LoongArch. All original copyrights + * apply as per the original source declaration. + * + * OpenRISC implementation: + * Copyright (C) 2025 Sahil Siddiq <sahilcdq@proton.me> + */ + +#include <linux/cacheinfo.h> +#include <asm/cpuinfo.h> +#include <asm/spr.h> +#include <asm/spr_defs.h> + +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, struct cache_desc *cache, int cpu) +{ + this_leaf->type = type; + this_leaf->level = level; + this_leaf->coherency_line_size = cache->block_size; + this_leaf->number_of_sets = cache->sets; + this_leaf->ways_of_associativity = cache->ways; + this_leaf->size = cache->size; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); +} + +int init_cache_level(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + int leaves = 0, levels = 0; + unsigned long upr = mfspr(SPR_UPR); + unsigned long iccfgr, dccfgr; + + if (!(upr & SPR_UPR_UP)) { + printk(KERN_INFO + "-- no UPR register... unable to detect configuration\n"); + return -ENOENT; + } + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + dccfgr = mfspr(SPR_DCCFGR); + cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); + cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); + cpuinfo->dcache.size = + cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size; + leaves += 1; + printk(KERN_INFO + "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->dcache.size, cpuinfo->dcache.block_size, + cpuinfo->dcache.sets, cpuinfo->dcache.ways); + } else + printk(KERN_INFO "-- dcache disabled\n"); + + if (cpu_cache_is_present(SPR_UPR_ICP)) { + iccfgr = mfspr(SPR_ICCFGR); + cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); + cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); + cpuinfo->icache.size = + cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size; + leaves += 1; + printk(KERN_INFO + "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->icache.size, cpuinfo->icache.block_size, + cpuinfo->icache.sets, cpuinfo->icache.ways); + } else + printk(KERN_INFO "-- icache disabled\n"); + + if (!leaves) + return -ENOENT; + + levels = 1; + + this_cpu_ci->num_leaves = leaves; + this_cpu_ci->num_levels = levels; + + return 0; +} + +int populate_cache_leaves(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + int level = 1; + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu); + this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ? + CACHE_WRITE_BACK : CACHE_WRITE_THROUGH; + this_leaf++; + } + + if (cpu_cache_is_present(SPR_UPR_ICP)) + ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu); + + this_cpu_ci->cpu_map_populated = true; + + return 0; +} diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index b3edbb33b621..3a7b5baaa450 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -17,6 +17,7 @@ #include <linux/pagewalk.h> #include <asm/cpuinfo.h> +#include <asm/cacheflush.h> #include <asm/spr_defs.h> #include <asm/tlbflush.h> @@ -24,9 +25,6 @@ static int page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - pte_val(*pte) |= _PAGE_CI; /* @@ -36,8 +34,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, flush_tlb_kernel_range(addr, addr + PAGE_SIZE); /* Flush page out of dcache */ - for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(__pa(addr), __pa(next)); return 0; } @@ -98,21 +95,14 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) void arch_sync_dma_for_device(phys_addr_t addr, size_t size, enum dma_data_direction dir) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - switch (dir) { case DMA_TO_DEVICE: /* Flush the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(addr, addr + size); break; case DMA_FROM_DEVICE: /* Invalidate the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBIR, cl); + local_dcache_range_inv(addr, addr + size); break; default: /* diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index be56eaafc8b9..a9fb9cc6779e 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -113,21 +113,6 @@ static void print_cpuinfo(void) return; } - if (upr & SPR_UPR_DCP) - printk(KERN_INFO - "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->dcache_size, cpuinfo->dcache_block_size, - cpuinfo->dcache_ways); - else - printk(KERN_INFO "-- dcache disabled\n"); - if (upr & SPR_UPR_ICP) - printk(KERN_INFO - "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->icache_size, cpuinfo->icache_block_size, - cpuinfo->icache_ways); - else - printk(KERN_INFO "-- icache disabled\n"); - if (upr & SPR_UPR_DMP) printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), @@ -155,8 +140,6 @@ static void print_cpuinfo(void) void __init setup_cpuinfo(void) { struct device_node *cpu; - unsigned long iccfgr, dccfgr; - unsigned long cache_set_size; int cpu_id = smp_processor_id(); struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id]; @@ -164,20 +147,6 @@ void __init setup_cpuinfo(void) if (!cpu) panic("Couldn't find CPU%d in device tree...\n", cpu_id); - iccfgr = mfspr(SPR_ICCFGR); - cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); - cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); - cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); - cpuinfo->icache_size = - cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size; - - dccfgr = mfspr(SPR_DCCFGR); - cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); - cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); - cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); - cpuinfo->dcache_size = - cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size; - if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo->clock_frequency)) { printk(KERN_WARNING @@ -294,14 +263,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int vr, cpucfgr; unsigned int avr; unsigned int version; +#ifdef CONFIG_SMP struct cpuinfo_or1k *cpuinfo = v; + seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); +#endif vr = mfspr(SPR_VR); cpucfgr = mfspr(SPR_CPUCFGR); -#ifdef CONFIG_SMP - seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); -#endif if (vr & SPR_VR_UVRP) { vr = mfspr(SPR_VR2); version = vr & SPR_VR2_VER; @@ -320,14 +289,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV); } seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ); - seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size); - seq_printf(m, "dcache block size\t: %d bytes\n", - cpuinfo->dcache_block_size); - seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways); - seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size); - seq_printf(m, "icache block size\t: %d bytes\n", - cpuinfo->icache_block_size); - seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways); seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW)); diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c index eb43b73f3855..0f265b8e73ec 100644 --- a/arch/openrisc/mm/cache.c +++ b/arch/openrisc/mm/cache.c @@ -14,31 +14,70 @@ #include <asm/spr_defs.h> #include <asm/cache.h> #include <asm/cacheflush.h> +#include <asm/cpuinfo.h> #include <asm/tlbflush.h> -static __always_inline void cache_loop(struct page *page, const unsigned int reg) +/* + * Check if the cache component exists. + */ +bool cpu_cache_is_present(const unsigned int cache_type) { - unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; - unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); + unsigned long upr = mfspr(SPR_UPR); + unsigned long mask = SPR_UPR_UP | cache_type; + + return !((upr & mask) ^ mask); +} + +static __always_inline void cache_loop(unsigned long paddr, unsigned long end, + const unsigned short reg, const unsigned int cache_type) +{ + if (!cpu_cache_is_present(cache_type)) + return; - while (line < paddr + PAGE_SIZE) { - mtspr(reg, line); - line += L1_CACHE_BYTES; + while (paddr < end) { + mtspr(reg, paddr); + paddr += L1_CACHE_BYTES; } } +static __always_inline void cache_loop_page(struct page *page, const unsigned short reg, + const unsigned int cache_type) +{ + unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; + unsigned long end = paddr + PAGE_SIZE; + + paddr &= ~(L1_CACHE_BYTES - 1); + + cache_loop(paddr, end, reg, cache_type); +} + void local_dcache_page_flush(struct page *page) { - cache_loop(page, SPR_DCBFR); + cache_loop_page(page, SPR_DCBFR, SPR_UPR_DCP); } EXPORT_SYMBOL(local_dcache_page_flush); void local_icache_page_inv(struct page *page) { - cache_loop(page, SPR_ICBIR); + cache_loop_page(page, SPR_ICBIR, SPR_UPR_ICP); } EXPORT_SYMBOL(local_icache_page_inv); +void local_dcache_range_flush(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBFR, SPR_UPR_DCP); +} + +void local_dcache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBIR, SPR_UPR_DCP); +} + +void local_icache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_ICBIR, SPR_UPR_ICP); +} + void update_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { @@ -58,4 +97,3 @@ void update_cache(struct vm_area_struct *vma, unsigned long address, sync_icache_dcache(folio_page(folio, nr)); } } - diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index be1c2eb8bb94..e4904ca6f0a0 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -35,6 +35,7 @@ #include <asm/fixmap.h> #include <asm/tlbflush.h> #include <asm/sections.h> +#include <asm/cacheflush.h> int mem_init_done; @@ -176,8 +177,8 @@ void __init paging_init(void) barrier(); /* Invalidate instruction caches after code modification */ - mtspr(SPR_ICBIR, 0x900); - mtspr(SPR_ICBIR, 0xa00); + local_icache_block_inv(0x900); + local_icache_block_inv(0xa00); /* New TLB miss handlers and kernel page tables are in now place. * Make sure that page flags get updated for all pages in TLB by diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1db60fe13802..3d8dc822282a 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 34a5aec4908f..126bf3b06ab7 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -258,10 +258,6 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, break; } } - if (i == hdr->e_shnum) { - pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); - return -ENOEXEC; - } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 311e2112d782..9f764bc42b8c 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -976,7 +976,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, return 0; } - +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { if (radix_enabled()) @@ -984,6 +984,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) return false; } +#endif int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) @@ -1120,6 +1121,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1145,8 +1159,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 3fbe0295ce14..95d7ba73d43d 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,7 +17,7 @@ config PPC_POWERNV select MMU_NOTIFIER select FORCE_SMP select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config OPAL_PRD diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index a934c2a262f6..fa3c2fff082a 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,7 +23,7 @@ config PPC_PSERIES select FORCE_SMP select SWIOTLB select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config PARAVIRT diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8de73f91bfa3..b59ffeb668d6 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page) flush_dcache_folio(page_folio(page)); } -/* - * RISC-V doesn't have an instruction to flush parts of the instruction cache, - * so instead we just flush the whole thing. - */ -#define flush_icache_range(start, end) flush_icache_all() #define flush_icache_user_page(vma, pg, addr, len) \ do { \ if (vma->vm_flags & VM_EXEC) \ @@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range flush_icache_range +static inline void flush_icache_range(unsigned long start, unsigned long end) +{ + flush_icache_all(); +} + extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; void riscv_init_cbo_blocksizes(void); diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 8d186bfced45..f7480c9c6f8d 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,8 +9,8 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif -CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) -CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_table.o += $(call cc-disable-warning, override-init) +CFLAGS_compat_syscall_table.o += $(call cc-disable-warning, override-init) ifdef CONFIG_KEXEC_CORE AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77..cc15f7ca6cc1 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 81f55da81967..640fcac3af74 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -59,7 +59,7 @@ KBUILD_CFLAGS += $(CONFIG_CC_IMPLICIT_FALLTHROUGH) $(obj)/bzImage: asflags-y := $(SVGA_MODE) quiet_cmd_image = BUILD $@ - cmd_image = cp $< $@; truncate -s %4K $@; cat $(obj)/vmlinux.bin >>$@ + cmd_image = (dd if=$< bs=4k conv=sync status=none; cat $(filter-out $<,$(real-prereqs))) >$@ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin FORCE $(call if_changed,image) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6866cc5acb0b..3a4f031d2f44 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -629,7 +629,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (!event->attr.freq && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3bdae454a959..7bc174a1f1cb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include <asm/mtrr.h> #include <asm/msr-index.h> #include <asm/asm.h> +#include <asm/irq_remapping.h> #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> #include <asm/reboot.h> @@ -2423,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a33147520044..c88691b15f3c 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,8 @@ #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> +#include <asm/cpufeature.h> + #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include <asm-generic/pgalloc.h> @@ -29,16 +31,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif -#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. + * In case of Page Table Isolation active, we acquire two PGDs instead of one. + * Being order-1, it is both 8k in size and 8k-aligned. That lets us just + * flip bit 12 in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +static inline unsigned int pgd_allocation_order(void) +{ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + return 1; + return 0; +} /* * Allocate and free page tables. diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9d8dd8deb2a7..9920122018a0 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1299,6 +1299,14 @@ void __init e820__memblock_setup(void) memblock_add(entry->addr, entry->size); } + /* + * 32-bit systems are limited to 4BG of memory even with HIGHMEM and + * to even less without it. + * Discard memory after max_pfn - the actual limit detected at runtime. + */ + if (IS_ENABLED(CONFIG_X86_32)) + memblock_remove(PFN_PHYS(max_pfn), -1); + /* Throw away partial pages: */ memblock_trim_memory(PAGE_SIZE); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 80265162aeff..1f325304c4a8 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -42,7 +42,7 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { - free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)image->arch.pgd, pgd_allocation_order()); image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); @@ -59,7 +59,7 @@ static void machine_kexec_free_page_tables(struct kimage *image) static int machine_kexec_alloc_page_tables(struct kimage *image) { image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PGD_ALLOCATION_ORDER); + pgd_allocation_order()); #ifdef CONFIG_X86_PAE image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 65fd245a9953..7338879d1c0c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -796,12 +796,15 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) struct amd_svm_iommu_ir *ir; u64 entry; + if (WARN_ON_ONCE(!pi->ir_data)) + return -EINVAL; + /** * In some cases, the existing irte is updated and re-set, * so we need to check here if it's already been * added * to the ir_list. */ - if (pi->ir_data && (pi->prev_ga_tag != 0)) { + if (pi->prev_ga_tag) { struct kvm *kvm = svm->vcpu.kvm; u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); @@ -820,7 +823,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -896,10 +899,10 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) return 0; pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", @@ -933,6 +936,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -951,33 +956,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -993,6 +971,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ccda95e53f62..ba736cbb0587 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -11,6 +11,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#ifdef CREATE_TRACE_POINTS +#define tracing_kvm_rip_read(vcpu) ({ \ + typeof(vcpu) __vcpu = vcpu; \ + __vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \ + }) +#endif + /* * Tracepoint for guest mode entry. */ @@ -28,7 +35,7 @@ TRACE_EVENT(kvm_entry, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->rip = kvm_rip_read(vcpu); + __entry->rip = tracing_kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, @@ -319,7 +326,7 @@ TRACE_EVENT(name, \ ), \ \ TP_fast_assign( \ - __entry->guest_rip = kvm_rip_read(vcpu); \ + __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ @@ -423,7 +430,7 @@ TRACE_EVENT(kvm_page_fault, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->guest_rip = kvm_rip_read(vcpu); + __entry->guest_rip = tracing_kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 51116fe69a50..d70e5b90087d 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -297,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -335,21 +336,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; @@ -357,11 +345,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + enable_remapped_mode = false; + + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -369,6 +358,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3712dde0bf9d..df5b99ea1f18 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11098,7 +11098,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * Profile KVM exit RIPs: */ - if (unlikely(prof_on == KVM_PROFILING)) { + if (unlikely(prof_on == KVM_PROFILING && + !vcpu->arch.guest_state_protected)) { unsigned long rip = kvm_rip_read(vcpu); profile_hit(KVM_PROFILING, (void *)rip); } @@ -13556,25 +13557,27 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); -bool kvm_arch_has_irq_bypass(void) -{ - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; int ret; - irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); + + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = prod; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) kvm_arch_end_assignment(irqfd->kvm); + spin_unlock_irq(&kvm->irqfds.lock); + + return ret; } @@ -13584,9 +13587,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; WARN_ON(irqfd->producer != prod); - irqfd->producer = NULL; /* * When producer of consumer is unregistered, we change back to @@ -13594,12 +13597,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = NULL; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + spin_unlock_irq(&kvm->irqfds.lock); + + kvm_arch_end_assignment(irqfd->kvm); } @@ -13612,7 +13621,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return !!memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..f5dd84eb55dc 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a05fcddfc811..f7ae44d3dd9e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -360,7 +360,7 @@ static inline pgd_t *_pgd_alloc(struct mm_struct *mm) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); /* * Now PAE kernel is not running as a Xen domain. We can allocate @@ -380,7 +380,7 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); } static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac57259a432b..a4b4ebd41b8f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -73,7 +73,7 @@ int __init efi_alloc_page_tables(void) gfp_t gfp_mask; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); if (!efi_pgd) goto fail; @@ -96,7 +96,7 @@ free_p4d: if (pgtable_l5_enabled()) free_page((unsigned long)pgd_page_vaddr(*pgd)); free_pgd: - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); fail: return -ENOMEM; } diff --git a/block/bdev.c b/block/bdev.c index 6a34179192c9..889ec6e002d7 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev) get_order(bsize)); } -int set_blocksize(struct file *file, int size) +/** + * bdev_validate_blocksize - check that this block size is acceptable + * @bdev: blockdevice to check + * @block_size: block size to check + * + * For block device users that do not use buffer heads or the block device + * page cache, make sure that this block size can be used with the device. + * + * Return: On success zero is returned, negative error code on failure. + */ +int bdev_validate_blocksize(struct block_device *bdev, int block_size) { - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = I_BDEV(inode); - - if (blk_validate_block_size(size)) + if (blk_validate_block_size(block_size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_logical_block_size(bdev)) + if (block_size < bdev_logical_block_size(bdev)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(bdev_validate_blocksize); + +int set_blocksize(struct file *file, int size) +{ + struct inode *inode = file->f_mapping->host; + struct block_device *bdev = I_BDEV(inode); + int ret; + + ret = bdev_validate_blocksize(bdev, size); + if (ret) + return ret; + if (!file->private_data) return -EINVAL; /* Don't change the size if it is same as current */ if (inode->i_blkbits != blksize_bits(size)) { + /* + * Flush and truncate the pagecache before we reconfigure the + * mapping geometry because folio sizes are variable now. If a + * reader has already allocated a folio whose size is smaller + * than the new min_order but invokes readahead after the new + * min_order becomes visible, readahead will think there are + * "zero" blocks per folio and crash. Take the inode and + * invalidation locks to avoid racing with + * read/write/fallocate. + */ + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + sync_blockdev(bdev); + kill_bdev(bdev); + inode->i_blkbits = blksize_bits(size); mapping_set_folio_min_order(inode->i_mapping, get_order(size)); kill_bdev(bdev); + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); } return 0; } @@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part) blkdev_put_whole(whole); } -struct block_device *blkdev_get_no_open(dev_t dev) +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload) { struct block_device *bdev; struct inode *inode; inode = ilookup(blockdev_superblock, dev); - if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { + if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { blk_request_module(dev); inode = ilookup(blockdev_superblock, dev); if (inode) @@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (ret) return ERR_PTR(ret); - bdev = blkdev_get_no_open(dev); + bdev = blkdev_get_no_open(dev, true); if (!bdev) return ERR_PTR(-ENXIO); @@ -1274,18 +1312,15 @@ void sync_bdevs(bool wait) */ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask) { - struct inode *backing_inode; struct block_device *bdev; - backing_inode = d_backing_inode(path->dentry); - /* - * Note that backing_inode is the inode of a block device node file, - * not the block device's internal inode. Therefore it is *not* valid - * to use I_BDEV() here; the block device has to be looked up by i_rdev + * Note that d_backing_inode() returns the block device node inode, not + * the block device's internal inode. Therefore it is *not* valid to + * use I_BDEV() here; the block device has to be looked up by i_rdev * instead. */ - bdev = blkdev_get_no_open(backing_inode->i_rdev); + bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false); if (!bdev) return; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5905f277057b..ce93706555c5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) return -EINVAL; input = skip_spaces(input); - bdev = blkdev_get_no_open(MKDEV(major, minor)); + bdev = blkdev_get_no_open(MKDEV(major, minor), false); if (!bdev) return -ENODEV; if (bdev_is_partition(bdev)) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 6b2dbe645d23..4817e7ca03f8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi, /* * For read-ahead of large files to be effective, we need to read ahead * at least twice the optimal I/O size. + * + * There is no hardware limitation for the read-ahead size and the user + * might have increased the read-ahead size through sysfs, so don't ever + * decrease it. */ - bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + bdi->ra_pages = max3(bdi->ra_pages, + lim->io_opt * 2 / PAGE_SIZE, + VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0c77244a35c9..8f15d1aa6eb8 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) @@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: - if (cmd == BLKRESETZONE) + if (cmd == BLKRESETZONE) { filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + } return ret; } diff --git a/block/blk.h b/block/blk.h index 006e3be433d2..328075787814 100644 --- a/block/blk.h +++ b/block/blk.h @@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done) wait_for_completion_io(done); } +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); +void blkdev_put_no_open(struct block_device *bdev); + #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); diff --git a/block/fops.c b/block/fops.c index be9f1dbea9ce..82b672d15ea4 100644 --- a/block/fops.c +++ b/block/fops.c @@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (ret) return ret; - bdev = blkdev_get_no_open(inode->i_rdev); + bdev = blkdev_get_no_open(inode->i_rdev, true); if (!bdev) return -ENXIO; @@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = direct_write_fallback(iocb, from, ret, blkdev_buffered_write(iocb, from)); } else { + /* + * Take i_rwsem and invalidate_lock to avoid racing with + * set_blocksize changing i_blkbits/folio order and punching + * out the pagecache. + */ + inode_lock_shared(bd_inode); ret = blkdev_buffered_write(iocb, from); + inode_unlock_shared(bd_inode); } if (ret > 0) @@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct inode *bd_inode = bdev_file_inode(iocb->ki_filp); struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); loff_t size = bdev_nr_bytes(bdev); loff_t pos = iocb->ki_pos; @@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) goto reexpand; } + /* + * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize + * changing i_blkbits/folio order and punching out the pagecache. + */ + inode_lock_shared(bd_inode); ret = filemap_read(iocb, to, ret); + inode_unlock_shared(bd_inode); reexpand: if (unlikely(shorted)) @@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); /* @@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, fail: filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); return error; } diff --git a/block/ioctl.c b/block/ioctl.c index faa40f383e27..e472cc1030c6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (err) return err; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) @@ -174,6 +175,7 @@ out_unplug: blk_finish_plug(&plug); fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, end > bdev_nr_bytes(bdev)) return -EINVAL; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) @@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } diff --git a/crypto/scompress.c b/crypto/scompress.c index 5762fcc63b51..36934c78d127 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -215,8 +215,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) spage = nth_page(spage, soff / PAGE_SIZE); soff = offset_in_page(soff); - n = slen / PAGE_SIZE; - n += (offset_in_page(slen) + soff - 1) / PAGE_SIZE; + n = (slen - 1) / PAGE_SIZE; + n += (offset_in_page(slen - 1) + soff) / PAGE_SIZE; if (PageHighMem(nth_page(spage, n)) && size_add(soff, slen) > PAGE_SIZE) break; @@ -243,9 +243,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) dpage = nth_page(dpage, doff / PAGE_SIZE); doff = offset_in_page(doff); - n = dlen / PAGE_SIZE; - n += (offset_in_page(dlen) + doff - 1) / PAGE_SIZE; - if (PageHighMem(dpage + n) && + n = (dlen - 1) / PAGE_SIZE; + n += (offset_in_page(dlen - 1) + doff) / PAGE_SIZE; + if (PageHighMem(nth_page(dpage, n)) && size_add(doff, dlen) > PAGE_SIZE) break; dst = kmap_local_page(dpage) + doff; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index abd609d4c8ef..82977ea25db3 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -58,9 +58,6 @@ module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); #endif -/* Multibuffer is unlimited. Set arbitrary limit for testing. */ -#define MAX_MB_MSGS 16 - #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS /* a perfect nop */ @@ -3329,48 +3326,27 @@ static int test_acomp(struct crypto_acomp *tfm, int ctcount, int dtcount) { const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)); - struct scatterlist *src = NULL, *dst = NULL; - struct acomp_req *reqs[MAX_MB_MSGS] = {}; - char *decomp_out[MAX_MB_MSGS] = {}; - char *output[MAX_MB_MSGS] = {}; - struct crypto_wait wait; - struct acomp_req *req; - int ret = -ENOMEM; unsigned int i; + char *output, *decomp_out; + int ret; + struct scatterlist src, dst; + struct acomp_req *req; + struct crypto_wait wait; - src = kmalloc_array(MAX_MB_MSGS, sizeof(*src), GFP_KERNEL); - if (!src) - goto out; - dst = kmalloc_array(MAX_MB_MSGS, sizeof(*dst), GFP_KERNEL); - if (!dst) - goto out; - - for (i = 0; i < MAX_MB_MSGS; i++) { - reqs[i] = acomp_request_alloc(tfm); - if (!reqs[i]) - goto out; - - acomp_request_set_callback(reqs[i], - CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &wait); - if (i) - acomp_request_chain(reqs[i], reqs[0]); - - output[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); - if (!output[i]) - goto out; + output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!output) + return -ENOMEM; - decomp_out[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); - if (!decomp_out[i]) - goto out; + decomp_out = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!decomp_out) { + kfree(output); + return -ENOMEM; } for (i = 0; i < ctcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = ctemplate[i].inlen; void *input_vec; - int j; input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { @@ -3378,61 +3354,70 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } + memset(output, 0, dlen); crypto_init_wait(&wait); - sg_init_one(src, input_vec, ilen); + sg_init_one(&src, input_vec, ilen); + sg_init_one(&dst, output, dlen); - for (j = 0; j < MAX_MB_MSGS; j++) { - sg_init_one(dst + j, output[j], dlen); - acomp_request_set_params(reqs[j], src, dst + j, ilen, dlen); + req = acomp_request_alloc(tfm); + if (!req) { + pr_err("alg: acomp: request alloc failed for %s\n", + algo); + kfree(input_vec); + ret = -ENOMEM; + goto out; } - req = reqs[0]; + acomp_request_set_params(req, &src, &dst, ilen, dlen); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ret = crypto_wait_req(crypto_acomp_compress(req), &wait); if (ret) { pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); kfree(input_vec); + acomp_request_free(req); goto out; } ilen = req->dlen; dlen = COMP_BUF_SIZE; + sg_init_one(&src, output, ilen); + sg_init_one(&dst, decomp_out, dlen); crypto_init_wait(&wait); - for (j = 0; j < MAX_MB_MSGS; j++) { - sg_init_one(src + j, output[j], ilen); - sg_init_one(dst + j, decomp_out[j], dlen); - acomp_request_set_params(reqs[j], src + j, dst + j, ilen, dlen); - } - - crypto_wait_req(crypto_acomp_decompress(req), &wait); - for (j = 0; j < MAX_MB_MSGS; j++) { - ret = reqs[j]->base.err; - if (ret) { - pr_err("alg: acomp: compression failed on test %d (%d) for %s: ret=%d\n", - i + 1, j, algo, -ret); - kfree(input_vec); - goto out; - } + acomp_request_set_params(req, &src, &dst, ilen, dlen); - if (reqs[j]->dlen != ctemplate[i].inlen) { - pr_err("alg: acomp: Compression test %d (%d) failed for %s: output len = %d\n", - i + 1, j, algo, reqs[j]->dlen); - ret = -EINVAL; - kfree(input_vec); - goto out; - } + ret = crypto_wait_req(crypto_acomp_decompress(req), &wait); + if (ret) { + pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", + i + 1, algo, -ret); + kfree(input_vec); + acomp_request_free(req); + goto out; + } - if (memcmp(input_vec, decomp_out[j], reqs[j]->dlen)) { - pr_err("alg: acomp: Compression test %d (%d) failed for %s\n", - i + 1, j, algo); - hexdump(output[j], reqs[j]->dlen); - ret = -EINVAL; - kfree(input_vec); - goto out; - } + if (req->dlen != ctemplate[i].inlen) { + pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", + i + 1, algo, req->dlen); + ret = -EINVAL; + kfree(input_vec); + acomp_request_free(req); + goto out; + } + + if (memcmp(input_vec, decomp_out, req->dlen)) { + pr_err("alg: acomp: Compression test %d failed for %s\n", + i + 1, algo); + hexdump(output, req->dlen); + ret = -EINVAL; + kfree(input_vec); + acomp_request_free(req); + goto out; } kfree(input_vec); + acomp_request_free(req); } for (i = 0; i < dtcount; i++) { @@ -3446,9 +3431,10 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } + memset(output, 0, dlen); crypto_init_wait(&wait); - sg_init_one(src, input_vec, ilen); - sg_init_one(dst, output[0], dlen); + sg_init_one(&src, input_vec, ilen); + sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { @@ -3459,7 +3445,7 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } - acomp_request_set_params(req, src, dst, ilen, dlen); + acomp_request_set_params(req, &src, &dst, ilen, dlen); acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); @@ -3481,10 +3467,10 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } - if (memcmp(output[0], dtemplate[i].output, req->dlen)) { + if (memcmp(output, dtemplate[i].output, req->dlen)) { pr_err("alg: acomp: Decompression test %d failed for %s\n", i + 1, algo); - hexdump(output[0], req->dlen); + hexdump(output, req->dlen); ret = -EINVAL; kfree(input_vec); acomp_request_free(req); @@ -3498,13 +3484,8 @@ static int test_acomp(struct crypto_acomp *tfm, ret = 0; out: - acomp_request_free(reqs[0]); - for (i = 0; i < MAX_MB_MSGS; i++) { - kfree(output[i]); - kfree(decomp_out[i]); - } - kfree(dst); - kfree(src); + kfree(decomp_out); + kfree(output); return ret; } diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index b5205d464a8a..2295abbecd14 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length) } /* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { +static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 76052006bd87..5fc2c8ee61b1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6373,7 +6373,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd offset %lx\n", buffer->data_size, buffer->offsets_size, - proc->alloc.vm_start - buffer->user_data); + buffer->user_data - proc->alloc.vm_start); } static void print_binder_work_ilocked(struct seq_file *m, diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2796c0da8257..c0eb8c67a9ff 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2453,8 +2453,8 @@ static unsigned int ata_msense_control_ata_feature(struct ata_device *dev, */ put_unaligned_be16(ATA_FEATURE_SUB_MPAGE_LEN - 4, &buf[2]); - if (dev->flags & ATA_DFLAG_CDL) - buf[4] = 0x02; /* Support T2A and T2B pages */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + buf[4] = 0x02; /* T2A and T2B pages enabled */ else buf[4] = 0; @@ -3886,12 +3886,11 @@ static int ata_mselect_control_spg0(struct ata_queued_cmd *qc, } /* - * Translate MODE SELECT control mode page, sub-pages f2h (ATA feature mode + * Translate MODE SELECT control mode page, sub-page f2h (ATA feature mode * page) into a SET FEATURES command. */ -static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, - const u8 *buf, int len, - u16 *fp) +static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, + const u8 *buf, int len, u16 *fp) { struct ata_device *dev = qc->dev; struct ata_taskfile *tf = &qc->tf; @@ -3909,17 +3908,27 @@ static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, /* Check cdl_ctrl */ switch (buf[0] & 0x03) { case 0: - /* Disable CDL */ + /* Disable CDL if it is enabled */ + if (!(dev->flags & ATA_DFLAG_CDL_ENABLED)) + return 0; + ata_dev_dbg(dev, "Disabling CDL\n"); cdl_action = 0; dev->flags &= ~ATA_DFLAG_CDL_ENABLED; break; case 0x02: - /* Enable CDL T2A/T2B: NCQ priority must be disabled */ + /* + * Enable CDL if not already enabled. Since this is mutually + * exclusive with NCQ priority, allow this only if NCQ priority + * is disabled. + */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + return 0; if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { ata_dev_err(dev, "NCQ priority must be disabled to enable CDL\n"); return -EINVAL; } + ata_dev_dbg(dev, "Enabling CDL\n"); cdl_action = 1; dev->flags |= ATA_DFLAG_CDL_ENABLED; break; diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index afa4df4c5a3f..95717d509ca9 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -156,6 +156,16 @@ * }, * .ops = my_custom_ops, * }; + * + * Please note that such custom ops approach is valid, but it is hard to implement + * it right without global locks per-device to protect from auxiliary_drv removal + * during call to that ops. In addition, this implementation lacks proper module + * dependency, which causes to load/unload races between auxiliary parent and devices + * modules. + * + * The most easiest way to provide these ops reliably without needing to + * have a lock is to EXPORT_SYMBOL*() them and rely on already existing + * modules infrastructure for validity and correct dependencies chains. */ static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, diff --git a/drivers/base/base.h b/drivers/base/base.h index 0042e4774b0c..123031a757d9 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -73,6 +73,7 @@ static inline void subsys_put(struct subsys_private *sp) kset_put(&sp->subsys); } +struct subsys_private *bus_to_subsys(const struct bus_type *bus); struct subsys_private *class_to_subsys(const struct class *class); struct driver_private { @@ -180,6 +181,22 @@ int driver_add_groups(const struct device_driver *drv, const struct attribute_gr void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev); +static inline void device_set_driver(struct device *dev, const struct device_driver *drv) +{ + /* + * Majority (all?) read accesses to dev->driver happens either + * while holding device lock or in bus/driver code that is only + * invoked when the device is bound to a driver and there is no + * concern of the pointer being changed while it is being read. + * However when reading device's uevent file we read driver pointer + * without taking device lock (so we do not block there for + * arbitrary amount of time). We use WRITE_ONCE() here to prevent + * tearing so that READ_ONCE() can safely be used in uevent code. + */ + // FIXME - this cast should not be needed "soon" + WRITE_ONCE(dev->driver, (struct device_driver *)drv); +} + int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 5ea3b03af9ba..5e75e1bce551 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -57,7 +57,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ -static struct subsys_private *bus_to_subsys(const struct bus_type *bus) +struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; diff --git a/drivers/base/core.c b/drivers/base/core.c index d2f9d3a59d6b..cbc0099d8ef2 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2624,6 +2624,35 @@ static const char *dev_uevent_name(const struct kobject *kobj) return NULL; } +/* + * Try filling "DRIVER=<name>" uevent variable for a device. Because this + * function may race with binding and unbinding the device from a driver, + * we need to be careful. Binding is generally safe, at worst we miss the + * fact that the device is already bound to a driver (but the driver + * information that is delivered through uevents is best-effort, it may + * become obsolete as soon as it is generated anyways). Unbinding is more + * risky as driver pointer is transitioning to NULL, so READ_ONCE() should + * be used to make sure we are dealing with the same pointer, and to + * ensure that driver structure is not going to disappear from under us + * we take bus' drivers klist lock. The assumption that only registered + * driver can be bound to a device, and to unregister a driver bus code + * will take the same lock. + */ +static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (sp) { + scoped_guard(spinlock, &sp->klist_drivers.k_lock) { + struct device_driver *drv = READ_ONCE(dev->driver); + if (drv) + add_uevent_var(env, "DRIVER=%s", drv->name); + } + + subsys_put(sp); + } +} + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); @@ -2655,8 +2684,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Add "DRIVER=%s" variable if the device is bound to a driver */ + dev_driver_uevent(dev, env); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2726,11 +2755,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; @@ -3700,7 +3726,7 @@ done: device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: - dev->driver = NULL; + device_set_driver(dev, NULL); bus_remove_device(dev); BusError: device_remove_attrs(dev); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0e4b4aba885..b526e0e0f52d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -550,7 +550,7 @@ static void device_unbind_cleanup(struct device *dev) arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - dev->driver = NULL; + device_set_driver(dev, NULL); dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); @@ -629,8 +629,7 @@ static int really_probe(struct device *dev, const struct device_driver *drv) } re_probe: - // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + device_set_driver(dev, drv); /* If using pinctrl, bind pins now before probing */ ret = pinctrl_bind_pins(dev); @@ -1014,7 +1013,7 @@ static int __device_attach(struct device *dev, bool allow_async) if (ret == 0) ret = 1; else { - dev->driver = NULL; + device_set_driver(dev, NULL); ret = 0; } } else { diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 6dd1a8860f1c..31bfb3194b4c 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -296,7 +296,7 @@ static int delete_path(const char *nodepath) return err; } -static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) +static int dev_mynode(struct device *dev, struct inode *inode) { /* did we create it */ if (inode->i_private != &thread) @@ -304,13 +304,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta /* does the dev_t match */ if (is_blockdev(dev)) { - if (!S_ISBLK(stat->mode)) + if (!S_ISBLK(inode->i_mode)) return 0; } else { - if (!S_ISCHR(stat->mode)) + if (!S_ISCHR(inode->i_mode)) return 0; } - if (stat->rdev != dev->devt) + if (inode->i_rdev != dev->devt) return 0; /* ours */ @@ -321,20 +321,16 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; - struct kstat stat; - struct path p; + struct inode *inode; int deleted = 0; - int err; + int err = 0; dentry = kern_path_locked(nodename, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - p.mnt = parent.mnt; - p.dentry = dentry; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { + inode = d_inode(dentry); + if (dev_mynode(dev, inode)) { struct iattr newattrs; /* * before unlinking this node, reset permissions @@ -342,7 +338,7 @@ static int handle_remove(const char *nodename, struct device *dev) */ newattrs.ia_uid = GLOBAL_ROOT_UID; newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_mode = inode->i_mode & ~0777; newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE; inode_lock(d_inode(dentry)); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8f3a41d9bfaa..19469e7f88c2 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -816,21 +816,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state, return 0; } -static int __init add_boot_memory_block(unsigned long base_section_nr) -{ - unsigned long nr; - - for_each_present_section_nr(base_section_nr, nr) { - if (nr >= (base_section_nr + sections_per_block)) - break; - - return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, NULL, NULL); - } - - return 0; -} - static int add_hotplug_memory_block(unsigned long block_id, struct vmem_altmap *altmap, struct memory_group *group) @@ -957,7 +942,7 @@ static const struct attribute_group *memory_root_attr_groups[] = { void __init memory_dev_init(void) { int ret; - unsigned long block_sz, nr; + unsigned long block_sz, block_id, nr; /* Validate the configured memory block size */ block_sz = memory_block_size_bytes(); @@ -970,15 +955,23 @@ void __init memory_dev_init(void) panic("%s() failed to register subsystem: %d\n", __func__, ret); /* - * Create entries for memory sections that were found - * during boot and have been initialized + * Create entries for memory sections that were found during boot + * and have been initialized. Use @block_id to track the last + * handled block and initialize it to an invalid value (ULONG_MAX) + * to bypass the block ID matching check for the first present + * block so that it can be covered. */ - for (nr = 0; nr <= __highest_present_section_nr; - nr += sections_per_block) { - ret = add_boot_memory_block(nr); - if (ret) - panic("%s() failed to add memory block: %d\n", __func__, - ret); + block_id = ULONG_MAX; + for_each_present_section_nr(0, nr) { + if (block_id != ULONG_MAX && memory_block_id(nr) == block_id) + continue; + + block_id = memory_block_id(nr); + ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL); + if (ret) { + panic("%s() failed to add memory block: %d\n", + __func__, ret); + } } } diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index b1726a3515f6..5c78fa6ae772 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1080,6 +1080,7 @@ void software_node_notify(struct device *dev) if (!swnode) return; + kobject_get(&swnode->kobj); ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); if (ret) return; @@ -1089,8 +1090,6 @@ void software_node_notify(struct device *dev) sysfs_remove_link(&dev->kobj, "software_node"); return; } - - kobject_get(&swnode->kobj); } void software_node_notify_remove(struct device *dev) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2de7b2bd409d..40f971a66d3e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1683,14 +1683,31 @@ static void ublk_start_cancel(struct ublk_queue *ubq) ublk_put_disk(disk); } -static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, +static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, unsigned int issue_flags) { + struct ublk_io *io = &ubq->ios[tag]; + struct ublk_device *ub = ubq->dev; + struct request *req; bool done; if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) return; + /* + * Don't try to cancel this command if the request is started for + * avoiding race between io_uring_cmd_done() and + * io_uring_cmd_complete_in_task(). + * + * Either the started request will be aborted via __ublk_abort_rq(), + * then this uring_cmd is canceled next time, or it will be done in + * task work function ublk_dispatch_req() because io_uring guarantees + * that ublk_dispatch_req() is always called + */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + if (req && blk_mq_request_started(req)) + return; + spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); if (!done) @@ -1722,7 +1739,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1737,9 +1753,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (!ubq->canceling) ublk_start_cancel(ubq); - io = &ubq->ios[pdu->tag]; - WARN_ON_ONCE(io->cmd != cmd); - ublk_cancel_cmd(ubq, io, issue_flags); + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1752,7 +1767,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) int i; for (i = 0; i < ubq->q_depth; i++) - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ @@ -1886,15 +1901,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) } } -static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, - int tag) -{ - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - - ublk_queue_cmd(ubq, req); -} - static inline int ublk_check_cmd_op(u32 cmd_op) { u32 ioc_type = _IOC_TYPE(cmd_op); @@ -2103,8 +2109,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); - break; + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); + ublk_dispatch_req(ubq, req, issue_flags); + return -EIOCBQUEUED; default: goto out; } diff --git a/drivers/char/misc.c b/drivers/char/misc.c index f7dd455dd0dd..dda466f9181a 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -315,7 +315,7 @@ static int __init misc_init(void) goto fail_remove; err = -EIO; - if (register_chrdev(MISC_MAJOR, "misc", &misc_fops)) + if (__register_chrdev(MISC_MAJOR, 0, MINORMASK + 1, "misc", &misc_fops)) goto fail_printk; return 0; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 5f04951d0dd4..088182e54deb 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1576,8 +1576,8 @@ static void handle_control_message(struct virtio_device *vdev, break; case VIRTIO_CONSOLE_RESIZE: { struct { - __u16 rows; - __u16 cols; + __virtio16 cols; + __virtio16 rows; } size; if (!is_console_port(port)) @@ -1585,7 +1585,8 @@ static void handle_control_message(struct virtio_device *vdev, memcpy(&size, buf->buf + buf->offset + sizeof(*cpkt), sizeof(size)); - set_console_size(port, size.rows, size.cols); + set_console_size(port, virtio16_to_cpu(vdev, size.rows), + virtio16_to_cpu(vdev, size.cols)); port->cons.hvc->irq_requested = 1; resize_console(port); diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c index cdc842b32bab..75dce1ff2419 100644 --- a/drivers/comedi/drivers/jr3_pci.c +++ b/drivers/comedi/drivers/jr3_pci.c @@ -758,7 +758,7 @@ static void jr3_pci_detach(struct comedi_device *dev) struct jr3_pci_dev_private *devpriv = dev->private; if (devpriv) - timer_delete_sync(&devpriv->timer); + timer_shutdown_sync(&devpriv->timer); comedi_pci_detach(dev); } diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f9cb943d945..0d46402e3094 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default y + default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ tristate "Calxeda Highbank-based" depends on ARCH_HIGHBANK || COMPILE_TEST depends on CPUFREQ_DT && REGULATOR && PL320_MBOX - default m + default m if ARCH_HIGHBANK help This adds the CPUFreq driver for Calxeda Highbank SoC based boards. @@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ config ARM_MEDIATEK_CPUFREQ_HW tristate "MediaTek CPUFreq HW driver" depends on ARCH_MEDIATEK || COMPILE_TEST - default m + default m if ARCH_MEDIATEK help Support for the CPUFreq HW driver. Some MediaTek chipsets have a HW engine to offload the steps @@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" depends on CPU_S3C6410 || COMPILE_TEST - default y + default CPU_S3C6410 help This adds the CPUFreq driver for Samsung S3C6410 SoC. @@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" depends on CPU_S5PV210 || COMPILE_TEST - default y + default CPU_S5PV210 help This adds the CPUFreq driver for Samsung S5PV210 and S5PC110 SoCs. @@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help This adds the CPUFreq driver support for SPEAr SOCs. @@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra20/30 SOCs. @@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra124 SOCs. @@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default y + default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC help This adds CPU frequency driver support for Tegra194 SOCs. config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST - default y + default ARCH_OMAP2PLUS || ARCH_K3 help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 4994c86feb57..b1d29b7af232 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct apple_cpu_priv *priv; struct cpufreq_frequency_table *p; unsigned int pstate; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + if (priv->info->cur_pstate_mask) { u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index b3d74f9adcf0..cb93f00bafdb 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -747,7 +747,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2aa00769cf09..a010da0f6337 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -175,6 +175,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8350", }, { .compatible = "qcom,sm8450", }, { .compatible = "qcom,sm8550", }, + { .compatible = "qcom,sm8650", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c310aeebc8f3..944e899eb1be 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scmi_data *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct scmi_data *priv; unsigned long rate; int ret; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false); if (ret) return 0; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 17cda84f00df..dcbb0ae7dd47 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops; static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scpi_data *priv = policy->driver_data; - unsigned long rate = clk_get_rate(priv->clk); + struct cpufreq_policy *policy; + struct scpi_data *priv; + unsigned long rate; + + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + rate = clk_get_rate(priv->clk); return rate / 1000; } diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 47d6840b3489..744312a44279 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void) struct nvmem_cell *speedbin_nvmem; const struct of_device_id *match; struct device *cpu_dev; - u32 *speedbin; + void *speedbin_ptr; + u32 speedbin = 0; + size_t len; int ret; cpu_dev = get_cpu_device(0); @@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void) return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), "Could not get nvmem cell\n"); - speedbin = nvmem_cell_read(speedbin_nvmem, NULL); + speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); - if (IS_ERR(speedbin)) - return PTR_ERR(speedbin); + if (IS_ERR(speedbin_ptr)) + return PTR_ERR(speedbin_ptr); - ret = opp_data->efuse_xlate(*speedbin); + if (len <= 4) + memcpy(&speedbin, speedbin_ptr, len); + speedbin = le32_to_cpu(speedbin); - kfree(speedbin); + ret = opp_data->efuse_xlate(speedbin); + + kfree(speedbin_ptr); return ret; }; diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 75bebec2c757..0fcf4a39de27 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -163,6 +163,12 @@ static int atmel_sha204a_probe(struct i2c_client *client) i2c_priv->hwrng.name = dev_name(&client->dev); i2c_priv->hwrng.read = atmel_sha204a_rng_read; + /* + * According to review by Bill Cox [1], this HWRNG has very low entropy. + * [1] https://www.metzdowd.com/pipermail/cryptography/2014-December/023858.html + */ + i2c_priv->hwrng.quality = 1; + ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng); if (ret) dev_warn(&client->dev, "failed to register RNG (%d)\n", ret); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 15699299dc11..17b692eb3257 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -119,7 +119,7 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); +int cxl_gpf_port_setup(struct cxl_dport *dport); int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, int nid, resource_size_t *size); diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index f4daefe3180e..1498e2369c37 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -528,13 +528,13 @@ static void *cxlctl_set_feature(struct cxl_features_state *cxlfs, rc = cxl_set_feature(cxl_mbox, &feat_in->uuid, feat_in->version, feat_in->feat_data, data_size, flags, offset, &return_code); + *out_len = sizeof(*rpc_out); if (rc) { rpc_out->retval = return_code; return no_free_ptr(rpc_out); } rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; - *out_len = sizeof(*rpc_out); return no_free_ptr(rpc_out); } @@ -677,7 +677,7 @@ static void free_memdev_fwctl(void *_fwctl_dev) fwctl_put(fwctl_dev); } -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_features_state *cxlfs; @@ -700,7 +700,7 @@ int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) if (rc) return rc; - return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl, + return devm_add_action_or_reset(host, free_memdev_fwctl, no_free_ptr(fwctl_dev)); } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL"); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 96fecb799cbc..3b80e9a76ba8 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1072,14 +1072,20 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) +u16 cxl_gpf_get_dvsec(struct device *dev) { + struct pci_dev *pdev; + bool is_port = true; u16 dvsec; if (!dev_is_pci(dev)) return 0; - dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, + pdev = to_pci_dev(dev); + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) + is_port = false; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", @@ -1128,26 +1134,24 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) return rc; } -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) +int cxl_gpf_port_setup(struct cxl_dport *dport) { - struct pci_dev *pdev; - - if (!port) + if (!dport) return -EINVAL; - if (!port->gpf_dvsec) { + if (!dport->gpf_dvsec) { + struct pci_dev *pdev; int dvsec; - dvsec = cxl_gpf_get_dvsec(dport_dev, true); + dvsec = cxl_gpf_get_dvsec(dport->dport_dev); if (!dvsec) return -EINVAL; - port->gpf_dvsec = dvsec; + dport->gpf_dvsec = dvsec; + pdev = to_pci_dev(dport->dport_dev); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); } - pdev = to_pci_dev(dport_dev); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); - return 0; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0fd6646c1a2e..726bd4a7de27 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1678,7 +1678,7 @@ retry: if (rc && rc != -EBUSY) return rc; - cxl_gpf_port_setup(dport_dev, port); + cxl_gpf_port_setup(dport); /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 117c2e94c761..5ca7b0eed568 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -581,7 +581,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri resource_size_t rcrb = ri->base; void __iomem *addr; u32 bar0, bar1; - u16 cmd; u32 id; if (which == CXL_RCRB_UPSTREAM) @@ -603,7 +602,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri } id = readl(addr + PCI_VENDOR_ID); - cmd = readw(addr + PCI_COMMAND); bar0 = readl(addr + PCI_BASE_ADDRESS_0); bar1 = readl(addr + PCI_BASE_ADDRESS_1); iounmap(addr); @@ -618,8 +616,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri dev_err(dev, "Failed to access Downstream Port RCRB\n"); return CXL_RESOURCE_NONE; } - if (!(cmd & PCI_COMMAND_MEMORY)) - return CXL_RESOURCE_NONE; /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */ if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO)) return CXL_RESOURCE_NONE; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index be8a7dc77719..a9ab46eb0610 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -592,7 +592,6 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds - * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_port { struct device dev; @@ -616,7 +615,6 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; - int gpf_dvsec; }; /** @@ -664,6 +662,7 @@ struct cxl_rcrb_info { * @regs: Dport parsed register blocks * @coord: access coordinates (bandwidth and latency performance attributes) * @link_latency: calculated PCIe downstream latency + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_dport { struct device *dport_dev; @@ -675,6 +674,7 @@ struct cxl_dport { struct cxl_regs regs; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; long link_latency; + int gpf_dvsec; }; /** @@ -910,6 +910,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); +u16 cxl_gpf_get_dvsec(struct device *dev); #endif /* __CXL_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 7b14a154463c..785aa2af5eaa 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1018,7 +1018,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index d061fe3d2b86..e197883690ef 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -108,7 +108,7 @@ static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) return; } - if (!cxl_gpf_get_dvsec(cxlds->dev, false)) + if (!cxl_gpf_get_dvsec(cxlds->dev)) return; if (cxl_get_dirty_count(mds, &count)) { diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 3c52cb73237a..e3f990d888d7 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -1224,22 +1224,28 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) if (!svc->intel_svc_fcs) { dev_err(dev, "failed to allocate %s device\n", INTEL_FCS); ret = -ENOMEM; - goto err_unregister_dev; + goto err_unregister_rsu_dev; } ret = platform_device_add(svc->intel_svc_fcs); if (ret) { platform_device_put(svc->intel_svc_fcs); - goto err_unregister_dev; + goto err_unregister_rsu_dev; } + ret = of_platform_default_populate(dev_of_node(dev), NULL, dev); + if (ret) + goto err_unregister_fcs_dev; + dev_set_drvdata(dev, svc); pr_info("Intel Service Layer Driver Initialized\n"); return 0; -err_unregister_dev: +err_unregister_fcs_dev: + platform_device_unregister(svc->intel_svc_fcs); +err_unregister_rsu_dev: platform_device_unregister(svc->stratix10_svc_rsu); err_free_kfifo: kfifo_free(&controller->svc_fifo); @@ -1253,6 +1259,8 @@ static void stratix10_svc_drv_remove(struct platform_device *pdev) struct stratix10_svc *svc = dev_get_drvdata(&pdev->dev); struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); + of_platform_depopulate(ctrl->dev); + platform_device_unregister(svc->intel_svc_fcs); platform_device_unregister(svc->stratix10_svc_rsu); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 667080cc9ae1..e6913fcf2c7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -77,22 +102,32 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct dma_buf *dmabuf = attach->dmabuf; struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); - u32 domains = bo->preferred_domains; + u32 domains = bo->allowed_domains; dma_resv_assert_held(dmabuf->resv); - /* - * Try pinning into VRAM to allow P2P with RDMA NICs without ODP + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP * support if all attachments can do P2P. If any attachment can't do * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. */ - list_for_each_entry(attach, &dmabuf->attachments, node) - if (!attach->peer2peer) - domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (WARN_ON(!domains)) + return -EINVAL; + return amdgpu_bo_pin(bo, domains); } @@ -470,6 +505,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9fed4471405f..536f73131c2d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1920,26 +1920,6 @@ static enum dmub_ips_disable_type dm_get_default_ips_mode( switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(3, 5, 0): case IP_VERSION(3, 6, 0): - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) - /* - * Other ASICs with DCN35 that have residency issues with - * IPS2 in idle. - * We want them to use IPS2 only in display off cases. - */ - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - break; case IP_VERSION(3, 5, 1): ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; break; @@ -3355,16 +3335,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); @@ -6521,12 +6501,12 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, const struct drm_display_mode *native_mode, bool scale_enabled) { - if (scale_enabled) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); - } else if (native_mode->clock == drm_mode->clock && - native_mode->htotal == drm_mode->htotal && - native_mode->vtotal == drm_mode->vtotal) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); + if (scale_enabled || ( + native_mode->clock == drm_mode->clock && + native_mode->htotal == drm_mode->htotal && + native_mode->vtotal == drm_mode->vtotal)) { + if (native_mode->crtc_clock) + copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); } else { /* no scaling nor amdgpu inserted, no need to patch */ } @@ -11043,6 +11023,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 2cd35392e2da..1395a748d726 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -918,7 +918,7 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) { struct drm_connector *connector = data; struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev); - unsigned char start = block * EDID_LENGTH; + unsigned short start = block * EDID_LENGTH; struct edid *edid; int r; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 92f0a099d089..d9159ca55412 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 0, + .do_urgent_latency_adjustment = 1, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 34d2e097ca2e..5a5d48fadbf2 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -35,6 +35,17 @@ #define DC_LOGGER \ link->ctx->logger +static void get_default_8b_10b_lttpr_aux_rd_interval( + union training_aux_rd_interval *training_rd_interval) +{ + /* LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX + * read reply to this register. Since old sinks with DPCD rev 1.1 + * and earlier may not support this register, assume the mandatory + * value is programmed by the LTTPR to avoid AUX timeout issues. + */ + training_rd_interval->raw = 0x4; +} + static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, const struct dc_link_settings *link_settings, enum lttpr_mode lttpr_mode) @@ -43,17 +54,22 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, uint32_t wait_in_micro_secs = 100; memset(&training_rd_interval, 0, sizeof(training_rd_interval)); - if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); - if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) - wait_in_micro_secs = 400; - if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) - wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); + + if (training_rd_interval.raw != 0) { + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; + if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) + wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + } } return wait_in_micro_secs; } @@ -71,13 +87,15 @@ static uint32_t get_eq_training_aux_rd_interval( DP_128B132B_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); - } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); + } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); } switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) { diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 5170f72b0830..f91daefa9d2b 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -43,13 +43,13 @@ struct decon_data { unsigned int wincon_burstlen_shift; }; -static struct decon_data exynos7_decon_data = { +static const struct decon_data exynos7_decon_data = { .vidw_buf_start_base = 0x80, .shadowcon_win_protect_shift = 10, .wincon_burstlen_shift = 11, }; -static struct decon_data exynos7870_decon_data = { +static const struct decon_data exynos7870_decon_data = { .vidw_buf_start_base = 0x880, .shadowcon_win_protect_shift = 8, .wincon_burstlen_shift = 10, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index f313ae7bc3a3..6cc7bf77bcac 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -355,8 +355,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); - if (drm) - drm_atomic_helper_shutdown(drm); + drm_atomic_helper_shutdown(drm); } static struct platform_driver exynos_drm_platform_driver = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index b150cfd92f6e..09e33a26caaf 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -908,7 +908,7 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, u32 buf_num; u32 cfg; - DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueu[%d]\n", buf_id, enqueue); + DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueue[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 1ad87584b1c2..c394cc702d7d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -731,7 +731,7 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, /* * Setting dma-burst to 16Word causes permanent tearing for very small * buffers, e.g. cursor buffer. Burst Mode switching which based on - * plane size is not recommended as plane size varies alot towards the + * plane size is not recommended as plane size varies a lot towards the * end of the screen and rapid movement causes unstable DMA, but it is * still better to change dma-burst than displaying garbage. */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 08cf79a62025..e644e2382d77 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -312,9 +312,6 @@ static int vidi_get_modes(struct drm_connector *connector) else drm_edid = drm_edid_alloc(fake_edid_info, sizeof(fake_edid_info)); - if (!drm_edid) - return 0; - drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 81d2ee37e773..49ff9f1f16d3 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ { .limits = { - .max_hdmi_phy_freq = 1650000, + .max_hdmi_phy_freq = 1650000000, }, .attrs = (const struct soc_device_attribute []) { { .soc_id = "GXL (S805*)", }, diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 3f9345c14f31..be4b0e4df6e1 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -37,7 +37,7 @@ struct meson_drm_match_data { }; struct meson_drm_soc_limits { - unsigned int max_hdmi_phy_freq; + unsigned long long max_hdmi_phy_freq; }; struct meson_drm { diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 6d1c9262a2cf..7752d8ac85f0 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, { struct meson_drm *priv = encoder_hdmi->priv; int vic = drm_match_cea_mode(mode); - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; - vclk_freq = mode->clock; + vclk_freq = mode->clock * 1000; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", + dev_dbg(priv->dev, + "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); @@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long clock = mode->clock * 1000; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; int vic = drm_match_cea_mode(mode); enum drm_mode_status status; @@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (status != MODE_OK) return status; - return meson_vclk_dmt_supported_freq(priv, mode->clock); + return meson_vclk_dmt_supported_freq(priv, clock); /* Check against supported VIC modes */ } else if (!meson_venc_hdmi_supported_vic(vic)) return MODE_BAD; - vclk_freq = mode->clock; + vclk_freq = clock; /* For 420, pixel clock is half unlike venc clock */ if (drm_mode_is_420_only(display_info, mode) || @@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", + dev_dbg(priv->dev, + "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc..3325580d885d 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,7 +110,10 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) +#define PIXEL_FREQ_1000_1001(_freq) \ + DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) +#define PHY_FREQ_1000_1001(_freq) \ + (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) /* VID PLL Dividers */ enum { @@ -360,11 +363,11 @@ enum { }; struct meson_vclk_params { - unsigned int pll_freq; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int pixel_freq; + unsigned long long pll_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long pixel_freq; unsigned int pll_od1; unsigned int pll_od2; unsigned int pll_od3; @@ -372,11 +375,11 @@ struct meson_vclk_params { unsigned int vclk_div; } params[] = { [MESON_VCLK_HDMI_ENCI_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 54000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 54000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -384,11 +387,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 27000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 27000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -396,11 +399,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_148500] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 74250000, .pll_od1 = 4, .pll_od2 = 1, .pll_od3 = 1, @@ -408,11 +411,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_74250] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 74250, - .venc_freq = 74250, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 74250000, + .venc_freq = 74250000, + .pixel_freq = 74250000, .pll_od1 = 2, .pll_od2 = 2, .pll_od3 = 2, @@ -420,11 +423,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_148500] = { - .pll_freq = 2970000, - .phy_freq = 1485000, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 148500, + .pll_freq = 2970000000, + .phy_freq = 1485000000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 148500000, .pll_od1 = 1, .pll_od2 = 2, .pll_od3 = 2, @@ -432,11 +435,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_297000] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 297000, - .vclk_freq = 297000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 297000000, + .vclk_freq = 297000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -444,11 +447,11 @@ struct meson_vclk_params { .vclk_div = 2, }, [MESON_VCLK_HDMI_594000] = { - .pll_freq = 5940000, - .phy_freq = 5940000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 594000, + .pll_freq = 5940000000, + .phy_freq = 5940000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 594000000, .pll_od1 = 1, .pll_od2 = 1, .pll_od3 = 2, @@ -456,11 +459,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_594000_YUV420] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -617,16 +620,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, 3 << 20, pll_od_to_reg(od3) << 20); } -#define XTAL_FREQ 24000 +#define XTAL_FREQ (24 * 1000 * 1000) static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { /* The GXBB PLL has a /2 pre-multiplier */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) - pll_freq /= 2; + pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); - return pll_freq / XTAL_FREQ; + return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); } #define HDMI_FRAC_MAX_GXBB 4096 @@ -635,12 +638,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, unsigned int m, - unsigned int pll_freq) + unsigned long long pll_freq) { - unsigned int parent_freq = XTAL_FREQ; + unsigned long long parent_freq = XTAL_FREQ; unsigned int frac_max = HDMI_FRAC_MAX_GXL; unsigned int frac_m; unsigned int frac; + u32 remainder; /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -652,11 +656,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, frac_max = HDMI_FRAC_MAX_G12A; /* We can have a perfect match !*/ - if (pll_freq / m == parent_freq && - pll_freq % m == 0) + if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && + remainder == 0) return 0; - frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); + frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); frac_m = m * frac_max; if (frac_m > frac) return frac_max; @@ -666,7 +670,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, } static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, - unsigned int m, + unsigned long long m, unsigned int frac) { if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -694,7 +698,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, } static bool meson_hdmi_pll_find_params(struct meson_drm *priv, - unsigned int freq, + unsigned long long freq, unsigned int *m, unsigned int *frac, unsigned int *od) @@ -706,7 +710,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, continue; *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", freq, *m, *frac, *od); if (meson_hdmi_pll_validate_params(priv, *m, *frac)) @@ -718,7 +722,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, /* pll_freq is the frequency after the OD dividers */ enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) { unsigned int od, m, frac; @@ -741,7 +745,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); /* pll_freq is the frequency after the OD dividers */ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { unsigned int od, m, frac, od1, od2, od3; @@ -756,7 +760,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, od1 = od / od2; } - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", pll_freq, m, frac, od1, od2, od3); meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); @@ -764,17 +768,18 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, return; } - DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", + DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", pll_freq); } enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq) +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq) { int i; - DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", + DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", phy_freq, vclk_freq); /* Check against soc revision/package limits */ @@ -785,19 +790,19 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", + DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", i, params[i].pixel_freq, - FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", + PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); + PHY_FREQ_1000_1001(params[i].phy_freq)); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && - vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && + vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -805,8 +810,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); -static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, - unsigned int od1, unsigned int od2, unsigned int od3, +static void meson_vclk_set(struct meson_drm *priv, + unsigned long long pll_base_freq, unsigned int od1, + unsigned int od2, unsigned int od3, unsigned int vid_pll_div, unsigned int vclk_div, unsigned int hdmi_tx_div, unsigned int venc_div, bool hdmi_use_enci, bool vic_alternate_clock) @@ -826,15 +832,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_generic_set(priv, pll_base_freq); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x3d; frac = vic_alternate_clock ? 0xd02 : 0xe00; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0x59 : 0x5a; frac = vic_alternate_clock ? 0xe8f : 0; break; - case 5940000: + case 5940000000: m = 0x7b; frac = vic_alternate_clock ? 0xa05 : 0xc00; break; @@ -844,15 +850,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x281 : 0x300; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x347 : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x102 : 0x200; break; @@ -861,15 +867,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x140b4 : 0x18000; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x1a3ee : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x8148 : 0x10000; break; @@ -1025,14 +1031,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci) { bool vic_alternate_clock = false; - unsigned int freq; - unsigned int hdmi_tx_div; - unsigned int venc_div; + unsigned long long freq; + unsigned long long hdmi_tx_div; + unsigned long long venc_div; if (target == MESON_VCLK_TARGET_CVBS) { meson_venci_cvbs_clock_config(priv); @@ -1052,27 +1058,27 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, return; } - hdmi_tx_div = vclk_freq / dac_freq; + hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); if (hdmi_tx_div == 0) { - pr_err("Fatal Error, invalid HDMI-TX freq %d\n", + pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", dac_freq); return; } - venc_div = vclk_freq / venc_freq; + venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); if (venc_div == 0) { - pr_err("Fatal Error, invalid HDMI venc freq %d\n", + pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", venc_freq); return; } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && + phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && (vclk_freq == params[freq].vclk_freq || - vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else @@ -1098,7 +1104,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } if (!params[freq].pixel_freq) { - pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); + pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", + vclk_freq); return; } diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h index 60617aaf18dd..7ac55744e574 100644 --- a/drivers/gpu/drm/meson/meson_vclk.h +++ b/drivers/gpu/drm/meson/meson_vclk.h @@ -20,17 +20,18 @@ enum { }; /* 27MHz is the CVBS Pixel Clock */ -#define MESON_VCLK_CVBS 27000 +#define MESON_VCLK_CVBS (27 * 1000 * 1000) enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq); +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq); void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci); #endif /* __MESON_VCLK_H */ diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 7d68a8acfe2e..eb0f8373258c 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -129,11 +129,11 @@ static int jadard_unprepare(struct drm_panel *panel) { struct jadard *jadard = panel_to_jadard(panel); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(120); if (jadard->desc->reset_before_power_off_vcioo) { - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); usleep_range(1000, 2000); } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 2d88e390feb4..e32e680c7197 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -128,6 +128,14 @@ static void virtio_gpu_remove(struct virtio_device *vdev) drm_dev_put(dev); } +static void virtio_gpu_shutdown(struct virtio_device *vdev) +{ + /* + * drm does its own synchronization on shutdown. + * Do nothing here, opt out of device reset. + */ +} + static void virtio_gpu_config_changed(struct virtio_device *vdev) { struct drm_device *dev = vdev->priv; @@ -162,6 +170,7 @@ static struct virtio_driver virtio_gpu_driver = { .id_table = id_table, .probe = virtio_gpu_probe, .remove = virtio_gpu_remove, + .shutdown = virtio_gpu_shutdown, .config_changed = virtio_gpu_config_changed }; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index b3b11be11650..59792e00cecf 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -307,7 +307,7 @@ void __init hv_get_partition_id(void) local_irq_save(flags); output = *this_cpu_ptr(hyperv_pcpu_input_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output); pt_id = output->partition_id; local_irq_restore(flags); @@ -566,9 +566,11 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); - kfree(*synic_eventring_tail); - *synic_eventring_tail = NULL; + if (hv_root_partition()) { + synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); + kfree(*synic_eventring_tail); + *synic_eventring_tail = NULL; + } return 0; } diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig index 4b6359326ede..4f7d2b6d79e2 100644 --- a/drivers/hwtracing/intel_th/Kconfig +++ b/drivers/hwtracing/intel_th/Kconfig @@ -60,6 +60,7 @@ config INTEL_TH_STH config INTEL_TH_MSU tristate "Intel(R) Trace Hub Memory Storage Unit" + depends on MMU help Memory Storage Unit (MSU) trace output device enables storing STP traces to system memory. It supports single diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index bf99d79a4192..7163950eb371 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -19,6 +19,7 @@ #include <linux/io.h> #include <linux/workqueue.h> #include <linux/dma-mapping.h> +#include <linux/pfn_t.h> #ifdef CONFIG_X86 #include <asm/set_memory.h> @@ -976,7 +977,6 @@ static void msc_buffer_contig_free(struct msc *msc) for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { struct page *page = virt_to_page(msc->base + off); - page->mapping = NULL; __free_page(page); } @@ -1158,9 +1158,6 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win) int i; for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) { - struct page *page = msc_sg_page(sg); - - page->mapping = NULL; dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, sg_virt(sg), sg_dma_address(sg)); } @@ -1601,22 +1598,10 @@ static void msc_mmap_close(struct vm_area_struct *vma) { struct msc_iter *iter = vma->vm_file->private_data; struct msc *msc = iter->msc; - unsigned long pg; if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex)) return; - /* drop page _refcounts */ - for (pg = 0; pg < msc->nr_pages; pg++) { - struct page *page = msc_buffer_get_page(msc, pg); - - if (WARN_ON_ONCE(!page)) - continue; - - if (page->mapping) - page->mapping = NULL; - } - /* last mapping -- drop user_count */ atomic_dec(&msc->user_count); mutex_unlock(&msc->buf_mutex); @@ -1626,16 +1611,14 @@ static vm_fault_t msc_mmap_fault(struct vm_fault *vmf) { struct msc_iter *iter = vmf->vma->vm_file->private_data; struct msc *msc = iter->msc; + struct page *page; - vmf->page = msc_buffer_get_page(msc, vmf->pgoff); - if (!vmf->page) + page = msc_buffer_get_page(msc, vmf->pgoff); + if (!page) return VM_FAULT_SIGBUS; - get_page(vmf->page); - vmf->page->mapping = vmf->vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - return 0; + get_page(page); + return vmf_insert_mixed(vmf->vma, vmf->address, page_to_pfn_t(page)); } static const struct vm_operations_struct msc_mmap_ops = { @@ -1676,7 +1659,7 @@ out: atomic_dec(&msc->user_count); vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP); vma->vm_ops = &msc_mmap_ops; return ret; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index be8761bbef0f..f34209b08b4c 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3869,6 +3869,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + if (ir_data->iommu == NULL) return -EINVAL; @@ -3879,21 +3882,11 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * we should not modify the IRTE */ if (!dev_data || !dev_data->use_vapic) - return 0; + return -EINVAL; ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; - /* Note: - * SVM tries to set up for VAPIC mode, but we are in - * legacy mode. So, we force legacy mode instead. - */ - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("%s: Fall back to using intr legacy remap\n", - __func__); - pi_data->is_guest_mode = false; - } - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c69894861866..dc98c39d2b20 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 02a680c73979..bf0d7d58c8b0 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -96,7 +96,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, ret = mcb_device_register(bus, mdev); if (ret < 0) - goto err; + return ret; return 0; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c8ed65cd87e..f0b5a6931161 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -68,6 +68,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2424,7 +2426,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2a283feb3319..cc3d3897ef42 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -5164,7 +5164,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 35100a435c88..9e175c5e0634 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -523,9 +523,10 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { *size = new_size; + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); } kfree(old_argv); @@ -1049,7 +1050,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; unsigned int bioset_flags = 0; - bool mempool_needs_integrity = t->integrity_supported; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1074,8 +1074,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * per_io_data_size = max(per_io_data_size, ti->per_io_data_size); min_pool_size = max(min_pool_size, ti->num_flush_bios); - - mempool_needs_integrity |= ti->mempool_needs_integrity; } pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index 04756302b878..98d3d123004c 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -37,6 +37,7 @@ struct pci1xxxx_gpio { struct auxiliary_device *aux_dev; void __iomem *reg_base; + raw_spinlock_t wa_lock; struct gpio_chip gpio; spinlock_t lock; int irq_base; @@ -167,7 +168,7 @@ static void pci1xxxx_gpio_irq_ack(struct irq_data *data) unsigned long flags; spin_lock_irqsave(&priv->lock, flags); - pci1xxx_assign_bit(priv->reg_base, INTR_STAT_OFFSET(gpio), (gpio % 32), true); + writel(BIT(gpio % 32), priv->reg_base + INTR_STAT_OFFSET(gpio)); spin_unlock_irqrestore(&priv->lock, flags); } @@ -257,6 +258,7 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) struct pci1xxxx_gpio *priv = dev_id; struct gpio_chip *gc = &priv->gpio; unsigned long int_status = 0; + unsigned long wa_flags; unsigned long flags; u8 pincount; int bit; @@ -280,7 +282,9 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) writel(BIT(bit), priv->reg_base + INTR_STATUS_OFFSET(gpiobank)); spin_unlock_irqrestore(&priv->lock, flags); irq = irq_find_mapping(gc->irq.domain, (bit + (gpiobank * 32))); - handle_nested_irq(irq); + raw_spin_lock_irqsave(&priv->wa_lock, wa_flags); + generic_handle_irq(irq); + raw_spin_unlock_irqrestore(&priv->wa_lock, wa_flags); } } spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index a5f88ec97df7..bc40b940ae21 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,7 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ /* diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d6ff9d82ae94..3f9c60b579ae 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, /* required last entry */ diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 7be1649b1972..da26a080916c 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -36,20 +36,24 @@ #define VSC_TP_XFER_TIMEOUT_BYTES 700 #define VSC_TP_PACKET_PADDING_SIZE 1 #define VSC_TP_PACKET_SIZE(pkt) \ - (sizeof(struct vsc_tp_packet) + le16_to_cpu((pkt)->len) + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + le16_to_cpu((pkt)->hdr.len) + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_PACKET_SIZE \ - (sizeof(struct vsc_tp_packet) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_XFER_SIZE \ (VSC_TP_MAX_PACKET_SIZE + VSC_TP_XFER_TIMEOUT_BYTES) #define VSC_TP_NEXT_XFER_LEN(len, offset) \ - (len + sizeof(struct vsc_tp_packet) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) + (len + sizeof(struct vsc_tp_packet_hdr) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) -struct vsc_tp_packet { +struct vsc_tp_packet_hdr { __u8 sync; __u8 cmd; __le16 len; __le32 seq; - __u8 buf[] __counted_by(len); +}; + +struct vsc_tp_packet { + struct vsc_tp_packet_hdr hdr; + __u8 buf[VSC_TP_MAX_XFER_SIZE - sizeof(struct vsc_tp_packet_hdr)]; }; struct vsc_tp { @@ -67,8 +71,8 @@ struct vsc_tp { u32 seq; /* command buffer */ - void *tx_buf; - void *rx_buf; + struct vsc_tp_packet *tx_buf; + struct vsc_tp_packet *rx_buf; atomic_t assert_cnt; wait_queue_head_t xfer_wait; @@ -158,12 +162,12 @@ static int vsc_tp_dev_xfer(struct vsc_tp *tp, void *obuf, void *ibuf, size_t len static int vsc_tp_xfer_helper(struct vsc_tp *tp, struct vsc_tp_packet *pkt, void *ibuf, u16 ilen) { - int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet); + int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet_hdr); int next_xfer_len = VSC_TP_PACKET_SIZE(pkt) + VSC_TP_XFER_TIMEOUT_BYTES; - u8 *src, *crc_src, *rx_buf = tp->rx_buf; + u8 *src, *crc_src, *rx_buf = (u8 *)tp->rx_buf; int count_down = VSC_TP_MAX_XFER_COUNT; u32 recv_crc = 0, crc = ~0; - struct vsc_tp_packet ack; + struct vsc_tp_packet_hdr ack; u8 *dst = (u8 *)&ack; bool synced = false; @@ -280,10 +284,10 @@ int vsc_tp_xfer(struct vsc_tp *tp, u8 cmd, const void *obuf, size_t olen, guard(mutex)(&tp->mutex); - pkt->sync = VSC_TP_PACKET_SYNC; - pkt->cmd = cmd; - pkt->len = cpu_to_le16(olen); - pkt->seq = cpu_to_le32(++tp->seq); + pkt->hdr.sync = VSC_TP_PACKET_SYNC; + pkt->hdr.cmd = cmd; + pkt->hdr.len = cpu_to_le16(olen); + pkt->hdr.seq = cpu_to_le32(++tp->seq); memcpy(pkt->buf, obuf, olen); crc = ~crc32(~0, (u8 *)pkt, sizeof(pkt) + olen); @@ -320,7 +324,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) guard(mutex)(&tp->mutex); /* rom xfer is big endian */ - cpu_to_be32_array(tp->tx_buf, obuf, words); + cpu_to_be32_array((u32 *)tp->tx_buf, obuf, words); ret = read_poll_timeout(gpiod_get_value_cansleep, ret, !ret, VSC_TP_ROM_XFER_POLL_DELAY_US, @@ -336,7 +340,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; if (ibuf) - be32_to_cpu_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, (u32 *)tp->rx_buf, words); return ret; } @@ -490,11 +494,11 @@ static int vsc_tp_probe(struct spi_device *spi) if (!tp) return -ENOMEM; - tp->tx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->tx_buf = devm_kzalloc(dev, sizeof(*tp->tx_buf), GFP_KERNEL); if (!tp->tx_buf) return -ENOMEM; - tp->rx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->rx_buf = devm_kzalloc(dev, sizeof(*tp->rx_buf), GFP_KERNEL); if (!tp->rx_buf) return -ENOMEM; diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index d294850a35a1..c4e5e2c977be 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -122,7 +122,6 @@ struct pci_endpoint_test { struct pci_endpoint_test_data { enum pci_barno test_reg_bar; size_t alignment; - int irq_type; }; static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, @@ -948,7 +947,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, test_reg_bar = data->test_reg_bar; test->test_reg_bar = test_reg_bar; test->alignment = data->alignment; - test->irq_type = data->irq_type; } init_completion(&test->irq_raised); @@ -970,10 +968,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - ret = pci_endpoint_test_alloc_irq_vectors(test, test->irq_type); - if (ret) - goto err_disable_irq; - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { base = pci_ioremap_bar(pdev, bar); @@ -1009,10 +1003,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, goto err_ida_remove; } - ret = pci_endpoint_test_request_irq(test); - if (ret) - goto err_kfree_test_name; - pci_endpoint_test_get_capabilities(test); misc_device = &test->miscdev; @@ -1020,7 +1010,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, misc_device->name = kstrdup(name, GFP_KERNEL); if (!misc_device->name) { ret = -ENOMEM; - goto err_release_irq; + goto err_kfree_test_name; } misc_device->parent = &pdev->dev; misc_device->fops = &pci_endpoint_test_fops; @@ -1036,9 +1026,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, err_kfree_name: kfree(misc_device->name); -err_release_irq: - pci_endpoint_test_release_irq(test); - err_kfree_test_name: kfree(test->name); @@ -1051,8 +1038,6 @@ err_iounmap: pci_iounmap(pdev, test->bar[bar]); } -err_disable_irq: - pci_endpoint_test_free_irq_vectors(test); pci_release_regions(pdev); err_disable_pdev: @@ -1092,23 +1077,19 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) static const struct pci_endpoint_test_data default_data = { .test_reg_bar = BAR_0, .alignment = SZ_4K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data am654_data = { .test_reg_bar = BAR_2, .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data j721e_data = { .alignment = 256, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data rk3588_data = { .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; /* diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d70399bce5b9..c5d6628d7980 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2419,6 +2419,9 @@ mt7531_setup_common(struct dsa_switch *ds) struct mt7530_priv *priv = ds->priv; int ret, i; + ds->assisted_learning_on_cpu_port = true; + ds->mtu_enforcement_ingress = true; + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ @@ -2571,9 +2574,6 @@ mt7531_setup(struct dsa_switch *ds) if (ret) return ret; - ds->assisted_learning_on_cpu_port = true; - ds->mtu_enforcement_ingress = true; - return 0; } diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index c83a0a80d533..506f682d15c1 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -5,11 +5,6 @@ #include "core.h" -struct pdsc_wait_context { - struct pdsc_qcq *qcq; - struct completion wait_completion; -}; - static int pdsc_process_notifyq(struct pdsc_qcq *qcq) { union pds_core_notifyq_comp *comp; @@ -109,10 +104,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) q_info = &q->info[q->tail_idx]; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - /* Copy out the completion data */ - memcpy(q_info->dest, comp, sizeof(*comp)); - - complete_all(&q_info->wc->wait_completion); + if (!completion_done(&q_info->completion)) { + memcpy(q_info->dest, comp, sizeof(*comp)); + complete(&q_info->completion); + } if (cq->tail_idx == cq->num_descs - 1) cq->done_color = !cq->done_color; @@ -162,8 +157,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) static int __pdsc_adminq_post(struct pdsc *pdsc, struct pdsc_qcq *qcq, union pds_core_adminq_cmd *cmd, - union pds_core_adminq_comp *comp, - struct pdsc_wait_context *wc) + union pds_core_adminq_comp *comp) { struct pdsc_queue *q = &qcq->q; struct pdsc_q_info *q_info; @@ -205,9 +199,9 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Post the request */ index = q->head_idx; q_info = &q->info[index]; - q_info->wc = wc; q_info->dest = comp; memcpy(q_info->desc, cmd, sizeof(*cmd)); + reinit_completion(&q_info->completion); dev_dbg(pdsc->dev, "head_idx %d tail_idx %d\n", q->head_idx, q->tail_idx); @@ -231,16 +225,13 @@ int pdsc_adminq_post(struct pdsc *pdsc, union pds_core_adminq_comp *comp, bool fast_poll) { - struct pdsc_wait_context wc = { - .wait_completion = - COMPLETION_INITIALIZER_ONSTACK(wc.wait_completion), - }; unsigned long poll_interval = 1; unsigned long poll_jiffies; unsigned long time_limit; unsigned long time_start; unsigned long time_done; unsigned long remaining; + struct completion *wc; int err = 0; int index; @@ -250,20 +241,19 @@ int pdsc_adminq_post(struct pdsc *pdsc, return -ENXIO; } - wc.qcq = &pdsc->adminqcq; - index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); + index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp); if (index < 0) { err = index; goto err_out; } + wc = &pdsc->adminqcq.q.info[index].completion; time_start = jiffies; time_limit = time_start + HZ * pdsc->devcmd_timeout; do { /* Timeslice the actual wait to catch IO errors etc early */ poll_jiffies = msecs_to_jiffies(poll_interval); - remaining = wait_for_completion_timeout(&wc.wait_completion, - poll_jiffies); + remaining = wait_for_completion_timeout(wc, poll_jiffies); if (remaining) break; @@ -292,9 +282,11 @@ int pdsc_adminq_post(struct pdsc *pdsc, dev_dbg(pdsc->dev, "%s: elapsed %d msecs\n", __func__, jiffies_to_msecs(time_done - time_start)); - /* Check the results */ - if (time_after_eq(time_done, time_limit)) + /* Check the results and clear an un-completed timeout */ + if (time_after_eq(time_done, time_limit) && !completion_done(wc)) { err = -ETIMEDOUT; + complete(wc); + } dev_dbg(pdsc->dev, "read admin queue completion idx %d:\n", index); dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1, diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index eeb72b1809ea..c9aac27883a3 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -107,9 +107,6 @@ int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev, dev_dbg(pf->dev, "%s: %s opcode %d\n", __func__, dev_name(&padev->aux_dev.dev), req->opcode); - if (pf->state) - return -ENXIO; - /* Wrap the client's request */ cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD; cmd.client_request.client_id = cpu_to_le16(padev->client_id); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 1eb0d92786f7..9512aa4083f0 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -167,8 +167,10 @@ static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa) q->base = base; q->base_pa = base_pa; - for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) + for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) { cur->desc = base + (i * q->desc_size); + init_completion(&cur->completion); + } } static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa) @@ -325,10 +327,7 @@ static int pdsc_core_init(struct pdsc *pdsc) size_t sz; int err; - /* Scale the descriptor ring length based on number of CPUs and VFs */ - numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus()); - numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev); - numdescs = roundup_pow_of_two(numdescs); + numdescs = PDSC_ADMINQ_MAX_LENGTH; err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq", PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR, numdescs, diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 0bf320c43083..0b53a1fab46d 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -16,7 +16,7 @@ #define PDSC_WATCHDOG_SECS 5 #define PDSC_QUEUE_NAME_MAX_SZ 16 -#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ +#define PDSC_ADMINQ_MAX_LENGTH 16 /* must be a power of two */ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ #define PDSC_TEARDOWN_RECOVERY false #define PDSC_TEARDOWN_REMOVING true @@ -96,7 +96,7 @@ struct pdsc_q_info { unsigned int bytes; unsigned int nbufs; struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS]; - struct pdsc_wait_context *wc; + struct completion completion; void *dest; }; diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index c5c787df61a4..d8dc39da4161 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -105,7 +105,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL, .fw_control.oper = PDS_CORE_FW_GET_LIST, }; - struct pds_core_fw_list_info fw_list; + struct pds_core_fw_list_info fw_list = {}; struct pdsc *pdsc = devlink_priv(dl); union pds_core_dev_comp comp; char buf[32]; @@ -118,8 +118,6 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (!err) memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); - if (err && err != -EIO) - return err; listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names)); for (i = 0; i < listlen; i++) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 2106861463e4..3ee52f4b1166 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1850,6 +1850,16 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, } } +static void enetc_bulk_flip_buff(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + enetc_flip_rx_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } +} + static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, struct napi_struct *napi, int work_limit, struct bpf_prog *prog) @@ -1868,11 +1878,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd, *orig_rxbd; - int orig_i, orig_cleaned_cnt; struct xdp_buff xdp_buff; struct sk_buff *skb; + int orig_i, err; u32 bd_status; - int err; rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); @@ -1887,7 +1896,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, break; orig_rxbd = rxbd; - orig_cleaned_cnt = cleaned_cnt; orig_i = i; enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, @@ -1915,15 +1923,21 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, rx_ring->stats.xdp_drops++; break; case XDP_PASS: - rxbd = orig_rxbd; - cleaned_cnt = orig_cleaned_cnt; - i = orig_i; - - skb = enetc_build_skb(rx_ring, bd_status, &rxbd, - &i, &cleaned_cnt, - ENETC_RXB_DMA_SIZE_XDP); - if (unlikely(!skb)) + skb = xdp_build_skb_from_buff(&xdp_buff); + /* Probably under memory pressure, stop NAPI */ + if (unlikely(!skb)) { + enetc_xdp_drop(rx_ring, orig_i, i); + rx_ring->stats.xdp_drops++; goto out; + } + + enetc_get_offloads(rx_ring, orig_rxbd, skb); + + /* These buffers are about to be owned by the stack. + * Update our buffer cache (the rx_swbd array elements) + * with their other page halves. + */ + enetc_bulk_flip_buff(rx_ring, orig_i, i); napi_gro_receive(napi, skb); break; @@ -1965,11 +1979,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_xdp_drop(rx_ring, orig_i, i); rx_ring->stats.xdp_redirect_failures++; } else { - while (orig_i != i) { - enetc_flip_rx_buff(rx_ring, - &rx_ring->rx_swbd[orig_i]); - enetc_bdr_idx_inc(rx_ring, &orig_i); - } + enetc_bulk_flip_buff(rx_ring, orig_i, i); xdp_redirect_frm_cnt++; rx_ring->stats.xdp_redirect++; } @@ -3362,7 +3372,8 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i, bdr->buffer_offset = ENETC_RXB_PAD; priv->rx_ring[i] = bdr; - err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0); + err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, + ENETC_RXB_DMA_SIZE_XDP); if (err) goto free_vector; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index bdb98c9d8b1c..47807b202310 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4043,11 +4043,27 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); if (mtk_is_netsys_v3_or_greater(eth)) { - /* PSE should not drop port1, port8 and port9 packets */ - mtk_w32(eth, 0x00000302, PSE_DROP_CFG); + /* PSE dummy page mechanism */ + mtk_w32(eth, PSE_DUMMY_WORK_GDM(1) | PSE_DUMMY_WORK_GDM(2) | + PSE_DUMMY_WORK_GDM(3) | DUMMY_PAGE_THR, PSE_DUMY_REQ); + + /* PSE free buffer drop threshold */ + mtk_w32(eth, 0x00600009, PSE_IQ_REV(8)); + + /* PSE should not drop port8, port9 and port13 packets from + * WDMA Tx + */ + mtk_w32(eth, 0x00002300, PSE_DROP_CFG); + + /* PSE should drop packets to port8, port9 and port13 on WDMA Rx + * ring full + */ + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(0)); + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(1)); + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(2)); /* GDM and CDM Threshold */ - mtk_w32(eth, 0x00000707, MTK_CDMW0_THRES); + mtk_w32(eth, 0x08000707, MTK_CDMW0_THRES); mtk_w32(eth, 0x00000077, MTK_CDMW1_THRES); /* Disable GDM1 RX CRC stripping */ @@ -4064,7 +4080,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) mtk_w32(eth, 0x00000300, PSE_DROP_CFG); /* PSE should drop packets to port 8/9 on WDMA Rx ring full */ - mtk_w32(eth, 0x00000300, PSE_PPE0_DROP); + mtk_w32(eth, 0x00000300, PSE_PPE_DROP(0)); /* PSE Free Queue Flow Control */ mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 39709649ea8d..88ef2e9c50fc 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -151,7 +151,15 @@ #define PSE_FQFC_CFG1 0x100 #define PSE_FQFC_CFG2 0x104 #define PSE_DROP_CFG 0x108 -#define PSE_PPE0_DROP 0x110 +#define PSE_PPE_DROP(x) (0x110 + ((x) * 0x4)) + +/* PSE Last FreeQ Page Request Control */ +#define PSE_DUMY_REQ 0x10C +/* PSE_DUMY_REQ is not a typo but actually called like that also in + * MediaTek's datasheet + */ +#define PSE_DUMMY_WORK_GDM(x) BIT(16 + (x)) +#define DUMMY_PAGE_THR 0x1 /* PSE Input Queue Reservation Register*/ #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index eb3bd9c7f66e..ca9ecec358b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -637,10 +637,6 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -654,7 +650,16 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &inner_ttc_groups[TTC_GROUPS_DEFAULT]; @@ -710,10 +715,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -727,7 +728,16 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &ttc_groups[TTC_GROUPS_DEFAULT]; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 967a16212faf..0c011a47d5a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -320,8 +320,8 @@ enum rtc_control { /* PTP and timestamping registers */ -#define GMAC3_X_ATSNS GENMASK(19, 16) -#define GMAC3_X_ATSNS_SHIFT 16 +#define GMAC3_X_ATSNS GENMASK(29, 25) +#define GMAC3_X_ATSNS_SHIFT 25 #define GMAC_PTP_TCR_ATSFC BIT(24) #define GMAC_PTP_TCR_ATSEN0 BIT(25) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index a8b901cdf5cb..56b76aaa58f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -553,7 +553,7 @@ void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) u64 ns; ns = readl(ptpaddr + GMAC_PTP_ATNR); - ns += readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; + ns += (u64)readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; *ptp_time = ns; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 0f59aa982604..e2840fa241f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -222,7 +222,7 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) u64 ns; ns = readl(ptpaddr + PTP_ATNR); - ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; + ns += (u64)readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; *ptp_time = ns; } diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 14f361549638..e32013eb0186 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -730,7 +730,7 @@ static int dp83822_phy_reset(struct phy_device *phydev) return phydev->drv->config_init(phydev); } -#ifdef CONFIG_OF_MDIO +#if IS_ENABLED(CONFIG_OF_MDIO) static const u32 tx_amplitude_100base_tx_gain[] = { 80, 82, 83, 85, 87, 88, 90, 92, 93, 95, 97, 98, 100, 102, 103, 105, diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 0e17cc458efd..93de88c1c8fd 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -37,47 +37,6 @@ static int lan88xx_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, page); } -static int lan88xx_phy_config_intr(struct phy_device *phydev) -{ - int rc; - - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - /* unmask all source and clear them before enable */ - rc = phy_write(phydev, LAN88XX_INT_MASK, 0x7FFF); - rc = phy_read(phydev, LAN88XX_INT_STS); - rc = phy_write(phydev, LAN88XX_INT_MASK, - LAN88XX_INT_MASK_MDINTPIN_EN_ | - LAN88XX_INT_MASK_LINK_CHANGE_); - } else { - rc = phy_write(phydev, LAN88XX_INT_MASK, 0); - if (rc) - return rc; - - /* Ack interrupts after they have been disabled */ - rc = phy_read(phydev, LAN88XX_INT_STS); - } - - return rc < 0 ? rc : 0; -} - -static irqreturn_t lan88xx_handle_interrupt(struct phy_device *phydev) -{ - int irq_status; - - irq_status = phy_read(phydev, LAN88XX_INT_STS); - if (irq_status < 0) { - phy_error(phydev); - return IRQ_NONE; - } - - if (!(irq_status & LAN88XX_INT_STS_LINK_CHANGE_)) - return IRQ_NONE; - - phy_trigger_machine(phydev); - - return IRQ_HANDLED; -} - static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; @@ -528,8 +487,9 @@ static struct phy_driver microchip_phy_driver[] = { .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, - .config_intr = lan88xx_phy_config_intr, - .handle_interrupt = lan88xx_handle_interrupt, + /* Interrupt handling is broken, do not define related + * functions to force polling. + */ .suspend = lan88xx_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index bd3c9554f6ac..60893691d4c3 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -93,9 +93,8 @@ int phy_led_triggers_register(struct phy_device *phy) if (!phy->phy_num_led_triggers) return 0; - phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev, - sizeof(*phy->led_link_trigger), - GFP_KERNEL); + phy->led_link_trigger = kzalloc(sizeof(*phy->led_link_trigger), + GFP_KERNEL); if (!phy->led_link_trigger) { err = -ENOMEM; goto out_clear; @@ -105,10 +104,9 @@ int phy_led_triggers_register(struct phy_device *phy) if (err) goto out_free_link; - phy->phy_led_triggers = devm_kcalloc(&phy->mdio.dev, - phy->phy_num_led_triggers, - sizeof(struct phy_led_trigger), - GFP_KERNEL); + phy->phy_led_triggers = kcalloc(phy->phy_num_led_triggers, + sizeof(struct phy_led_trigger), + GFP_KERNEL); if (!phy->phy_led_triggers) { err = -ENOMEM; goto out_unreg_link; @@ -129,11 +127,11 @@ int phy_led_triggers_register(struct phy_device *phy) out_unreg: while (i--) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); - devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); + kfree(phy->phy_led_triggers); out_unreg_link: phy_led_trigger_unregister(phy->led_link_trigger); out_free_link: - devm_kfree(&phy->mdio.dev, phy->led_link_trigger); + kfree(phy->led_link_trigger); phy->led_link_trigger = NULL; out_clear: phy->phy_num_led_triggers = 0; @@ -147,8 +145,13 @@ void phy_led_triggers_unregister(struct phy_device *phy) for (i = 0; i < phy->phy_num_led_triggers; i++) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + kfree(phy->phy_led_triggers); + phy->phy_led_triggers = NULL; - if (phy->led_link_trigger) + if (phy->led_link_trigger) { phy_led_trigger_unregister(phy->led_link_trigger); + kfree(phy->led_link_trigger); + phy->led_link_trigger = NULL; + } } EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b68369e2342b..1bdd5d8bb5b0 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -81,6 +81,7 @@ struct phylink { unsigned int pcs_state; bool link_failed; + bool suspend_link_up; bool major_config_failed; bool mac_supports_eee_ops; bool mac_supports_eee; @@ -2545,14 +2546,16 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) /* Stop the resolver bringing the link up */ __set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state); - /* Disable the carrier, to prevent transmit timeouts, - * but one would hope all packets have been sent. This - * also means phylink_resolve() will do nothing. - */ - if (pl->netdev) - netif_carrier_off(pl->netdev); - else + pl->suspend_link_up = phylink_link_is_up(pl); + if (pl->suspend_link_up) { + /* Disable the carrier, to prevent transmit timeouts, + * but one would hope all packets have been sent. This + * also means phylink_resolve() will do nothing. + */ + if (pl->netdev) + netif_carrier_off(pl->netdev); pl->old_link_state = false; + } /* We do not call mac_link_down() here as we want the * link to remain up to receive the WoL packets. @@ -2603,15 +2606,18 @@ void phylink_resume(struct phylink *pl) if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) { /* Wake-on-Lan enabled, MAC handling */ - /* Call mac_link_down() so we keep the overall state balanced. - * Do this under the state_mutex lock for consistency. This - * will cause a "Link Down" message to be printed during - * resume, which is harmless - the true link state will be - * printed when we run a resolve. - */ - mutex_lock(&pl->state_mutex); - phylink_link_down(pl); - mutex_unlock(&pl->state_mutex); + if (pl->suspend_link_up) { + /* Call mac_link_down() so we keep the overall state + * balanced. Do this under the state_mutex lock for + * consistency. This will cause a "Link Down" message + * to be printed during resume, which is harmless - + * the true link state will be printed when we run a + * resolve. + */ + mutex_lock(&pl->state_mutex); + phylink_link_down(pl); + mutex_unlock(&pl->state_mutex); + } /* Re-apply the link parameters so that all the settings get * restored to the MAC. diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e4617216a4b..848fab51dfa1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3342,7 +3342,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +static void __virtnet_rx_pause(struct virtnet_info *vi, + struct receive_queue *rq) { bool running = netif_running(vi->dev); @@ -3352,17 +3353,63 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) } } -static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +static void virtnet_rx_pause_all(struct virtnet_info *vi) +{ + int i; + + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + for (i = 0; i < vi->max_queue_pairs; i++) + __virtnet_rx_pause(vi, &vi->rq[i]); +} + +static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +{ + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + __virtnet_rx_pause(vi, rq); +} + +static void __virtnet_rx_resume(struct virtnet_info *vi, + struct receive_queue *rq, + bool refill) { bool running = netif_running(vi->dev); - if (!try_fill_recv(vi, rq, GFP_KERNEL)) + if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); if (running) virtnet_napi_enable(rq); } +static void virtnet_rx_resume_all(struct virtnet_info *vi) +{ + int i; + + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + __virtnet_rx_resume(vi, &vi->rq[i], true); + else + __virtnet_rx_resume(vi, &vi->rq[i], false); + } +} + +static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +{ + enable_delayed_refill(vi); + __virtnet_rx_resume(vi, rq, true); +} + static int virtnet_rx_resize(struct virtnet_info *vi, struct receive_queue *rq, u32 ring_num) { @@ -5959,12 +6006,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (prog) bpf_prog_add(prog, vi->max_queue_pairs - 1); + virtnet_rx_pause_all(vi); + /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_disable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_disable(&vi->sq[i]); - } } if (!prog) { @@ -5996,13 +6043,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_enabled = false; } + virtnet_rx_resume_all(vi); for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) { - virtnet_napi_enable(&vi->rq[i]); + if (netif_running(dev)) virtnet_napi_tx_enable(&vi->sq[i]); - } } return 0; @@ -6014,11 +6060,10 @@ err: rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); } + virtnet_rx_resume_all(vi); if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_enable(&vi->sq[i]); - } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index fc52d5c4c69b..5091e1fa4a0d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -985,20 +985,27 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_TX: - get_page(pdata); xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + trace_xdp_exception(queue->info->netdev, prog, act); + break; + } + get_page(pdata); err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0); - if (unlikely(!err)) + if (unlikely(err <= 0)) { + if (err < 0) + trace_xdp_exception(queue->info->netdev, prog, act); xdp_return_frame_rx_napi(xdpf); - else if (unlikely(err < 0)) - trace_xdp_exception(queue->info->netdev, prog, act); + } break; case XDP_REDIRECT: get_page(pdata); err = xdp_do_redirect(queue->info->netdev, xdp, prog); *need_xdp_flush = true; - if (unlikely(err)) + if (unlikely(err)) { trace_xdp_exception(queue->info->netdev, prog, act); + xdp_return_buff(xdp); + } break; case XDP_PASS: case XDP_DROP: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 71f8d06998d6..245475c43127 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port) lockdep_assert_held(&nvmet_config_sem); + if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) + return -EINVAL; + ops = nvmet_transports[port->disc_addr.trtype]; if (!ops) { up_write(&nvmet_config_sem); diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index fff85bbf0ecd..e206efc29a00 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -594,9 +594,11 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, cell->nbits = info->nbits; cell->np = info->np; - if (cell->nbits) + if (cell->nbits) { cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, BITS_PER_BYTE); + cell->raw_len = ALIGN(cell->bytes, nvmem->word_size); + } if (!IS_ALIGNED(cell->offset, nvmem->stride)) { dev_err(&nvmem->dev, @@ -605,6 +607,18 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, return -EINVAL; } + if (!IS_ALIGNED(cell->raw_len, nvmem->word_size)) { + dev_err(&nvmem->dev, + "cell %s raw len %zd unaligned to nvmem word size %d\n", + cell->name ?: "<unknown>", cell->raw_len, + nvmem->word_size); + + if (info->raw_len) + return -EINVAL; + + cell->raw_len = ALIGN(cell->raw_len, nvmem->word_size); + } + return 0; } @@ -837,7 +851,9 @@ static int nvmem_add_cells_from_dt(struct nvmem_device *nvmem, struct device_nod if (addr && len == (2 * sizeof(u32))) { info.bit_offset = be32_to_cpup(addr++); info.nbits = be32_to_cpup(addr); - if (info.bit_offset >= BITS_PER_BYTE || info.nbits < 1) { + if (info.bit_offset >= BITS_PER_BYTE * info.bytes || + info.nbits < 1 || + info.bit_offset + info.nbits > BITS_PER_BYTE * info.bytes) { dev_err(dev, "nvmem: invalid bits on %pOF\n", child); of_node_put(child); return -EINVAL; @@ -1630,21 +1646,29 @@ EXPORT_SYMBOL_GPL(nvmem_cell_put); static void nvmem_shift_read_buffer_in_place(struct nvmem_cell_entry *cell, void *buf) { u8 *p, *b; - int i, extra, bit_offset = cell->bit_offset; + int i, extra, bytes_offset; + int bit_offset = cell->bit_offset; p = b = buf; - if (bit_offset) { + + bytes_offset = bit_offset / BITS_PER_BYTE; + b += bytes_offset; + bit_offset %= BITS_PER_BYTE; + + if (bit_offset % BITS_PER_BYTE) { /* First shift */ - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; /* setup rest of the bytes if any */ for (i = 1; i < cell->bytes; i++) { /* Get bits from next byte and shift them towards msb */ - *p |= *b << (BITS_PER_BYTE - bit_offset); + *p++ |= *b << (BITS_PER_BYTE - bit_offset); - p = b; - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; } + } else if (p != b) { + memmove(p, b, cell->bytes - bytes_offset); + p += cell->bytes - 1; } else { /* point to the msb */ p += cell->bytes - 1; diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c index 116a39e804c7..a872c640b8c5 100644 --- a/drivers/nvmem/qfprom.c +++ b/drivers/nvmem/qfprom.c @@ -321,19 +321,32 @@ static int qfprom_reg_read(void *context, unsigned int reg, void *_val, size_t bytes) { struct qfprom_priv *priv = context; - u8 *val = _val; - int i = 0, words = bytes; + u32 *val = _val; void __iomem *base = priv->qfpcorrected; + int words = DIV_ROUND_UP(bytes, sizeof(u32)); + int i; if (read_raw_data && priv->qfpraw) base = priv->qfpraw; - while (words--) - *val++ = readb(base + reg + i++); + for (i = 0; i < words; i++) + *val++ = readl(base + reg + i * sizeof(u32)); return 0; } +/* Align reads to word boundary */ +static void qfprom_fixup_dt_cell_info(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell) +{ + unsigned int byte_offset = cell->offset % sizeof(u32); + + cell->bit_offset += byte_offset * BITS_PER_BYTE; + cell->offset -= byte_offset; + if (byte_offset && !cell->nbits) + cell->nbits = cell->bytes * BITS_PER_BYTE; +} + static void qfprom_runtime_disable(void *data) { pm_runtime_disable(data); @@ -358,10 +371,11 @@ static int qfprom_probe(struct platform_device *pdev) struct nvmem_config econfig = { .name = "qfprom", .add_legacy_fixed_of_cells = true, - .stride = 1, - .word_size = 1, + .stride = 4, + .word_size = 4, .id = NVMEM_DEVID_AUTO, .reg_read = qfprom_reg_read, + .fixup_dt_cell_info = qfprom_fixup_dt_cell_info, }; struct device *dev = &pdev->dev; struct resource *res; diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index ebc3f0b24166..d88f12c53242 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -59,7 +59,6 @@ #define RK3588_OTPC_AUTO_EN 0x08 #define RK3588_OTPC_INT_ST 0x84 #define RK3588_OTPC_DOUT0 0x20 -#define RK3588_NO_SECURE_OFFSET 0x300 #define RK3588_NBYTES 4 #define RK3588_BURST_NUM 1 #define RK3588_BURST_SHIFT 8 @@ -69,6 +68,7 @@ struct rockchip_data { int size; + int read_offset; const char * const *clks; int num_clks; nvmem_reg_read_t reg_read; @@ -196,7 +196,7 @@ static int rk3588_otp_read(void *context, unsigned int offset, addr_start = round_down(offset, RK3588_NBYTES) / RK3588_NBYTES; addr_end = round_up(offset + bytes, RK3588_NBYTES) / RK3588_NBYTES; addr_len = addr_end - addr_start; - addr_start += RK3588_NO_SECURE_OFFSET; + addr_start += otp->data->read_offset / RK3588_NBYTES; buf = kzalloc(array_size(addr_len, RK3588_NBYTES), GFP_KERNEL); if (!buf) @@ -274,12 +274,21 @@ static const struct rockchip_data px30_data = { .reg_read = px30_otp_read, }; +static const struct rockchip_data rk3576_data = { + .size = 0x100, + .read_offset = 0x700, + .clks = px30_otp_clocks, + .num_clks = ARRAY_SIZE(px30_otp_clocks), + .reg_read = rk3588_otp_read, +}; + static const char * const rk3588_otp_clocks[] = { "otp", "apb_pclk", "phy", "arb", }; static const struct rockchip_data rk3588_data = { .size = 0x400, + .read_offset = 0xc00, .clks = rk3588_otp_clocks, .num_clks = ARRAY_SIZE(rk3588_otp_clocks), .reg_read = rk3588_otp_read, @@ -295,6 +304,10 @@ static const struct of_device_id rockchip_otp_match[] = { .data = &px30_data, }, { + .compatible = "rockchip,rk3576-otp", + .data = &rk3576_data, + }, + { .compatible = "rockchip,rk3588-otp", .data = &rk3588_data, }, diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 54d6f4fa3ce1..e994c546422c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -187,6 +187,9 @@ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) panic("%s: kzalloc() failed!\n", __func__); tmp->res = r; tmp->dev = dev; + tmp->start = r->start; + tmp->end = r->end; + tmp->flags = r->flags; /* Fallback is smallest one or list is empty */ n = head; @@ -545,6 +548,7 @@ assign: pci_dbg(dev, "%s %pR: releasing\n", res_name, res); release_resource(res); + restore_dev_resource(dev_res); } /* Restore start/end/flags from saved list */ list_for_each_entry(save_res, &save_head, list) diff --git a/drivers/pps/generators/pps_gen_tio.c b/drivers/pps/generators/pps_gen_tio.c index 1d5ffe055463..de00a85bfafa 100644 --- a/drivers/pps/generators/pps_gen_tio.c +++ b/drivers/pps/generators/pps_gen_tio.c @@ -230,7 +230,7 @@ static int pps_gen_tio_probe(struct platform_device *pdev) hrtimer_setup(&tio->timer, hrtimer_callback, CLOCK_REALTIME, HRTIMER_MODE_ABS); spin_lock_init(&tio->lock); - platform_set_drvdata(pdev, &tio); + platform_set_drvdata(pdev, tio); return 0; } diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 3fcb1ad3b070..1d7901a8f0e4 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -174,6 +174,9 @@ static void mpi3mr_print_event_data(struct mpi3mr_ioc *mrioc, char *desc = NULL; u16 event; + if (!(mrioc->logging_level & MPI3_DEBUG_EVENT)) + return; + event = event_reply->event; switch (event) { @@ -451,6 +454,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) return 0; } + atomic_set(&mrioc->admin_pend_isr, 0); reply_desc = (struct mpi3_default_reply_descriptor *)mrioc->admin_reply_base + admin_reply_ci; @@ -565,7 +569,7 @@ int mpi3mr_process_op_reply_q(struct mpi3mr_ioc *mrioc, WRITE_ONCE(op_req_q->ci, le16_to_cpu(reply_desc->request_queue_ci)); mpi3mr_process_op_reply_desc(mrioc, reply_desc, &reply_dma, reply_qidx); - atomic_dec(&op_reply_q->pend_ios); + if (reply_dma) mpi3mr_repost_reply_buf(mrioc, reply_dma); num_op_reply++; @@ -2925,6 +2929,7 @@ static int mpi3mr_setup_admin_qpair(struct mpi3mr_ioc *mrioc) mrioc->admin_reply_ci = 0; mrioc->admin_reply_ephase = 1; atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (!mrioc->admin_req_base) { mrioc->admin_req_base = dma_alloc_coherent(&mrioc->pdev->dev, @@ -4653,6 +4658,7 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) if (mrioc->admin_reply_base) memset(mrioc->admin_reply_base, 0, mrioc->admin_reply_q_sz); atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (mrioc->init_cmds.reply) { memset(mrioc->init_cmds.reply, 0, sizeof(*mrioc->init_cmds.reply)); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 53daf923ad8e..518a252eb6aa 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -707,26 +707,23 @@ void scsi_cdl_check(struct scsi_device *sdev) */ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) { - struct scsi_mode_data data; - struct scsi_sense_hdr sshdr; - struct scsi_vpd *vpd; - bool is_ata = false; char buf[64]; + bool is_ata; int ret; if (!sdev->cdl_supported) return -EOPNOTSUPP; rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (vpd) - is_ata = true; + is_ata = rcu_dereference(sdev->vpd_pg89); rcu_read_unlock(); /* * For ATA devices, CDL needs to be enabled with a SET FEATURES command. */ if (is_ata) { + struct scsi_mode_data data; + struct scsi_sense_hdr sshdr; char *buf_data; int len; @@ -735,16 +732,30 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) if (ret) return -EINVAL; - /* Enable CDL using the ATA feature page */ + /* Enable or disable CDL using the ATA feature page */ len = min_t(size_t, sizeof(buf), data.length - data.header_length - data.block_descriptor_length); buf_data = buf + data.header_length + data.block_descriptor_length; - if (enable) - buf_data[4] = 0x02; - else - buf_data[4] = 0; + + /* + * If we want to enable CDL and CDL is already enabled on the + * device, do nothing. This avoids needlessly resetting the CDL + * statistics on the device as that is implied by the CDL enable + * action. Similar to this, there is no need to do anything if + * we want to disable CDL and CDL is already disabled. + */ + if (enable) { + if ((buf_data[4] & 0x03) == 0x02) + goto out; + buf_data[4] &= ~0x03; + buf_data[4] |= 0x02; + } else { + if ((buf_data[4] & 0x03) == 0x00) + goto out; + buf_data[4] &= ~0x03; + } ret = scsi_mode_select(sdev, 1, 0, buf_data, len, 5 * HZ, 3, &data, &sshdr); @@ -756,6 +767,7 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) } } +out: sdev->cdl_enable = enable; return 0; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0d29470e86b0..1b43013d72c0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1253,8 +1253,12 @@ EXPORT_SYMBOL_GPL(scsi_alloc_request); */ static void scsi_cleanup_rq(struct request *rq) { + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); + + cmd->flags = 0; + if (rq->rq_flags & RQF_DONTPREP) { - scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq)); + scsi_mq_uninit_cmd(cmd); rq->rq_flags &= ~RQF_DONTPREP; } } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1244ef3aa86c..620ba6e0ab07 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4263,8 +4263,8 @@ int iscsit_close_connection( spin_unlock(&iscsit_global->ts_bitmap_lock); iscsit_stop_timers_for_cmds(conn); - iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + iscsit_stop_nopin_response_timer(conn); if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn); diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 1b137e068444..3449945493ce 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1746,6 +1746,12 @@ msm_serial_early_console_setup_dm(struct earlycon_device *device, if (!device->port.membase) return -ENODEV; + /* Disable DM / single-character modes */ + msm_write(&device->port, 0, UARTDM_DMEN); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_RX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_TX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_TX_ENABLE, MSM_UART_CR); + device->con->write = msm_serial_early_write_dm; return 0; } diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 5904a2d4cefa..054a8e630ace 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -563,8 +563,11 @@ static void sifive_serial_break_ctl(struct uart_port *port, int break_state) static int sifive_serial_startup(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_enable_rxwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); return 0; } @@ -572,9 +575,12 @@ static int sifive_serial_startup(struct uart_port *port) static void sifive_serial_shutdown(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_disable_rxwm(ssp); __ssp_disable_txwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); } /** diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 0bd6544e30a6..791e2f1f7c0b 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -193,13 +193,12 @@ int set_selection_user(const struct tiocl_selection __user *sel, return -EFAULT; /* - * TIOCL_SELCLEAR, TIOCL_SELPOINTER and TIOCL_SELMOUSEREPORT are OK to - * use without CAP_SYS_ADMIN as they do not modify the selection. + * TIOCL_SELCLEAR and TIOCL_SELPOINTER are OK to use without + * CAP_SYS_ADMIN as they do not modify the selection. */ switch (v.sel_mode) { case TIOCL_SELCLEAR: case TIOCL_SELPOINTER: - case TIOCL_SELMOUSEREPORT: break; default: if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 240ce135bbfb..f1294c29f484 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -677,13 +677,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) unsigned long flags; int err; - if (!ufshcd_cmd_inflight(lrbp->cmd)) { - dev_err(hba->dev, - "%s: skip abort. cmd at tag %d already completed.\n", - __func__, tag); - return FAILED; - } - /* Skip task abort in case previous aborts failed and report failure */ if (lrbp->req_abort_skip) { dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", @@ -692,6 +685,11 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) } hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) { + dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n", + __func__, tag); + return FAILED; + } if (ufshcd_mcq_sqe_search(hba, hwq, tag)) { /* diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 44156041d88f..5cb6132b8147 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -278,6 +278,7 @@ static const struct ufs_dev_quirk ufs_fixups[] = { .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE | + UFS_DEVICE_QUIRK_PA_HIBER8TIME | UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS }, { .wmanufacturerid = UFS_VENDOR_SKHYNIX, .model = UFS_ANY_MODEL, @@ -5677,6 +5678,8 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba, continue; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + continue; if (force_compl) { ufshcd_mcq_compl_all_cqes_lock(hba, hwq); @@ -8470,6 +8473,31 @@ out: return ret; } +/** + * ufshcd_quirk_override_pa_h8time - Ensures proper adjustment of PA_HIBERN8TIME. + * @hba: per-adapter instance + * + * Some UFS devices require specific adjustments to the PA_HIBERN8TIME parameter + * to ensure proper hibernation timing. This function retrieves the current + * PA_HIBERN8TIME value and increments it by 100us. + */ +static void ufshcd_quirk_override_pa_h8time(struct ufs_hba *hba) +{ + u32 pa_h8time; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_HIBERN8TIME), &pa_h8time); + if (ret) { + dev_err(hba->dev, "Failed to get PA_HIBERN8TIME: %d\n", ret); + return; + } + + /* Increment by 1 to increase hibernation time by 100 µs */ + ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME), pa_h8time + 1); + if (ret) + dev_err(hba->dev, "Failed updating PA_HIBERN8TIME: %d\n", ret); +} + static void ufshcd_tune_unipro_params(struct ufs_hba *hba) { ufshcd_vops_apply_dev_quirks(hba); @@ -8480,6 +8508,9 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE) ufshcd_quirk_tune_host_pa_tactivate(hba); + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_HIBER8TIME) + ufshcd_quirk_override_pa_h8time(hba); } static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 1b37449fbffc..c0761ccc1381 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -33,6 +33,10 @@ ((((c) >> 16) & MCQ_QCFGPTR_MASK) * MCQ_QCFGPTR_UNIT) #define MCQ_QCFG_SIZE 0x40 +/* De-emphasis for gear-5 */ +#define DEEMPHASIS_3_5_dB 0x04 +#define NO_DEEMPHASIS 0x0 + enum { TSTBUS_UAWM, TSTBUS_UARM, @@ -795,6 +799,23 @@ static int ufs_qcom_icc_update_bw(struct ufs_qcom_host *host) return ufs_qcom_icc_set_bw(host, bw_table.mem_bw, bw_table.cfg_bw); } +static void ufs_qcom_set_tx_hs_equalizer(struct ufs_hba *hba, u32 gear, u32 tx_lanes) +{ + u32 equalizer_val; + int ret, i; + + /* Determine the equalizer value based on the gear */ + equalizer_val = (gear == 5) ? DEEMPHASIS_3_5_dB : NO_DEEMPHASIS; + + for (i = 0; i < tx_lanes; i++) { + ret = ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(TX_HS_EQUALIZER, i), + equalizer_val); + if (ret) + dev_err(hba->dev, "%s: failed equalizer lane %d\n", + __func__, i); + } +} + static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, const struct ufs_pa_layer_attr *dev_max_params, @@ -846,6 +867,11 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, dev_req_params->gear_tx, PA_INITIAL_ADAPT); } + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING) + ufs_qcom_set_tx_hs_equalizer(hba, + dev_req_params->gear_tx, dev_req_params->lane_tx); + break; case POST_CHANGE: if (ufs_qcom_cfg_timers(hba, false)) { @@ -893,6 +919,16 @@ static int ufs_qcom_quirk_host_pa_saveconfigtime(struct ufs_hba *hba) (pa_vs_config_reg1 | (1 << 12))); } +static void ufs_qcom_override_pa_tx_hsg1_sync_len(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_dme_peer_set(hba, UIC_ARG_MIB(PA_TX_HSG1_SYNC_LENGTH), + PA_TX_HSG1_SYNC_LENGTH_VAL); + if (err) + dev_err(hba->dev, "Failed (%d) set PA_TX_HSG1_SYNC_LENGTH\n", err); +} + static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) { int err = 0; @@ -900,6 +936,9 @@ static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME) err = ufs_qcom_quirk_host_pa_saveconfigtime(hba); + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH) + ufs_qcom_override_pa_tx_hsg1_sync_len(hba); + return err; } @@ -914,6 +953,10 @@ static struct ufs_dev_quirk ufs_qcom_dev_fixups[] = { { .wmanufacturerid = UFS_VENDOR_WDC, .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE }, + { .wmanufacturerid = UFS_VENDOR_SAMSUNG, + .model = UFS_ANY_MODEL, + .quirk = UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH | + UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING }, {} }; diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h index d0e6ec9128e7..05d4cb569c50 100644 --- a/drivers/ufs/host/ufs-qcom.h +++ b/drivers/ufs/host/ufs-qcom.h @@ -122,8 +122,11 @@ enum { TMRLUT_HW_CGC_EN | OCSC_HW_CGC_EN) /* QUniPro Vendor specific attributes */ +#define PA_TX_HSG1_SYNC_LENGTH 0x1552 #define PA_VS_CONFIG_REG1 0x9000 #define DME_VS_CORE_CLK_CTRL 0xD002 +#define TX_HS_EQUALIZER 0x0037 + /* bit and mask definitions for DME_VS_CORE_CLK_CTRL attribute */ #define CLK_1US_CYCLES_MASK_V4 GENMASK(27, 16) #define CLK_1US_CYCLES_MASK GENMASK(7, 0) @@ -141,6 +144,21 @@ enum { #define UNIPRO_CORE_CLK_FREQ_201_5_MHZ 202 #define UNIPRO_CORE_CLK_FREQ_403_MHZ 403 +/* TX_HSG1_SYNC_LENGTH attr value */ +#define PA_TX_HSG1_SYNC_LENGTH_VAL 0x4A + +/* + * Some ufs device vendors need a different TSync length. + * Enable this quirk to give an additional TX_HS_SYNC_LENGTH. + */ +#define UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH BIT(16) + +/* + * Some ufs device vendors need a different Deemphasis setting. + * Enable this quirk to tune TX Deemphasis parameters. + */ +#define UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING BIT(17) + /* ICE allocator type to share AES engines among TX stream and RX stream */ #define ICE_ALLOCATOR_TYPE 2 diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 694aa1457739..d9d8dc05b235 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1963,6 +1963,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) unsigned int bit; unsigned long reg; + local_bh_disable(); spin_lock_irqsave(&priv_dev->lock, flags); reg = readl(&priv_dev->regs->usb_ists); @@ -2004,6 +2005,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) irqend: writel(~0, &priv_dev->regs->ep_ien); spin_unlock_irqrestore(&priv_dev->lock, flags); + local_bh_enable(); return ret; } diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 1a7fc638213e..4f8bfd242b59 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -336,6 +336,13 @@ static int ci_hdrc_imx_notify_event(struct ci_hdrc *ci, unsigned int event) return ret; } +static void ci_hdrc_imx_disable_regulator(void *arg) +{ + struct ci_hdrc_imx_data *data = arg; + + regulator_disable(data->hsic_pad_regulator); +} + static int ci_hdrc_imx_probe(struct platform_device *pdev) { struct ci_hdrc_imx_data *data; @@ -394,6 +401,13 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) "Failed to enable HSIC pad regulator\n"); goto err_put; } + ret = devm_add_action_or_reset(dev, + ci_hdrc_imx_disable_regulator, data); + if (ret) { + dev_err(dev, + "Failed to add regulator devm action\n"); + goto err_put; + } } } @@ -432,11 +446,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_get_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = imx_prepare_enable_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = clk_prepare_enable(data->clk_wakeup); if (ret) @@ -470,7 +484,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) { pdata.flags |= CI_HDRC_OVERRIDE_PHY_CONTROL; data->override_phy_control = true; - usb_phy_init(pdata.usb_phy); + ret = usb_phy_init(pdata.usb_phy); + if (ret) { + dev_err(dev, "Failed to init phy\n"); + goto err_clk; + } } if (pdata.flags & CI_HDRC_SUPPORTS_RUNTIME_PM) @@ -479,7 +497,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_usbmisc_init(data->usbmisc_data); if (ret) { dev_err(dev, "usbmisc init failed, ret=%d\n", ret); - goto err_clk; + goto phy_shutdown; } data->ci_pdev = ci_hdrc_add_device(dev, @@ -488,7 +506,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (IS_ERR(data->ci_pdev)) { ret = PTR_ERR(data->ci_pdev); dev_err_probe(dev, ret, "ci_hdrc_add_device failed\n"); - goto err_clk; + goto phy_shutdown; } if (data->usbmisc_data) { @@ -522,19 +540,20 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); +phy_shutdown: + if (data->override_phy_control) + usb_phy_shutdown(data->phy); err_clk: clk_disable_unprepare(data->clk_wakeup); err_wakeup_clk: imx_disable_unprepare_clks(dev); -disable_hsic_regulator: - if (data->hsic_pad_regulator) - /* don't overwrite original ret (cf. EPROBE_DEFER) */ - regulator_disable(data->hsic_pad_regulator); +qos_remove_request: if (pdata.flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); data->ci_pdev = NULL; err_put: - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); return ret; } @@ -556,10 +575,9 @@ static void ci_hdrc_imx_remove(struct platform_device *pdev) clk_disable_unprepare(data->clk_wakeup); if (data->plat_data->flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); - if (data->hsic_pad_regulator) - regulator_disable(data->hsic_pad_regulator); } - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); } static void ci_hdrc_imx_shutdown(struct platform_device *pdev) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 86ee39db013f..16e7fa4d488d 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -726,7 +726,7 @@ static int wdm_open(struct inode *inode, struct file *file) rv = -EBUSY; goto out; } - + smp_rmb(); /* ordered against wdm_wwan_port_stop() */ rv = usb_autopm_get_interface(desc->intf); if (rv < 0) { dev_err(&desc->intf->dev, "Error autopm - %d\n", rv); @@ -829,6 +829,7 @@ static struct usb_class_driver wdm_class = { static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); + int rv; /* The interface is both exposed via the WWAN framework and as a * legacy usbmisc chardev. If chardev is already open, just fail @@ -848,7 +849,15 @@ static int wdm_wwan_port_start(struct wwan_port *port) wwan_port_txon(port); /* Start getting events */ - return usb_submit_urb(desc->validity, GFP_KERNEL); + rv = usb_submit_urb(desc->validity, GFP_KERNEL); + if (rv < 0) { + wwan_port_txoff(port); + desc->manage_power(desc->intf, 0); + /* this must be last lest we race with chardev open */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); + } + + return rv; } static void wdm_wwan_port_stop(struct wwan_port *port) @@ -859,8 +868,10 @@ static void wdm_wwan_port_stop(struct wwan_port *port) poison_urbs(desc); desc->manage_power(desc->intf, 0); clear_bit(WDM_READ, &desc->flags); - clear_bit(WDM_WWAN_IN_USE, &desc->flags); unpoison_urbs(desc); + smp_wmb(); /* ordered against wdm_open() */ + /* this must be last lest we open a poisoned device */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); } static void wdm_wwan_port_tx_complete(struct urb *urb) @@ -868,7 +879,7 @@ static void wdm_wwan_port_tx_complete(struct urb *urb) struct sk_buff *skb = urb->context; struct wdm_device *desc = skb_shinfo(skb)->destructor_arg; - usb_autopm_put_interface(desc->intf); + usb_autopm_put_interface_async(desc->intf); wwan_port_txon(desc->wwanp); kfree_skb(skb); } @@ -898,7 +909,7 @@ static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb) req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; - req->wIndex = desc->inum; + req->wIndex = desc->inum; /* already converted */ req->wLength = cpu_to_le16(skb->len); skb_shinfo(skb)->destructor_arg = desc; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8efbacc5bc34..36d3df7d040c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -369,6 +369,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* SanDisk Corp. SanDisk 3.2Gen1 */ + { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, @@ -383,6 +386,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0904, 0x6103), .driver_info = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Silicon Motion Flash Drive */ + { USB_DEVICE(0x090c, 0x1000), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Sound Devices USBPre2 */ { USB_DEVICE(0x0926, 0x0202), .driver_info = USB_QUIRK_ENDPOINT_IGNORE }, @@ -539,6 +545,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* VLI disk */ + { USB_DEVICE(0x2109, 0x0711), .driver_info = USB_QUIRK_NO_LPM }, + /* Raydium Touchscreen */ { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index a33a42ba0249..4ca7f6240d07 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -207,15 +207,13 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) skip_usb3_phy: /* ulpi reset via gpio-modepin or gpio-framework driver */ - reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); + reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset_gpio)) { return dev_err_probe(dev, PTR_ERR(reset_gpio), "Failed to request reset GPIO\n"); } if (reset_gpio) { - /* Toggle ulpi to reset the phy. */ - gpiod_set_value_cansleep(reset_gpio, 1); usleep_range(5000, 10000); gpiod_set_value_cansleep(reset_gpio, 0); usleep_range(5000, 10000); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 47e73c4ed62d..8c30d86cc4e3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4617,6 +4617,12 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) if (!count) return IRQ_NONE; + if (count > evt->length) { + dev_err_ratelimited(dwc->dev, "invalid count(%u) > evt->length(%u)\n", + count, evt->length); + return IRQ_NONE; + } + evt->count = count; evt->flags |= DWC3_EVENT_PENDING; diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 900ea0d368e0..9f0a6b27e47c 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -165,6 +165,25 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) return 0; } +static int ohci_quirk_loongson(struct usb_hcd *hcd) +{ + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + + /* + * Loongson's LS7A OHCI controller (rev 0x02) has a + * flaw. MMIO register with offset 0x60/64 is treated + * as legacy PS2-compatible keyboard/mouse interface. + * Since OHCI only use 4KB BAR resource, LS7A OHCI's + * 32KB BAR is wrapped around (the 2nd 4KB BAR space + * is the same as the 1st 4KB internally). So add 4KB + * offset (0x1000) to the OHCI registers as a quirk. + */ + if (pdev->revision == 0x2) + hcd->regs += SZ_4K; /* SZ_4K = 0x1000 */ + + return 0; +} + static int ohci_quirk_qemu(struct usb_hcd *hcd) { struct ohci_hcd *ohci = hcd_to_ohci(hcd); @@ -225,6 +244,10 @@ static const struct pci_device_id ohci_pci_quirks[] = { .driver_data = (unsigned long)ohci_quirk_amd700, }, { + PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, 0x7a24), + .driver_data = (unsigned long)ohci_quirk_loongson, + }, + { .vendor = PCI_VENDOR_ID_APPLE, .device = 0x003f, .subvendor = PCI_SUBVENDOR_ID_REDHAT_QUMRANET, diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c0f226584a40..486347776cb2 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1878,9 +1878,10 @@ int xhci_bus_resume(struct usb_hcd *hcd) int max_ports, port_index; int sret; u32 next_state; - u32 temp, portsc; + u32 portsc; struct xhci_hub *rhub; struct xhci_port **ports; + bool disabled_irq = false; rhub = xhci_get_rhub(hcd); ports = rhub->ports; @@ -1896,17 +1897,20 @@ int xhci_bus_resume(struct usb_hcd *hcd) return -ESHUTDOWN; } - /* delay the irqs */ - temp = readl(&xhci->op_regs->command); - temp &= ~CMD_EIE; - writel(temp, &xhci->op_regs->command); - /* bus specific resume for ports we suspended at bus_suspend */ - if (hcd->speed >= HCD_USB3) + if (hcd->speed >= HCD_USB3) { next_state = XDEV_U0; - else + } else { next_state = XDEV_RESUME; - + if (bus_state->bus_suspended) { + /* + * prevent port event interrupts from interfering + * with usb2 port resume process + */ + xhci_disable_interrupter(xhci->interrupters[0]); + disabled_irq = true; + } + } port_index = max_ports; while (port_index--) { portsc = readl(ports[port_index]->addr); @@ -1974,11 +1978,9 @@ int xhci_bus_resume(struct usb_hcd *hcd) (void) readl(&xhci->op_regs->command); bus_state->next_statechange = jiffies + msecs_to_jiffies(5); - /* re-enable irqs */ - temp = readl(&xhci->op_regs->command); - temp |= CMD_EIE; - writel(temp, &xhci->op_regs->command); - temp = readl(&xhci->op_regs->command); + /* re-enable interrupter */ + if (disabled_irq) + xhci_enable_interrupter(xhci->interrupters[0]); spin_unlock_irqrestore(&xhci->lock, flags); return 0; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 5d64c297721c..b906bc2eea5f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -561,8 +561,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, * pointer command pending because the device can choose to start any * stream once the endpoint is on the HW schedule. */ - if (ep_state & (EP_STOP_CMD_PENDING | SET_DEQ_PENDING | EP_HALTED | - EP_CLEARING_TT | EP_STALLED)) + if ((ep_state & EP_STOP_CMD_PENDING) || (ep_state & SET_DEQ_PENDING) || + (ep_state & EP_HALTED) || (ep_state & EP_CLEARING_TT)) return; trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id)); @@ -2573,9 +2573,6 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET); return; - case COMP_STALL_ERROR: - ep->ep_state |= EP_STALLED; - break; default: /* do nothing */ break; @@ -2916,7 +2913,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (xhci_spurious_success_tx_event(xhci, ep_ring)) { xhci_dbg(xhci, "Spurious event dma %pad, comp_code %u after %u\n", &ep_trb_dma, trb_comp_code, ep_ring->old_trb_comp_code); - ep_ring->old_trb_comp_code = trb_comp_code; + ep_ring->old_trb_comp_code = 0; return 0; } @@ -3780,7 +3777,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, * enqueue a No Op TRB, this can prevent the Setup and Data Stage * TRB to be breaked by the Link TRB. */ - if (trb_is_link(ep_ring->enqueue + 1)) { + if (last_trb_on_seg(ep_ring->enq_seg, ep_ring->enqueue + 1)) { field = TRB_TYPE(TRB_TR_NOOP) | ep_ring->cycle_state; queue_trb(xhci, ep_ring, false, 0, 0, TRB_INTR_TARGET(0), field); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0452b8d65832..90eb491267b5 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -322,7 +322,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } -static int xhci_enable_interrupter(struct xhci_interrupter *ir) +int xhci_enable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -335,7 +335,7 @@ static int xhci_enable_interrupter(struct xhci_interrupter *ir) return 0; } -static int xhci_disable_interrupter(struct xhci_interrupter *ir) +int xhci_disable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -1605,11 +1605,6 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto free_priv; } - /* Class driver might not be aware ep halted due to async URB giveback */ - if (*ep_state & EP_STALLED) - dev_dbg(&urb->dev->dev, "URB %p queued before clearing halt\n", - urb); - switch (usb_endpoint_type(&urb->ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: @@ -1770,8 +1765,8 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) goto done; } - /* In these cases no commands are pending but the endpoint is stopped */ - if (ep->ep_state & (EP_CLEARING_TT | EP_STALLED)) { + /* In this case no commands are pending but the endpoint is stopped */ + if (ep->ep_state & EP_CLEARING_TT) { /* and cancelled TDs can be given back right away */ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n", urb->dev->slot_id, ep_index, ep->ep_state); @@ -3209,11 +3204,8 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, ep = &vdev->eps[ep_index]; - spin_lock_irqsave(&xhci->lock, flags); - - ep->ep_state &= ~EP_STALLED; - /* Bail out if toggle is already being cleared by a endpoint reset */ + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; spin_unlock_irqrestore(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 37860f1e3aba..242ab9fbc8ae 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -664,7 +664,7 @@ struct xhci_virt_ep { unsigned int err_count; unsigned int ep_state; #define SET_DEQ_PENDING (1 << 0) -#define EP_HALTED (1 << 1) /* Halted host ep handling */ +#define EP_HALTED (1 << 1) /* For stall handling */ #define EP_STOP_CMD_PENDING (1 << 2) /* For URB cancellation */ /* Transitioning the endpoint to using streams, don't enqueue URBs */ #define EP_GETTING_STREAMS (1 << 3) @@ -675,7 +675,6 @@ struct xhci_virt_ep { #define EP_SOFT_CLEAR_TOGGLE (1 << 7) /* usb_hub_clear_tt_buffer is in progress */ #define EP_CLEARING_TT (1 << 8) -#define EP_STALLED (1 << 9) /* For stall handling */ /* ---- Related to URB cancellation ---- */ struct list_head cancelled_td_list; struct xhci_hcd *xhci; @@ -1891,6 +1890,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct usb_tt *tt, gfp_t mem_flags); int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, u32 imod_interval); +int xhci_enable_interrupter(struct xhci_interrupter *ir); +int xhci_disable_interrupter(struct xhci_interrupter *ir); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9b34e23b7091..6ac7a0a5cf07 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1093,6 +1093,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, + /* Abacus Electrics */ + { USB_DEVICE(FTDI_VID, ABACUS_OPTICAL_PROBE_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 52be47d684ea..9acb6f837327 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -443,6 +443,11 @@ #define LINX_FUTURE_2_PID 0xF44C /* Linx future device */ /* + * Abacus Electrics + */ +#define ABACUS_OPTICAL_PROBE_PID 0xf458 /* ABACUS ELECTRICS Optical Probe */ + +/* * Oceanic product ids */ #define FTDI_OCEANIC_PID 0xF460 /* Oceanic dive instrument */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5cd26dac2069..27879cc57536 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -611,6 +611,7 @@ static void option_instat_callback(struct urb *urb); /* Sierra Wireless products */ #define SIERRA_VENDOR_ID 0x1199 #define SIERRA_PRODUCT_EM9191 0x90d3 +#define SIERRA_PRODUCT_EM9291 0x90e3 /* UNISOC (Spreadtrum) products */ #define UNISOC_VENDOR_ID 0x1782 @@ -2432,6 +2433,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 2c12449ff60c..a0afaf254d12 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -100,6 +100,11 @@ DEVICE(nokia, NOKIA_IDS); { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ DEVICE_N(novatel_gps, NOVATEL_IDS, 3); +/* OWON electronic test and measurement equipment driver */ +#define OWON_IDS() \ + { USB_DEVICE(0x5345, 0x1234) } /* HDS200 oscilloscopes and others */ +DEVICE(owon, OWON_IDS); + /* Siemens USB/MPI adapter */ #define SIEMENS_IDS() \ { USB_DEVICE(0x908, 0x0004) } @@ -134,6 +139,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &motorola_tetra_device, &nokia_device, &novatel_gps_device, + &owon_device, &siemens_mpi_device, &suunto_device, &vivopay_device, @@ -153,6 +159,7 @@ static const struct usb_device_id id_table[] = { MOTOROLA_TETRA_IDS(), NOKIA_IDS(), NOVATEL_IDS(), + OWON_IDS(), SIEMENS_IDS(), SUUNTO_IDS(), VIVOPAY_IDS(), diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 1f8c9b16a0fb..d460d71b4257 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -83,6 +83,13 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_LUNS), +/* Reported-by: Oliver Neukum <oneukum@suse.com> */ +UNUSUAL_DEV(0x125f, 0xa94a, 0x0160, 0x0160, + "ADATA", + "Portable HDD CH94", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9c76c3d0c6cf..67a533e35150 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1052,9 +1052,11 @@ struct typec_partner *typec_register_partner(struct typec_port *port, partner->usb_mode = USB_MODE_USB3; } + mutex_lock(&port->partner_link_lock); ret = device_register(&partner->dev); if (ret) { dev_err(&port->dev, "failed to register partner (%d)\n", ret); + mutex_unlock(&port->partner_link_lock); put_device(&partner->dev); return ERR_PTR(ret); } @@ -1063,6 +1065,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, typec_partner_link_device(partner, port->usb2_dev); if (port->usb3_dev) typec_partner_link_device(partner, port->usb3_dev); + mutex_unlock(&port->partner_link_lock); return partner; } @@ -1083,12 +1086,18 @@ void typec_unregister_partner(struct typec_partner *partner) port = to_typec_port(partner->dev.parent); - if (port->usb2_dev) + mutex_lock(&port->partner_link_lock); + if (port->usb2_dev) { typec_partner_unlink_device(partner, port->usb2_dev); - if (port->usb3_dev) + port->usb2_dev = NULL; + } + if (port->usb3_dev) { typec_partner_unlink_device(partner, port->usb3_dev); + port->usb3_dev = NULL; + } device_unregister(&partner->dev); + mutex_unlock(&port->partner_link_lock); } EXPORT_SYMBOL_GPL(typec_unregister_partner); @@ -2041,10 +2050,11 @@ static struct typec_partner *typec_get_partner(struct typec_port *port) static void typec_partner_attach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; struct usb_device *udev = to_usb_device(dev); enum usb_mode usb_mode; + mutex_lock(&port->partner_link_lock); if (udev->speed < USB_SPEED_SUPER) { usb_mode = USB_MODE_USB2; port->usb2_dev = dev; @@ -2053,18 +2063,22 @@ static void typec_partner_attach(struct typec_connector *con, struct device *dev port->usb3_dev = dev; } + partner = typec_get_partner(port); if (partner) { typec_partner_set_usb_mode(partner, usb_mode); typec_partner_link_device(partner, dev); put_device(&partner->dev); } + mutex_unlock(&port->partner_link_lock); } static void typec_partner_deattach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; + mutex_lock(&port->partner_link_lock); + partner = typec_get_partner(port); if (partner) { typec_partner_unlink_device(partner, dev); put_device(&partner->dev); @@ -2074,6 +2088,7 @@ static void typec_partner_deattach(struct typec_connector *con, struct device *d port->usb2_dev = NULL; else if (port->usb3_dev == dev) port->usb3_dev = NULL; + mutex_unlock(&port->partner_link_lock); } /** @@ -2614,6 +2629,7 @@ struct typec_port *typec_register_port(struct device *parent, ida_init(&port->mode_ids); mutex_init(&port->port_type_lock); + mutex_init(&port->partner_link_lock); port->id = id; port->ops = cap->ops; diff --git a/drivers/usb/typec/class.h b/drivers/usb/typec/class.h index b3076a24ad2e..db2fe96c48ff 100644 --- a/drivers/usb/typec/class.h +++ b/drivers/usb/typec/class.h @@ -59,6 +59,7 @@ struct typec_port { enum typec_port_type port_type; enum usb_mode usb_mode; struct mutex port_type_lock; + struct mutex partner_link_lock; enum typec_orientation orientation; struct typec_switch *sw; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f6f5a7ac7894..26bcf3a7f70c 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -627,6 +627,9 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) int ret; llnode = llist_del_all(&svq->completion_list); + + mutex_lock(&svq->vq.mutex); + llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; @@ -660,6 +663,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_scsi_release_cmd_res(se_cmd); } + mutex_unlock(&svq->vq.mutex); + if (signal) vhost_signal(&svq->vs->dev, &svq->vq); } @@ -994,39 +999,66 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_nexus *nexus, static void vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned int out, u8 status) + struct vhost_scsi_ctx *vc, u8 status) { - struct virtio_scsi_cmd_resp __user *resp; struct virtio_scsi_cmd_resp rsp; + struct iov_iter iov_iter; int ret; memset(&rsp, 0, sizeof(rsp)); rsp.status = status; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, + sizeof(rsp)); + + ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); + + if (likely(ret == sizeof(rsp))) + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_cmd_resp\n"); } +#define TYPE_IO_CMD 0 +#define TYPE_CTRL_TMF 1 +#define TYPE_CTRL_AN 2 + static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned out) + struct vhost_scsi_ctx *vc, int type) { - struct virtio_scsi_cmd_resp __user *resp; - struct virtio_scsi_cmd_resp rsp; + union { + struct virtio_scsi_cmd_resp cmd; + struct virtio_scsi_ctrl_tmf_resp tmf; + struct virtio_scsi_ctrl_an_resp an; + } rsp; + struct iov_iter iov_iter; + size_t rsp_size; int ret; memset(&rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_BAD_TARGET; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + + if (type == TYPE_IO_CMD) { + rsp_size = sizeof(struct virtio_scsi_cmd_resp); + rsp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + } else if (type == TYPE_CTRL_TMF) { + rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); + rsp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); + rsp.an.response = VIRTIO_SCSI_S_BAD_TARGET; + } + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, + rsp_size); + + ret = copy_to_iter(&rsp, rsp_size, &iov_iter); + + if (likely(ret == rsp_size)) + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else - pr_err("Faulted on virtio_scsi_cmd_resp\n"); + pr_err("Faulted on virtio scsi type=%d\n", type); } static int @@ -1390,9 +1422,9 @@ err: if (ret == -ENXIO) break; else if (ret == -EIO) - vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD); else if (ret == -ENOMEM) - vhost_scsi_send_status(vs, vq, vc.head, vc.out, + vhost_scsi_send_status(vs, vq, &vc, SAM_STAT_TASK_SET_FULL); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: @@ -1432,8 +1464,11 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) else resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; + mutex_lock(&tmf->svq->vq.mutex); vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, tmf->vq_desc, &tmf->resp_iov, resp_code); + mutex_unlock(&tmf->svq->vq.mutex); + vhost_scsi_release_tmf_res(tmf); } @@ -1623,7 +1658,10 @@ err: if (ret == -ENXIO) break; else if (ret == -EIO) - vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + vhost_scsi_send_bad_target(vs, vq, &vc, + v_req.type == VIRTIO_SCSI_T_TMF ? + TYPE_CTRL_TMF : + TYPE_CTRL_AN); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 150753c3b578..95d5d7993e5b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -407,6 +407,12 @@ static void virtio_dev_shutdown(struct device *_d) if (!drv) return; + /* If the driver has its own shutdown method, use that. */ + if (drv->shutdown) { + drv->shutdown(dev); + return; + } + /* * Some devices get wedged if you kick them after they are * reset. Mark all vqs as broken to make sure we don't. diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 5eaade757860..d50fe030d825 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -247,7 +247,7 @@ virtio_pci_admin_cmd_dev_parts_objects_enable(struct virtio_device *virtio_dev) sg_init_one(&data_sg, get_data, sizeof(*get_data)); sg_init_one(&result_sg, result, sizeof(*result)); cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET); - cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); cmd.data_sg = &data_sg; cmd.result_sg = &result_sg; ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); @@ -305,7 +305,7 @@ static void virtio_pci_admin_cmd_cap_init(struct virtio_device *virtio_dev) sg_init_one(&result_sg, data, sizeof(*data)); cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY); - cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); cmd.result_sg = &result_sg; ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fdd2d2b07b5a..b784aab66867 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2650,7 +2650,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->event_triggered) - vq->event_triggered = false; + data_race(vq->event_triggered = false); return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 7c930ef77380..effafc3e0ced 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1425,6 +1425,8 @@ alloc_done: open_bucket_for_each(c, &wp->ptrs, ob, i) wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 69ec6a012898..4c1e33cf57c0 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -110,7 +110,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob); + ob_push(c, ob->sectors_free < block_sectors(c) + ? &ptrs + : &keep, ob); wp->ptrs = keep; mutex_unlock(&wp->lock); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a3db328dee31..d6e4a496f02b 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct bkey_i key; __u64 key ## _pad[pad] +enum bch_bkey_type_flags { + BKEY_TYPE_strict_btree_checks = BIT(0), +}; + /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k) * * - WHITEOUT: for hash table btrees */ -#define BCH_BKEY_TYPES() \ - x(deleted, 0) \ - x(whiteout, 1) \ - x(error, 2) \ - x(cookie, 3) \ - x(hash_whiteout, 4) \ - x(btree_ptr, 5) \ - x(extent, 6) \ - x(reservation, 7) \ - x(inode, 8) \ - x(inode_generation, 9) \ - x(dirent, 10) \ - x(xattr, 11) \ - x(alloc, 12) \ - x(quota, 13) \ - x(stripe, 14) \ - x(reflink_p, 15) \ - x(reflink_v, 16) \ - x(inline_data, 17) \ - x(btree_ptr_v2, 18) \ - x(indirect_inline_data, 19) \ - x(alloc_v2, 20) \ - x(subvolume, 21) \ - x(snapshot, 22) \ - x(inode_v2, 23) \ - x(alloc_v3, 24) \ - x(set, 25) \ - x(lru, 26) \ - x(alloc_v4, 27) \ - x(backpointer, 28) \ - x(inode_v3, 29) \ - x(bucket_gens, 30) \ - x(snapshot_tree, 31) \ - x(logged_op_truncate, 32) \ - x(logged_op_finsert, 33) \ - x(accounting, 34) \ - x(inode_alloc_cursor, 35) +#define BCH_BKEY_TYPES() \ + x(deleted, 0, 0) \ + x(whiteout, 1, 0) \ + x(error, 2, 0) \ + x(cookie, 3, 0) \ + x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \ + x(extent, 6, BKEY_TYPE_strict_btree_checks) \ + x(reservation, 7, BKEY_TYPE_strict_btree_checks) \ + x(inode, 8, BKEY_TYPE_strict_btree_checks) \ + x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \ + x(dirent, 10, BKEY_TYPE_strict_btree_checks) \ + x(xattr, 11, BKEY_TYPE_strict_btree_checks) \ + x(alloc, 12, BKEY_TYPE_strict_btree_checks) \ + x(quota, 13, BKEY_TYPE_strict_btree_checks) \ + x(stripe, 14, BKEY_TYPE_strict_btree_checks) \ + x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \ + x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \ + x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \ + x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \ + x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \ + x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \ + x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \ + x(set, 25, 0) \ + x(lru, 26, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \ + x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \ + x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \ + x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \ + x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \ + x(accounting, 34, BKEY_TYPE_strict_btree_checks) \ + x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks) enum bch_bkey_type { -#define x(name, nr) KEY_TYPE_##name = nr, +#define x(name, nr, ...) KEY_TYPE_##name = nr, BCH_BKEY_TYPES() #undef x KEY_TYPE_MAX, @@ -863,6 +867,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); +LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 15c93576b5c2..00d05ccfaf73 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -21,7 +21,7 @@ #include "xattr.h" const char * const bch2_bkey_types[] = { -#define x(name, nr) #name, +#define x(name, nr, ...) #name, BCH_BKEY_TYPES() #undef x NULL @@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ }) const struct bkey_ops bch2_bkey_ops[] = { -#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, +#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name, BCH_BKEY_TYPES() #undef x }; @@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = { #undef x }; +static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = { +#define x(name, nr, flags) [KEY_TYPE_##name] = flags, + BCH_BKEY_TYPES() +#undef x +}; + const char *bch2_btree_node_type_str(enum btree_node_type type) { return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); @@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && - (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && + enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX + ? bch2_bkey_type_flags[k.k->type] + : 0; + + bool strict_key_type_allowed = + (from.flags & BCH_VALIDATE_commit) || + type == BKEY_TYPE_btree || + (from.btree < BTREE_ID_NR && + (bkey_flags & BKEY_TYPE_strict_btree_checks)); + + bkey_fsck_err_on(strict_key_type_allowed && + k.k->type < KEY_TYPE_MAX && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index e34e9598ef25..59fa527ac685 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && - !bkey_eq(iter->pos, POS_MAX)) { + !bkey_eq(iter->pos, POS_MAX) && + !((iter->flags & BTREE_ITER_is_extents) && + iter->pos.offset == U64_MAX)) { + /* * bkey_start_pos(), for extents, is not monotonically * increasing until after filtering for snapshots: @@ -2602,7 +2605,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode); int ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 8488a7578115..92ee59d9e00e 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,8 +13,8 @@ #include <linux/dcache.h> -static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) +int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); @@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info * #endif } -static inline int bch2_maybe_casefold(struct btree_trans *trans, - const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) -{ - if (likely(!info->cf_encoding)) { - *out_cf = *str; - return 0; - } else { - return bch2_casefold(trans, info, str, out_cf); - } -} - static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 0880772b80a9..9838a7ba7ed1 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,6 +23,21 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, + const struct qstr *, struct qstr *); + +static inline int bch2_maybe_casefold(struct btree_trans *trans, + const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + if (likely(!info->cf_encoding)) { + *out_cf = *str; + return 0; + } else { + return bch2_casefold(trans, info, str, out_cf); + } +} + struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index baf5dfb32298..925b0b54ea2f 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, { struct fsck_err_state *s; - if (!test_bit(BCH_FS_fsck_running, &c->flags)) - return NULL; - list_for_each_entry(s, &c->fsck_error_msgs, list) if (s->id == id) { /* @@ -639,14 +636,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, return ret; } -void bch2_flush_fsck_errs(struct bch_fs *c) +static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) { struct fsck_err_state *s, *n; mutex_lock(&c->fsck_error_msgs_lock); list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { - if (s->ratelimited && s->last_msg) + if (print && s->ratelimited && s->last_msg) bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); @@ -657,6 +654,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c) mutex_unlock(&c->fsck_error_msgs_lock); } +void bch2_flush_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, true); +} + +void bch2_free_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, false); +} + int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum, u64 offset) { diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index d0d024dc714b..4a364fd44abe 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -93,6 +93,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, _flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__) void bch2_flush_fsck_errs(struct bch_fs *); +void bch2_free_fsck_errs(struct bch_fs *); #define fsck_err_wrap(_do) \ ({ \ diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 14886e1d4d6d..a82dfce9e4ad 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -21,206 +21,6 @@ #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ -struct flags_set { - unsigned mask; - unsigned flags; - - unsigned projid; - - bool set_projinherit; - bool projinherit; -}; - -static int bch2_inode_flags_set(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - /* - * We're relying on btree locking here for exclusion with other ioctl - * calls - use the flags in the btree (@bi), not inode->i_flags: - */ - struct flags_set *s = p; - unsigned newflags = s->flags; - unsigned oldflags = bi->bi_flags & s->mask; - - if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - if (!S_ISREG(bi->bi_mode) && - !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) - return -EINVAL; - - if ((newflags ^ oldflags) & BCH_INODE_casefolded) { -#ifdef CONFIG_UNICODE - int ret = 0; - /* Not supported on individual files. */ - if (!S_ISDIR(bi->bi_mode)) - return -EOPNOTSUPP; - - /* - * Make sure the dir is empty, as otherwise we'd need to - * rehash everything and update the dirent keys. - */ - ret = bch2_empty_dir_trans(trans, inode_inum(inode)); - if (ret < 0) - return ret; - - ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); - if (ret) - return ret; - - bch2_check_set_feature(c, BCH_FEATURE_casefolding); -#else - printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); - return -EOPNOTSUPP; -#endif - } - - if (s->set_projinherit) { - bi->bi_fields_set &= ~(1 << Inode_opt_project); - bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); - } - - bi->bi_flags &= ~s->mask; - bi->bi_flags |= newflags; - - bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); - return 0; -} - -static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) -{ - unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); - - return put_user(flags, arg); -} - -static int bch2_ioc_setflags(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - void __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; - unsigned uflags; - int ret; - - if (get_user(uflags, (int __user *) arg)) - return -EFAULT; - - s.flags = map_flags_rev(bch_flags_to_uflags, uflags); - if (uflags) - return -EOPNOTSUPP; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto setflags_out; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_write_inode(c, inode, bch2_inode_flags_set, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - -setflags_out: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - -static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct fsxattr fa = { 0 }; - - fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); - - if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) - fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; - - fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; - - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; - - return 0; -} - -static int fssetxattr_inode_update_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct flags_set *s = p; - - if (s->projid != bi->bi_project) { - bi->bi_fields_set |= 1U << Inode_opt_project; - bi->bi_project = s->projid; - } - - return bch2_inode_flags_set(trans, inode, bi, p); -} - -static int bch2_ioc_fssetxattr(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; - struct fsxattr fa; - int ret; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - s.set_projinherit = true; - s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; - fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; - - s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); - if (fa.fsx_xflags) - return -EOPNOTSUPP; - - if (fa.fsx_projid >= U32_MAX) - return -EINVAL; - - /* - * inode fields accessible via the xattr interface are stored with a +1 - * bias, so that 0 means unset: - */ - s.projid = fa.fsx_projid + 1; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto err; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_set_projid(c, inode, fa.fsx_projid) ?: - bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); -err: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - static int bch2_reinherit_attrs_fn(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) long ret; switch (cmd) { - case FS_IOC_GETFLAGS: - ret = bch2_ioc_getflags(inode, (int __user *) arg); - break; - - case FS_IOC_SETFLAGS: - ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); - break; - - case FS_IOC_FSGETXATTR: - ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); - break; - - case FS_IOC_FSSETXATTR: - ret = bch2_ioc_fssetxattr(c, file, inode, - (void __user *) arg); - break; - case BCHFS_IOC_REINHERIT_ATTRS: ret = bch2_ioc_reinherit_attrs(c, file, inode, (void __user *) arg); diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index ecd3bfdcde21..a657e4994b71 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -2,81 +2,6 @@ #ifndef _BCACHEFS_FS_IOCTL_H #define _BCACHEFS_FS_IOCTL_H -/* Inode flags: */ - -/* bcachefs inode flags -> vfs inode flags: */ -static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_sync] = S_SYNC, - [__BCH_INODE_immutable] = S_IMMUTABLE, - [__BCH_INODE_append] = S_APPEND, - [__BCH_INODE_noatime] = S_NOATIME, - [__BCH_INODE_casefolded] = S_CASEFOLD, -}; - -/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_sync] = FS_SYNC_FL, - [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, - [__BCH_INODE_append] = FS_APPEND_FL, - [__BCH_INODE_nodump] = FS_NODUMP_FL, - [__BCH_INODE_noatime] = FS_NOATIME_FL, - [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, -}; - -/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -static const __maybe_unused unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_sync] = FS_XFLAG_SYNC, - [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_append] = FS_XFLAG_APPEND, - [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, - [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, - //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; -}; - -#define set_flags(_map, _in, _out) \ -do { \ - unsigned _i; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & (1 << _i)) \ - (_out) |= _map[_i]; \ - else \ - (_out) &= ~_map[_i]; \ -} while (0) - -#define map_flags(_map, _in) \ -({ \ - unsigned _out = 0; \ - \ - set_flags(_map, _in, _out); \ - _out; \ -}) - -#define map_flags_rev(_map, _in) \ -({ \ - unsigned _i, _out = 0; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & _map[_i]) { \ - (_out) |= 1 << _i; \ - (_in) &= ~_map[_i]; \ - } \ - (_out); \ -}) - -#define map_defined(_map) \ -({ \ - unsigned _in = ~0; \ - \ - map_flags_rev(_map, _in); \ -}) - -/* Set VFS inode flags from bcachefs inode: */ -static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) -{ - set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); -} - long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5a41b1a8e54f..0f1d61aab90b 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -33,6 +33,7 @@ #include <linux/backing-dev.h> #include <linux/exportfs.h> #include <linux/fiemap.h> +#include <linux/fileattr.h> #include <linux/fs_context.h> #include <linux/module.h> #include <linux/pagemap.h> @@ -51,6 +52,22 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_subvolume *); +/* Set VFS inode flags from bcachefs inode: */ +static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode) +{ + static const __maybe_unused unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, + }; + + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); + + if (bch2_inode_casefold(c, &inode->ei_inode)) + inode->v.i_flags |= S_CASEFOLD; +} + void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -79,7 +96,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, inode->ei_inode = *bi; - bch2_inode_flags_to_vfs(inode); + bch2_inode_flags_to_vfs(c, inode); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -631,13 +648,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dirent_iter = {}; subvol_inum inum = {}; struct printbuf buf = PRINTBUF; + struct qstr lookup_name; + int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); + if (ret) + return ERR_PTR(ret); + + struct btree_iter dirent_iter = {}; struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc, - dir_hash_info, dir, name, 0); - int ret = bkey_err(k); + dir_hash_info, dir, &lookup_name, 0); + ret = bkey_err(k); if (ret) return ERR_PTR(ret); @@ -825,6 +847,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, */ set_nlink(&inode->v, 0); } + + if (IS_CASEFOLDED(vdir)) { + d_invalidate(dentry); + d_prune_aliases(&inode->v); + } err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -1235,10 +1262,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } +struct bch_fiemap_extent { + struct bkey_buf kbuf; + unsigned flags; +}; + static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, - struct bkey_s_c k, unsigned flags) + struct bch_fiemap_extent *fe) { + struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); + unsigned flags = fe->flags; + + BUG_ON(!k.k->size); + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1291,110 +1328,223 @@ static int bch2_fill_extent(struct bch_fs *c, } } -static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - u64 start, u64 len) +/* + * Scan a range of an inode for data in pagecache. + * + * Intended to be retryable, so don't modify the output params until success is + * imminent. + */ +static int +bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, + bool nonblock) { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; - bool have_extent = false; - int ret = 0; + loff_t dstart, dend; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); - if (ret) + dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); + if (dstart < 0) + return dstart; + + if (dstart == *end) { + *start = dstart; + return 0; + } + + dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); + if (dend < 0) + return dend; + + /* race */ + BUG_ON(dstart == dend); + + *start = dstart; + *end = dend; + return 0; +} + +/* + * Scan a range of pagecache that corresponds to a file mapping hole in the + * extent btree. If data is found, fake up an extent key so it looks like a + * delalloc extent to the rest of the fiemap processing code. + */ +static int +bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode, + u64 start, u64 end, struct bch_fiemap_extent *cur) +{ + struct bch_fs *c = trans->c; + struct bkey_i_extent *delextent; + struct bch_extent_ptr ptr = {}; + loff_t dstart = start << 9, dend = end << 9; + int ret; + + /* + * We hold btree locks here so we cannot block on folio locks without + * dropping trans locks first. Run a nonblocking scan for the common + * case of no folios over holes and fall back on failure. + * + * Note that dropping locks like this is technically racy against + * writeback inserting to the extent tree, but a non-sync fiemap scan is + * fundamentally racy with writeback anyways. Therefore, just report the + * range as delalloc regardless of whether we have to cycle trans locks. + */ + ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true); + if (ret == -EAGAIN) + ret = drop_locks_do(trans, + bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false)); + if (ret < 0) return ret; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); - if (start + len < start) - return -EINVAL; + /* + * Create a fake extent key in the buffer. We have to add a dummy extent + * pointer for the fill code to add an extent entry. It's explicitly + * zeroed to reflect delayed allocation (i.e. phys offset 0). + */ + bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); + delextent = bkey_extent_init(cur->kbuf.k); + delextent->k.p = POS(inode->ei_inum.inum, dend >> 9); + delextent->k.size = (dend - dstart) >> 9; + bch2_bkey_append_ptr(&delextent->k_i, ptr); - start >>= 9; + cur->flags = FIEMAP_EXTENT_DELALLOC; - bch2_bkey_buf_init(&cur); - bch2_bkey_buf_init(&prev); - trans = bch2_trans_get(c); + return 0; +} + +static int bch2_next_fiemap_extent(struct btree_trans *trans, + struct bch_inode_info *inode, + u64 start, u64 end, + struct bch_fiemap_extent *cur) +{ + u32 snapshot; + int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); + if (ret) + return ret; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start), 0); + SPOS(inode->ei_inum.inum, start, snapshot), 0); - while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - enum btree_id data_btree = BTREE_ID_extents; + struct bkey_s_c k = + bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); + ret = bkey_err(k); + if (ret) + goto err; - bch2_trans_begin(trans); + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur); + if (ret) + goto err; - u32 snapshot; - ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); - if (ret) - continue; + struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); - bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + /* + * Does the pagecache or the btree take precedence? + * + * It _should_ be the pagecache, so that we correctly report delalloc + * extents when dirty in the pagecache (we're COW, after all). + * + * But we'd have to add per-sector writeback tracking to + * bch_folio_state, otherwise we report delalloc extents for clean + * cached data in the pagecache. + * + * We should do this, but even then fiemap won't report stable mappings: + * on bcachefs data moves around in the background (copygc, rebalance) + * and we don't provide a way for userspace to lock that out. + */ + if (k.k && + bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)), + pagecache_start)) { + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + bch2_cut_front(iter.pos, cur->kbuf.k); + bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k); + cur->flags = 0; + } else if (k.k) { + bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k); + } - k = bch2_btree_iter_peek_max(trans, &iter, end); - ret = bkey_err(k); + if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) { + unsigned sectors = cur->kbuf.k->k.size; + s64 offset_into_extent = 0; + enum btree_id data_btree = BTREE_ID_extents; + int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) - continue; + goto err; - if (!k.k) - break; + struct bkey_i *k = cur->kbuf.k; + sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(trans, &iter); - continue; - } + bch2_cut_front(POS(k->k.p.inode, + bkey_start_offset(&k->k) + offset_into_extent), + k); + bch2_key_resize(&k->k, sectors); + k->k.p = iter.pos; + k->k.p.offset += k->k.size; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + u64 start, u64 len) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct btree_trans *trans; + struct bch_fiemap_extent cur, prev; + int ret = 0; + + ret = fiemap_prep(&ei->v, info, start, &len, 0); + if (ret) + return ret; + + if (start + len < start) + return -EINVAL; + + start >>= 9; + u64 end = (start + len) >> 9; - bch2_bkey_buf_reassemble(&cur, c, k); + bch2_bkey_buf_init(&cur.kbuf); + bch2_bkey_buf_init(&prev.kbuf); + bkey_init(&prev.kbuf.k->k); - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &cur); + trans = bch2_trans_get(c); + + while (start < end) { + ret = lockrestart_do(trans, + bch2_next_fiemap_extent(trans, ei, start, end, &cur)); if (ret) - continue; + goto err; - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); + BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start); + BUG_ON(cur.kbuf.k->k.p.offset > end); - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + if (bkey_start_offset(&cur.kbuf.k->k) == end) + break; - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); - bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter.pos; - cur.k->k.p.offset += cur.k->k.size; + start = cur.kbuf.k->k.p.offset; - if (have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(prev.k), 0); + ret = bch2_fill_extent(c, info, &prev); if (ret) - break; + goto err; } - bkey_copy(prev.k, cur.k); - have_extent = true; - - bch2_btree_iter_set_pos(trans, &iter, - POS(iter.pos.inode, iter.pos.offset + sectors)); + bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k); + prev.flags = cur.flags; } - bch2_trans_iter_exit(trans, &iter); - if (!ret && have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), - FIEMAP_EXTENT_LAST); + prev.flags |= FIEMAP_EXTENT_LAST; + ret = bch2_fill_extent(c, info, &prev); } - +err: bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret < 0 ? ret : 0; + bch2_bkey_buf_exit(&cur.kbuf, c); + bch2_bkey_buf_exit(&prev.kbuf, c); + + return bch2_err_class(ret < 0 ? ret : 0); } static const struct vm_operations_struct bch_vm_ops = { @@ -1449,6 +1599,165 @@ static int bch2_open(struct inode *vinode, struct file *file) return generic_file_open(vinode, file); } +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const __maybe_unused unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const __maybe_unused unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, +}; + +static int bch2_fileattr_get(struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); + + if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) + fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + + if (bch2_inode_casefold(c, &inode->ei_inode)) + fa->flags |= FS_CASEFOLD_FL; + + fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; + return 0; +} + +struct flags_set { + unsigned mask; + unsigned flags; + unsigned projid; + bool set_project; + bool set_casefold; + bool casefold; +}; + +static int fssetxattr_inode_update_fn(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = trans->c; + struct flags_set *s = p; + + /* + * We're relying on btree locking here for exclusion with other ioctl + * calls - use the flags in the btree (@bi), not inode->i_flags: + */ + if (!S_ISREG(bi->bi_mode) && + !S_ISDIR(bi->bi_mode) && + (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) + return -EINVAL; + + if (s->casefold != bch2_inode_casefold(c, bi)) { +#ifdef CONFIG_UNICODE + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inode_inum(inode)); + if (ret < 0) + return ret; + + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + if (ret) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); + + bi->bi_casefold = s->casefold + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); + +#else + printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); + return -EOPNOTSUPP; +#endif + } + + if (s->set_project) { + bi->bi_project = s->projid; + bi->bi_fields_set |= BIT(Inode_opt_project); + } + + bi->bi_flags &= ~s->mask; + bi->bi_flags |= s->flags; + + bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); + return 0; +} + +static int bch2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct flags_set s = {}; + int ret; + + if (fa->fsx_valid) { + fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT; + + s.mask = map_defined(bch_flags_to_xflags); + s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); + if (fa->fsx_xflags) + return -EOPNOTSUPP; + + if (fa->fsx_projid >= U32_MAX) + return -EINVAL; + + /* + * inode fields accessible via the xattr interface are stored with a +1 + * bias, so that 0 means unset: + */ + if ((inode->ei_inode.bi_project || + fa->fsx_projid) && + inode->ei_inode.bi_project != fa->fsx_projid + 1) { + s.projid = fa->fsx_projid + 1; + s.set_project = true; + } + } + + if (fa->flags_valid) { + s.mask = map_defined(bch_flags_to_uflags); + + s.set_casefold = true; + s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0; + fa->flags &= ~FS_CASEFOLD_FL; + + s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); + if (fa->flags) + return -EOPNOTSUPP; + } + + mutex_lock(&inode->ei_update_lock); + ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + (s.set_project + ? bch2_set_projid(c, inode, fa->fsx_projid) + : 0) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ATTR_CTIME); + mutex_unlock(&inode->ei_update_lock); + return ret; +} + static const struct file_operations bch_file_operations = { .open = bch2_open, .llseek = bch2_llseek, @@ -1476,6 +1785,8 @@ static const struct inode_operations bch_file_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_dir_inode_operations = { @@ -1496,6 +1807,8 @@ static const struct inode_operations bch_dir_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct file_operations bch_dir_file_operations = { @@ -1518,6 +1831,8 @@ static const struct inode_operations bch_symlink_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_special_inode_operations = { @@ -1528,6 +1843,8 @@ static const struct inode_operations bch_special_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct address_space_operations bch_address_space_operations = { diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f82cfbf460d0..c74af15b14f2 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k) } } +static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi) +{ + /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */ + return bi->bi_casefold + ? bi->bi_casefold - 1 + : c->opts.casefold; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 117110af1e3f..87e193e8ed25 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -103,7 +103,8 @@ struct bch_inode_generation { x(bi_parent_subvol, 32) \ x(bi_nocow, 8) \ x(bi_depth, 32) \ - x(bi_inodes_32bit, 8) + x(bi_inodes_32bit, 8) \ + x(bi_casefold, 8) /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ @@ -117,7 +118,8 @@ struct bch_inode_generation { x(background_target, 16) \ x(erasure_code, 16) \ x(nocow, 8) \ - x(inodes_32bit, 8) + x(inodes_32bit, 8) \ + x(casefold, 8) enum inode_opt_id { #define x(name, ...) \ @@ -137,8 +139,7 @@ enum inode_opt_id { x(i_sectors_dirty, 6) \ x(unlinked, 7) \ x(backptr_untrusted, 8) \ - x(has_child_snapshot, 9) \ - x(casefolded, 10) + x(has_child_snapshot, 9) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index d8f74b6d0a75..bb45d3634194 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; - BUG_ON(sectors > buf->sectors); + if (unlikely(sectors > buf->sectors)) { + struct printbuf err = PRINTBUF; + err.atomic++; + + prt_printf(&err, "journal entry overran reserved space: %u > %u\n", + sectors, buf->sectors); + prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n", + le32_to_cpu(buf->data->u64s), buf->u64s_reserved, + j->cur_entry_u64s, + c->block_bits); + prt_printf(&err, "fatal error - emergency read only"); + bch2_journal_halt_locked(j); + + bch_err(c, "%s", err.buf); + printbuf_exit(&err); + return; + } + buf->sectors = sectors; /* @@ -1462,8 +1479,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); - - set_bit(JOURNAL_running, &j->flags); j->last_flush_write = jiffies; j->reservations.idx = journal_cur_seq(j); @@ -1474,6 +1489,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return 0; } +void bch2_journal_set_replay_done(struct journal *j) +{ + /* + * journal_space_available must happen before setting JOURNAL_running + * JOURNAL_running must happen before JOURNAL_replay_done + */ + spin_lock(&j->lock); + bch2_journal_space_available(j); + + set_bit(JOURNAL_need_flush_write, &j->flags); + set_bit(JOURNAL_running, &j->flags); + set_bit(JOURNAL_replay_done, &j->flags); + spin_unlock(&j->lock); +} + /* init/exit: */ void bch2_dev_journal_exit(struct bch_dev *ca) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 47828771f9c2..641e20c05a14 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j) struct bch_dev; -static inline void bch2_journal_set_replay_done(struct journal *j) -{ - BUG_ON(!test_bit(JOURNAL_running, &j->flags)); - set_bit(JOURNAL_replay_done, &j->flags); -} - void bch2_journal_unblock(struct journal *); void bch2_journal_block(struct journal *); struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); @@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); int bch2_fs_journal_start(struct journal *, u64); +void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 5d1547aa118a..ea670c3c43d8 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -252,7 +252,10 @@ void bch2_journal_space_available(struct journal *j) bch2_journal_set_watermark(j); out: - j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; + j->cur_entry_sectors = !ret + ? round_down(j->space[journal_space_discarded].next_entry, + block_sectors(c)) + : 0; j->cur_entry_error = ret; if (!ret) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 159410c50861..96873372b516 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h index ea181fef5bc9..d1885cf67a45 100644 --- a/fs/bcachefs/movinggc.h +++ b/fs/bcachefs/movinggc.h @@ -5,6 +5,15 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); +static inline void bch2_copygc_wakeup(struct bch_fs *c) +{ + rcu_read_lock(); + struct task_struct *p = rcu_dereference(c->copygc_thread); + if (p) + wake_up_process(p); + rcu_read_unlock(); +} + void bch2_copygc_stop(struct bch_fs *); int bch2_copygc_start(struct bch_fs *); void bch2_fs_copygc_init(struct bch_fs *); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 0d65ea96f7a2..46f3c8b100a9 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - /* Inherit casefold state from parent. */ - if (S_ISDIR(mode)) - new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded; - if (!(flags & BCH_CREATE_SNAPSHOT)) { /* Normal create path - allocate a new inode: */ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 4d06313076ff..dfb14810124c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -228,6 +228,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_ERASURE_CODE, false, \ NULL, "Enable erasure coding (DO NOT USE YET)") \ + x(casefold, u8, \ + OPT_FS|OPT_INODE|OPT_FORMAT, \ + OPT_BOOL(), \ + BCH_SB_CASEFOLD, false, \ + NULL, "Dirent lookups are casefolded") \ x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index c63fa53f30d2..4ccdfc1f34aa 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -262,7 +262,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, writepoint_ptr(&c->rebalance_write_point), true); @@ -664,7 +671,7 @@ void bch2_rebalance_stop(struct bch_fs *c) c->rebalance.thread = NULL; if (p) { - /* for sychronizing with rebalance_wakeup() */ + /* for sychronizing with bch2_rebalance_wakeup() */ synchronize_rcu(); kthread_stop(p); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 62a3859d3823..e5e8eb4a2dd1 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); int bch2_set_fs_needs_rebalance(struct bch_fs *); -static inline void rebalance_wakeup(struct bch_fs *c) +static inline void bch2_rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 606d684e6f23..d6c4ef819d40 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -18,6 +18,7 @@ #include "journal_seq_blacklist.h" #include "logged_ops.h" #include "move.h" +#include "movinggc.h" #include "namei.h" #include "quota.h" #include "rebalance.h" @@ -1129,13 +1130,13 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - set_bit(BCH_FS_accounting_replay_done, &c->flags); - bch2_journal_set_replay_done(&c->journal); - ret = bch2_fs_read_write_early(c); if (ret) goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_journal_set_replay_done(&c->journal); + for_each_member_device(c, ca) { ret = bch2_dev_usage_init(ca, false); if (ret) { @@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c) c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); if (ret) diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 593ff142530d..22f72bb5b853 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -12,6 +12,7 @@ #include "journal.h" #include "lru.h" #include "logged_ops.h" +#include "movinggc.h" #include "rebalance.h" #include "recovery.h" #include "recovery_passes.h" @@ -262,49 +263,52 @@ int bch2_run_recovery_passes(struct bch_fs *c) */ c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { - c->next_recovery_pass = c->curr_recovery_pass + 1; + spin_lock_irq(&c->recovery_pass_lock); - spin_lock_irq(&c->recovery_pass_lock); + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { + unsigned prev_done = c->recovery_pass_done; unsigned pass = c->curr_recovery_pass; + c->next_recovery_pass = pass + 1; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) { - spin_unlock_irq(&c->recovery_pass_lock); + c->curr_recovery_pass > c->opts.recovery_pass_last) break; - } - if (!should_run_recovery_pass(c, pass)) { - c->curr_recovery_pass++; - c->recovery_pass_done = max(c->recovery_pass_done, pass); + if (should_run_recovery_pass(c, pass)) { spin_unlock_irq(&c->recovery_pass_lock); - continue; - } - spin_unlock_irq(&c->recovery_pass_lock); - - ret = bch2_run_recovery_pass(c, pass) ?: - bch2_journal_flush(&c->journal); - - if (!ret && !test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, pass); - - spin_lock_irq(&c->recovery_pass_lock); - if (c->next_recovery_pass < c->curr_recovery_pass) { - /* - * bch2_run_explicit_recovery_pass() was called: we - * can't always catch -BCH_ERR_restart_recovery because - * it may have been called from another thread (btree - * node read completion) - */ - ret = 0; - c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); - } else { - c->recovery_passes_complete |= BIT_ULL(pass); - c->recovery_pass_done = max(c->recovery_pass_done, pass); + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + if (!ret && !test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, pass); + spin_lock_irq(&c->recovery_pass_lock); + + if (c->next_recovery_pass < c->curr_recovery_pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + ret = 0; + c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); + } else { + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); + } } + c->curr_recovery_pass = c->next_recovery_pass; - spin_unlock_irq(&c->recovery_pass_lock); + + if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && + c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) { + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + } } + spin_unlock_irq(&c->recovery_pass_lock); + return ret; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b7de29aed839..fec569c7deb1 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -396,7 +396,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) u32 subvol = 0, s; rcu_read_lock(); - while (id) { + while (id && bch2_snapshot_exists(c, id)) { s = snapshot_t(c, id)->subvol; if (s && (!subvol || s < subvol)) diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 09a354a26c3b..0c1a00539bd1 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u8 type; - struct unicode_map *cf_encoding; + struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of * the siphash_key (k0) is used as the key. @@ -44,11 +44,10 @@ struct bch_hash_info { static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { - /* XXX ick */ struct bch_hash_info info = { .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 25b6bce05c3c..cb5d960aed92 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1102,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); printbuf_exit(&buf); - return -BCH_ERR_sb_not_downgraded; + ret = -BCH_ERR_sb_not_downgraded; + goto out; } darray_for_each(online_devices, ca) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e8a17ed1615d..e4ab0595c0ae 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) return ret; } -static int bch2_fs_read_write_late(struct bch_fs *c) -{ - int ret; - - /* - * Data move operations can't run until after check_snapshots has - * completed, and bch2_snapshot_is_ancestor() is available. - * - * Ideally we'd start copygc/rebalance earlier instead of waiting for - * all of recovery/fsck to complete: - */ - ret = bch2_copygc_start(c); - if (ret) { - bch_err(c, "error starting copygc thread"); - return ret; - } - - ret = bch2_rebalance_start(c); - if (ret) { - bch_err(c, "error starting rebalance thread"); - return ret; - } - - return 0; -} - static int __bch2_fs_read_write(struct bch_fs *c, bool early) { int ret; @@ -466,29 +440,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) clear_bit(BCH_FS_clean_shutdown, &c->flags); + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { + bch2_dev_allocator_add(c, ca); + percpu_ref_reinit(&ca->io_ref[WRITE]); + } + bch2_recalc_capacity(c); + /* * First journal write must be a flush write: after a clean shutdown we * don't read the journal, so the first journal write may end up * overwriting whatever was there previously, and there must always be * at least one non-flush write in the journal or recovery will fail: */ + spin_lock(&c->journal.lock); set_bit(JOURNAL_need_flush_write, &c->journal.flags); set_bit(JOURNAL_running, &c->journal.flags); - - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { - bch2_dev_allocator_add(c, ca); - percpu_ref_reinit(&ca->io_ref[WRITE]); - } - bch2_recalc_capacity(c); + bch2_journal_space_available(&c->journal); + spin_unlock(&c->journal.lock); ret = bch2_fs_mark_dirty(c); if (ret) goto err; - spin_lock(&c->journal.lock); - bch2_journal_space_available(&c->journal); - spin_unlock(&c->journal.lock); - ret = bch2_journal_reclaim_start(&c->journal); if (ret) goto err; @@ -504,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) atomic_long_inc(&c->writes[i]); } #endif - if (!early) { - ret = bch2_fs_read_write_late(c); - if (ret) - goto err; + + ret = bch2_copygc_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting copygc thread"); + goto err; + } + + ret = bch2_rebalance_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting rebalance thread"); + goto err; } bch2_do_discards(c); @@ -553,6 +533,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_free_fsck_errs(c); bch2_fs_accounting_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); @@ -1023,6 +1004,40 @@ static void print_mount_opts(struct bch_fs *c) printbuf_exit(&p); } +static bool bch2_fs_may_start(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i, flags = 0; + + if (c->opts.very_degraded) + flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; + + if (c->opts.degraded) + flags |= BCH_FORCE_IF_DEGRADED; + + if (!c->opts.degraded && + !c->opts.very_degraded) { + mutex_lock(&c->sb_lock); + + for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + + ca = bch2_dev_locked(c, i); + + if (!bch2_dev_is_online(ca) && + (ca->mi.state == BCH_MEMBER_STATE_rw || + ca->mi.state == BCH_MEMBER_STATE_ro)) { + mutex_unlock(&c->sb_lock); + return false; + } + } + mutex_unlock(&c->sb_lock); + } + + return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); +} + int bch2_fs_start(struct bch_fs *c) { time64_t now = ktime_get_real_seconds(); @@ -1030,6 +1045,9 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); + if (!bch2_fs_may_start(c)) + return -BCH_ERR_insufficient_devices_to_start; + down_write(&c->state_lock); mutex_lock(&c->sb_lock); @@ -1082,13 +1100,10 @@ int bch2_fs_start(struct bch_fs *c) wake_up(&c->ro_ref_wait); down_write(&c->state_lock); - if (c->opts.read_only) { + if (c->opts.read_only) bch2_fs_read_only(c); - } else { - ret = !test_bit(BCH_FS_rw, &c->flags) - ? bch2_fs_read_write(c) - : bch2_fs_read_write_late(c); - } + else if (!test_bit(BCH_FS_rw, &c->flags)) + ret = bch2_fs_read_write(c); up_write(&c->state_lock); err: @@ -1500,7 +1515,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) printbuf_exit(&name); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return 0; } @@ -1559,40 +1574,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, } } -static bool bch2_fs_may_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned i, flags = 0; - - if (c->opts.very_degraded) - flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; - - if (c->opts.degraded) - flags |= BCH_FORCE_IF_DEGRADED; - - if (!c->opts.degraded && - !c->opts.very_degraded) { - mutex_lock(&c->sb_lock); - - for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { - if (!bch2_member_exists(c->disk_sb.sb, i)) - continue; - - ca = bch2_dev_locked(c, i); - - if (!bch2_dev_is_online(ca) && - (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); - return false; - } - } - mutex_unlock(&c->sb_lock); - } - - return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); -} - static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { bch2_dev_io_ref_stop(ca, WRITE); @@ -1646,7 +1627,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -2228,11 +2209,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } up_write(&c->state_lock); - if (!bch2_fs_may_start(c)) { - ret = -BCH_ERR_insufficient_devices_to_start; - goto err_print; - } - if (!c->opts.nostart) { ret = bch2_fs_start(c); if (ret) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e5f003c29369..82ee333ddd21 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -654,11 +654,10 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, bch2_set_rebalance_needs_scan(c, 0); if (v && id == Opt_rebalance_enabled) - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); - if (v && id == Opt_copygc_enabled && - c->copygc_thread) - wake_up_process(c->copygc_thread); + if (v && id == Opt_copygc_enabled) + bch2_copygc_wakeup(c); if (id == Opt_discard && !ca) { mutex_lock(&c->sb_lock); diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index c265b102267a..782a05fe7656 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static int test_peek_end(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; @@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6ba5071ab6dd..3e52c7f8ddd2 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -739,4 +739,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len) *--dst = *src++; } +#define set_flags(_map, _in, _out) \ +do { \ + unsigned _i; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + else \ + (_out) &= ~_map[_i]; \ +} while (0) + +#define map_flags(_map, _in) \ +({ \ + unsigned _out = 0; \ + \ + set_flags(_map, _in, _out); \ + _out; \ +}) + +#define map_flags_rev(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & _map[_i]) { \ + (_out) |= 1 << _i; \ + (_in) &= ~_map[_i]; \ + } \ + (_out); \ +}) + +#define map_defined(_map) \ +({ \ + unsigned _in = ~0; \ + \ + map_flags_rev(_map, _in); \ +}) + #endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 262a707d8990..71b8a825c447 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2104,15 +2104,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * will always return true. * So here we need to do extra page alignment for * filemap_range_has_page(). + * + * And do not decrease page_lockend right now, as it can be 0. */ const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); - const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; + const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE); while (1) { truncate_pagecache_range(inode, lockstart, lockend); lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, cached_state); + /* The same page or adjacent pages. */ + if (page_lockend <= page_lockstart) + break; /* * We can't have ordered extents in the range, nor dirty/writeback * pages, because we have locked the inode's VFS lock in exclusive @@ -2124,7 +2129,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * we do, unlock the range and retry. */ if (!filemap_range_has_page(inode->i_mapping, page_lockstart, - page_lockend)) + page_lockend - 1)) break; unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f948f4f6431c..e17bcb034595 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3803,7 +3803,7 @@ out: if (ret) { if (inode) iput(&inode->vfs_inode); - inode = ERR_PTR(ret); + return ERR_PTR(ret); } return &inode->vfs_inode; } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 11dbd7be6a3b..c0a0b8b063d0 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -204,7 +204,7 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, btrfs_blocks_per_folio(fs_info, folio); \ \ btrfs_subpage_assert(fs_info, folio, start, len); \ - __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ + __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ __start_bit; \ }) @@ -666,7 +666,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, btrfs_blocks_per_folio(fs_info, folio); \ const struct btrfs_subpage *subpage = folio_get_private(folio); \ \ - ASSERT(blocks_per_folio < BITS_PER_LONG); \ + ASSERT(blocks_per_folio <= BITS_PER_LONG); \ *dst = bitmap_read(subpage->bitmaps, \ blocks_per_folio * btrfs_bitmap_nr_##name, \ blocks_per_folio); \ diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 43979891f7c8..2b66a6130269 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -2235,7 +2235,7 @@ int btrfs_verify_level_key(struct extent_buffer *eb, btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, check->level, found_level); - return -EIO; + return -EUCLEAN; } if (!check->has_first_key) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index fb8b8b29c169..4a3e02b49f29 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1277,7 +1277,7 @@ struct zone_info { static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, struct zone_info *info, unsigned long *active, - struct btrfs_chunk_map *map) + struct btrfs_chunk_map *map, bool new) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_device *device; @@ -1307,6 +1307,8 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, return 0; } + ASSERT(!new || btrfs_dev_is_empty_zone(device, info->physical)); + /* This zone will be used for allocation, so mark this zone non-empty. */ btrfs_dev_clear_zone_empty(device, info->physical); @@ -1319,6 +1321,18 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, * to determine the allocation offset within the zone. */ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); + + if (new) { + sector_t capacity; + + capacity = bdev_zone_capacity(device->bdev, info->physical >> SECTOR_SHIFT); + up_read(&dev_replace->rwsem); + info->alloc_offset = 0; + info->capacity = capacity << SECTOR_SHIFT; + + return 0; + } + nofs_flag = memalloc_nofs_save(); ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); @@ -1588,7 +1602,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } for (i = 0; i < map->num_stripes; i++) { - ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); + ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map, new); if (ret) goto out; @@ -1659,7 +1673,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) * stripe. */ cache->alloc_offset = cache->zone_capacity; - ret = 0; } out: diff --git a/fs/buffer.c b/fs/buffer.c index c7abb4a029dc..7be23ff20b27 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_write_sync); -/* - * Various filesystems appear to want __find_get_block to be non-blocking. - * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the blockdev mapping's - * i_private_lock. - * - * Hack idea: for the blockdev mapping, i_private_lock contention - * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_private_lock. - */ static struct buffer_head * -__find_get_block_slow(struct block_device *bdev, sector_t block) +__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; @@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (IS_ERR(folio)) goto out; - spin_lock(&bd_mapping->i_private_lock); + /* + * Folio lock protects the buffers. Callers that cannot block + * will fallback to serializing vs try_to_free_buffers() via + * the i_private_lock. + */ + if (atomic) + spin_lock(&bd_mapping->i_private_lock); + else + folio_lock(folio); + head = folio_buffers(folio); if (!head) goto out_unlock; + /* + * Upon a noref migration, the folio lock serializes here; + * otherwise bail. + */ + if (test_bit_acquire(BH_Migrate, &head->b_state)) { + WARN_ON(!atomic); + goto out_unlock; + } + bh = head; do { if (!buffer_mapped(bh)) @@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) 1 << blkbits); } out_unlock: - spin_unlock(&bd_mapping->i_private_lock); + if (atomic) + spin_unlock(&bd_mapping->i_private_lock); + else + folio_unlock(folio); folio_put(folio); out: return ret; @@ -656,7 +667,9 @@ EXPORT_SYMBOL(generic_buffers_fsync); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { - struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); + struct buffer_head *bh; + + bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); @@ -1386,16 +1399,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return - * NULL + * NULL. Atomic context callers may also return NULL if the buffer is being + * migrated; similarly the page is not marked accessed either. */ -struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +static struct buffer_head * +find_get_block_common(struct block_device *bdev, sector_t block, + unsigned size, bool atomic) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ - bh = __find_get_block_slow(bdev, block); + bh = __find_get_block_slow(bdev, block, atomic); if (bh) bh_lru_install(bh); } else @@ -1403,8 +1418,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) return bh; } + +struct buffer_head * +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +{ + return find_get_block_common(bdev, block, size, true); +} EXPORT_SYMBOL(__find_get_block); +/* same as __find_get_block() but allows sleeping contexts */ +struct buffer_head * +__find_get_block_nonatomic(struct block_device *bdev, sector_t block, + unsigned size) +{ + return find_get_block_common(bdev, block, size, false); +} +EXPORT_SYMBOL(__find_get_block_nonatomic); + /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. @@ -1422,7 +1452,12 @@ EXPORT_SYMBOL(__find_get_block); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + struct buffer_head *bh; + + if (gfpflags_allow_blocking(gfp)) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); might_alloc(gfp); if (bh) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 6ac2bd555e86..06cd2963e41e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2367,7 +2367,7 @@ static int fill_fscrypt_truncate(struct inode *inode, /* Try to writeback the dirty pagecaches */ if (issued & (CEPH_CAP_FILE_BUFFER)) { - loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; + loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1; ret = filemap_write_and_wait_range(inode->i_mapping, orig_pos, lend); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38bc8d74f4cc..e7ecc7c8a729 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (!bh || !buffer_uptodate(bh)) /* * If the block is not in the buffer cache, then it - * must have been written out. + * must have been written out, or, most unlikely, is + * being migrated - false failure should be OK here. */ goto out; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f88424c28194..1e98c5be4e0a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6642,7 +6642,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, for (i = 0; i < count; i++) { cond_resched(); if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); + bh = sb_find_get_block_nonatomic(inode->i_sb, + block + i); ext4_forget(handle, is_metadata, inode, bh, block + i); } } diff --git a/fs/file.c b/fs/file.c index dc3f7e120e3e..3a3146664cf3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,7 +26,7 @@ #include "internal.h" -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) +static noinline bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) { /* * If the reference count was already in the dead zone, then this diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 0cf0fddbee81..1467f6790747 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, bh = bh_in; if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } @@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, /* If there is a different buffer_head lying around in * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh2 = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) @@ -464,7 +466,8 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, + bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); @@ -492,9 +495,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal) struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; - bh = __find_get_block(journal->j_fs_dev, - record->blocknr, - journal->j_blocksize); + bh = __find_get_block_nonatomic(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh); diff --git a/fs/namespace.c b/fs/namespace.c index d9ca80dcc544..98a5cd756e9a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2826,56 +2826,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath) struct vfsmount *mnt = path->mnt; struct dentry *dentry; struct mountpoint *mp = ERR_PTR(-ENOENT); + struct path under = {}; for (;;) { - struct mount *m; + struct mount *m = real_mount(mnt); if (beneath) { - m = real_mount(mnt); + path_put(&under); read_seqlock_excl(&mount_lock); - dentry = dget(m->mnt_mountpoint); + under.mnt = mntget(&m->mnt_parent->mnt); + under.dentry = dget(m->mnt_mountpoint); read_sequnlock_excl(&mount_lock); + dentry = under.dentry; } else { dentry = path->dentry; } inode_lock(dentry->d_inode); - if (unlikely(cant_mount(dentry))) { - inode_unlock(dentry->d_inode); - goto out; - } - namespace_lock(); - if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { + if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) + break; // not to be mounted on + + if (beneath && unlikely(m->mnt_mountpoint != dentry || + &m->mnt_parent->mnt != under.mnt)) { namespace_unlock(); inode_unlock(dentry->d_inode); - goto out; + continue; // got moved } mnt = lookup_mnt(path); - if (likely(!mnt)) + if (unlikely(mnt)) { + namespace_unlock(); + inode_unlock(dentry->d_inode); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + continue; // got overmounted + } + mp = get_mountpoint(dentry); + if (IS_ERR(mp)) break; - - namespace_unlock(); - inode_unlock(dentry->d_inode); - if (beneath) - dput(dentry); - path_put(path); - path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); - } - - mp = get_mountpoint(dentry); - if (IS_ERR(mp)) { - namespace_unlock(); - inode_unlock(dentry->d_inode); + if (beneath) { + /* + * @under duplicates the references that will stay + * at least until namespace_unlock(), so the path_put() + * below is safe (and OK to do under namespace_lock - + * we are not dropping the final references here). + */ + path_put(&under); + } + return mp; } - -out: + namespace_unlock(); + inode_unlock(dentry->d_inode); if (beneath) - dput(dentry); - + path_put(&under); return mp; } @@ -2886,14 +2892,11 @@ static inline struct mountpoint *lock_mount(struct path *path) static void unlock_mount(struct mountpoint *where) { - struct dentry *dentry = where->m_dentry; - + inode_unlock(where->m_dentry->d_inode); read_seqlock_excl(&mount_lock); put_mountpoint(where); read_sequnlock_excl(&mount_lock); - namespace_unlock(); - inode_unlock(dentry->d_inode); } static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f1b4b3e611cb..c7a9729dc9d0 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1249,7 +1249,7 @@ static int ocfs2_force_read_journal(struct inode *inode) } for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh) diff --git a/fs/splice.c b/fs/splice.c index 90d464241f15..4d6df083e0c0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -45,7 +45,7 @@ * here if set to avoid blocking other users of this pipe if splice is * being done on it. */ -static noinline void noinline pipe_clear_nowait(struct file *file) +static noinline void pipe_clear_nowait(struct file *file) { fmode_t fmode = READ_ONCE(file->f_mode); diff --git a/fs/xattr.c b/fs/xattr.c index 02bee149ad96..fabb2a04501e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) error = -EBADF; @@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) return -EBADF; diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 8c541ca71872..81c94dd1d596 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -170,7 +170,8 @@ bool xfs_zoned_need_gc( struct xfs_mount *mp) { - s64 available, free; + s64 available, free, threshold; + s32 remainder; if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) return false; @@ -183,7 +184,12 @@ xfs_zoned_need_gc( return true; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); - if (available < mult_frac(free, mp->m_zonegc_low_space, 100)) + + threshold = div_s64_rem(free, 100, &remainder); + threshold = threshold * mp->m_zonegc_low_space + + remainder * div_s64(mp->m_zonegc_low_space, 100); + + if (available < threshold) return true; return false; diff --git a/include/cxl/features.h b/include/cxl/features.h index a3bb34694c06..5f7f842765a5 100644 --- a/include/cxl/features.h +++ b/include/cxl/features.h @@ -66,7 +66,7 @@ struct cxl_memdev; #ifdef CONFIG_CXL_FEATURES inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); int devm_cxl_setup_features(struct cxl_dev_state *cxlds); -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd); +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd); #else static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) { @@ -78,7 +78,8 @@ static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds) return -EOPNOTSUPP; } -static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +static inline int devm_cxl_setup_fwctl(struct device *host, + struct cxl_memdev *cxlmd) { return -EOPNOTSUPP; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 678dc38442bf..9a1f0ee40b56 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -712,23 +712,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) (q->limits.features & BLK_FEAT_ZONED); } -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return disk->nr_zones; -} -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { if (!blk_queue_is_zoned(disk->queue)) @@ -736,11 +719,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return disk_nr_zones(bdev->bd_disk); -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -847,6 +825,51 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) (sb->s_blocksize_bits - SECTOR_SHIFT); } +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); + +/** + * disk_zone_capacity - returns the zone capacity of zone containing @sector + * @disk: disk to work with + * @sector: sector number within the querying zone + * + * Returns the zone capacity of a zone containing @sector. @sector can be any + * sector in the zone. + */ +static inline unsigned int disk_zone_capacity(struct gendisk *disk, + sector_t sector) +{ + sector_t zone_sectors = disk->queue->limits.chunk_sectors; + + if (sector + zone_sectors >= get_capacity(disk)) + return disk->last_zone_capacity; + return disk->zone_capacity; +} +static inline unsigned int bdev_zone_capacity(struct block_device *bdev, + sector_t pos) +{ + return disk_zone_capacity(bdev->bd_disk, pos); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); @@ -1614,6 +1637,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1670,10 +1694,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f0a4ad7839b6..0029ff880e27 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -222,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -397,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a45..50b14a5661c7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c433..85ab710ec0e7 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 7db62fbc0500..31551e4cb8f3 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -61,7 +61,6 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) atomic_long_set(&ref->refcnt, cnt - 1); } -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt); bool __file_ref_put(file_ref_t *ref, unsigned long cnt); /** @@ -178,20 +177,14 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) */ static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) { - long old, new; + long old; old = atomic_long_read(&ref->refcnt); - do { - if (unlikely(old < 0)) - return __file_ref_put_badval(ref, old); - - if (old == FILE_REF_ONEREF) - new = FILE_REF_DEAD; - else - new = old - 1; - } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new)); - - return new == FILE_REF_DEAD; + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); } /** diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 6fa0a268d538..097be89487bf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index bf2bf40d7b18..8d5ac16a9b17 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -102,11 +102,11 @@ do { \ l = (local_lock_t *)this_cpu_ptr(lock); \ tl = (local_trylock_t *)l; \ _Generic((lock), \ - local_trylock_t *: ({ \ + __percpu local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 0); \ WRITE_ONCE(tl->acquired, 1); \ }), \ - default:(void)0); \ + __percpu local_lock_t *: (void)0); \ local_lock_acquire(l); \ } while (0) @@ -171,11 +171,11 @@ do { \ tl = (local_trylock_t *)l; \ local_lock_release(l); \ _Generic((lock), \ - local_trylock_t *: ({ \ + __percpu local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 1); \ WRITE_ONCE(tl->acquired, 0); \ }), \ - default:(void)0); \ + __percpu local_lock_t *: (void)0); \ } while (0) #define __local_unlock(lock) \ diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 1f5773ab5660..30659b615fca 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -361,23 +361,29 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_link_down() - take the link down + * mac_link_down() - notification that the link has gone down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. Interface type - * selection must be done in mac_config(). + * Notifies the MAC that the link has gone down. This will not be called + * unless mac_link_up() has been previously called. + * + * The MAC should stop processing packets for transmission and reception. + * phylink will have called netif_carrier_off() to notify the networking + * stack that the link has gone down, so MAC drivers should not make this + * call. + * + * If @mode is %MLO_AN_INBAND, then this function must not prevent the + * link coming up. */ void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** - * mac_link_up() - allow the link to come up + * mac_link_up() - notification that the link has come up * @config: a pointer to a &struct phylink_config. - * @phy: any attached phy + * @phy: any attached phy (deprecated - please use LPI interfaces) * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -385,7 +391,10 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * @tx_pause: link transmit pause enablement status * @rx_pause: link receive pause enablement status * - * Configure the MAC for an established link. + * Notifies the MAC that the link has come up, and the parameters of the + * link as seen from the MACs point of view. If mac_link_up() has been + * called previously, there will be an intervening call to mac_link_down() + * before this method will be subsequently called. * * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link * settings, and should be used to configure the MAC block appropriately @@ -397,9 +406,9 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * that the user wishes to override the pause settings, and this should * be allowed when considering the implementation of this method. * - * If in-band negotiation mode is disabled, allow the link to come up. If - * @phy is non-%NULL, configure Energy Efficient Ethernet by calling - * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Once configured, the MAC may begin to process packets for transmission + * and reception. + * * Interface type selection must be done in mac_config(). */ void mac_link_up(struct phylink_config *config, struct phy_device *phy, diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4d16c13d0df5..64cb4b04be7a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -220,6 +220,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * occurs. * @reset_done: optional function to call after transport specific reset * operation has finished. + * @shutdown: synchronize with the device on shutdown. If provided, replaces + * the virtio core implementation. */ struct virtio_driver { struct device_driver driver; @@ -237,6 +239,7 @@ struct virtio_driver { int (*restore)(struct virtio_device *dev); int (*reset_prepare)(struct virtio_device *dev); int (*reset_done)(struct virtio_device *dev); + void (*shutdown)(struct virtio_device *dev); }; #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d9d0cb827117..f030adc462ee 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -53,43 +53,70 @@ struct landlock_ruleset_attr { __u64 scoped; }; -/* - * sys_landlock_create_ruleset() flags: +/** + * DOC: landlock_create_ruleset_flags + * + * **Flags** * - * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI - * version. - * - %LANDLOCK_CREATE_RULESET_ERRATA: Get a bitmask of fixed issues. + * %LANDLOCK_CREATE_RULESET_VERSION + * Get the highest supported Landlock ABI version (starting at 1). + * + * %LANDLOCK_CREATE_RULESET_ERRATA + * Get a bitmask of fixed issues for the current Landlock ABI version. */ /* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) #define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) /* clang-format on */ -/* - * sys_landlock_restrict_self() flags: - * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF: Do not create any log related to the - * enforced restrictions. This should only be set by tools launching unknown - * or untrusted programs (e.g. a sandbox tool, container runtime, system - * service manager). Because programs sandboxing themselves should fix any - * denied access, they should not set this flag to be aware of potential - * issues reported by system's logs (i.e. audit). - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON: Explicitly ask to continue - * logging denied access requests even after an :manpage:`execve(2)` call. - * This flag should only be set if all the programs than can legitimately be - * executed will not try to request a denied access (which could spam audit - * logs). - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF: Do not create any log related - * to the enforced restrictions coming from future nested domains created by - * the caller or its descendants. This should only be set according to a - * runtime configuration (i.e. not hardcoded) by programs launching other - * unknown or untrusted programs that may create their own Landlock domains - * and spam logs. The main use case is for container runtimes to enable users - * to mute buggy sandboxed programs for a specific container image. Other use - * cases include sandboxer tools and init systems. Unlike - * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, - * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF does not impact the requested - * restriction (if any) but only the future nested domains. +/** + * DOC: landlock_restrict_self_flags + * + * **Flags** + * + * By default, denied accesses originating from programs that sandbox themselves + * are logged via the audit subsystem. Such events typically indicate unexpected + * behavior, such as bugs or exploitation attempts. However, to avoid excessive + * logging, access requests denied by a domain not created by the originating + * program are not logged by default. The rationale is that programs should know + * their own behavior, but not necessarily the behavior of other programs. This + * default configuration is suitable for most programs that sandbox themselves. + * For specific use cases, the following flags allow programs to modify this + * default logging behavior. + * + * The %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF and + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON flags apply to the newly created + * Landlock domain. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * Disables logging of denied accesses originating from the thread creating + * the Landlock domain, as well as its children, as long as they continue + * running the same executable code (i.e., without an intervening + * :manpage:`execve(2)` call). This is intended for programs that execute + * unknown code without invoking :manpage:`execve(2)`, such as script + * interpreters. Programs that only sandbox themselves should not set this + * flag, so users can be notified of unauthorized access attempts via system + * logs. + * + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * Enables logging of denied accesses after an :manpage:`execve(2)` call, + * providing visibility into unauthorized access attempts by newly executed + * programs within the created Landlock domain. This flag is recommended + * only when all potential executables in the domain are expected to comply + * with the access restrictions, as excessive audit log entries could make + * it more difficult to identify critical events. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * Disables logging of denied accesses originating from nested Landlock + * domains created by the caller or its descendants. This flag should be set + * according to runtime configuration, not hardcoded, to avoid suppressing + * important security events. It is useful for container runtimes or + * sandboxing tools that may launch programs which themselves create + * Landlock domains and could otherwise generate excessive logs. Unlike + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects + * future nested domains, not the one being created. It can also be used + * with a @ruleset_fd value of -1 to mute subdomain logs without creating a + * domain. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index b95dd84eef2d..d4b3e2ae1314 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -28,10 +28,10 @@ /* Set current process as the (exclusive) owner of this file descriptor. This * must be called before any other vhost command. Further calls to - * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ + * VHOST_SET_OWNER fail until VHOST_RESET_OWNER is called. */ #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. - * Allows subsequent call to VHOST_OWNER_SET to succeed. */ + * Allows subsequent call to VHOST_SET_OWNER to succeed. */ #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 8549d4571257..c691ac210ce2 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 41ff44dfa1db..f52de5ed1b3b 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -107,4 +107,10 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) +/* + * Some ufs devices may need more time to be in hibern8 before exiting. + * Enable this quirk to give it an additional 100us. + */ +#define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) + #endif /* UFS_QUIRKS_H_ */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c6209fe44cb1..a2b256e96d5d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -872,10 +872,15 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); - __io_cq_lock(ctx); - posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + if (!ctx->lockless_cq) { + spin_lock(&ctx->completion_lock); + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + spin_unlock(&ctx->completion_lock); + } else { + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + } + ctx->submit_state.cq_flush = true; - __io_cq_unlock_post(ctx); return posted; } @@ -1078,21 +1083,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync) while (node) { req = container_of(node, struct io_kiocb, io_task_work.node); node = node->next; - if (sync && last_ctx != req->ctx) { + if (last_ctx != req->ctx) { if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } last_ctx = req->ctx; percpu_ref_get(&last_ctx->refs); } - if (llist_add(&req->io_task_work.node, - &req->ctx->fallback_llist)) - schedule_delayed_work(&req->ctx->fallback_work, 1); + if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist)) + schedule_delayed_work(&last_ctx->fallback_work, 1); } if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5a5adc66b8e2..92b606d60020 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2189,7 +2189,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ b = &htab->buckets[i]; rcu_read_lock(); head = &b->head; - hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { key = elem->key; if (is_percpu) { /* current cpu value for percpu map */ diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 2fdf3c978db1..774e5a538811 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -89,5 +89,6 @@ static void __exit fini(void) } late_initcall(load); module_exit(fini); +MODULE_IMPORT_NS("BPF_INTERNAL"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Embedded BPF programs for introspection in bpffs"); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9794446bc8c6..64c3393e8270 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1583,7 +1583,7 @@ struct bpf_map *bpf_map_get(u32 ufd) return map; } -EXPORT_SYMBOL(bpf_map_get); +EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); struct bpf_map *bpf_map_get_with_uref(u32 ufd) { @@ -3364,7 +3364,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) bpf_link_inc(link); return link; } -EXPORT_SYMBOL(bpf_link_get_from_fd); +EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); static void bpf_tracing_link_release(struct bpf_link *link) { @@ -6020,7 +6020,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) return ____bpf_sys_bpf(cmd, attr, size); } } -EXPORT_SYMBOL(kern_sys_bpf); +EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); static const struct bpf_func_proto bpf_sys_bpf_proto = { .func = bpf_sys_bpf, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3caf2cd86e65..63e5b90da1f3 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -90,7 +90,7 @@ DEFINE_MUTEX(cgroup_mutex); DEFINE_SPINLOCK(css_set_lock); -#ifdef CONFIG_PROVE_RCU +#if (defined CONFIG_PROVE_RCU || defined CONFIG_LOCKDEP) EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif @@ -2353,9 +2353,37 @@ static struct file_system_type cgroup2_fs_type = { }; #ifdef CONFIG_CPUSETS_V1 +enum cpuset_param { + Opt_cpuset_v2_mode, +}; + +static const struct fs_parameter_spec cpuset_fs_parameters[] = { + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), + {} +}; + +static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, cpuset_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + return 0; + } + return -EINVAL; +} + static const struct fs_context_operations cpuset_fs_context_ops = { .get_tree = cgroup1_get_tree, .free = cgroup_fs_context_free, + .parse_param = cpuset_parse_param, }; /* @@ -2392,6 +2420,7 @@ static int cpuset_init_fs_context(struct fs_context *fc) static struct file_system_type cpuset_fs_type = { .name = "cpuset", .init_fs_context = cpuset_init_fs_context, + .parameters = cpuset_fs_parameters, .fs_flags = FS_USERNS_MOUNT, }; #endif diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 3b2bdca9f1d4..77c8d9487a9a 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -336,16 +336,22 @@ static phys_addr_t dma_reserved_default_memory_size __initdata; static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - if (!rmem->priv) { - struct dma_coherent_mem *mem; + struct dma_coherent_mem *mem = rmem->priv; + if (!mem) { mem = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size, true); if (IS_ERR(mem)) return PTR_ERR(mem); rmem->priv = mem; } - dma_assign_coherent_memory(dev, rmem->priv); + + /* Warn if the device potentially can't use the reserved memory */ + if (mem->device_base + rmem->size - 1 > + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit)) + dev_warn(dev, "reserved memory is beyond device's set DMA address range\n"); + + dma_assign_coherent_memory(dev, mem); return 0; } diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 055da410ac71..8df0dfaaca18 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -64,8 +64,7 @@ struct cma *dma_contiguous_default_area; * Users, who want to set the size of global CMA area for their system * should use cma= kernel parameter. */ -static const phys_addr_t size_bytes __initconst = - (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; +#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M) static phys_addr_t size_cmdline __initdata = -1; static phys_addr_t base_cmdline __initdata; static phys_addr_t limit_cmdline __initdata; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index cda127027e48..051a32988040 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -910,6 +910,19 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); +static bool __dma_addressing_limited(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops) || use_dma_iommu(dev)) + return false; + return !dma_direct_all_ram_mapped(dev); +} + /** * dma_addressing_limited - return if the device is addressing limited * @dev: device to check @@ -920,15 +933,11 @@ EXPORT_SYMBOL(dma_set_coherent_mask); */ bool dma_addressing_limited(struct device *dev) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev)) - return true; - - if (unlikely(ops) || use_dma_iommu(dev)) + if (!__dma_addressing_limited(dev)) return false; - return !dma_direct_all_ram_mapped(dev); + + dev_dbg(dev, "device is DMA addressing limited\n"); + return true; } EXPORT_SYMBOL_GPL(dma_addressing_limited); diff --git a/kernel/events/core.c b/kernel/events/core.c index e93c19565914..95e703891b24 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3943,7 +3943,7 @@ static int merge_sched_in(struct perf_event *event, void *data) perf_event_set_state(event, PERF_EVENT_STATE_ERROR); if (*perf_event_fasync(event)) - event->pending_kill = POLL_HUP; + event->pending_kill = POLL_ERR; perf_event_wakeup(event); } else { @@ -6075,7 +6075,7 @@ static __poll_t perf_poll(struct file *file, poll_table *wait) if (unlikely(READ_ONCE(event->state) == PERF_EVENT_STATE_ERROR && event->attr.pinned)) - return events; + return EPOLLERR; /* * Pin the event->rb by taking event->mmap_mutex; otherwise diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 66bcd40a28ca..fdbf249d1c68 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -163,7 +163,7 @@ enum scx_ops_flags { /* * CPU cgroup support flags */ - SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* cpu.weight */ + SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */ SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | @@ -3899,35 +3899,6 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; -static bool cgroup_warned_missing_weight; -static bool cgroup_warned_missing_idle; - -static void scx_cgroup_warn_missing_weight(struct task_group *tg) -{ - if (scx_ops_enable_state() == SCX_OPS_DISABLED || - cgroup_warned_missing_weight) - return; - - if ((scx_ops.flags & SCX_OPS_HAS_CGROUP_WEIGHT) || !tg->css.parent) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.weight\n", - scx_ops.name); - cgroup_warned_missing_weight = true; -} - -static void scx_cgroup_warn_missing_idle(struct task_group *tg) -{ - if (!scx_cgroup_enabled || cgroup_warned_missing_idle) - return; - - if (!tg->idle) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.idle\n", - scx_ops.name); - cgroup_warned_missing_idle = true; -} int scx_tg_online(struct task_group *tg) { @@ -3937,8 +3908,6 @@ int scx_tg_online(struct task_group *tg) percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_weight(tg); - if (scx_cgroup_enabled) { if (SCX_HAS_OP(cgroup_init)) { struct scx_cgroup_init_args args = @@ -4076,9 +4045,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) void scx_group_set_idle(struct task_group *tg, bool idle) { - percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_idle(tg); - percpu_up_read(&scx_cgroup_rwsem); + /* TODO: Implement ops->cgroup_set_idle() */ } static void scx_cgroup_lock(void) @@ -4272,9 +4239,6 @@ static int scx_cgroup_init(void) percpu_rwsem_assert_held(&scx_cgroup_rwsem); - cgroup_warned_missing_weight = false; - cgroup_warned_missing_idle = false; - /* * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and init, all online cgroups are initialized. @@ -4284,9 +4248,6 @@ static int scx_cgroup_init(void) struct task_group *tg = css_tg(css); struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; - scx_cgroup_warn_missing_weight(tg); - scx_cgroup_warn_missing_idle(tg); - if ((tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) continue; @@ -4623,7 +4584,7 @@ unlock: static void free_exit_info(struct scx_exit_info *ei) { - kfree(ei->dump); + kvfree(ei->dump); kfree(ei->msg); kfree(ei->bt); kfree(ei); @@ -4639,7 +4600,7 @@ static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len) ei->bt = kcalloc(SCX_EXIT_BT_LEN, sizeof(ei->bt[0]), GFP_KERNEL); ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL); - ei->dump = kzalloc(exit_dump_len, GFP_KERNEL); + ei->dump = kvzalloc(exit_dump_len, GFP_KERNEL); if (!ei->bt || !ei->msg || !ei->dump) { free_exit_info(ei); @@ -5252,6 +5213,9 @@ static int validate_ops(const struct sched_ext_ops *ops) return -EINVAL; } + if (ops->flags & SCX_OPS_HAS_CGROUP_WEIGHT) + pr_warn("SCX_OPS_HAS_CGROUP_WEIGHT is deprecated and a noop\n"); + return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e43993a4e580..0fb9bf995a47 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7081,9 +7081,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) h_nr_idle = task_has_idle_policy(p); if (task_sleep || task_delayed || !se->sched_delayed) h_nr_runnable = 1; - } else { - cfs_rq = group_cfs_rq(se); - slice = cfs_rq_min_slice(cfs_rq); } for_each_sched_entity(se) { @@ -7093,6 +7090,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) if (p && &p->se == se) return -1; + slice = cfs_rq_min_slice(cfs_rq); break; } diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 2ef2e1b80091..2f844c279a3e 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); * @arg: data to be passed to fn and handled_kill * @name: the thread's name * - * This returns a specialized task for use by the vhost layer or NULL on + * This returns a specialized task for use by the vhost layer or ERR_PTR() on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ diff --git a/mm/migrate.c b/mm/migrate.c index f3ee6d8d5e2e..676d9cfc7059 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping, return -EAGAIN; if (check_refs) { - bool busy; + bool busy, migrating; bool invalidated = false; + migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state); + VM_WARN_ON_ONCE(migrating); recheck_buffers: busy = false; spin_lock(&mapping->i_private_lock); @@ -859,12 +861,12 @@ recheck_buffers: } bh = bh->b_this_page; } while (bh != head); + spin_unlock(&mapping->i_private_lock); if (busy) { if (invalidated) { rc = -EAGAIN; goto unlock_buffers; } - spin_unlock(&mapping->i_private_lock); invalidate_bh_lrus(); invalidated = true; goto recheck_buffers; @@ -883,7 +885,7 @@ recheck_buffers: unlock_buffers: if (check_refs) - spin_unlock(&mapping->i_private_lock); + clear_bit_unlock(BH_Migrate, &head->b_state); bh = head; do { unlock_buffer(bh); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b24afec24138..6664ea73ccf8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -220,16 +220,6 @@ void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); -void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); -} -EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); - #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -297,19 +287,6 @@ static void osd_req_op_cls_request_info_pagelist( ceph_osd_data_pagelist_init(osd_data, pagelist); } -void osd_req_op_cls_request_data_pagelist( - struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, cls, request_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); - osd_req->r_ops[which].cls.indata_len += pagelist->length; - osd_req->r_ops[which].indata_len += pagelist->length; -} -EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist); - void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e39a459540ec..60f27cb4e54f 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -333,6 +333,8 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; int ret; + local_bh_disable(); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); @@ -348,8 +350,10 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || - lwtstate->type > LWTUNNEL_ENCAP_MAX) - return 0; + lwtstate->type > LWTUNNEL_ENCAP_MAX) { + ret = 0; + goto out; + } ret = -EOPNOTSUPP; rcu_read_lock(); @@ -364,11 +368,13 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ret == -EOPNOTSUPP) goto drop; - return ret; + goto out; drop: kfree_skb(skb); +out: + local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_output); @@ -380,6 +386,8 @@ int lwtunnel_xmit(struct sk_buff *skb) struct dst_entry *dst; int ret; + local_bh_disable(); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); @@ -396,8 +404,10 @@ int lwtunnel_xmit(struct sk_buff *skb) lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || - lwtstate->type > LWTUNNEL_ENCAP_MAX) - return 0; + lwtstate->type > LWTUNNEL_ENCAP_MAX) { + ret = 0; + goto out; + } ret = -EOPNOTSUPP; rcu_read_lock(); @@ -412,11 +422,13 @@ int lwtunnel_xmit(struct sk_buff *skb) if (ret == -EOPNOTSUPP) goto drop; - return ret; + goto out; drop: kfree_skb(skb); +out: + local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_xmit); @@ -428,6 +440,8 @@ int lwtunnel_input(struct sk_buff *skb) struct dst_entry *dst; int ret; + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5d7af50fe702..230743bdbb14 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -861,14 +861,17 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) mutex_lock(&priv->lock); + err = 0; netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); - if (!netdev || !netif_device_present(netdev)) { + if (!netdev) { err = -ENODEV; goto err_unlock_sock; } - - if (!netdev_need_ops_lock(netdev)) { + if (!netif_device_present(netdev)) + err = -ENODEV; + else if (!netdev_need_ops_lock(netdev)) err = -EOPNOTSUPP; + if (err) { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_DEV_IFINDEX]); goto err_unlock; diff --git a/net/core/selftests.c b/net/core/selftests.c index e99ae983fca9..35f807ea9952 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -100,10 +100,10 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, ehdr->h_proto = htons(ETH_P_IP); if (attr->tcp) { + memset(thdr, 0, sizeof(*thdr)); thdr->source = htons(attr->sport); thdr->dest = htons(attr->dport); thdr->doff = sizeof(struct tcphdr) / 4; - thdr->check = 0; } else { uhdr->source = htons(attr->sport); uhdr->dest = htons(attr->dport); @@ -144,10 +144,18 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, attr->id = net_test_next_id; shdr->id = net_test_next_id++; - if (attr->size) - skb_put(skb, attr->size); - if (attr->max_size && attr->max_size > skb->len) - skb_put(skb, attr->max_size - skb->len); + if (attr->size) { + void *payload = skb_put(skb, attr->size); + + memset(payload, 0, attr->size); + } + + if (attr->max_size && attr->max_size > skb->len) { + size_t pad_len = attr->max_size - skb->len; + void *pad = skb_put(skb, pad_len); + + memset(pad, 0, pad_len); + } skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2cb62f026b1f..a715dcbe0146 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -337,7 +337,11 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) release_sock(sk); - sock_kfree_s(sk, match, sizeof(*match)); + kfree_rcu_mightsleep(match); + /* Adjust sk_omem_alloc like sock_kfree_s() does, to match + * with allocation of this memory by sock_kmemdup() + */ + atomic_sub(sizeof(*match), &sk->sk_omem_alloc); err = 0; out: diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ce5045eea065..6c8ef826cec0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -961,6 +961,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl != NULL) { int old_flags; + int len = 0; if (parentid) { if (cl->cl_parent && @@ -991,9 +992,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (usc != NULL) hfsc_change_usc(cl, usc, cur_time); + if (cl->qdisc->q.qlen != 0) + len = qdisc_peek_len(cl->qdisc); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ if (cl->qdisc->q.qlen != 0) { - int len = qdisc_peek_len(cl->qdisc); - if (cl->cl_flags & HFSC_RSC) { if (old_flags & HFSC_RSC) update_ed(cl, len); @@ -1636,10 +1641,16 @@ hfsc_dequeue(struct Qdisc *sch) if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); - if (realtime) - update_ed(cl, next_len); - else - update_d(cl, next_len); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ + if (cl->qdisc->q.qlen != 0) { + if (realtime) + update_ed(cl, next_len); + else + update_d(cl, next_len); + } } else { /* the class becomes passive */ eltree_remove(cl); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 004cdb59f010..7ce5e28a6c03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,13 +1536,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. - * - * If someone frequently calls the flush interface, we should - * immediately clean the corresponding cache_detail instead of - * continuously accumulating nextcheck. */ - if (cd->flush_time >= now && cd->flush_time < (now + 5)) + if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index e2f19627e43d..b45c5b91bc7a 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -716,7 +716,8 @@ void tipc_mon_reinit_self(struct net *net) if (!mon) continue; write_lock_bh(&mon->lock); - mon->self->addr = tipc_own_addr(net); + if (mon->self) + mon->self->addr = tipc_own_addr(net); write_unlock_bh(&mon->lock); } } diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index f04b058b09b2..2494c96e105f 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) -use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; +use crate::{bindings, device::Device, error::Error, error::Result, ffi, str::CStr}; use core::ptr::NonNull; /// # Invariants @@ -12,7 +12,11 @@ use core::ptr::NonNull; /// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`, /// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`. struct FwFunc( - unsafe extern "C" fn(*mut *const bindings::firmware, *const u8, *mut bindings::device) -> i32, + unsafe extern "C" fn( + *mut *const bindings::firmware, + *const ffi::c_char, + *mut bindings::device, + ) -> i32, ); impl FwFunc { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5b632635e00d..95a4fa1f1e44 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -376,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index d75897559d18..2d6e59561c9d 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -15,7 +15,7 @@ KBUILD_CFLAGS += -Werror=return-type KBUILD_CFLAGS += -Werror=strict-prototypes KBUILD_CFLAGS += -Wno-format-security KBUILD_CFLAGS += -Wno-trigraphs -KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-disable-warning, frame-address) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += -Wmissing-declarations KBUILD_CFLAGS += -Wmissing-prototypes diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f3e7ac513db3..f99ab1a3b0f0 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -245,7 +245,9 @@ static int process_measurement(struct file *file, const struct cred *cred, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && - (ima_policy_flag & IMA_MEASURE)); + (ima_policy_flag & IMA_MEASURE) && + ((action & IMA_MEASURE) || + (file->f_mode & FMODE_WRITE))); if (!action && !violation_check) return 0; diff --git a/security/landlock/domain.c b/security/landlock/domain.c index bae2e9909013..a647b68e8d06 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -16,6 +16,7 @@ #include <linux/path.h> #include <linux/pid.h> #include <linux/sched.h> +#include <linux/signal.h> #include <linux/uidgid.h> #include "access.h" @@ -99,8 +100,7 @@ static struct landlock_details *get_current_details(void) return ERR_PTR(-ENOMEM); memcpy(details->exe_path, path_str, path_size); - WARN_ON_ONCE(current_cred() != current_real_cred()); - details->pid = get_pid(task_pid(current)); + details->pid = get_pid(task_tgid(current)); details->uid = from_kuid(&init_user_ns, current_uid()); get_task_comm(details->comm, current); return details; diff --git a/security/landlock/domain.h b/security/landlock/domain.h index ed0d348e214c..7fb70b25f85a 100644 --- a/security/landlock/domain.h +++ b/security/landlock/domain.h @@ -130,7 +130,7 @@ int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy); static inline void landlock_free_hierarchy_details(struct landlock_hierarchy *const hierarchy) { - if (WARN_ON_ONCE(!hierarchy || !hierarchy->details)) + if (!hierarchy || !hierarchy->details) return; put_pid(hierarchy->details->pid); diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 54a9f29e6ebb..b9561e3417ae 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -169,20 +169,16 @@ const int landlock_abi_version = 7; * the new ruleset. * @size: Size of the pointed &struct landlock_ruleset_attr (needed for * backward and forward compatibility). - * @flags: Supported value: + * @flags: Supported values: + * * - %LANDLOCK_CREATE_RULESET_VERSION * - %LANDLOCK_CREATE_RULESET_ERRATA * * This system call enables to create a new Landlock ruleset, and returns the * related file descriptor on success. * - * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is - * 0, then the returned value is the highest supported Landlock ABI version - * (starting at 1). - * - * If @flags is %LANDLOCK_CREATE_RULESET_ERRATA and @attr is NULL and @size is - * 0, then the returned value is a bitmask of fixed issues for the current - * Landlock ABI version. + * If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is + * set, then @attr must be NULL and @size must be 0. * * Possible returned errors are: * @@ -191,6 +187,9 @@ const int landlock_abi_version = 7; * - %E2BIG: @attr or @size inconsistencies; * - %EFAULT: @attr or @size inconsistencies; * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_create_ruleset_flags */ SYSCALL_DEFINE3(landlock_create_ruleset, const struct landlock_ruleset_attr __user *const, attr, @@ -452,18 +451,15 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * @ruleset_fd: File descriptor tied to the ruleset to merge with the target. * @flags: Supported values: * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF * * This system call enables to enforce a Landlock ruleset on the current * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * - * It is allowed to only pass the %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF - * flag with a @ruleset_fd value of -1. - * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; @@ -475,6 +471,9 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * %CAP_SYS_ADMIN in its namespace. * - %E2BIG: The maximum number of stacked rulesets is reached for the current * thread. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_restrict_self_flags */ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..f5dd84eb55dc 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9..b9ce3aab15fe 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include <sys/poll.h> #include <sys/utsname.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 2c720e3ecad5..fdc7170639e6 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index f2957a3e36fe..bf9caa908f89 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(dev, "No CXL FWCTL setup\n"); diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..7bb885b0c32d 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -43,6 +43,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_AUDIT=y +CONFIG_PRIME_NUMBERS=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9..5fea3209566e 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ out: for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862..0a99fd404f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 13a2e22f5465..863df7c0fdd0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -221,7 +221,7 @@ #define CAN_USE_GOTOL #endif -#if _clang_major__ >= 18 +#if __clang_major__ >= 18 #define CAN_USE_BPF_ST #endif diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c new file mode 100644 index 000000000000..82307166f789 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Intel Corporation */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, __u64); + __type(value, __u64); +} hashmap SEC(".maps"); + +static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg) +{ + bpf_map_delete_elem(map, key); + bpf_map_update_elem(map, key, val, 0); + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + (void)skb; + + bpf_for_each_map_elem(&hashmap, cb, NULL, 0); + + return 0; +} diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b9054086a0c9..18a6014920b5 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -300,15 +300,22 @@ out: return err; } -static int __maybe_unused matches_log_domain_allocated(int audit_fd, +static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, __u64 *domain_id) { - return audit_match_record( - audit_fd, AUDIT_LANDLOCK_DOMAIN, - REGEX_LANDLOCK_PREFIX - " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" - " exe=\"[^\"]\\+\" comm=\".*_test\"$", - domain_id); + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=%d uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); } static int __maybe_unused matches_log_domain_deallocated( diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index a0643070c403..cfc571afd0eb 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -9,6 +9,7 @@ #include <errno.h> #include <limits.h> #include <linux/landlock.h> +#include <pthread.h> #include <stdlib.h> #include <sys/mount.h> #include <sys/prctl.h> @@ -40,7 +41,6 @@ FIXTURE(audit) { struct audit_filter audit_filter; int audit_fd; - __u64(*domain_stack)[16]; }; FIXTURE_SETUP(audit) @@ -60,18 +60,10 @@ FIXTURE_SETUP(audit) TH_LOG("Failed to initialize audit: %s", error_msg); } clear_cap(_metadata, CAP_AUDIT_CONTROL); - - self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ASSERT_NE(MAP_FAILED, self->domain_stack); - memset(self->domain_stack, 0, sizeof(*self->domain_stack)); } FIXTURE_TEARDOWN(audit) { - EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); - set_cap(_metadata, CAP_AUDIT_CONTROL); EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); clear_cap(_metadata, CAP_AUDIT_CONTROL); @@ -83,9 +75,15 @@ TEST_F(audit, layers) .scoped = LANDLOCK_SCOPE_SIGNAL, }; int status, ruleset_fd, i; + __u64(*domain_stack)[16]; __u64 prev_dom = 3; pid_t child; + domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, domain_stack); + memset(domain_stack, 0, sizeof(*domain_stack)); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); @@ -94,7 +92,7 @@ TEST_F(audit, layers) child = fork(); ASSERT_LE(0, child); if (child == 0) { - for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) { __u64 denial_dom = 1; __u64 allocated_dom = 2; @@ -107,7 +105,8 @@ TEST_F(audit, layers) matches_log_signal(_metadata, self->audit_fd, getppid(), &denial_dom)); EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(denial_dom, 1); EXPECT_NE(denial_dom, 0); EXPECT_EQ(denial_dom, allocated_dom); @@ -115,7 +114,7 @@ TEST_F(audit, layers) /* Checks that the new domain is younger than the previous one. */ EXPECT_GT(allocated_dom, prev_dom); prev_dom = allocated_dom; - (*self->domain_stack)[i] = allocated_dom; + (*domain_stack)[i] = allocated_dom; } /* Checks that we reached the maximum number of layers. */ @@ -142,23 +141,143 @@ TEST_F(audit, layers) /* Purges log from deallocated domains. */ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, &deallocated_dom)); - EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*self->domain_stack)[i], i); + (*domain_stack)[i], i); } } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, sizeof(audit_tv_default))); - EXPECT_EQ(0, close(ruleset_fd)); } +struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; +}; + +static void *thread_audit_test(void *arg) +{ + const struct thread_data *data = (struct thread_data *)arg; + uintptr_t err = 0; + char buffer; + + /* TGID and TID are different for a second thread. */ + if (getpid() == gettid()) { + err = 1; + goto out; + } + + if (landlock_restrict_self(data->ruleset_fd, 0)) { + err = 2; + goto out; + } + + if (close(data->ruleset_fd)) { + err = 3; + goto out; + } + + /* Creates a denial to get the domain ID. */ + if (kill(data->parent_pid, 0) != -1) { + err = 4; + goto out; + } + + if (EPERM != errno) { + err = 5; + goto out; + } + + /* Signals the parent to read denial logs. */ + if (write(data->pipe_child, ".", 1) != 1) { + err = 6; + goto out; + } + + /* Waits for the parent to update audit filters. */ + if (read(data->pipe_parent, &buffer, 1) != 1) { + err = 7; + goto out; + } + +out: + close(data->pipe_child); + close(data->pipe_parent); + return (void *)err; +} + +/* Checks that the PID tied to a domain is not a TID but the TGID. */ +TEST_F(audit, thread) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + __u64 deallocated_dom = 3; + pthread_t thread; + int pipe_child[2], pipe_parent[2]; + char buffer; + struct thread_data child_data; + + child_data.parent_pid = getppid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + child_data.pipe_child = pipe_child[1]; + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + child_data.pipe_parent = pipe_parent[0]; + child_data.ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, child_data.ruleset_fd); + + /* TGID and TID are the same for the initial thread . */ + EXPECT_EQ(getpid(), gettid()); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test, + &child_data)); + + /* Waits for the child to generate a denial. */ + ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Matches the signal log to get the domain ID. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + child_data.parent_pid, &denial_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(), + &allocated_dom)); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + /* Signals the thread to exit, which will generate a domain deallocation. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; @@ -273,7 +392,8 @@ TEST_F(audit_flags, signal) /* Checks domain information records. */ EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(*self->domain_id, 1); EXPECT_NE(*self->domain_id, 0); EXPECT_EQ(*self->domain_id, allocated_dom); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index f819011a8798..73729382d40f 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled) EXPECT_EQ(EXDEV, errno); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d1)); - EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, + matches_log_domain_allocated(self->audit_fd, getpid(), NULL)); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d3)); diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4f55477ffe08..e7a75341f0f3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -206,9 +206,8 @@ chk_dump_one() local token local msg - ss_token="$(ss -inmHMN $ns | grep 'token:' |\ - head -n 1 |\ - sed 's/.*token:\([0-9a-f]*\).*/\1/')" + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ awk -F':[ \t]+' '/^token/ {print $2}')" diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 48ec048f47af..277f92f9d753 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,3 @@ -TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh +TEST_PROGS = set_pcie_cooling_state.sh +TEST_FILES = set_pcie_speed.sh include ../lib.mk diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d4ea9cd845a3..e26bbc169783 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -313,5 +313,44 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c3", + "name": "Test HFSC with netem/blackhole - queue emptying during peek operation", + "category": [ + "qdisc", + "hfsc", + "netem", + "blackhole" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms", + "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true", + "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "$TC class del dev $DUMMY parent 3:0 classid 3:1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true" + ], + "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 759f06637146..e57a1486bb48 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1354,6 +1354,7 @@ int main(int argc, char *argv[]) value = strtol(optarg, NULL, 10); if (value) ctx.flags |= UBLK_F_NEED_GET_DATA; + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 29571eb296f1..918db5cd633f 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -86,9 +86,6 @@ struct dev_ctx { unsigned int fg:1; unsigned int recovery:1; - /* fault_inject */ - long long delay_us; - int _evtfd; int _shmid; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 9fc111f64576..a81210ca3e99 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -17,8 +17,8 @@ _get_disk_dev_t() { local minor dev=/dev/ublkb"${dev_id}" - major=$(stat -c '%Hr' "$dev") - minor=$(stat -c '%Lr' "$dev") + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 714630b4b329..3bb00a347402 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -3,7 +3,7 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh -TID="generic_04" +TID="generic_05" ERR_CODE=0 ublk_run_recover_test() |