diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-07-14 14:19:42 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-07-14 15:27:35 -0700 |
commit | 816cd1688331e0ffa1927889c15e7ed56650a183 (patch) | |
tree | 30cbcb7d5d4e1dd226fda651f25ca094e54714a0 | |
parent | b126047f43f11f61f1dd64802979765d71795dae (diff) | |
parent | db886979683a8360ced9b24ab1125ad0c4d2cf76 (diff) | |
download | linux-stable-816cd1688331e0ffa1927889c15e7ed56650a183.tar.gz linux-stable-816cd1688331e0ffa1927889c15e7ed56650a183.tar.bz2 linux-stable-816cd1688331e0ffa1927889c15e7ed56650a183.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
include/net/sock.h
310731e2f161 ("net: Fix data-races around sysctl_mem.")
e70f3c701276 ("Revert "net: set SK_MEM_QUANTUM to 4096"")
https://lore.kernel.org/all/20220711120211.7c8b7cba@canb.auug.org.au/
net/ipv4/fib_semantics.c
747c14307214 ("ip: fix dflt addr selection for connected nexthop")
d62607c3fe45 ("net: rename reference+tracking helpers")
net/tls/tls.h
include/net/tls.h
3d8c51b25a23 ("net/tls: Check for errors in tls_device_init")
587903142308 ("tls: create an internal header")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
353 files changed, 3652 insertions, 1455 deletions
@@ -64,6 +64,9 @@ Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com> Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com> Ben Gardner <bgardner@wabtec.com> Ben M Cahill <ben.m.cahill@intel.com> +Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net> +Ben Widawsky <bwidawsk@kernel.org> <ben.widawsky@intel.com> +Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com> Björn Steinbrink <B.Steinbrink@gmx.de> Björn Töpel <bjorn@kernel.org> <bjorn.topel@gmail.com> Björn Töpel <bjorn@kernel.org> <bjorn.topel@intel.com> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2522b11e593f..f2d26cb7e853 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5197,6 +5197,30 @@ retain_initrd [RAM] Keep initrd memory after extraction + retbleed= [X86] Control mitigation of RETBleed (Arbitrary + Speculative Code Execution with Return Instructions) + vulnerability. + + off - no mitigation + auto - automatically select a migitation + auto,nosmt - automatically select a mitigation, + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). + ibpb - mitigate short speculation windows on + basic block boundaries too. Safe, highest + perf impact. + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. + unret,nosmt - like unret, will disable SMT when STIBP + is not available. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. + + Not specifying this option is equivalent to retbleed=auto. + rfkill.default_state= 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, etc. communication is blocked by default. @@ -5568,6 +5592,7 @@ eibrs - enhanced IBRS eibrs,retpoline - enhanced IBRS + Retpolines eibrs,lfence - enhanced IBRS + LFENCE + ibrs - use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index d6b3f94b9f1f..0793c400d4b0 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -223,7 +223,7 @@ Module Loading Inter Module support -------------------- -Refer to the file kernel/module.c for more information. +Refer to the files in kernel/module/ for more information. Hardware Interfaces =================== diff --git a/Documentation/core-api/symbol-namespaces.rst b/Documentation/core-api/symbol-namespaces.rst index 5ad9e0abe42c..12e4aecdae94 100644 --- a/Documentation/core-api/symbol-namespaces.rst +++ b/Documentation/core-api/symbol-namespaces.rst @@ -51,8 +51,8 @@ namespace ``USB_STORAGE``, use:: The corresponding ksymtab entry struct ``kernel_symbol`` will have the member ``namespace`` set accordingly. A symbol that is exported without a namespace will refer to ``NULL``. There is no default namespace if none is defined. ``modpost`` -and kernel/module.c make use the namespace at build time or module load time, -respectively. +and kernel/module/main.c make use the namespace at build time or module load +time, respectively. 2.2 Using the DEFAULT_SYMBOL_NAMESPACE define ============================================= diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml index ff0a5c58d78c..e712444abff1 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml @@ -67,7 +67,7 @@ if: then: properties: clocks: - maxItems: 2 + minItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml index 8cd0adbf7021..7029cb1f38ff 100644 --- a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Atheros ath9k wireless devices Generic Binding maintainers: - - Kalle Valo <kvalo@codeaurora.org> + - Toke Høiland-Jørgensen <toke@toke.dk> description: | This node provides properties for configuring the ath9k wireless device. diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index 8c01fdba134b..a677b056f112 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -9,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices Generic Binding maintainers: - - Kalle Valo <kvalo@codeaurora.org> + - Kalle Valo <kvalo@kernel.org> description: | These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml index e9a533080b32..ef18a572a1ff 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml @@ -25,12 +25,12 @@ properties: - qcom,sc7280-lpass-cpu reg: - minItems: 2 + minItems: 1 maxItems: 6 description: LPAIF core registers reg-names: - minItems: 2 + minItems: 1 maxItems: 6 clocks: @@ -42,12 +42,12 @@ properties: maxItems: 10 interrupts: - minItems: 2 + minItems: 1 maxItems: 4 description: LPAIF DMA buffer interrupt interrupt-names: - minItems: 2 + minItems: 1 maxItems: 4 qcom,adsp: diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 7da6c30ed596..316cfd8b1891 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -466,6 +466,10 @@ overlay filesystem and the value of st_ino for filesystem objects may not be persistent and could change even while the overlay filesystem is mounted, as summarized in the `Inode properties`_ table above. +4) "idmapped mounts" +When the upper or lower layers are idmapped mounts overlayfs will be mounted +without support for POSIX Access Control Lists (ACLs). This limitation will +eventually be lifted. Changes to underlying filesystems --------------------------------- diff --git a/Documentation/livepatch/module-elf-format.rst b/Documentation/livepatch/module-elf-format.rst index dbe9b400e39f..7347638895a0 100644 --- a/Documentation/livepatch/module-elf-format.rst +++ b/Documentation/livepatch/module-elf-format.rst @@ -210,11 +210,11 @@ module->symtab. ===================================== Normally, a stripped down copy of a module's symbol table (containing only "core" symbols) is made available through module->symtab (See layout_symtab() -in kernel/module.c). For livepatch modules, the symbol table copied into memory -on module load must be exactly the same as the symbol table produced when the -patch module was compiled. This is because the relocations in each livepatch -relocation section refer to their respective symbols with their symbol indices, -and the original symbol indices (and thus the symtab ordering) must be +in kernel/module/kallsyms.c). For livepatch modules, the symbol table copied +into memory on module load must be exactly the same as the symbol table produced +when the patch module was compiled. This is because the relocations in each +livepatch relocation section refer to their respective symbols with their symbol +indices, and the original symbol indices (and thus the symtab ordering) must be preserved in order for apply_relocate_add() to find the right symbol. For example, take this particular rela from a livepatch module::: diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 4c8bbf5acfd1..2b329042b38c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1091,7 +1091,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. @@ -1185,7 +1185,7 @@ ip_autobind_reuse - BOOLEAN option should only be set by experts. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting diff --git a/Documentation/sound/soc/dai.rst b/Documentation/sound/soc/dai.rst index 009b07e5a0f3..bf8431386d26 100644 --- a/Documentation/sound/soc/dai.rst +++ b/Documentation/sound/soc/dai.rst @@ -10,7 +10,7 @@ AC97 ==== AC97 is a five wire interface commonly found on many PC sound cards. It is -now also popular in many portable devices. This DAI has a reset line and time +now also popular in many portable devices. This DAI has a RESET line and time multiplexes its data on its SDATA_OUT (playback) and SDATA_IN (capture) lines. The bit clock (BCLK) is always driven by the CODEC (usually 12.288MHz) and the frame (FRAME) (usually 48kHz) is always driven by the controller. Each AC97 diff --git a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst index 42f5d04e38ec..0f6898860d6d 100644 --- a/Documentation/translations/it_IT/core-api/symbol-namespaces.rst +++ b/Documentation/translations/it_IT/core-api/symbol-namespaces.rst @@ -50,9 +50,9 @@ Di conseguenza, nella tabella dei simboli del kernel ci sarà una voce rappresentata dalla struttura ``kernel_symbol`` che avrà il campo ``namespace`` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio dei nomi avrà questo campo impostato a ``NULL``. Non esiste uno spazio dei nomi -di base. Il programma ``modpost`` e il codice in kernel/module.c usano lo spazio -dei nomi, rispettivamente, durante la compilazione e durante il caricamento -di un modulo. +di base. Il programma ``modpost`` e il codice in kernel/module/main.c usano lo +spazio dei nomi, rispettivamente, durante la compilazione e durante il +caricamento di un modulo. 2.2 Usare il simbolo di preprocessore DEFAULT_SYMBOL_NAMESPACE ============================================================== diff --git a/Documentation/translations/zh_CN/core-api/kernel-api.rst b/Documentation/translations/zh_CN/core-api/kernel-api.rst index e45fe80d1cd8..962d31d019d7 100644 --- a/Documentation/translations/zh_CN/core-api/kernel-api.rst +++ b/Documentation/translations/zh_CN/core-api/kernel-api.rst @@ -224,7 +224,7 @@ kernel/kmod.c 模块接口支持 ------------ -更多信息请参考文件kernel/module.c。 +更多信息请参阅kernel/module/目录下的文件。 硬件接口 ======== diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst index 6abf7ed534ca..bb16f0611046 100644 --- a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst +++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst @@ -52,7 +52,7 @@ 相应的 ksymtab 条目结构体 ``kernel_symbol`` 将有相应的成员 ``命名空间`` 集。 导出时未指明命名空间的符号将指向 ``NULL`` 。如果没有定义命名空间,则默认没有。 -``modpost`` 和kernel/module.c分别在构建时或模块加载时使用名称空间。 +``modpost`` 和kernel/module/main.c分别在构建时或模块加载时使用名称空间。 2.2 使用DEFAULT_SYMBOL_NAMESPACE定义 ==================================== diff --git a/MAINTAINERS b/MAINTAINERS index 5fc2af7eb9bd..66738c8330db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -425,7 +425,6 @@ F: drivers/acpi/*thermal* ACPI VIOT DRIVER M: Jean-Philippe Brucker <jean-philippe@linaro.org> L: linux-acpi@vger.kernel.org -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Maintained F: drivers/acpi/viot.c @@ -959,7 +958,6 @@ F: drivers/video/fbdev/geode/ AMD IOMMU (AMD-VI) M: Joerg Roedel <joro@8bytes.org> R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git @@ -1039,6 +1037,7 @@ F: arch/arm64/boot/dts/amd/ AMD XGBE DRIVER M: Tom Lendacky <thomas.lendacky@amd.com> +M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com> L: netdev@vger.kernel.org S: Supported F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi @@ -5102,7 +5101,7 @@ COMPUTE EXPRESS LINK (CXL) M: Alison Schofield <alison.schofield@intel.com> M: Vishal Verma <vishal.l.verma@intel.com> M: Ira Weiny <ira.weiny@intel.com> -M: Ben Widawsky <ben.widawsky@intel.com> +M: Ben Widawsky <bwidawsk@kernel.org> M: Dan Williams <dan.j.williams@intel.com> L: linux-cxl@vger.kernel.org S: Maintained @@ -6054,7 +6053,6 @@ DMA MAPPING HELPERS M: Christoph Hellwig <hch@lst.de> M: Marek Szyprowski <m.szyprowski@samsung.com> R: Robin Murphy <robin.murphy@arm.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git @@ -6067,7 +6065,6 @@ F: kernel/dma/ DMA MAPPING BENCHMARK M: Xiang Chen <chenxiang66@hisilicon.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ @@ -7666,7 +7663,6 @@ F: drivers/gpu/drm/exynos/exynos_dp* EXYNOS SYSMMU (IOMMU) driver M: Marek Szyprowski <m.szyprowski@samsung.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Maintained F: drivers/iommu/exynos-iommu.c @@ -10091,7 +10087,6 @@ F: drivers/hid/intel-ish-hid/ INTEL IOMMU (VT-d) M: David Woodhouse <dwmw2@infradead.org> M: Lu Baolu <baolu.lu@linux.intel.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git @@ -10471,7 +10466,6 @@ F: include/linux/iomap.h IOMMU DRIVERS M: Joerg Roedel <joro@8bytes.org> M: Will Deacon <will@kernel.org> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git @@ -12631,7 +12625,6 @@ F: drivers/i2c/busses/i2c-mt65xx.c MEDIATEK IOMMU DRIVER M: Yong Wu <yong.wu@mediatek.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported @@ -16683,7 +16676,6 @@ F: drivers/i2c/busses/i2c-qcom-cci.c QUALCOMM IOMMU M: Rob Clark <robdclark@gmail.com> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev L: linux-arm-msm@vger.kernel.org S: Maintained @@ -19326,7 +19318,6 @@ F: arch/x86/boot/video* SWIOTLB SUBSYSTEM M: Christoph Hellwig <hch@infradead.org> -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Supported W: http://git.infradead.org/users/hch/dma-mapping.git @@ -22008,7 +21999,6 @@ XEN SWIOTLB SUBSYSTEM M: Juergen Gross <jgross@suse.com> M: Stefano Stabellini <sstabellini@kernel.org> L: xen-devel@lists.xenproject.org (moderated for non-subscribers) -L: iommu@lists.linux-foundation.org L: iommu@lists.linux.dev S: Supported F: arch/x86/xen/*swiotlb* @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index f1d0a7807cd0..41536feb4392 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -112,19 +112,6 @@ static __always_inline void set_domain(unsigned int val) } #endif -#ifdef CONFIG_CPU_USE_DOMAINS -#define modify_domain(dom,type) \ - do { \ - unsigned int domain = get_domain(); \ - domain &= ~domain_mask(dom); \ - domain = domain | domain_val(dom, type); \ - set_domain(domain); \ - } while (0) - -#else -static inline void modify_domain(unsigned dom, unsigned type) { } -#endif - /* * Generate the T (user) versions of the LDR/STR and related * instructions (inline assembly) diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7..2b8970d8e5a2 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 93051e2f402c..1408a6a15d0e 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -163,5 +163,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7aa3ded4af92..6a447ac67d80 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -302,6 +302,7 @@ local_restart: b ret_fast_syscall #endif ENDPROC(vector_swi) + .ltorg /* * This is the really slow path. We're going to be doing diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index a3a4589ec73b..fc439c2c16f8 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -631,7 +631,11 @@ config CPU_USE_DOMAINS bool help This option enables or disables the use of domain switching - via the set_fs() function. + using the DACR (domain access control register) to protect memory + domains from each other. In Linux we use three domains: kernel, user + and IO. The domains are used to protect userspace from kernelspace + and to handle IO-space as a special type of memory by assigning + manager or client roles to running code (such as a process). config CPU_V7M_NUM_IRQ int "Number of external interrupts connected to the NVIC" diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f499559d193..f8dd0b3cc8e0 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 5e2be37a198e..cd17e324aa51 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -296,6 +296,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -489,6 +496,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -568,6 +576,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -587,6 +596,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -647,6 +658,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -1360,7 +1373,7 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); map.virtual = FDT_FIXED_BASE; map.length = FDT_FIXED_SIZE; - map.type = MT_ROM; + map.type = MT_MEMORY_RO; create_mapping(&map); } diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index fb9f3eb6bf48..8bc7a2d6d6c7 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -108,8 +108,7 @@ static unsigned int spectre_v2_install_workaround(unsigned int method) #else static unsigned int spectre_v2_install_workaround(unsigned int method) { - pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", - smp_processor_id()); + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); return SPECTRE_VULNERABLE; } @@ -209,10 +208,10 @@ static int spectre_bhb_install_workaround(int method) return SPECTRE_VULNERABLE; spectre_bhb_method = method; - } - pr_info("CPU%u: Spectre BHB: using %s workaround\n", - smp_processor_id(), spectre_bhb_method_name(method)); + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } return SPECTRE_MITIGATED; } diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 973173598992..facc889d05ee 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/stddef.h> #include <asm/probes.h> +#include <asm/ptrace.h> #include <asm/kprobes.h> void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1920d52653b4..53a912befb62 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -54,7 +54,6 @@ config LOONGARCH select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE @@ -77,7 +76,6 @@ config LOONGARCH select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD @@ -86,8 +84,6 @@ config LOONGARCH select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h index adb16e4b43b0..b6be527831dd 100644 --- a/arch/loongarch/include/asm/fpregdef.h +++ b/arch/loongarch/include/asm/fpregdef.h @@ -48,6 +48,5 @@ #define fcsr1 $r1 #define fcsr2 $r2 #define fcsr3 $r3 -#define vcsr16 $r16 #endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 3dba4986f6c9..dc47fc724fa1 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -6,6 +6,7 @@ #define _ASM_PAGE_H #include <linux/const.h> +#include <asm/addrspace.h> /* * PAGE_SHIFT determines the page size diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 1d63c934b289..57ec45aa078e 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,7 +80,6 @@ BUILD_FPR_ACCESS(64) struct loongarch_fpu { unsigned int fcsr; - unsigned int vcsr; uint64_t fcc; /* 8x8 */ union fpureg fpr[NUM_FPU_REGS]; }; @@ -161,7 +160,6 @@ struct thread_struct { */ \ .fpu = { \ .fcsr = 0, \ - .vcsr = 0, \ .fcc = 0, \ .fpr = {{{0,},},}, \ }, \ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index bfb65eb2844f..20cd9e16a95a 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -166,7 +166,6 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); - OFFSET(THREAD_VCSR, loongarch_fpu, vcsr); BLANK(); } diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 75c6ce0682a2..a631a7137667 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -146,16 +146,6 @@ movgr2fcsr fcsr0, \tmp0 .endm - .macro sc_save_vcsr base, tmp0 - movfcsr2gr \tmp0, vcsr16 - EX st.w \tmp0, \base, 0 - .endm - - .macro sc_restore_vcsr base, tmp0 - EX ld.w \tmp0, \base, 0 - movgr2fcsr vcsr16, \tmp0 - .endm - /* * Save a thread's fp context. */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index a76f547a5aa3..a13f92593cfd 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -429,7 +429,6 @@ int __init init_numa_memory(void) return 0; } -EXPORT_SYMBOL(init_numa_memory); #endif void __init paging_init(void) diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 6b6e16732c60..92e404032257 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) endif cflags-vdso := $(ccflags-vdso) \ + -isystem $(shell $(CC) -print-file-name=include) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 463c78c52cc5..3805ad13b8f3 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -176,12 +176,8 @@ static int __init pnv_get_random_long_early(unsigned long *v) NULL) != pnv_get_random_long_early) return 0; - for_each_compatible_node(dn, NULL, "ibm,power-rng") { - if (rng_create(dn)) - continue; - /* Create devices for hwrng driver */ - of_platform_device_create(dn, NULL, NULL); - } + for_each_compatible_node(dn, NULL, "ibm,power-rng") + rng_create(dn); if (!ppc_md.get_random_seed) return 0; @@ -205,10 +201,18 @@ void __init pnv_rng_init(void) static int __init pnv_rng_late_init(void) { + struct device_node *dn; unsigned long v; + /* In case it wasn't called during init for some other reason. */ if (ppc_md.get_random_seed == pnv_get_random_long_early) pnv_get_random_long_early(&v); + + if (ppc_md.get_random_seed == powernv_get_random_long) { + for_each_compatible_node(dn, NULL, "ibm,power-rng") + of_platform_device_create(dn, NULL, NULL); + } + return 0; } machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 32ffef9f6e5b..fcbb81feb7ad 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -38,7 +38,7 @@ config RISCV select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU - select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index cf9a3ec32406..fba90e670ed4 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -271,8 +271,12 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, #endif /* CONFIG_HAVE_IOREMAP_PROT */ #else /* CONFIG_MMU */ -#define iounmap(addr) do { } while (0) -#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset)) +static inline void __iomem *ioremap(phys_addr_t offset, size_t size) +{ + return (void __iomem *)(unsigned long)offset; +} + +static inline void iounmap(volatile void __iomem *addr) { } #endif /* CONFIG_MMU */ #define ioremap_uc ioremap diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 95af12e82a32..cdbd9653aa14 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -102,8 +102,8 @@ extern unsigned long uml_physmem; * casting is the right thing, but 32-bit UML can't have 64-bit virtual * addresses */ -#define __pa(virt) to_phys((void *) (unsigned long) (virt)) -#define __va(phys) to_virt((unsigned long) (phys)) +#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt)) +#define __va(phys) uml_to_virt((unsigned long) (phys)) #define phys_to_pfn(p) ((p) >> PAGE_SHIFT) #define pfn_to_phys(pfn) PFN_PHYS(pfn) diff --git a/arch/um/include/shared/mem.h b/arch/um/include/shared/mem.h index 4862c91d4213..98aacd544108 100644 --- a/arch/um/include/shared/mem.h +++ b/arch/um/include/shared/mem.h @@ -9,12 +9,12 @@ extern int phys_mapping(unsigned long phys, unsigned long long *offset_out); extern unsigned long uml_physmem; -static inline unsigned long to_phys(void *virt) +static inline unsigned long uml_to_phys(void *virt) { return(((unsigned long) virt) - uml_physmem); } -static inline void *to_virt(unsigned long phys) +static inline void *uml_to_virt(unsigned long phys) { return((void *) uml_physmem + phys); } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 87d3129e7362..c316c993a949 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -251,7 +251,7 @@ static int userspace_tramp(void *stack) signal(SIGTERM, SIG_DFL); signal(SIGWINCH, SIG_IGN); - fd = phys_mapping(to_phys(__syscall_stub_start), &offset); + fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); if (addr == MAP_FAILED) { @@ -261,7 +261,7 @@ static int userspace_tramp(void *stack) } if (stack != NULL) { - fd = phys_mapping(to_phys(stack), &offset); + fd = phys_mapping(uml_to_phys(stack), &offset); addr = mmap((void *) STUB_DATA, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, offset); @@ -534,7 +534,7 @@ int copy_context_skas0(unsigned long new_stack, int pid) struct stub_data *data = (struct stub_data *) current_stack; struct stub_data *child_data = (struct stub_data *) new_stack; unsigned long long new_offset; - int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset); /* * prepare offset and fd of child's stack as argument for parent's diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index be0b95e51df6..e58798f636d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -462,29 +462,6 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH -config RETPOLINE - bool "Avoid speculative indirect branches in kernel" - select OBJTOOL if HAVE_OBJTOOL - default y - help - Compile kernel with the retpoline compiler options to guard against - kernel-to-user data leaks by avoiding speculative indirect - branches. Requires a compiler with -mindirect-branch=thunk-extern - support for full protection. The kernel may run slower. - -config CC_HAS_SLS - def_bool $(cc-option,-mharden-sls=all) - -config SLS - bool "Mitigate Straight-Line-Speculation" - depends on CC_HAS_SLS && X86_64 - select OBJTOOL if HAVE_OBJTOOL - default n - help - Compile the kernel with straight-line-speculation options to guard - against straight line speculation. The kernel image might be slightly - larger. - config X86_CPU_RESCTRL bool "x86 CPU resource control support" depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) @@ -2453,6 +2430,91 @@ source "kernel/livepatch/Kconfig" endmenu +config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + +config CC_HAS_RETURN_THUNK + def_bool $(cc-option,-mfunction-return=thunk-extern) + +menuconfig SPECULATION_MITIGATIONS + bool "Mitigations for speculative execution vulnerabilities" + default y + help + Say Y here to enable options which enable mitigations for + speculative execution hardware vulnerabilities. + + If you say N, all mitigations will be disabled. You really + should know what you are doing to say so. + +if SPECULATION_MITIGATIONS + +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + default y + depends on (X86_64 || X86_PAE) + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pti.rst for more details. + +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + +config RETHUNK + bool "Enable return-thunks" + depends on RETPOLINE && CC_HAS_RETURN_THUNK + select OBJTOOL if HAVE_OBJTOOL + default y + help + Compile the kernel with the return-thunks compiler option to guard + against kernel-to-user data leaks by avoiding return speculation. + Requires a compiler with -mfunction-return=thunk-extern + support for full protection. The kernel may run slower. + +config CPU_UNRET_ENTRY + bool "Enable UNRET on kernel entry" + depends on CPU_SUP_AMD && RETHUNK + default y + help + Compile the kernel with support for the retbleed=unret mitigation. + +config CPU_IBPB_ENTRY + bool "Enable IBPB on kernel entry" + depends on CPU_SUP_AMD + default y + help + Compile the kernel with support for the retbleed=ibpb mitigation. + +config CPU_IBRS_ENTRY + bool "Enable IBRS on kernel entry" + depends on CPU_SUP_INTEL + default y + help + Compile the kernel with support for the spectre_v2=ibrs mitigation. + This mitigates both spectre_v2 and retbleed at great cost to + performance. + +config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 + select OBJTOOL if HAVE_OBJTOOL + default n + help + Compile the kernel with straight-line-speculation options to guard + against straight line speculation. The kernel image might be slightly + larger. + +endif + config ARCH_HAS_ADD_PAGES def_bool y depends on ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a74886aed349..1f40dad30d50 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -21,6 +21,12 @@ ifdef CONFIG_CC_IS_CLANG RETPOLINE_CFLAGS := -mretpoline-external-thunk RETPOLINE_VDSO_CFLAGS := -mretpoline endif + +ifdef CONFIG_RETHUNK +RETHUNK_CFLAGS := -mfunction-return=thunk-extern +RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) +endif + export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 44c350d627c7..d4a314cc50d6 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -110,6 +110,7 @@ void kernel_add_identity_map(unsigned long start, unsigned long end) void initialize_identity_maps(void *rmode) { unsigned long cmdline; + struct setup_data *sd; /* Exclude the encryption mask from __PHYSICAL_MASK */ physical_mask &= ~sme_me_mask; @@ -163,6 +164,18 @@ void initialize_identity_maps(void *rmode) cmdline = get_cmd_line_ptr(); kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); + /* + * Also map the setup_data entries passed via boot_params in case they + * need to be accessed by uncompressed kernel via the identity mapping. + */ + sd = (struct setup_data *)boot_params->hdr.setup_data; + while (sd) { + unsigned long sd_addr = (unsigned long)sd; + + kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len); + sd = (struct setup_data *)sd->next; + } + sev_prep_identity_maps(top_level_pgt); /* Load the new page-table. */ diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 7fec5dcf6438..eeadbd7d92cc 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 29b36e9e4e74..f6907627172b 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -7,6 +7,8 @@ #include <asm/asm-offsets.h> #include <asm/processor-flags.h> #include <asm/ptrace-abi.h> +#include <asm/msr.h> +#include <asm/nospec-branch.h> /* @@ -283,6 +285,66 @@ For 32-bit we have the following conventions - kernel is built with #endif /* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg +#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +#endif +.endm + +/* * Mitigate Spectre v1 for conditional swapgs code paths. * * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S new file mode 100644 index 000000000000..bfb7bcb362bc --- /dev/null +++ b/arch/x86/entry/entry.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common place for both 32- and 64-bit entry routines. + */ + +#include <linux/linkage.h> +#include <asm/export.h> +#include <asm/msr-index.h> + +.pushsection .noinstr.text, "ax" + +SYM_FUNC_START(entry_ibpb) + movl $MSR_IA32_PRED_CMD, %ecx + movl $PRED_CMD_IBPB, %eax + xorl %edx, %edx + wrmsr + RET +SYM_FUNC_END(entry_ibpb) +/* For KVM */ +EXPORT_SYMBOL_GPL(entry_ibpb); + +.popsection diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 887420844066..e309e7156038 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) movl %ebx, PER_CPU_VAR(__stack_chk_guard) #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* Restore flags or the incoming task to restore AC state. */ popfl diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4300ba49b5ee..285e043a3e40 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -85,7 +85,7 @@ */ SYM_CODE_START(entry_SYSCALL_64) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR swapgs @@ -112,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi /* Sign extend the lower 32bit as syscall numbers are treated as int */ movslq %eax, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + UNTRAIN_RET + call do_syscall_64 /* returns with IRQs disabled */ /* @@ -191,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: + IBRS_EXIT POP_REGS pop_rdi=0 /* @@ -249,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -258,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 @@ -322,13 +326,13 @@ SYM_CODE_END(ret_from_fork) #endif .endm -/* Save all registers in pt_regs */ -SYM_CODE_START_LOCAL(push_and_clear_regs) +SYM_CODE_START_LOCAL(xen_error_entry) UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 + UNTRAIN_RET RET -SYM_CODE_END(push_and_clear_regs) +SYM_CODE_END(xen_error_entry) /** * idtentry_body - Macro to emit code calling the C function @@ -337,9 +341,6 @@ SYM_CODE_END(push_and_clear_regs) */ .macro idtentry_body cfunc has_error_code:req - call push_and_clear_regs - UNWIND_HINT_REGS - /* * Call error_entry() and switch to the task stack if from userspace. * @@ -349,7 +350,7 @@ SYM_CODE_END(push_and_clear_regs) * switch the CR3. So it can skip invoking error_entry(). */ ALTERNATIVE "call error_entry; movq %rax, %rsp", \ - "", X86_FEATURE_XENPV + "call xen_error_entry", X86_FEATURE_XENPV ENCODE_FRAME_POINTER UNWIND_HINT_REGS @@ -612,6 +613,7 @@ __irqentry_text_end: SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -725,6 +727,7 @@ native_irq_return_ldt: pushq %rdi /* Stash user RDI */ swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + UNTRAIN_RET movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ @@ -897,6 +900,9 @@ SYM_CODE_END(xen_failsafe_callback) * 1 -> no SWAPGS on exit * * Y GSBASE value at entry, must be restored in paranoid_exit + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_entry) UNWIND_HINT_FUNC @@ -940,7 +946,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * is needed here. */ SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx - RET + jmp .Lparanoid_gsbase_done .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ @@ -959,8 +965,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) xorl %ebx, %ebx swapgs .Lparanoid_kernel_gsbase: - FENCE_SWAPGS_KERNEL_ENTRY +.Lparanoid_gsbase_done: + + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + UNTRAIN_RET + RET SYM_CODE_END(paranoid_entry) @@ -982,9 +996,19 @@ SYM_CODE_END(paranoid_entry) * 1 -> no SWAPGS on exit * * Y User space GSBASE, must be restored unconditionally + * + * R14 - old CR3 + * R15 - old SPEC_CTRL */ SYM_CODE_START_LOCAL(paranoid_exit) UNWIND_HINT_REGS + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* * The order of operations is important. RESTORE_CR3 requires * kernel GSBASE. @@ -1017,6 +1041,10 @@ SYM_CODE_END(paranoid_exit) */ SYM_CODE_START_LOCAL(error_entry) UNWIND_HINT_FUNC + + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 + testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1028,9 +1056,12 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ call sync_regs RET @@ -1065,6 +1096,7 @@ SYM_CODE_START_LOCAL(error_entry) .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY leaq 8(%rsp), %rax /* return pt_regs pointer */ + ANNOTATE_UNRET_END RET .Lbstep_iret: @@ -1080,6 +1112,8 @@ SYM_CODE_START_LOCAL(error_entry) swapgs FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER + UNTRAIN_RET /* * Pretend that the exception came from user mode: set up pt_regs @@ -1185,6 +1219,9 @@ SYM_CODE_START(asm_exc_nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + UNTRAIN_RET + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1409,6 +1446,9 @@ end_repeat_nmi: movq $-1, %rsi call exc_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index d1052742ad0c..682338e7e2a3 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include <asm/asm-offsets.h> #include <asm/current.h> #include <asm/errno.h> @@ -14,9 +13,12 @@ #include <asm/irqflags.h> #include <asm/asm.h> #include <asm/smap.h> +#include <asm/nospec-branch.h> #include <linux/linkage.h> #include <linux/err.h> +#include "calling.h" + .section .entry.text, "ax" /* @@ -47,7 +49,7 @@ * 0(%ebp) arg6 */ SYM_CODE_START(entry_SYSENTER_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -88,6 +90,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) cld + IBRS_ENTER + UNTRAIN_RET + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -174,7 +179,7 @@ SYM_CODE_END(entry_SYSENTER_compat) * 0(%esp) arg6 */ SYM_CODE_START(entry_SYSCALL_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* Interrupts are off on entry. */ swapgs @@ -203,6 +208,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS UNWIND_HINT_REGS + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -217,6 +225,8 @@ sysret32_from_system_call: */ STACKLEAK_ERASE + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -295,7 +305,7 @@ SYM_CODE_END(entry_SYSCALL_compat) * ebp arg6 */ SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR /* * Interrupts are off on entry. @@ -337,6 +347,9 @@ SYM_CODE_START(entry_INT80_compat) cld + IBRS_ENTER + UNTRAIN_RET + movq %rsp, %rdi call do_int80_syscall_32 jmp swapgs_restore_regs_and_return_to_usermode diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c2a8b76ae0bc..76cd790ed0bd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -92,6 +92,7 @@ endif endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S index 15e35159ebb6..ef2dd1827243 100644 --- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S +++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S @@ -19,17 +19,20 @@ __vsyscall_page: mov $__NR_gettimeofday, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_time, %rax syscall - RET + ret + int3 .balign 1024, 0xcc mov $__NR_getcpu, %rax syscall - RET + ret + int3 .balign 4096, 0xcc diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9b10c8c76087..9542c582d546 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 03acc823838a..00f5227c8459 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -296,6 +296,12 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -316,6 +322,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -447,5 +454,6 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 36369e76cc63..33d2cd04d254 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,25 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_RETPOLINE +# define DISABLE_RETPOLINE 0 +#else +# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) +#endif + +#ifdef CONFIG_RETHUNK +# define DISABLE_RETHUNK 0 +#else +# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) +#endif + +#ifdef CONFIG_CPU_UNRET_ENTRY +# define DISABLE_UNRET 0 +#else +# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -82,7 +101,7 @@ #define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 0 +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 85865f1645bd..73ca20049835 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -19,19 +19,27 @@ #define __ALIGN_STR __stringify(__ALIGN) #endif +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define RET jmp __x86_return_thunk +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define RET ret; int3 #else #define RET ret #endif +#endif /* CONFIG_RETPOLINE */ #else /* __ASSEMBLY__ */ +#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define ASM_RET "jmp __x86_return_thunk\n\t" +#else /* CONFIG_RETPOLINE */ #ifdef CONFIG_SLS #define ASM_RET "ret; int3\n\t" #else #define ASM_RET "ret\n\t" #endif +#endif /* CONFIG_RETPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d27e0581b777..cc615be27a54 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -93,6 +95,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -140,6 +143,13 @@ * bit available to control VERW * behavior. */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -567,6 +577,9 @@ /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index da251a5645b0..10a3bfc1eb23 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,6 +11,7 @@ #include <asm/cpufeatures.h> #include <asm/msr-index.h> #include <asm/unwind_hints.h> +#include <asm/percpu.h> #define RETPOLINE_THUNK_SIZE 32 @@ -76,6 +77,23 @@ .endm /* + * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions + * vs RETBleed validation. + */ +#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + +/* + * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should + * eventually turn into it's own annotation. + */ +.macro ANNOTATE_UNRET_END +#ifdef CONFIG_DEBUG_ENTRY + ANNOTATE_RETPOLINE_SAFE + nop +#endif +.endm + +/* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. @@ -105,10 +123,34 @@ * monstrosity above, manually. */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: +.endm + +#ifdef CONFIG_CPU_UNRET_ENTRY +#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#else +#define CALL_ZEN_UNTRAIN_RET "" +#endif + +/* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * + * While zen_untrain_ret() doesn't clobber anything but requires stack, + * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ +.macro UNTRAIN_RET +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB #endif .endm @@ -120,17 +162,20 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#ifdef CONFIG_RETPOLINE - typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +extern retpoline_thunk_t __x86_indirect_thunk_array[]; + +extern void __x86_return_thunk(void); +extern void zen_untrain_ret(void); +extern void entry_ibpb(void); + +#ifdef CONFIG_RETPOLINE #define GEN(reg) \ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; #include <asm/GEN-for-each-reg.h> #undef GEN -extern retpoline_thunk_t __x86_indirect_thunk_array[]; - #ifdef CONFIG_X86_64 /* @@ -193,6 +238,7 @@ enum spectre_v2_mitigation { SPECTRE_V2_EIBRS, SPECTRE_V2_EIBRS_RETPOLINE, SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ @@ -235,6 +281,9 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction @@ -244,18 +293,16 @@ extern u64 x86_spec_ctrl_base; */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f8b9ee97a891..f37cbff7354c 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -120,6 +120,9 @@ void *extend_brk(size_t size, size_t align); static char __brk_##name[size] extern void probe_roms(void); + +void clear_bss(void); + #ifdef __i386__ asmlinkage void __init i386_start_kernel(void); diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index 2d8dacd02643..343b722ccaf2 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -21,6 +21,16 @@ * relative displacement across sections. */ +/* + * The trampoline is 8 bytes and of the general form: + * + * jmp.d32 \func + * ud1 %esp, %ecx + * + * That trailing #UD provides both a speculation stop and serves as a unique + * 3 byte signature identifying static call trampolines. Also see tramp_ud[] + * and __static_call_fixup(). + */ #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ asm(".pushsection .static_call.text, \"ax\" \n" \ ".align 4 \n" \ @@ -28,7 +38,7 @@ STATIC_CALL_TRAMP_STR(name) ": \n" \ ANNOTATE_NOENDBR \ insns " \n" \ - ".byte 0x53, 0x43, 0x54 \n" \ + ".byte 0x0f, 0xb9, 0xcc \n" \ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ".popsection \n") @@ -36,8 +46,13 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") +#ifdef CONFIG_RETHUNK +#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") +#else #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#endif #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) @@ -48,4 +63,6 @@ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ".popsection \n") +extern bool __static_call_fixup(void *tramp, u8 op, void *dest); + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8b33674288ea..f66fbe6537dd 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -8,7 +8,11 @@ #ifdef __ASSEMBLY__ .macro UNWIND_HINT_EMPTY - UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 +.endm + +.macro UNWIND_HINT_ENTRY + UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 @@ -52,6 +56,14 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +.macro UNWIND_HINT_SAVE + UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE +.endm + +.macro UNWIND_HINT_RESTORE + UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +.endm + #else #define UNWIND_HINT_FUNC \ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index bea5cdcdf532..e02a8a8ef23c 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -15,7 +15,7 @@ #define SETUP_INDIRECT (1<<31) /* SETUP_INDIRECT | max(SETUP_*) */ -#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE) +#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_CC_BLOB) /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 8b8cbf22461a..734b96454896 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -11,6 +11,16 @@ /* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ +bool cpc_supported_by_cpu(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_HYGON: + return boot_cpu_has(X86_FEATURE_CPPC); + } + return false; +} + bool cpc_ffh_supported(void) { return true; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e257f6c80372..d6858533e6e5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __return_sites[], __return_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; @@ -507,9 +508,76 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } } +#ifdef CONFIG_RETHUNK +/* + * Rewrite the compiler generated return thunk tail-calls. + * + * For example, convert: + * + * JMP __x86_return_thunk + * + * into: + * + * RET + */ +static int patch_return(void *addr, struct insn *insn, u8 *bytes) +{ + int i = 0; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return -1; + + bytes[i++] = RET_INSN_OPCODE; + + for (; i < insn->length;) + bytes[i++] = INT3_INSN_OPCODE; + + return i; +} + +void __init_or_module noinline apply_returns(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; + u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + + op = insn.opcode.bytes[0]; + if (op == JMP32_INSN_OPCODE) + dest = addr + insn.length + insn.immediate.value; + + if (__static_call_fixup(addr, op, dest) || + WARN_ON_ONCE(dest != &__x86_return_thunk)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + addr, addr, insn.length, + addr + insn.length + insn.immediate.value); + + len = patch_return(addr, &insn, bytes); + if (len == insn.length) { + DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + text_poke_early(addr, bytes, len); + } + } +} +#else +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } +#endif /* CONFIG_RETHUNK */ + #else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ @@ -860,6 +928,7 @@ void __init alternative_instructions(void) * those can rewrite the retpoline thunks. */ apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); /* * Then patch alternatives, such that those paravirt calls that are in diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 437308004ef2..cb50589a7102 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -19,6 +19,7 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> #include <asm/tdx.h> +#include "../kvm/vmx/vmx.h" #ifdef CONFIG_XEN #include <xen/interface/xen.h> @@ -107,4 +108,9 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); + + if (IS_ENABLED(CONFIG_KVM_INTEL)) { + BLANK(); + OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); + } } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0c0b09796ced..35d5288394cb 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -862,6 +862,28 @@ static void init_amd_bd(struct cpuinfo_x86 *c) clear_rdrand_cpuid_bit(c); } +void init_spectral_chicken(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* + * On Zen2 we offer this chicken (bit) on the altar of Speculation. + * + * This suppresses speculation from the middle of a basic block, i.e. it + * suppresses non-branch predictions. + * + * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { + if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { + value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } +#endif +} + static void init_amd_zn(struct cpuinfo_x86 *c) { set_cpu_cap(c, X86_FEATURE_ZEN); @@ -870,12 +892,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -907,7 +938,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; case 0x16: init_amd_jg(c); break; - case 0x17: fallthrough; + case 0x17: init_spectral_chicken(c); + fallthrough; case 0x19: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 74c62cc47a5f..0dd04713434b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -38,6 +38,8 @@ static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); @@ -48,16 +50,40 @@ static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); /* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +void write_spec_ctrl_current(u64 val, bool force) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); /* * AMD specific MSR info for Speculative Store Bypass control. @@ -114,13 +140,21 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_select_mitigation(); + /* + * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); md_clear_select_mitigation(); @@ -161,31 +195,17 @@ void __init check_bugs(void) #endif } +/* + * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is + * done in vmenter.S. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -752,12 +772,180 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, + RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, + RETBLEED_CMD_IBPB, +}; + +const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", + [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __ro_after_init retbleed_nosmt = false; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "off")) { + retbleed_cmd = RETBLEED_CMD_OFF; + } else if (!strcmp(str, "auto")) { + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; + } else if (!strcmp(str, "ibpb")) { + retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { + pr_err("Ignoring unknown retbleed option (%s).", str); + } + + str = next; + } + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + +static void __init retbleed_select_mitigation(void) +{ + bool mitigate_smt = false; + + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_UNRET: + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + } else { + pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); + goto do_cmd_auto; + } + break; + + case RETBLEED_CMD_IBPB: + if (!boot_cpu_has(X86_FEATURE_IBPB)) { + pr_err("WARNING: CPU does not support IBPB.\n"); + goto do_cmd_auto; + } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto do_cmd_auto; + } + break; + +do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in + * spectre_v2_select_mitigation(). 'retbleed_mitigation' will + * be set accordingly below. + */ + + break; + } + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); + + mitigate_smt = true; + break; + + case RETBLEED_MITIGATION_IBPB: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; + + default: + break; + } + + if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && + (retbleed_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = SPECTRE_V2_USER_NONE; static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = @@ -828,6 +1016,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_EIBRS, SPECTRE_V2_CMD_EIBRS_RETPOLINE, SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -868,13 +1057,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -900,15 +1091,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { - return (mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE); + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; } static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -921,7 +1113,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -969,12 +1161,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } /* - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not - * required. + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* @@ -986,6 +1178,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (mode != SPECTRE_V2_USER_STRICT && + mode != SPECTRE_V2_USER_STRICT_PREFERRED) + pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + } + spectre_v2_user_stibp = mode; set_mode: @@ -999,6 +1198,7 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -1016,6 +1216,7 @@ static const struct { { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -1078,6 +1279,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } + if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -1093,6 +1318,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1117,6 +1358,15 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && + boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; + } + mode = spectre_v2_select_retpoline(); break; @@ -1133,6 +1383,10 @@ static void __init spectre_v2_select_mitigation(void) mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + case SPECTRE_V2_CMD_EIBRS: mode = SPECTRE_V2_EIBRS; break; @@ -1149,10 +1403,9 @@ static void __init spectre_v2_select_mitigation(void) if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); - if (spectre_v2_in_eibrs_mode(mode)) { - /* Force it so VMEXIT will restore correctly */ + if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } switch (mode) { @@ -1160,6 +1413,10 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_EIBRS: break; + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + break; + case SPECTRE_V2_LFENCE: case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); @@ -1171,43 +1428,107 @@ static void __init spectre_v2_select_mitigation(void) break; } + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. + * + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. + * + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Similar to context switches, there are two types of RSB attacks + * after vmexit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS, on the other hand, has RSB-poisoning protections, so it + * doesn't need RSB clearing after vmexit. + */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) || + boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + + /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1424,16 +1745,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -1450,7 +1761,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1701,7 +2012,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); @@ -1938,7 +2249,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1994,6 +2305,24 @@ static ssize_t srbds_show_state(char *buf) return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); } +static ssize_t retbleed_show_state(char *buf) +{ + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + + return sprintf(buf, "%s; SMT %s\n", + retbleed_strings[retbleed_mitigation], + !sched_smt_active() ? "disabled" : + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? + "enabled with STIBP protection" : "vulnerable"); + } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2039,6 +2368,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_MMIO_STALE_DATA: return mmio_stale_data_show_state(buf); + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -2095,4 +2427,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at { return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4730b0a58f24..736262a76a12 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1205,48 +1205,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { {} }; +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ INTEL_FAM6_##model, steppings, \ X86_FEATURE_ANY, issues) +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + #define SRBDS BIT(0) /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ #define MMIO BIT(1) /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ #define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), - VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | - BIT(7) | BIT(0xB), MMIO), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; @@ -1348,6 +1360,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !arch_cap_mmio_immune(ia32_cap)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a8e584fc991..7c9b5893c30a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,6 +61,8 @@ static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } #endif /* CONFIG_CPU_SUP_INTEL */ +extern void init_spectral_chicken(struct cpuinfo_x86 *c); + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 3fcdda4c1e11..21fd425088fe 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) /* get apicid instead of initial apic id from cpuid */ c->apicid = hard_smp_processor_id(); + /* + * XXX someone from Hygon needs to confirm this DTRT + * + init_spectral_chicken(c); + */ + set_cpu_cap(c, X86_FEATURE_ZEN); set_cpu_cap(c, X86_FEATURE_CPB); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbaa8326d6f2..fd44b54c90d5 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5b4efc927d80..24b9fa89aa27 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -301,7 +301,7 @@ union ftrace_op_code_union { } __attribute__((packed)); }; -#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) +#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) @@ -357,7 +357,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - memcpy(ip, retq, RET_SIZE); + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + else + memcpy(ip, retq, sizeof(retq)); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bd4a34100ed0..6a3cfaf6b72a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -426,10 +426,12 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr) /* Don't add a printk in there. printk relies on the PDA which is not initialized yet. */ -static void __init clear_bss(void) +void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 92c4afa2b729..d860d437631b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -389,6 +389,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -448,6 +450,7 @@ SYM_CODE_END(early_idt_handler_array) SYM_CODE_START_LOCAL(early_idt_handler_common) UNWIND_HINT_IRET_REGS offset=16 + ANNOTATE_UNRET_END /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -497,6 +500,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 ENDBR + ANNOTATE_UNRET_END + /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b98ffcf4d250..67828d973389 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".return_sites", secstrings + s->sh_name)) + returns = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + if (returns) { + void *rseg = (void *)returns->sh_addr; + apply_returns(rseg, rseg + returns->sh_size); + } if (alt) { /* patch .altinstructions */ void *aseg = (void *)alt->sh_addr; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9b2772b7e1f3..d456ce21c255 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index fcc8a7699103..c7c4b1917336 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,10 +7,12 @@ #include <linux/linkage.h> #include <asm/page_types.h> #include <asm/kexec.h> +#include <asm/nospec-branch.h> #include <asm/processor-flags.h> /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 2) @@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) movl %edi, %eax addl $(identity_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %edx, %edx xorl %esi, %esi xorl %ebp, %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popl %edx movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ @@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) movl %edi, %eax addl $(virtual_mapped - relocate_kernel), %eax pushl %eax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popl %edi popl %esi popl %ebx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) popl %edi popl %ebx popl %ebp - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c1d8626c53b6..4809c0dc4eb0 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -13,7 +13,8 @@ #include <asm/unwind_hints.h> /* - * Must be relocatable PIC code callable as a C function + * Must be relocatable PIC code callable as a C function, in particular + * there must be a plain RET and not jump to return thunk. */ #define PTR(x) (x << 3) @@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) /* jump to identity mapped page */ addq $(identity_mapped - relocate_kernel), %r8 pushq %r8 - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(relocate_kernel) SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) @@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) xorl %r14d, %r14d xorl %r15d, %r15d - RET + ANNOTATE_UNRET_SAFE + ret + int3 1: popq %rdx @@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) call swap_pages movq $virtual_mapped, %rax pushq %rax - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) @@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) popq %r12 popq %rbp popq %rbx - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(virtual_mapped) /* Do the copies */ @@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) lea PAGE_SIZE(%rax), %rsi jmp 0b 3: - RET + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(swap_pages) .globl kexec_control_code_size diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index aa72cefdd5be..aaaba85d6d7f 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -12,13 +12,21 @@ enum insn_type { }; /* + * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such + * that there is no false-positive trampoline identification while also being a + * speculation stop. + */ +static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; + +/* * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; -static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) +static void __ref __static_call_transform(void *insn, enum insn_type type, + void *func, bool modinit) { const void *emulate = NULL; int size = CALL_INSN_SIZE; @@ -43,14 +51,17 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void break; case RET: - code = &retinsn; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + else + code = &retinsn; break; } if (memcmp(insn, code, size) == 0) return; - if (unlikely(system_state == SYSTEM_BOOTING)) + if (system_state == SYSTEM_BOOTING || modinit) return text_poke_early(insn, code, size); text_poke_bp(insn, code, size, emulate); @@ -60,7 +71,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) { u8 opcode = *(u8 *)insn; - if (tramp && memcmp(insn+5, "SCT", 3)) { + if (tramp && memcmp(insn+5, tramp_ud, 3)) { pr_err("trampoline signature fail"); BUG(); } @@ -104,14 +115,42 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) if (tramp) { __static_call_validate(tramp, true, true); - __static_call_transform(tramp, __sc_insn(!func, true), func); + __static_call_transform(tramp, __sc_insn(!func, true), func, false); } if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { __static_call_validate(site, tail, false); - __static_call_transform(site, __sc_insn(!func, tail), func); + __static_call_transform(site, __sc_insn(!func, tail), func, false); } mutex_unlock(&text_mutex); } EXPORT_SYMBOL_GPL(arch_static_call_transform); + +#ifdef CONFIG_RETHUNK +/* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as + * having a return trampoline. + * + * The problem is that static_call() is available before determining + * X86_FEATURE_RETHUNK and, by implication, running alternatives. + * + * This means that __static_call_transform() above can have overwritten the + * return trampoline and we now need to fix things up to be consistent. + */ +bool __static_call_fixup(void *tramp, u8 op, void *dest) +{ + if (memcmp(tramp+5, tramp_ud, 3)) { + /* Not a trampoline site, not our problem. */ + return false; + } + + mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) + __static_call_transform(tramp, RET, NULL, true); + mutex_unlock(&text_mutex); + + return true; +} +#endif diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 81aba718ecd5..15f29053cec4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,7 +141,7 @@ SECTIONS #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; - *(.text.__x86.indirect_thunk) + *(.text.__x86.*) __indirect_thunk_end = .; #endif } :text =0xcccc @@ -283,6 +283,13 @@ SECTIONS *(.retpoline_sites) __retpoline_sites_end = .; } + + . = ALIGN(8); + .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { + __return_sites = .; + *(.return_sites) + __return_sites_end = .; + } #endif #ifdef CONFIG_X86_KERNEL_IBT @@ -385,7 +392,7 @@ SECTIONS __end_of_kernel_reserve = .; . = ALIGN(PAGE_SIZE); - .brk (NOLOAD) : AT(ADDR(.brk) - LOAD_OFFSET) { + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { __brk_base = .; . += 64 * 1024; /* 64k alignment slop space */ *(.bss..brk) /* areas brk users have reserved */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 89b11e7dca8a..db96bf7d1122 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -325,13 +325,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define FOP_RET(name) \ __FOP_RET(#name) -#define FOP_START(op) \ +#define __FOP_START(op, align) \ extern void em_##op(struct fastop *fake); \ asm(".pushsection .text, \"ax\" \n\t" \ ".global em_" #op " \n\t" \ - ".align " __stringify(FASTOP_SIZE) " \n\t" \ + ".align " __stringify(align) " \n\t" \ "em_" #op ":\n\t" +#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) + #define FOP_END \ ".popsection") @@ -435,16 +437,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * - * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] - * SETcc %al [3 bytes] - * RET [1 byte] - * INT3 [1 byte; CONFIG_SLS] - * - * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the - * next power-of-two alignment they become 4, 8 or 16 resp. + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] + * SETcc %al [3 bytes] + * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] */ -#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) +#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) +#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) +#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) static_assert(SETCC_LENGTH <= SETCC_ALIGN); #define FOP_SETCC(op) \ @@ -453,9 +454,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); #op ": \n\t" \ ASM_ENDBR \ #op " %al \n\t" \ - __FOP_RET(#op) + __FOP_RET(#op) \ + ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" -FOP_START(setcc) +__FOP_START(setcc, SETCC_ALIGN) FOP_SETCC(seto) FOP_SETCC(setno) FOP_SETCC(setc) diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index dfaeb47fcf2a..723f8534986c 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -111,6 +111,15 @@ SYM_FUNC_START(__svm_vcpu_run) #endif /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize 'ret' if the return is + * from the kernel. + */ + UNTRAIN_RET + + /* * Clear all general purpose registers except RSP and RAX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers @@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be + * untrained as soon as we exit the VM and are back to the + * kernel. This should be done before re-enabling interrupts + * because interrupt handlers won't sanitize RET if the return is + * from the kernel. + */ + UNTRAIN_RET + pop %_ASM_BX #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 3f430e218375..c0e24826a86f 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -4,8 +4,8 @@ #include <asm/vmx.h> -#include "lapic.h" -#include "x86.h" +#include "../lapic.h" +#include "../x86.h" extern bool __read_mostly enable_vpid; extern bool __read_mostly flexpriority_enabled; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f5cb18e00e78..3a4e895269d7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3087,7 +3087,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) } vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + __vmx_vcpu_run_flags(vmx)); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h new file mode 100644 index 000000000000..edc3f16cc189 --- /dev/null +++ b/arch/x86/kvm/vmx/run_flags.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_RUN_FLAGS_H +#define __KVM_X86_VMX_RUN_FLAGS_H + +#define VMX_RUN_VMRESUME (1 << 0) +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 435c187927c4..4182c7ffc909 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,10 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> +#include <asm/asm-offsets.h> #include <asm/bitsperlong.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> +#include <asm/percpu.h> #include <asm/segment.h> +#include "run_flags.h" #define WORD_SIZE (BITS_PER_LONG / 8) @@ -31,72 +34,11 @@ .section .noinstr.text, "ax" /** - * vmx_vmenter - VM-Enter the current loaded VMCS - * - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME - * - * Returns: - * %RFLAGS.CF is set on VM-Fail Invalid - * %RFLAGS.ZF is set on VM-Fail Valid - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * Note that VMRESUME/VMLAUNCH fall-through and return directly if - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump - * to vmx_vmexit. - */ -SYM_FUNC_START_LOCAL(vmx_vmenter) - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ - je 2f - -1: vmresume - RET - -2: vmlaunch - RET - -3: cmpb $0, kvm_rebooting - je 4f - RET -4: ud2 - - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) - -SYM_FUNC_END(vmx_vmenter) - -/** - * vmx_vmexit - Handle a VMX VM-Exit - * - * Returns: - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit - * - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump - * here after hardware loads the host's state, i.e. this is the destination - * referred to by VMCS.HOST_RIP. - */ -SYM_FUNC_START(vmx_vmexit) -#ifdef CONFIG_RETPOLINE - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE - /* Preserve guest's RAX, it's used to stuff the RSB. */ - push %_ASM_AX - - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE - - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ - or $1, %_ASM_AX - - pop %_ASM_AX -.Lvmexit_skip_rsb: -#endif - RET -SYM_FUNC_END(vmx_vmexit) - -/** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * @launched: %true if the VMCS has been launched + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl * * Returns: * 0 on VM-Exit, 1 on VM-Fail @@ -115,24 +57,56 @@ SYM_FUNC_START(__vmx_vcpu_run) #endif push %_ASM_BX + /* Save @vmx for SPEC_CTRL handling */ + push %_ASM_ARG1 + + /* Save @flags for SPEC_CTRL handling */ + push %_ASM_ARG3 + /* * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and * @regs is needed after VM-Exit to save the guest's register values. */ push %_ASM_ARG2 - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ mov %_ASM_ARG3B, %bl - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + lea (%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp + ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL + + /* + * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the + * host's, write the MSR. + * + * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, + * there must not be any returns or indirect branches between this code + * and vmentry. + */ + mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI + movl VMX_spec_ctrl(%_ASM_DI), %edi + movl PER_CPU_VAR(x86_spec_ctrl_current), %esi + cmp %edi, %esi + je .Lspec_ctrl_done + mov $MSR_IA32_SPEC_CTRL, %ecx + xor %edx, %edx + mov %edi, %eax + wrmsr + +.Lspec_ctrl_done: + + /* + * Since vmentry is serializing on affected CPUs, there's no need for + * an LFENCE to stop speculation from skipping the wrmsr. + */ + /* Load @regs to RAX. */ mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - testb %bl, %bl + testb $VMX_RUN_VMRESUME, %bl /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -154,11 +128,37 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Enter guest mode */ - call vmx_vmenter + /* Check EFLAGS.ZF from 'testb' above */ + jz .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" + * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. + * So this isn't a typical function and objtool needs to be told to + * save the unwind state here and restore it below. + */ + UNWIND_HINT_SAVE + +/* + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at + * the 'vmx_vmexit' label below. + */ +.Lvmresume: + vmresume + jmp .Lvmfail + +.Lvmlaunch: + vmlaunch + jmp .Lvmfail - /* Jump on VM-Fail. */ - jbe 2f + _ASM_EXTABLE(.Lvmresume, .Lfixup) + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) + +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + + /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ + UNWIND_HINT_RESTORE + ENDBR /* Temporarily save guest's RAX. */ push %_ASM_AX @@ -185,21 +185,23 @@ SYM_FUNC_START(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %eax, %eax + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ + xor %ebx, %ebx +.Lclear_regs: /* - * Clear all general purpose registers except RSP and RAX to prevent + * Clear all general purpose registers except RSP and RBX to prevent * speculative use of the guest's values, even those that are reloaded * via the stack. In theory, an L1 cache miss when restoring registers * could lead to speculative execution with the guest's values. * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially * free. RSP and RAX are exempt as RSP is restored by hardware during - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return + * value. */ -1: xor %ecx, %ecx + xor %eax, %eax + xor %ecx, %ecx xor %edx, %edx - xor %ebx, %ebx xor %ebp, %ebp xor %esi, %esi xor %edi, %edi @@ -216,8 +218,30 @@ SYM_FUNC_START(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BX + /* + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB + * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ + + call vmx_spec_ctrl_restore_host + + /* Put return value in AX */ + mov %_ASM_BX, %_ASM_AX + + pop %_ASM_BX #ifdef CONFIG_X86_64 pop %r12 pop %r13 @@ -230,9 +254,15 @@ SYM_FUNC_START(__vmx_vcpu_run) pop %_ASM_BP RET - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %eax - jmp 1b +.Lfixup: + cmpb $0, kvm_rebooting + jne .Lvmfail + ud2 +.Lvmfail: + /* VM-Fail: set return value to 1 */ + mov $1, %_ASM_BX + jmp .Lclear_regs + SYM_FUNC_END(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3a919e49129b..be7c19374fdd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) if (!vmx->disable_fb_clear) return; - rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); msr |= FB_CLEAR_DIS; - wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); /* Cache the MSR value to avoid reading it later */ vmx->msr_ia32_mcu_opt_ctrl = msr; } @@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) return; vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; - wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); } static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) @@ -839,6 +839,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) MSR_IA32_SPEC_CTRL); } +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) +{ + unsigned int flags = 0; + + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + + /* + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free + * to change it directly without causing a vmexit. In that case read + * it after vmexit and store it in vmx->spec_ctrl. + */ + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + flags |= VMX_RUN_SAVE_SPEC_CTRL; + + return flags; +} + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, unsigned long entry, unsigned long exit) { @@ -6813,6 +6831,31 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + unsigned int flags) +{ + u64 hostval = this_cpu_read(x86_spec_ctrl_current); + + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) + return; + + if (flags & VMX_RUN_SAVE_SPEC_CTRL) + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. + * + * For legacy IBRS, the IBRS bit always needs to be written after + * transitioning from a less privileged predictor mode, regardless of + * whether the guest/host values differ. + */ + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || + vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); +} + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { switch (to_vmx(vcpu)->exit_reason.basic) { @@ -6826,7 +6869,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, - struct vcpu_vmx *vmx) + struct vcpu_vmx *vmx, + unsigned long flags) { guest_state_enter_irqoff(); @@ -6845,7 +6889,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, native_write_cr2(vcpu->arch.cr2); vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, - vmx->loaded_vmcs->launched); + flags); vcpu->arch.cr2 = native_read_cr2(); @@ -6944,36 +6988,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_wait_lapic_expire(vcpu); - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - /* The actual VMENTER/EXIT is in the .noinstr.text section. */ - vmx_vcpu_enter_exit(vcpu, vmx); - - /* - * We do not use IBRS in the kernel. If this vCPU has used the - * SPEC_CTRL MSR it may have left it on; save the value and - * turn it off. This is much more efficient than blindly adding - * it to the atomic save/restore list. Especially as the former - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. - * - * For non-nested case: - * If the L01 MSR bitmap does not intercept the MSR, then we need to - * save it. - * - * For nested case: - * If the L02 MSR bitmap does not intercept the MSR, then we need to - * save it. - */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); /* All fields are clean at this point */ if (static_branch_unlikely(&enable_evmcs)) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8d2342ede0c5..1e7f9453894b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -8,11 +8,12 @@ #include <asm/intel_pt.h> #include "capabilities.h" -#include "kvm_cache_regs.h" +#include "../kvm_cache_regs.h" #include "posted_intr.h" #include "vmcs.h" #include "vmx_ops.h" -#include "cpuid.h" +#include "../cpuid.h" +#include "run_flags.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -404,7 +405,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 5e7f41225780..5cfc49ddb1b4 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -8,7 +8,7 @@ #include "evmcs.h" #include "vmcs.h" -#include "x86.h" +#include "../x86.h" asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1910e1e78b15..26d0cac32f73 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12631,9 +12631,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); -bool kvm_arch_has_assigned_device(struct kvm *kvm) +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return atomic_read(&kvm->arch.assigned_device_count); + return arch_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index d83cba364e31..724bbf83eb5b 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove) /* FSRM implies ERMS => no length checks, do the copy directly */ .Lmemmove_begin_forward: ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS + ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS /* * movsq instruction have many startup latency @@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET + +.Lmemmove_erms: + movq %rdx, %rcx + rep movsb + RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b2b2366885a2..073289a55f84 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE + ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ + __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) .endm @@ -67,3 +67,76 @@ SYM_CODE_END(__x86_indirect_thunk_array) #define GEN(reg) EXPORT_THUNK(reg) #include <asm/GEN-for-each-reg.h> #undef GEN + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +#ifdef CONFIG_RETHUNK + + .section .text.__x86.return_thunk + +/* + * Safety details here pertain to the AMD Zen{1,2} microarchitecture: + * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * alignment within the BTB. + * 2) The instruction at zen_untrain_ret must contain, and not + * end with, the 0xc3 byte of the RET. + * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread + * from re-poisioning the BTB prediction. + */ + .align 64 + .skip 63, 0xcc +SYM_FUNC_START_NOALIGN(zen_untrain_ret); + + /* + * As executed from zen_untrain_ret, this is: + * + * TEST $0xcc, %bl + * LFENCE + * JMP __x86_return_thunk + * + * Executing the TEST instruction has a side effect of evicting any BTB + * prediction (potentially attacker controlled) attached to the RET, as + * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + */ + .byte 0xf6 + + /* + * As executed from __x86_return_thunk, this is a plain RET. + * + * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. + * + * We subsequently jump backwards and architecturally execute the RET. + * This creates a correct BTB prediction (type=ret), but in the + * meantime we suffer Straight Line Speculation (because the type was + * no branch) which is halted by the INT3. + * + * With SMT enabled and STIBP active, a sibling thread cannot poison + * RET's prediction to a type of its choice, but can evict the + * prediction due to competitive sharing. If the prediction is + * evicted, __x86_return_thunk will suffer Straight Line Speculation + * which will be contained safely by the INT3. + */ +SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 +SYM_CODE_END(__x86_return_thunk) + + /* + * Ensure the TEST decoding / BTB invalidation is complete. + */ + lfence + + /* + * Jump back and execute the RET in the middle of the TEST instruction. + * INT3 is for SLS protection. + */ + jmp __x86_return_thunk + int3 +SYM_FUNC_END(zen_untrain_ret) +__EXPORT_THUNK(zen_untrain_ret) + +EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 3d1dba05fce4..9de3d900bc92 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -65,7 +65,10 @@ SYM_FUNC_START(sme_encrypt_execute) movq %rbp, %rsp /* Restore original stack pointer */ pop %rbp - RET + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE + ret + int3 SYM_FUNC_END(sme_encrypt_execute) SYM_FUNC_START(__enc_copy) @@ -151,6 +154,9 @@ SYM_FUNC_START(__enc_copy) pop %r12 pop %r15 - RET + /* Offset to __x86_return_thunk would be wrong here */ + ANNOTATE_UNRET_SAFE + ret + int3 .L__enc_copy_end: SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b88f43c9f050..7e95697a6459 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,16 +412,30 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; -#ifdef CONFIG_RETPOLINE if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); - } else -#endif - EMIT2(0xFF, 0xE0 + reg); + } else { + EMIT2(0xFF, 0xE0 + reg); + } + + *pprog = prog; +} + +static void emit_return(u8 **pprog, u8 *ip) +{ + u8 *prog = *pprog; + + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + emit_jump(&prog, &__x86_return_thunk, ip); + } else { + EMIT1(0xC3); /* ret */ + if (IS_ENABLED(CONFIG_SLS)) + EMIT1(0xCC); /* int3 */ + } *pprog = prog; } @@ -1686,7 +1700,7 @@ emit_jmp: ctx->cleanup_addr = proglen; pop_callee_regs(&prog, callee_regs_used); EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: @@ -2197,7 +2211,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ - EMIT1(0xC3); /* ret */ + emit_return(&prog, prog); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index e3297b15701c..70fb2ea85e90 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1183,15 +1183,19 @@ static void __init xen_domu_set_legacy_features(void) extern void early_xen_iret_patch(void); /* First C function to be called on Xen boot */ -asmlinkage __visible void __init xen_start_kernel(void) +asmlinkage __visible void __init xen_start_kernel(struct start_info *si) { struct physdev_set_iopl set_iopl; unsigned long initrd_start = 0; int rc; - if (!xen_start_info) + if (!si) return; + clear_bss(); + + xen_start_info = si; + __text_gen_insn(&early_xen_iret_patch, JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, JMP32_INSN_SIZE); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 81aa46f770c5..cfa99e8f054b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -918,7 +918,7 @@ void xen_enable_sysenter(void) if (!boot_cpu_has(sysenter_feature)) return; - ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); if(ret != 0) setup_clear_cpu_cap(sysenter_feature); } @@ -927,7 +927,7 @@ void xen_enable_syscall(void) { int ret; - ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); if (ret != 0) { printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other @@ -936,7 +936,7 @@ void xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, - xen_syscall32_target); + xen_entry_SYSCALL_compat); if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index caa9bc2fa100..6b4fdf6b9542 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) - UNWIND_HINT_EMPTY + UNWIND_HINT_ENTRY ENDBR pop %rcx pop %r11 @@ -234,8 +234,8 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) */ /* Normal 64-bit system call target */ -SYM_CODE_START(xen_syscall_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -249,13 +249,13 @@ SYM_CODE_START(xen_syscall_target) movq $__USER_CS, 1*8(%rsp) jmp entry_SYSCALL_64_after_hwframe -SYM_CODE_END(xen_syscall_target) +SYM_CODE_END(xen_entry_SYSCALL_64) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ -SYM_CODE_START(xen_syscall32_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_ENTRY ENDBR popq %rcx popq %r11 @@ -269,11 +269,11 @@ SYM_CODE_START(xen_syscall32_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSCALL_compat_after_hwframe -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSCALL_compat) /* 32-bit compat sysenter target */ -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means @@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target) movq $__USER32_CS, 1*8(%rsp) jmp entry_SYSENTER_compat_after_hwframe -SYM_CODE_END(xen_sysenter_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) #else /* !CONFIG_IA32_EMULATION */ -SYM_CODE_START(xen_syscall32_target) -SYM_CODE_START(xen_sysenter_target) - UNWIND_HINT_EMPTY +SYM_CODE_START(xen_entry_SYSCALL_compat) +SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_ENTRY ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 jmp hypercall_iret -SYM_CODE_END(xen_sysenter_target) -SYM_CODE_END(xen_syscall32_target) +SYM_CODE_END(xen_entry_SYSENTER_compat) +SYM_CODE_END(xen_entry_SYSCALL_compat) #endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 3a2cd93bf059..ffaa62167f6e 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE ret /* * Xen will write the hypercall page, and sort out ENDBR. @@ -48,15 +49,6 @@ SYM_CODE_START(startup_xen) ANNOTATE_NOENDBR cld - /* Clear .bss */ - xor %eax,%eax - mov $__bss_start, %rdi - mov $__bss_stop, %rcx - sub %rdi, %rcx - shr $3, %rcx - rep stosq - - mov %rsi, xen_start_info mov initial_stack(%rip), %rsp /* Set up %gs. @@ -71,6 +63,7 @@ SYM_CODE_START(startup_xen) cdq wrmsr + mov %rsi, %rdi call xen_start_kernel SYM_CODE_END(startup_xen) __FINIT diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index fd0fec6e92f4..9a8bb972193d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,10 +10,10 @@ /* These are code, but not functions. Defined in entry.S */ extern const char xen_failsafe_callback[]; -void xen_sysenter_target(void); +void xen_entry_SYSENTER_compat(void); #ifdef CONFIG_X86_64 -void xen_syscall_target(void); -void xen_syscall32_target(void); +void xen_entry_SYSCALL_64(void); +void xen_entry_SYSCALL_compat(void); #endif extern void *xen_initial_gdt; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 86fa61a21826..e2db1bdd9dd2 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -298,7 +298,7 @@ EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed); bool osc_sb_native_usb4_support_confirmed; EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed); -bool osc_sb_cppc_not_supported; +bool osc_sb_cppc2_support_acked; static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48"; static void acpi_bus_osc_negotiate_platform_control(void) @@ -358,11 +358,6 @@ static void acpi_bus_osc_negotiate_platform_control(void) return; } -#ifdef CONFIG_ACPI_CPPC_LIB - osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] & - (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT)); -#endif - /* * Now run _OSC again with query flag clear and with the caps * supported by both the OS and the platform. @@ -376,6 +371,10 @@ static void acpi_bus_osc_negotiate_platform_control(void) capbuf_ret = context.ret.pointer; if (context.ret.length > OSC_SUPPORT_DWORD) { +#ifdef CONFIG_ACPI_CPPC_LIB + osc_sb_cppc2_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPCV2_SUPPORT; +#endif + osc_sb_apei_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT; osc_pc_lpi_support_confirmed = diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 903528f7e187..6ff1901d7d43 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -578,6 +578,19 @@ bool __weak cpc_ffh_supported(void) } /** + * cpc_supported_by_cpu() - check if CPPC is supported by CPU + * + * Check if the architectural support for CPPC is present even + * if the _OSC hasn't prescribed it + * + * Return: true for supported, false for not supported + */ +bool __weak cpc_supported_by_cpu(void) +{ + return false; +} + +/** * pcc_data_alloc() - Allocate the pcc_data memory for pcc subspace * * Check and allocate the cppc_pcc_data memory. @@ -684,8 +697,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) acpi_status status; int ret = -ENODATA; - if (osc_sb_cppc_not_supported) - return -ENODEV; + if (!osc_sb_cppc2_support_acked) { + pr_debug("CPPC v2 _OSC not acked\n"); + if (!cpc_supported_by_cpu()) + return -ENODEV; + } /* Parse the ACPI _CPC table for this CPU. */ status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output, diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 0e3ed5eb367b..0cb20324da16 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -493,13 +493,8 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) goto skip_probe; ret = amba_read_periphid(dev); - if (ret) { - if (ret != -EPROBE_DEFER) { - amba_device_put(dev); - goto err_out; - } + if (ret) goto err_release; - } skip_probe: ret = device_add(&dev->dev); @@ -546,6 +541,7 @@ static int amba_deferred_retry(void) continue; list_del_init(&ddev->node); + amba_device_put(ddev->dev); kfree(ddev); } diff --git a/drivers/base/core.c b/drivers/base/core.c index 7cd789c4985d..460d6f163e41 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -486,7 +486,18 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + /* + * If supplier_preactivated is set, the link has been dropped between + * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls + * in __driver_probe_device(). In that case, drop the supplier's + * PM-runtime usage counter to remove the reference taken by + * pm_runtime_get_suppliers(). + */ + if (link->supplier_preactivated) + pm_runtime_put_noidle(link->supplier); + + pm_request_idle(link->supplier); put_device(link->consumer); put_device(link->supplier); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index a97776ea9d99..4c98849577d4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); +static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, &dev_attr_mmio_stale_data.attr, + &dev_attr_retbleed.attr, NULL }; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 676dc72d912d..949907e2e242 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -308,13 +308,10 @@ static int rpm_get_suppliers(struct device *dev) /** * pm_runtime_release_supplier - Drop references to device link's supplier. * @link: Target device link. - * @check_idle: Whether or not to check if the supplier device is idle. * - * Drop all runtime PM references associated with @link to its supplier device - * and if @check_idle is set, check if that device is idle (and so it can be - * suspended). + * Drop all runtime PM references associated with @link to its supplier device. */ -void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +void pm_runtime_release_supplier(struct device_link *link) { struct device *supplier = link->supplier; @@ -327,9 +324,6 @@ void pm_runtime_release_supplier(struct device_link *link, bool check_idle) while (refcount_dec_not_one(&link->rpm_active) && atomic_read(&supplier->power.usage_count) > 0) pm_runtime_put_noidle(supplier); - - if (check_idle) - pm_request_idle(supplier); } static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) @@ -337,8 +331,11 @@ static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) - pm_runtime_release_supplier(link, try_to_suspend); + device_links_read_lock_held()) { + pm_runtime_release_supplier(link); + if (try_to_suspend) + pm_request_idle(link->supplier); + } } static void rpm_put_suppliers(struct device *dev) @@ -1771,7 +1768,6 @@ void pm_runtime_get_suppliers(struct device *dev) if (link->flags & DL_FLAG_PM_RUNTIME) { link->supplier_preactivated = true; pm_runtime_get_sync(link->supplier); - refcount_inc(&link->rpm_active); } device_links_read_unlock(idx); @@ -1791,19 +1787,8 @@ void pm_runtime_put_suppliers(struct device *dev) list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) if (link->supplier_preactivated) { - bool put; - link->supplier_preactivated = false; - - spin_lock_irq(&dev->power.lock); - - put = pm_runtime_status_suspended(dev) && - refcount_dec_not_one(&link->rpm_active); - - spin_unlock_irq(&dev->power.lock); - - if (put) - pm_runtime_put(link->supplier); + pm_runtime_put(link->supplier); } device_links_read_unlock(idx); @@ -1838,7 +1823,8 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - pm_runtime_release_supplier(link, true); + pm_runtime_release_supplier(link); + pm_request_idle(link->supplier); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 0e89a7a932d4..bfc8ee876278 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -197,7 +197,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, else cxld->target_type = CXL_DECODER_ACCELERATOR; - if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent))) + if (is_endpoint_decoder(&cxld->dev)) return 0; target_list.value = diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 54f434733b56..cbf23beebebe 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -355,11 +355,13 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EBUSY; /* Check the input buffer is the expected size */ - if (info->size_in != send_cmd->in.size) + if ((info->size_in != CXL_VARIABLE_PAYLOAD) && + (info->size_in != send_cmd->in.size)) return -ENOMEM; /* Check the output buffer is at least large enough */ - if (send_cmd->out.size < info->size_out) + if ((info->size_out != CXL_VARIABLE_PAYLOAD) && + (send_cmd->out.size < info->size_out)) return -ENOMEM; *mem_cmd = (struct cxl_mem_command) { diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index ea60abda6500..dbce99bdffab 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -272,7 +272,7 @@ static const struct device_type cxl_decoder_root_type = { .groups = cxl_decoder_root_attribute_groups, }; -static bool is_endpoint_decoder(struct device *dev) +bool is_endpoint_decoder(struct device *dev) { return dev->type == &cxl_decoder_endpoint_type; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 140dc3278cde..6799b27c7db2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -340,6 +340,7 @@ struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); +bool is_endpoint_decoder(struct device *dev); bool is_cxl_decoder(struct device *dev); struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 60d10ee1e7fc..7df0b053373a 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -300,13 +300,13 @@ struct cxl_mbox_identify { } __packed; struct cxl_mbox_get_lsa { - u32 offset; - u32 length; + __le32 offset; + __le32 length; } __packed; struct cxl_mbox_set_lsa { - u32 offset; - u32 reserved; + __le32 offset; + __le32 reserved; u8 data[]; } __packed; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index c310f1fd3db0..a979d0b484d5 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -29,6 +29,7 @@ static int create_endpoint(struct cxl_memdev *cxlmd, { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *endpoint; + int rc; endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, cxlds->component_reg_phys, parent_port); @@ -37,13 +38,17 @@ static int create_endpoint(struct cxl_memdev *cxlmd, dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + if (!endpoint->dev.driver) { dev_err(&cxlmd->dev, "%s failed probe\n", dev_name(&endpoint->dev)); return -ENXIO; } - return cxl_endpoint_autoremove(cxlmd, endpoint); + return 0; } static void enable_suspend(void *data) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index bbeef91e637e..0aaa70b4e0f7 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -108,8 +108,8 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, return -EINVAL; get_lsa = (struct cxl_mbox_get_lsa) { - .offset = cmd->in_offset, - .length = cmd->in_length, + .offset = cpu_to_le32(cmd->in_offset), + .length = cpu_to_le32(cmd->in_length), }; rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa, @@ -139,7 +139,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, return -ENOMEM; *set_lsa = (struct cxl_mbox_set_lsa) { - .offset = cmd->in_offset, + .offset = cpu_to_le32(cmd->in_offset), }; memcpy(set_lsa->data, cmd->in_buf, cmd->in_length); diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index e689101abc93..f7dcc44f9414 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev) } } - max_state = bus->devfreq->profile->max_state; - min_freq = (bus->devfreq->profile->freq_table[0] / 1000); - max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000); + max_state = bus->devfreq->max_state; + min_freq = (bus->devfreq->freq_table[0] / 1000); + max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000); pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n", dev_name(dev), min_freq, max_freq); diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 0cce6e4ec946..205acb2c744d 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -343,7 +343,7 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, if (old->context != context) continue; - dma_resv_list_set(list, i, replacement, usage); + dma_resv_list_set(list, i, dma_fence_get(replacement), usage); dma_fence_put(old); } } diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 3e9d726504e2..7b3e6030f7b4 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1900,6 +1900,11 @@ static int at_xdmac_alloc_chan_resources(struct dma_chan *chan) for (i = 0; i < init_nr_desc_per_channel; i++) { desc = at_xdmac_alloc_desc(chan, GFP_KERNEL); if (!desc) { + if (i == 0) { + dev_warn(chan2dev(chan), + "can't allocate any descriptors\n"); + return -EIO; + } dev_warn(chan2dev(chan), "only %d descriptors have been allocated\n", i); break; diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 0a2168a4ccb0..f696246f57fd 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -675,16 +675,10 @@ static int dmatest_func(void *data) /* * src and dst buffers are freed by ourselves below */ - if (params->polled) { + if (params->polled) flags = DMA_CTRL_ACK; - } else { - if (dma_has_cap(DMA_INTERRUPT, dev->cap_mask)) { - flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; - } else { - pr_err("Channel does not support interrupt!\n"); - goto err_pq_array; - } - } + else + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; ktime = ktime_get(); while (!(kthread_should_stop() || @@ -912,7 +906,6 @@ error_unmap_continue: runtime = ktime_to_us(ktime); ret = 0; -err_pq_array: kfree(dma_pq); err_srcs_array: kfree(srcs); diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index e9c9bcb1f5c2..c741da02b67e 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -1164,8 +1164,9 @@ static int dma_chan_pause(struct dma_chan *dchan) BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT; axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); } else { - val = BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT | - BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT; + val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG); + val |= BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT | + BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT; axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val); } @@ -1190,12 +1191,13 @@ static inline void axi_chan_resume(struct axi_dma_chan *chan) { u32 val; - val = axi_dma_ioread32(chan->chip, DMAC_CHEN); if (chan->chip->dw->hdata->reg_map_8_channels) { + val = axi_dma_ioread32(chan->chip, DMAC_CHEN); val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT); val |= (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT); axi_dma_iowrite32(chan->chip, DMAC_CHEN, val); } else { + val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG); val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT); val |= (BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT); axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val); diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index ff0ea60051f0..5a8cc52c1abf 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -716,10 +716,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd) struct idxd_wq *wq = idxd->wqs[i]; mutex_lock(&wq->wq_lock); - if (wq->state == IDXD_WQ_ENABLED) { - idxd_wq_disable_cleanup(wq); - wq->state = IDXD_WQ_DISABLED; - } + idxd_wq_disable_cleanup(wq); idxd_wq_device_reset_cleanup(wq); mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 355fb3ef4cbf..aa3478257ddb 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -512,15 +512,16 @@ static int idxd_probe(struct idxd_device *idxd) dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { - if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) + if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { dev_warn(dev, "Unable to turn on user SVA feature.\n"); - else + } else { set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); - if (idxd_enable_system_pasid(idxd)) - dev_warn(dev, "No in-kernel DMA with PASID.\n"); - else - set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + if (idxd_enable_system_pasid(idxd)) + dev_warn(dev, "No in-kernel DMA with PASID.\n"); + else + set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } } else if (!sva) { dev_warn(dev, "User forced SVA off via module param.\n"); } diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 8535018ee7a2..f37a276f519e 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -891,7 +891,7 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac) * SDMA stops cyclic channel when DMA request triggers a channel and no SDMA * owned buffer is available (i.e. BD_DONE was set too late). */ - if (!is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) { + if (sdmac->desc && !is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) { dev_warn(sdmac->sdma->dev, "restart cyclic channel %d\n", sdmac->channel); sdma_enable_channel(sdmac->sdma, sdmac->channel); } @@ -2346,7 +2346,7 @@ MODULE_DESCRIPTION("i.MX SDMA driver"); #if IS_ENABLED(CONFIG_SOC_IMX6Q) MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin"); #endif -#if IS_ENABLED(CONFIG_SOC_IMX7D) +#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M) MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin"); #endif MODULE_LICENSE("GPL"); diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c index efe8bd3a0e2a..9b9184f964be 100644 --- a/drivers/dma/lgm/lgm-dma.c +++ b/drivers/dma/lgm/lgm-dma.c @@ -1593,11 +1593,12 @@ static int intel_ldma_probe(struct platform_device *pdev) d->core_clk = devm_clk_get_optional(dev, NULL); if (IS_ERR(d->core_clk)) return PTR_ERR(d->core_clk); - clk_prepare_enable(d->core_clk); d->rst = devm_reset_control_get_optional(dev, NULL); if (IS_ERR(d->rst)) return PTR_ERR(d->rst); + + clk_prepare_enable(d->core_clk); reset_control_deassert(d->rst); ret = devm_add_action_or_reset(dev, ldma_clk_disable, d); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 858400e42ec0..09915a5cba3e 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2589,7 +2589,7 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) /* If the DMAC pool is empty, alloc new */ if (!desc) { - DEFINE_SPINLOCK(lock); + static DEFINE_SPINLOCK(lock); LIST_HEAD(pool); if (!add_desc(&pool, &lock, GFP_ATOMIC, 1)) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 87f6ca1541cf..2ff787df513e 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -558,14 +558,6 @@ static int bam_alloc_chan(struct dma_chan *chan) return 0; } -static int bam_pm_runtime_get_sync(struct device *dev) -{ - if (pm_runtime_enabled(dev)) - return pm_runtime_get_sync(dev); - - return 0; -} - /** * bam_free_chan - Frees dma resources associated with specific channel * @chan: specified channel @@ -581,7 +573,7 @@ static void bam_free_chan(struct dma_chan *chan) unsigned long flags; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -784,7 +776,7 @@ static int bam_pause(struct dma_chan *chan) unsigned long flag; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -810,7 +802,7 @@ static int bam_resume(struct dma_chan *chan) unsigned long flag; int ret; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -919,7 +911,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return IRQ_NONE; @@ -1037,7 +1029,7 @@ static void bam_start_dma(struct bam_chan *bchan) if (!vd) return; - ret = bam_pm_runtime_get_sync(bdev->dev); + ret = pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -1374,11 +1366,6 @@ static int bam_dma_probe(struct platform_device *pdev) if (ret) goto err_unregister_dma; - if (!bdev->bamclk) { - pm_runtime_disable(&pdev->dev); - return 0; - } - pm_runtime_irq_safe(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY); pm_runtime_use_autosuspend(&pdev->dev); @@ -1462,10 +1449,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev) { struct bam_device *bdev = dev_get_drvdata(dev); - if (bdev->bamclk) { - pm_runtime_force_suspend(dev); - clk_unprepare(bdev->bamclk); - } + pm_runtime_force_suspend(dev); + clk_unprepare(bdev->bamclk); return 0; } @@ -1475,13 +1460,11 @@ static int __maybe_unused bam_dma_resume(struct device *dev) struct bam_device *bdev = dev_get_drvdata(dev); int ret; - if (bdev->bamclk) { - ret = clk_prepare(bdev->bamclk); - if (ret) - return ret; + ret = clk_prepare(bdev->bamclk); + if (ret) + return ret; - pm_runtime_force_resume(dev); - } + pm_runtime_force_resume(dev); return 0; } diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index 71d24fc07c00..f744ddbbbad7 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -245,6 +245,7 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, if (dma_spec->args[0] >= xbar->xbar_requests) { dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", dma_spec->args[0]); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } @@ -252,12 +253,14 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); if (!dma_spec->np) { dev_err(&pdev->dev, "Can't get DMA master\n"); + put_device(&pdev->dev); return ERR_PTR(-EINVAL); } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } @@ -268,6 +271,8 @@ static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec, mutex_unlock(&xbar->mutex); dev_err(&pdev->dev, "Run out of free DMA requests\n"); kfree(map); + of_node_put(dma_spec->np); + put_device(&pdev->dev); return ERR_PTR(-ENOMEM); } set_bit(map->xbar_out, xbar->dma_inuse); diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 23cddb265a0d..9db42f6a2043 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -19,6 +19,7 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_irq.h> +#include <linux/pinctrl/consumer.h> #define VF610_GPIO_PER_PORT 32 diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index f5aa5f93342a..0c9a63becfef 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1460,11 +1460,12 @@ static ssize_t linereq_read(struct file *file, static void linereq_free(struct linereq *lr) { unsigned int i; - bool hte; + bool hte = false; for (i = 0; i < lr->num_lines; i++) { - hte = !!test_bit(FLAG_EVENT_CLOCK_HTE, - &lr->lines[i].desc->flags); + if (lr->lines[i].desc) + hte = !!test_bit(FLAG_EVENT_CLOCK_HTE, + &lr->lines[i].desc->flags); edge_detector_stop(&lr->lines[i], hte); if (lr->lines[i].desc) gpiod_free(lr->lines[i].desc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 17c9bbe0cbc5..4dfd6724b3ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1528,6 +1528,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, stime, etime, mode); } +static bool +amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) +{ + struct drm_device *dev = adev_to_drm(adev); + struct drm_fb_helper *fb_helper = dev->fb_helper; + + if (!fb_helper || !fb_helper->buffer) + return false; + + if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj) + return false; + + return true; +} + int amdgpu_display_suspend_helper(struct amdgpu_device *adev) { struct drm_device *dev = adev_to_drm(adev); @@ -1563,10 +1578,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); + if (!amdgpu_display_robj_is_fb(adev, robj)) { + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); + } } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 576849e95296..108e8e8a1a36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -496,7 +496,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed..9c964cd3b5d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2796,7 +2796,8 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb..e0ad9f27dc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2914,7 +2914,8 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 982855e6cf52..3caf6f386042 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2673,7 +2673,8 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 84440741c60b..7c75df5bffed 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2693,7 +2693,8 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index b4029c0d5d8c..0ba0598eba20 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9dd2e0601ea8..1c2984bbda51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3822,7 +3822,8 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; /* indicates support for immediate flip */ adev_to_drm(adev)->mode_config.async_page_flip = true; diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index b2675c769a55..4b503c544256 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -74,22 +74,6 @@ static int fsl_ldb_attach(struct drm_bridge *bridge, bridge, flags); } -static int fsl_ldb_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - /* Invert DE signal polarity. */ - bridge_state->input_bus_cfg.flags &= ~(DRM_BUS_FLAG_DE_LOW | - DRM_BUS_FLAG_DE_HIGH); - if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_LOW) - bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_HIGH; - else if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_HIGH) - bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_LOW; - - return 0; -} - static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state) { @@ -153,7 +137,7 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, reg = LDB_CTRL_CH0_ENABLE; if (fsl_ldb->lvds_dual_link) - reg |= LDB_CTRL_CH1_ENABLE; + reg |= LDB_CTRL_CH1_ENABLE | LDB_CTRL_SPLIT_MODE; if (lvds_format_24bpp) { reg |= LDB_CTRL_CH0_DATA_WIDTH; @@ -233,7 +217,7 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge, { struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge); - if (mode->clock > (fsl_ldb->lvds_dual_link ? 80000 : 160000)) + if (mode->clock > (fsl_ldb->lvds_dual_link ? 160000 : 80000)) return MODE_CLOCK_HIGH; return MODE_OK; @@ -241,7 +225,6 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge, static const struct drm_bridge_funcs funcs = { .attach = fsl_ldb_attach, - .atomic_check = fsl_ldb_atomic_check, .atomic_enable = fsl_ldb_atomic_enable, .atomic_disable = fsl_ldb_atomic_disable, .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c index 74bd4a76b253..059fd71424f6 100644 --- a/drivers/gpu/drm/drm_aperture.c +++ b/drivers/gpu/drm/drm_aperture.c @@ -329,7 +329,20 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const struct drm_driver *req_driver) { resource_size_t base, size; - int bar, ret = 0; + int bar, ret; + + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ +#if IS_REACHABLE(CONFIG_FB) + ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); + if (ret) + return ret; +#endif + ret = vga_remove_vgacon(pdev); + if (ret) + return ret; for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) { if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) @@ -339,15 +352,6 @@ int drm_aperture_remove_conflicting_pci_framebuffers(struct pci_dev *pdev, drm_aperture_detach_drivers(base, size); } - /* - * WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. - */ -#if IS_REACHABLE(CONFIG_FB) - ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name); -#endif - if (ret == 0) - ret = vga_remove_vgacon(pdev); - return ret; + return 0; } EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index df87ba99a87c..d4e0f2e85548 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -286,6 +286,21 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Tablet 2 830F / 830L */ + .matches = { + /* + * Note this also matches the Lenovo Yoga Tablet 2 1050F/L + * since that uses the same mainboard. The resolution match + * will limit this to only matching on the 830F/L. Neither has + * any external video outputs so those are not a concern. + */ + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."), + DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"), + DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"), + /* Partial match on beginning of BIOS version */ + DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* OneGX1 Pro */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"), diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 061b277e5ce7..14d2a64193b2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -839,6 +839,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f0d7b57b741e..2ff55b9994bc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0bffb70b3c5f..04d12f278f57 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1637,10 +1637,10 @@ static void force_unbind(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); } -static void release_references(struct i915_vma *vma, bool vm_ddestroy) +static void release_references(struct i915_vma *vma, struct intel_gt *gt, + bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; - struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1695,11 +1695,12 @@ void i915_vma_destroy_locked(struct i915_vma *vma) force_unbind(vma); list_del_init(&vma->vm_link); - release_references(vma, false); + release_references(vma, vma->vm->gt, false); } void i915_vma_destroy(struct i915_vma *vma) { + struct intel_gt *gt; bool vm_ddestroy; mutex_lock(&vma->vm->mutex); @@ -1707,8 +1708,11 @@ void i915_vma_destroy(struct i915_vma *vma) list_del_init(&vma->vm_link); vm_ddestroy = vma->vm_ddestroy; vma->vm_ddestroy = false; + + /* vma->vm may be freed when releasing vma->vm->mutex. */ + gt = vma->vm->gt; mutex_unlock(&vma->vm->mutex); - release_references(vma, vm_ddestroy); + release_references(vma, gt, vm_ddestroy); } void i915_vma_parked(struct intel_gt *gt) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 087e69b98d06..b1e6d238674f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -433,8 +433,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, if (args->retained) { if (args->madv == PANFROST_MADV_DONTNEED) - list_add_tail(&bo->base.madv_list, - &pfdev->shrinker_list); + list_move_tail(&bo->base.madv_list, + &pfdev->shrinker_list); else if (args->madv == PANFROST_MADV_WILLNEED) list_del_init(&bo->base.madv_list); } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index d3f82b26a631..b285a8001b1d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -518,7 +518,7 @@ err_map: err_pages: drm_gem_shmem_put_pages(&bo->base); err_bo: - drm_gem_object_put(&bo->base.base); + panfrost_gem_mapping_put(bomapping); return ret; } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 67d38f53d3e5..13ed33e74457 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -23,6 +23,14 @@ #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> +#if defined(CONFIG_ARM_DMA_USE_IOMMU) +#include <asm/dma-iommu.h> +#else +#define arm_iommu_detach_device(...) ({ }) +#define arm_iommu_release_mapping(...) ({ }) +#define to_dma_iommu_mapping(dev) NULL +#endif + #include "rockchip_drm_drv.h" #include "rockchip_drm_fb.h" #include "rockchip_drm_gem.h" @@ -49,6 +57,15 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev, if (!private->domain) return 0; + if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) { + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + + if (mapping) { + arm_iommu_detach_device(dev); + arm_iommu_release_mapping(mapping); + } + } + ret = iommu_attach_device(private->domain, dev); if (ret) { DRM_DEV_ERROR(dev, "Failed to attach iommu device\n"); diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 08394444dd6e..f4886e66ff34 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -350,7 +350,7 @@ static int ssd130x_init(struct ssd130x_device *ssd130x) /* Set precharge period in number of ticks from the internal clock */ precharge = (SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep1) | - SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep2)); + SSD130X_SET_PRECHARGE_PERIOD2_SET(ssd130x->prechargep2)); ret = ssd130x_write_cmd(ssd130x, 2, SSD130X_SET_PRECHARGE_PERIOD, precharge); if (ret < 0) return ret; diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index b4c1ad19cdae..3d6f8ee355bf 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1338,6 +1338,7 @@ static int cdns_i2c_probe(struct platform_device *pdev) return 0; err_clk_dis: + clk_notifier_unregister(id->clk, &id->clk_rate_change_nb); clk_disable_unprepare(id->clk); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index ac8e7d60672a..39cb1b7bb865 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -161,7 +161,6 @@ static const char *piix4_aux_port_name_sb800 = " port 1"; struct sb800_mmio_cfg { void __iomem *addr; - struct resource *res; bool use_mmio; }; @@ -179,13 +178,11 @@ static int piix4_sb800_region_request(struct device *dev, struct sb800_mmio_cfg *mmio_cfg) { if (mmio_cfg->use_mmio) { - struct resource *res; void __iomem *addr; - res = request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR, - SB800_PIIX4_FCH_PM_SIZE, - "sb800_piix4_smb"); - if (!res) { + if (!request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE, + "sb800_piix4_smb")) { dev_err(dev, "SMBus base address memory region 0x%x already in use.\n", SB800_PIIX4_FCH_PM_ADDR); @@ -195,12 +192,12 @@ static int piix4_sb800_region_request(struct device *dev, addr = ioremap(SB800_PIIX4_FCH_PM_ADDR, SB800_PIIX4_FCH_PM_SIZE); if (!addr) { - release_resource(res); + release_mem_region(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE); dev_err(dev, "SMBus base address mapping failed.\n"); return -ENOMEM; } - mmio_cfg->res = res; mmio_cfg->addr = addr; return 0; @@ -222,7 +219,8 @@ static void piix4_sb800_region_release(struct device *dev, { if (mmio_cfg->use_mmio) { iounmap(mmio_cfg->addr); - release_resource(mmio_cfg->res); + release_mem_region(SB800_PIIX4_FCH_PM_ADDR, + SB800_PIIX4_FCH_PM_SIZE); return; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 424ef470223d..f5c6802aa6c3 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -47,11 +47,13 @@ #include <linux/tick.h> #include <trace/events/power.h> #include <linux/sched.h> +#include <linux/sched/smt.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/moduleparam.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/nospec-branch.h> #include <asm/mwait.h> #include <asm/msr.h> @@ -106,6 +108,12 @@ static unsigned int mwait_substates __initdata; #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) /* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS BIT(16) + +/* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. @@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, return ret; } +static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + bool smt_active = sched_smt_active(); + u64 spec_ctrl = spec_ctrl_current(); + int ret; + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + ret = __intel_idle(dev, drv, index); + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + + return ret; +} + /** * intel_idle_s2idle - Ask the processor to enter the given idle state. * @dev: cpuidle device of the target CPU. @@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, @@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C7s", .desc = "MWAIT 0x33", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, .enter = &intel_idle, @@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C8", .desc = "MWAIT 0x40", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, @@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C9", .desc = "MWAIT 0x50", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, .enter = &intel_idle, @@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C10", .desc = "MWAIT 0x60", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, .enter = &intel_idle, @@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, .enter = &intel_idle, @@ -1819,6 +1845,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) drv->states[drv->state_count].enter = intel_idle_irq; + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && + cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { + WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); + drv->states[drv->state_count].enter = intel_idle_ibrs; + } + if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && intel_idle_off_by_default(mwait_hint) && diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 592c1e1a5d4b..9699ca101c62 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -382,7 +382,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, static struct notifier_block dmar_pci_bus_nb = { .notifier_call = dmar_pci_bus_notifier, - .priority = INT_MIN, + .priority = 1, }; static struct dmar_drhd_unit * diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 44016594831d..5c0dce78586a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -320,30 +320,6 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -/* - * Iterate over elements in device_domain_list and call the specified - * callback @fn against each element. - */ -int for_each_device_domain(int (*fn)(struct device_domain_info *info, - void *data), void *data) -{ - int ret = 0; - unsigned long flags; - struct device_domain_info *info; - - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &device_domain_list, global) { - ret = fn(info, data); - if (ret) { - spin_unlock_irqrestore(&device_domain_lock, flags); - return ret; - } - } - spin_unlock_irqrestore(&device_domain_lock, flags); - - return 0; -} - const struct iommu_ops intel_iommu_ops; static bool translation_pre_enabled(struct intel_iommu *iommu) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index cb4c1d0cf25c..17cad7c1f62d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -86,54 +86,6 @@ void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid) /* * Per device pasid table management: */ -static inline void -device_attach_pasid_table(struct device_domain_info *info, - struct pasid_table *pasid_table) -{ - info->pasid_table = pasid_table; - list_add(&info->table, &pasid_table->dev); -} - -static inline void -device_detach_pasid_table(struct device_domain_info *info, - struct pasid_table *pasid_table) -{ - info->pasid_table = NULL; - list_del(&info->table); -} - -struct pasid_table_opaque { - struct pasid_table **pasid_table; - int segment; - int bus; - int devfn; -}; - -static int search_pasid_table(struct device_domain_info *info, void *opaque) -{ - struct pasid_table_opaque *data = opaque; - - if (info->iommu->segment == data->segment && - info->bus == data->bus && - info->devfn == data->devfn && - info->pasid_table) { - *data->pasid_table = info->pasid_table; - return 1; - } - - return 0; -} - -static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) -{ - struct pasid_table_opaque *data = opaque; - - data->segment = pci_domain_nr(pdev->bus); - data->bus = PCI_BUS_NUM(alias); - data->devfn = alias & 0xff; - - return for_each_device_domain(&search_pasid_table, data); -} /* * Allocate a pasid table for @dev. It should be called in a @@ -143,28 +95,18 @@ int intel_pasid_alloc_table(struct device *dev) { struct device_domain_info *info; struct pasid_table *pasid_table; - struct pasid_table_opaque data; struct page *pages; u32 max_pasid = 0; - int ret, order; - int size; + int order, size; might_sleep(); info = dev_iommu_priv_get(dev); if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) return -EINVAL; - /* DMA alias device already has a pasid table, use it: */ - data.pasid_table = &pasid_table; - ret = pci_for_each_dma_alias(to_pci_dev(dev), - &get_alias_pasid_table, &data); - if (ret) - goto attach_out; - pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) return -ENOMEM; - INIT_LIST_HEAD(&pasid_table->dev); if (info->pasid_supported) max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), @@ -182,9 +124,7 @@ int intel_pasid_alloc_table(struct device *dev) pasid_table->table = page_address(pages); pasid_table->order = order; pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); - -attach_out: - device_attach_pasid_table(info, pasid_table); + info->pasid_table = pasid_table; return 0; } @@ -202,10 +142,7 @@ void intel_pasid_free_table(struct device *dev) return; pasid_table = info->pasid_table; - device_detach_pasid_table(info, pasid_table); - - if (!list_empty(&pasid_table->dev)) - return; + info->pasid_table = NULL; /* Free scalable mode PASID directory tables: */ dir = pasid_table->table; diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 583ea67fc783..bf5b937848b4 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -74,7 +74,6 @@ struct pasid_table { void *table; /* pasid table pointer */ int order; /* page order of pasid table */ u32 max_pasid; /* max pasid */ - struct list_head dev; /* device list */ }; /* Get PRESENT bit of a PASID directory entry. */ diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 1f23a6be7d88..bbb11cb8b0f7 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -298,7 +298,7 @@ config XTENSA_MX config XILINX_INTC bool "Xilinx Interrupt Controller IP" - depends on OF + depends on OF_ADDRESS select IRQ_DOMAIN help Support for the Xilinx Interrupt Controller IP core. diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 5ac83185ff47..1c2813ad8bbe 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -228,7 +228,7 @@ #define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS #define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT -DEFINE_STATIC_KEY_TRUE(use_fast_ipi); +static DEFINE_STATIC_KEY_TRUE(use_fast_ipi); struct aic_info { int version; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5c1cf907ee68..2d25bca63d2a 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -2042,15 +2042,40 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) vgic_set_kvm_info(&gic_v3_kvm_info); } +static void gic_request_region(resource_size_t base, resource_size_t size, + const char *name) +{ + if (!request_mem_region(base, size, name)) + pr_warn_once(FW_BUG "%s region %pa has overlapping address\n", + name, &base); +} + +static void __iomem *gic_of_iomap(struct device_node *node, int idx, + const char *name, struct resource *res) +{ + void __iomem *base; + int ret; + + ret = of_address_to_resource(node, idx, res); + if (ret) + return IOMEM_ERR_PTR(ret); + + gic_request_region(res->start, resource_size(res), name); + base = of_iomap(node, idx); + + return base ?: IOMEM_ERR_PTR(-ENOMEM); +} + static int __init gic_of_init(struct device_node *node, struct device_node *parent) { void __iomem *dist_base; struct redist_region *rdist_regs; + struct resource res; u64 redist_stride; u32 nr_redist_regions; int err, i; - dist_base = of_io_request_and_map(node, 0, "GICD"); + dist_base = gic_of_iomap(node, 0, "GICD", &res); if (IS_ERR(dist_base)) { pr_err("%pOF: unable to map gic dist registers\n", node); return PTR_ERR(dist_base); @@ -2073,12 +2098,8 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare } for (i = 0; i < nr_redist_regions; i++) { - struct resource res; - int ret; - - ret = of_address_to_resource(node, 1 + i, &res); - rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR"); - if (ret || IS_ERR(rdist_regs[i].redist_base)) { + rdist_regs[i].redist_base = gic_of_iomap(node, 1 + i, "GICR", &res); + if (IS_ERR(rdist_regs[i].redist_base)) { pr_err("%pOF: couldn't map region %d\n", node, i); err = -ENODEV; goto out_unmap_rdist; @@ -2151,7 +2172,7 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header, pr_err("Couldn't map GICR region @%llx\n", redist->base_address); return -ENOMEM; } - request_mem_region(redist->base_address, redist->length, "GICR"); + gic_request_region(redist->base_address, redist->length, "GICR"); gic_acpi_register_redist(redist->base_address, redist_base); return 0; @@ -2174,7 +2195,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) return -ENOMEM; - request_mem_region(gicc->gicr_base_address, size, "GICR"); + gic_request_region(gicc->gicr_base_address, size, "GICR"); gic_acpi_register_redist(gicc->gicr_base_address, redist_base); return 0; @@ -2376,7 +2397,7 @@ gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end) pr_err("Unable to map GICD registers\n"); return -ENOMEM; } - request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD"); + gic_request_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD"); err = gic_validate_dist_version(acpi_data.dist_base); if (err) { diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c index 1ef9b61077c4..f150d8769f19 100644 --- a/drivers/misc/cardreader/rtsx_usb.c +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -631,16 +631,20 @@ static int rtsx_usb_probe(struct usb_interface *intf, ucr->pusb_dev = usb_dev; - ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, - GFP_KERNEL, &ucr->iobuf_dma); - if (!ucr->iobuf) + ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->cmd_buf) return -ENOMEM; + ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL); + if (!ucr->rsp_buf) { + ret = -ENOMEM; + goto out_free_cmd_buf; + } + usb_set_intfdata(intf, ucr); ucr->vendor_id = id->idVendor; ucr->product_id = id->idProduct; - ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; mutex_init(&ucr->dev_mutex); @@ -668,8 +672,11 @@ static int rtsx_usb_probe(struct usb_interface *intf, out_init_fail: usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; +out_free_cmd_buf: + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; return ret; } @@ -682,8 +689,12 @@ static void rtsx_usb_disconnect(struct usb_interface *intf) mfd_remove_devices(&intf->dev); usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); + + kfree(ucr->cmd_buf); + ucr->cmd_buf = NULL; + + kfree(ucr->rsp_buf); + ucr->rsp_buf = NULL; } #ifdef CONFIG_PM diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index c9c56fd194c1..bdffc6543f6f 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -80,10 +80,9 @@ static int at25_ee_read(void *priv, unsigned int offset, struct at25_data *at25 = priv; char *buf = val; size_t max_chunk = spi_max_transfer_size(at25->spi); - size_t num_msgs = DIV_ROUND_UP(count, max_chunk); - size_t nr_bytes = 0; - unsigned int msg_offset; - size_t msg_count; + unsigned int msg_offset = offset; + size_t bytes_left = count; + size_t segment; u8 *cp; ssize_t status; struct spi_transfer t[2]; @@ -97,9 +96,8 @@ static int at25_ee_read(void *priv, unsigned int offset, if (unlikely(!count)) return -EINVAL; - msg_offset = (unsigned int)offset; - msg_count = min(count, max_chunk); - while (num_msgs) { + do { + segment = min(bytes_left, max_chunk); cp = at25->command; instr = AT25_READ; @@ -131,8 +129,8 @@ static int at25_ee_read(void *priv, unsigned int offset, t[0].len = at25->addrlen + 1; spi_message_add_tail(&t[0], &m); - t[1].rx_buf = buf + nr_bytes; - t[1].len = msg_count; + t[1].rx_buf = buf; + t[1].len = segment; spi_message_add_tail(&t[1], &m); status = spi_sync(at25->spi, &m); @@ -142,10 +140,10 @@ static int at25_ee_read(void *priv, unsigned int offset, if (status) return status; - --num_msgs; - msg_offset += msg_count; - nr_bytes += msg_count; - } + msg_offset += segment; + buf += segment; + bytes_left -= segment; + } while (bytes_left > 0); dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n", count, offset); @@ -229,7 +227,7 @@ static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count) do { unsigned long timeout, retries; unsigned segment; - unsigned offset = (unsigned) off; + unsigned offset = off; u8 *cp = bounce; int sr; u8 instr; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 831833911a52..8647125d60ae 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -379,7 +379,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev) } } -static int aq_suspend_common(struct device *dev, bool deep) +static int aq_suspend_common(struct device *dev) { struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev)); @@ -392,17 +392,15 @@ static int aq_suspend_common(struct device *dev, bool deep) if (netif_running(nic->ndev)) aq_nic_stop(nic); - if (deep) { - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - aq_nic_set_power(nic); - } + aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); + aq_nic_set_power(nic); rtnl_unlock(); return 0; } -static int atl_resume_common(struct device *dev, bool deep) +static int atl_resume_common(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; @@ -415,11 +413,6 @@ static int atl_resume_common(struct device *dev, bool deep) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (deep) { - /* Reinitialize Nic/Vecs objects */ - aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); - } - if (netif_running(nic->ndev)) { ret = aq_nic_init(nic); if (ret) @@ -444,22 +437,22 @@ err_exit: static int aq_pm_freeze(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_suspend_poweroff(struct device *dev) { - return aq_suspend_common(dev, true); + return aq_suspend_common(dev); } static int aq_pm_thaw(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static int aq_pm_resume_restore(struct device *dev) { - return atl_resume_common(dev, true); + return atl_resume_common(dev); } static const struct dev_pm_ops aq_pm_ops = { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c74b2e4250ad..ba0f1ffac507 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7785,7 +7785,7 @@ hwrm_dbg_qcaps_exit: static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp); -static int bnxt_hwrm_func_qcaps(struct bnxt *bp) +int bnxt_hwrm_func_qcaps(struct bnxt *bp) { int rc; @@ -10060,7 +10060,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE) resc_reinit = true; - if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) + if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE || + test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) fw_reset = true; else bnxt_remap_fw_health_regs(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a1dca8c58f54..075c6206325c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2314,6 +2314,7 @@ int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); +int bnxt_hwrm_func_qcaps(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 3528ce9849e6..6b3d4f4c2a75 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -979,9 +979,11 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (rc) return rc; - rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); - if (rc) - return rc; + if (BNXT_CHIP_P5(bp)) { + rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); + if (rc) + return rc; + } return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 562f8f68a47d..7f3c0875b6f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -76,14 +76,23 @@ static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts, u64 *ns) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u32 high_before, high_now, low; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return -EIO; + high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); ptp_read_system_prets(sts); - *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); ptp_read_system_postts(sts); - *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32; + high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]); + if (high_now != high_before) { + ptp_read_system_prets(sts); + low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]); + ptp_read_system_postts(sts); + } + *ns = ((u64)high_now << 32) | low; + return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index c4ed43604ddc..730febd19330 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -823,8 +823,10 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs) goto err_out2; rc = pci_enable_sriov(bp->pdev, *num_vfs); - if (rc) + if (rc) { + bnxt_ulp_sriov_cfg(bp, 0); goto err_out2; + } return 0; @@ -832,6 +834,9 @@ err_out2: /* Free the resources reserved for various VF's */ bnxt_hwrm_func_vf_resource_free(bp, *num_vfs); + /* Restore the max resources */ + bnxt_hwrm_func_qcaps(bp); + err_out1: bnxt_free_vf_resources(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index f02fe906dedb..f53387ed0167 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -28,7 +28,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct xdp_buff *xdp) { struct skb_shared_info *sinfo; - struct bnxt_sw_tx_bd *tx_buf, *first_buf; + struct bnxt_sw_tx_bd *tx_buf; struct tx_bd *txbd; int num_frags = 0; u32 flags; @@ -43,13 +43,14 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, /* fill up the first buffer */ prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[prod]; - first_buf = tx_buf; tx_buf->nr_frags = num_frags; if (xdp) tx_buf->page = virt_to_head_page(xdp->data); txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; - flags = ((len) << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT); + flags = (len << TX_BD_LEN_SHIFT) | + ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) | + bnxt_lhint_arr[len >> 9]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); txbd->tx_bd_opaque = prod; txbd->tx_bd_haddr = cpu_to_le64(mapping); @@ -82,7 +83,6 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, flags = frag_len << TX_BD_LEN_SHIFT; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); - txbd->tx_bd_opaque = prod; txbd->tx_bd_haddr = cpu_to_le64(frag_mapping); len = frag_len; @@ -96,7 +96,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, prod = NEXT_TX(prod); txr->tx_prod = prod; - return first_buf; + return tx_buf; } static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4af5561cbfc5..7c760aa65540 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk, th_ecn = tcph->ece && tcph->cwr; if (th_ecn) { ect = !INET_ECN_is_not_ect(ip_dsfield); - ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; + ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn); if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) inet_rsk(oreq)->ecn_ok = 1; } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 5231818943c6..c03663785a8d 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1764,6 +1764,19 @@ cleanup_clk: return rc; } +static bool ftgmac100_has_child_node(struct device_node *np, const char *name) +{ + struct device_node *child_np = of_get_child_by_name(np, name); + bool ret = false; + + if (child_np) { + ret = true; + of_node_put(child_np); + } + + return ret; +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1883,7 +1896,7 @@ static int ftgmac100_probe(struct platform_device *pdev) /* Display what we found */ phy_attached_info(phy); - } else if (np && !of_get_child_by_name(np, "mdio")) { + } else if (np && !ftgmac100_has_child_node(np, "mdio")) { /* Support legacy ASPEED devicetree descriptions that decribe a * MAC with an embedded MDIO controller but have no "mdio" * child node. Automatically scan the MDIO bus for available diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 61dd2f18dee8..b41bc3dc1745 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -5,6 +5,7 @@ #define _ICE_DEVIDS_H_ /* Device IDs */ +#define ICE_DEV_ID_E822_SI_DFLT 0x1888 /* Intel(R) Ethernet Connection E823-L for backplane */ #define ICE_DEV_ID_E823L_BACKPLANE 0x124C /* Intel(R) Ethernet Connection E823-L for SFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 3991d62473bf..3337314a7b35 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -814,6 +814,8 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf) devlink_port_unregister(devlink_port); } +#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance @@ -840,8 +842,9 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - void *nvm_data; - u32 nvm_size; + u8 *nvm_data, *tmp, i; + u32 nvm_size, left; + s8 num_blks; int status; nvm_size = hw->flash.flash_size; @@ -849,26 +852,44 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, if (!nvm_data) return -ENOMEM; - status = ice_acquire_nvm(hw, ICE_RES_READ); - if (status) { - dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(nvm_data); - return status; - } - status = ice_read_flat_nvm(hw, 0, &nvm_size, nvm_data, false); - if (status) { - dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - nvm_size, status, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); + tmp = nvm_data; + left = nvm_size; + + /* Some systems take longer to read the NVM than others which causes the + * FW to reclaim the NVM lock before the entire NVM has been read. Fix + * this by breaking the reads of the NVM into smaller chunks that will + * probably not take as long. This has some overhead since we are + * increasing the number of AQ commands, but it should always work + */ + for (i = 0; i < num_blks; i++) { + u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left); + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { + dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", + status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); + vfree(nvm_data); + return -EIO; + } + + status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, + &read_sz, tmp, false); + if (status) { + dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", + read_sz, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); + ice_release_nvm(hw); + vfree(nvm_data); + return -EIO; + } ice_release_nvm(hw); - vfree(nvm_data); - return status; - } - ice_release_nvm(hw); + tmp += read_sz; + left -= read_sz; + } *data = nvm_data; diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index 665a344fb9c0..3dc5662d62a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -736,7 +736,87 @@ static int ice_finalize_update(struct pldmfw *context) return 0; } -static const struct pldmfw_ops ice_fwu_ops = { +struct ice_pldm_pci_record_id { + u32 vendor; + u32 device; + u32 subsystem_vendor; + u32 subsystem_device; +}; + +/** + * ice_op_pci_match_record - Check if a PCI device matches the record + * @context: PLDM fw update structure + * @record: list of records extracted from the PLDM image + * + * Determine if the PCI device associated with this device matches the record + * data provided. + * + * Searches the descriptor TLVs and extracts the relevant descriptor data into + * a pldm_pci_record_id. This is then compared against the PCI device ID + * information. + * + * Returns: true if the device matches the record, false otherwise. + */ +static bool +ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record) +{ + struct pci_dev *pdev = to_pci_dev(context->dev); + struct ice_pldm_pci_record_id id = { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .subsystem_vendor = PCI_ANY_ID, + .subsystem_device = PCI_ANY_ID, + }; + struct pldmfw_desc_tlv *desc; + + list_for_each_entry(desc, &record->descs, entry) { + u16 value; + int *ptr; + + switch (desc->type) { + case PLDM_DESC_ID_PCI_VENDOR_ID: + ptr = &id.vendor; + break; + case PLDM_DESC_ID_PCI_DEVICE_ID: + ptr = &id.device; + break; + case PLDM_DESC_ID_PCI_SUBVENDOR_ID: + ptr = &id.subsystem_vendor; + break; + case PLDM_DESC_ID_PCI_SUBDEV_ID: + ptr = &id.subsystem_device; + break; + default: + /* Skip unrelated TLVs */ + continue; + } + + value = get_unaligned_le16(desc->data); + /* A value of zero for one of the descriptors is sometimes + * used when the record should ignore this field when matching + * device. For example if the record applies to any subsystem + * device or vendor. + */ + if (value) + *ptr = value; + else + *ptr = PCI_ANY_ID; + } + + /* the E822 device can have a generic device ID so check for that */ + if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) && + (id.device == PCI_ANY_ID || id.device == pdev->device || + id.device == ICE_DEV_ID_E822_SI_DFLT) && + (id.subsystem_vendor == PCI_ANY_ID || + id.subsystem_vendor == pdev->subsystem_vendor) && + (id.subsystem_device == PCI_ANY_ID || + id.subsystem_device == pdev->subsystem_device)) + return true; + + return false; +} + +static const struct pldmfw_ops ice_fwu_ops_e810 = { .match_record = &pldmfw_op_pci_match_record, .send_package_data = &ice_send_package_data, .send_component_table = &ice_send_component_table, @@ -744,6 +824,14 @@ static const struct pldmfw_ops ice_fwu_ops = { .finalize_update = &ice_finalize_update, }; +static const struct pldmfw_ops ice_fwu_ops_e822 = { + .match_record = &ice_op_pci_match_record, + .send_package_data = &ice_send_package_data, + .send_component_table = &ice_send_component_table, + .flash_component = &ice_flash_component, + .finalize_update = &ice_finalize_update, +}; + /** * ice_get_pending_updates - Check if the component has a pending update * @pf: the PF driver structure @@ -921,7 +1009,11 @@ int ice_devlink_flash_update(struct devlink *devlink, memset(&priv, 0, sizeof(priv)); - priv.context.ops = &ice_fwu_ops; + /* the E822 device needs a slightly different ops */ + if (hw->mac_type == ICE_MAC_GENERIC) + priv.context.ops = &ice_fwu_ops_e822; + else + priv.context.ops = &ice_fwu_ops_e810; priv.context.dev = dev; priv.extack = extack; priv.pf = pf; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c1ac2f746714..ff2eac2f8c64 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5413,6 +5413,7 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 }, /* required last entry */ { 0, } }; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 3754d8aec76d..3c8116f16b4d 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -588,6 +588,7 @@ err_router_lib_init: void prestera_router_fini(struct prestera_switch *sw) { + unregister_fib_notifier(&init_net, &sw->router->fib_nb); unregister_inetaddr_notifier(&sw->router->inetaddr_nb); unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb); rhashtable_destroy(&sw->router->kern_fib_cache_ht); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index af959fadbecf..a6d84ff3a0e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -76,6 +76,7 @@ struct mlx5_tc_ct_priv { struct mlx5_ct_fs *fs; struct mlx5_ct_fs_ops *fs_ops; spinlock_t ht_lock; /* protects ft entries */ + struct workqueue_struct *wq; struct mlx5_tc_ct_debugfs debugfs; }; @@ -941,14 +942,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work) static void __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&entry->refcnt)) return; - priv = netdev_priv(entry->ct_priv->netdev); INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); - queue_work(priv->wq, &entry->work); + queue_work(entry->ct_priv->wq, &entry->work); } static struct mlx5_ct_counter * @@ -1759,19 +1757,16 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) static void mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) { - struct mlx5e_priv *priv; - if (!refcount_dec_and_test(&ft->refcount)) return; + flush_workqueue(ct_priv->wq); nf_flow_table_offload_del_cb(ft->nf_ft, mlx5_tc_ct_block_flow_offload, ft); rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params); rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); - priv = netdev_priv(ct_priv->netdev); - flush_workqueue(priv->wq); mlx5_tc_ct_free_pre_ct_tables(ft); mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); kfree(ft); @@ -2176,6 +2171,12 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params)) goto err_ct_tuples_nat_ht; + ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0); + if (!ct_priv->wq) { + err = -ENOMEM; + goto err_wq; + } + err = mlx5_tc_ct_fs_init(ct_priv); if (err) goto err_init_fs; @@ -2184,6 +2185,8 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, return ct_priv; err_init_fs: + destroy_workqueue(ct_priv->wq); +err_wq: rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); err_ct_tuples_nat_ht: rhashtable_destroy(&ct_priv->ct_tuples_ht); @@ -2213,6 +2216,7 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) if (!ct_priv) return; + destroy_workqueue(ct_priv->wq); mlx5_ct_tc_remove_dbgfs(ct_priv); chains = ct_priv->chains; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 0bb0633b7542..27483aa7be8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -231,8 +231,7 @@ mlx5e_set_ktls_rx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_rx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) > - TLS_OFFLOAD_CONTEXT_SIZE_RX); + BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX); *ctx = priv_rx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index cc5cb3010e64..fba21edf88d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -68,8 +68,7 @@ mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, struct mlx5e_ktls_offload_context_tx **ctx = __tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX); - BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) > - TLS_OFFLOAD_CONTEXT_SIZE_TX); + BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX); *ctx = priv_tx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 57fa0489eeb8..1e87bb2b7541 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -688,7 +688,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env) u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; struct mlx5_core_dev *mdev = priv->mdev; - if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard)) + if (!mlx5e_stats_grp_vnic_env_num_stats(priv)) return; MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5e70e99aa1f4..2e12280a936f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3855,7 +3855,7 @@ static bool is_lag_dev(struct mlx5e_priv *priv, static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) { - if (mlx5e_eswitch_uplink_rep(out_dev) && + if (same_hw_reps(priv, out_dev) && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 64d78fd99c6e..699d3a9886bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -341,6 +341,26 @@ static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) } } +static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct mlx5e_tx_wqe *wqe; + u16 pi; + + /* Must not be called when a MPWQE session is active but empty. */ + mlx5e_tx_mpwqe_ensure_complete(sq); + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + wi = &sq->db.wqe_info[pi]; + + *wi = (struct mlx5e_tx_wqe_info) { + .num_wqebbs = 1, + }; + + wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); +} + static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, const struct mlx5e_tx_attr *attr, @@ -459,6 +479,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) @@ -560,6 +581,13 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_ctrl_seg *cseg; struct mlx5e_xmit_data txd; + txd.data = skb->data; + txd.len = skb->len; + + txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) + goto err_unmap; + if (!mlx5e_tx_mpwqe_session_is_active(sq)) { mlx5e_tx_mpwqe_session_start(sq, eseg); } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { @@ -569,18 +597,9 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - txd.data = skb->data; - txd.len = skb->len; - - txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) - goto err_unmap; mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); - mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); - mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { @@ -602,6 +621,7 @@ err_unmap: mlx5e_dma_unmap_wqe_err(sq, 1); sq->stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) @@ -1006,5 +1026,6 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, err_drop: stats->dropped++; dev_kfree_skb_any(skb); + mlx5e_tx_flush(sq); } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 9d17206d1625..fabe49a35a5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "fs_ft_pool.h" #include "esw/qos.h" enum { @@ -95,8 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.max_fte = POOL_NEXT_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { @@ -105,6 +105,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) goto out; } esw->fdb_table.legacy.fdb = fdb; + table_size = fdb->max_fte; /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index 15e41dc84d53..b8feaf0f5c4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -44,7 +44,7 @@ static int port_sel_mode_show(struct seq_file *file, void *priv) ldev = dev->priv.lag; mutex_lock(&ldev->lock); if (__mlx5_lag_is_active(ldev)) - mode = mlx5_get_str_port_sel_mode(ldev); + mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags); else ret = -EINVAL; mutex_unlock(&ldev->lock); @@ -72,6 +72,7 @@ static int state_show(struct seq_file *file, void *priv) static int flags_show(struct seq_file *file, void *priv) { struct mlx5_core_dev *dev = file->private; + bool fdb_sel_mode_native; struct mlx5_lag *ldev; bool shared_fdb; bool lag_active; @@ -79,14 +80,21 @@ static int flags_show(struct seq_file *file, void *priv) ldev = dev->priv.lag; mutex_lock(&ldev->lock); lag_active = __mlx5_lag_is_active(ldev); - if (lag_active) - shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + if (!lag_active) + goto unlock; + + shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &ldev->mode_flags); +unlock: mutex_unlock(&ldev->lock); if (!lag_active) return -EINVAL; seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); + seq_printf(file, "%s:%s\n", "fdb_selection_mode", + fdb_sel_mode_native ? "native" : "affinity"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 641505d2c0c2..0f34e3c80d1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -68,14 +68,15 @@ static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, unsigned long flags) { - bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); + bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, + &flags); int port_sel_mode = get_port_sel_mode(mode, flags); u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx; lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); - MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) { MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); @@ -471,8 +472,13 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, bool roce_lag = mode == MLX5_LAG_MODE_ROCE; *flags = 0; - if (shared_fdb) + if (shared_fdb) { set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); + } + + if (mode == MLX5_LAG_MODE_MPESW) + set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); if (roce_lag) return mlx5_lag_set_port_sel_mode_roce(ldev, flags); @@ -481,9 +487,9 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, return 0; } -char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev) +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) { - int port_sel_mode = get_port_sel_mode(ldev->mode, ldev->mode_flags); + int port_sel_mode = get_port_sel_mode(mode, flags); switch (port_sel_mode) { case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; @@ -507,7 +513,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, if (tracker) mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", - shared_fdb, mlx5_get_str_port_sel_mode(ldev)); + shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index c81b173156d2..ce2ce8ccbd70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -24,6 +24,7 @@ enum { enum { MLX5_LAG_MODE_FLAG_HASH_BASED, MLX5_LAG_MODE_FLAG_SHARED_FDB, + MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, }; enum mlx5_lag_mode { @@ -114,7 +115,7 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev); int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev); -char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev); +char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, u8 *ports, int *num_enabled); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index ee4b25a50315..f643202b29c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -41,7 +41,6 @@ void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev = dev->priv.lag; - bool shared_fdb; int err = 0; if (!ldev) @@ -55,8 +54,8 @@ int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) err = -EINVAL; goto out; } - shared_fdb = mlx5_shared_fdb_supported(ldev); - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, shared_fdb); + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); if (err) mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 083fddd263ec..8e3894cf5f7c 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -94,19 +94,18 @@ static void ocelot_fdma_activate_chan(struct ocelot *ocelot, dma_addr_t dma, ocelot_fdma_writel(ocelot, MSCC_FDMA_CH_ACTIVATE, BIT(chan)); } +static u32 ocelot_fdma_read_ch_safe(struct ocelot *ocelot) +{ + return ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE); +} + static int ocelot_fdma_wait_chan_safe(struct ocelot *ocelot, int chan) { - unsigned long timeout; u32 safe; - timeout = jiffies + usecs_to_jiffies(OCELOT_FDMA_CH_SAFE_TIMEOUT_US); - do { - safe = ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE); - if (safe & BIT(chan)) - return 0; - } while (time_after(jiffies, timeout)); - - return -ETIMEDOUT; + return readx_poll_timeout_atomic(ocelot_fdma_read_ch_safe, ocelot, safe, + safe & BIT(chan), 0, + OCELOT_FDMA_CH_SAFE_TIMEOUT_US); } static void ocelot_fdma_dcb_set_data(struct ocelot_fdma_dcb *dcb, diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 0af5541c6eaf..52f67157bd0f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -447,7 +447,8 @@ void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, static void nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, - void *flow, struct neighbour *neigh, bool is_ipv6) + void *flow, struct neighbour *neigh, bool is_ipv6, + bool override) { bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead; size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) : @@ -546,6 +547,13 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, if (nn_entry->flow) list_del(&nn_entry->list_head); kfree(nn_entry); + } else if (nn_entry && !neigh_invalid && override) { + mtype = is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); } spin_unlock_bh(&priv->predt_lock); @@ -610,7 +618,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, dst_release(dst); } - nfp_tun_write_neigh(n->dev, app, &flow6, n, true); + nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); #else return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ @@ -633,7 +641,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, ip_rt_put(rt); } - nfp_tun_write_neigh(n->dev, app, &flow4, n, false); + nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); } #else return NOTIFY_DONE; @@ -676,7 +684,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, false); + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); neigh_release(n); rcu_read_unlock(); return; @@ -718,7 +726,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh(n->dev, app, &flow, n, true); + nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); neigh_release(n); rcu_read_unlock(); return; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index fa1361de86e1..2b427d8ccb2f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -113,17 +113,18 @@ nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, static int nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, - unsigned int nr_frags, struct sk_buff *skb) + struct sk_buff *skb) { unsigned int n_descs, wr_p, nop_slots; const skb_frag_t *frag, *fend; struct nfp_nfdk_tx_desc *txd; + unsigned int nr_frags; unsigned int wr_idx; int err; recount_descs: n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); - + nr_frags = skb_shinfo(skb)->nr_frags; frag = skb_shinfo(skb)->frags; fend = frag + nr_frags; for (; frag < fend; frag++) @@ -266,10 +267,13 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) if (unlikely((int)metadata < 0)) goto err_flush; - nr_frags = skb_shinfo(skb)->nr_frags; - if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb)) + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) goto err_flush; + /* nr_frags will change after skb_linearize so we get nr_frags after + * nfp_nfdk_tx_maybe_close_block function + */ + nr_frags = skb_shinfo(skb)->nr_frags; /* DMA map all */ wr_idx = D_IDX(tx_ring, tx_ring->wr_p); txd = &tx_ring->ktxds[wr_idx]; @@ -295,7 +299,16 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) /* FIELD_PREP() implicitly truncates to chunk */ dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + + /* We will do our best to pass as much data as we can in descriptor + * and we need to make sure the first descriptor includes whole head + * since there is limitation in firmware side. Sometimes the value of + * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than + * headlen. + */ + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); @@ -921,7 +934,9 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, /* FIELD_PREP() implicitly truncates to chunk */ dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); @@ -1301,7 +1316,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, skb_push(skb, 4)); } - if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb)) + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) goto err_free; /* DMA map all */ @@ -1326,7 +1341,9 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf++; dma_len -= 1; - dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) | + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); txd->dma_len_type = cpu_to_le16(dlen_type); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index a99c3a6b912c..ab979fd11133 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1932,7 +1932,10 @@ static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx) efx_update_sw_stats(efx, stats); out: + /* releasing a DMA coherent buffer with BH disabled can panic */ + spin_unlock_bh(&efx->stats_lock); efx_nic_free_buffer(efx, &stats_buf); + spin_lock_bh(&efx->stats_lock); return rc; } diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 7f5aa4a8c451..92550c7e85ce 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -408,8 +408,9 @@ fail1: static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) { struct pci_dev *dev = efx->pci_dev; + struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int vfs_assigned = pci_vfs_assigned(dev); - int rc = 0; + int i, rc = 0; if (vfs_assigned && !force) { netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; " @@ -417,10 +418,13 @@ static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force) return -EBUSY; } - if (!vfs_assigned) + if (!vfs_assigned) { + for (i = 0; i < efx->vf_count; i++) + nic_data->vf[i].pci_dev = NULL; pci_disable_sriov(dev); - else + } else { rc = -EBUSY; + } efx_ef10_sriov_free_vf_vswitching(efx); efx->vf_count = 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index bc91fd867dcd..358fc26f8d1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -361,6 +361,7 @@ bypass_clk_reset_gpio: data->fix_mac_speed = tegra_eqos_fix_speed; data->init = tegra_eqos_init; data->bsp_priv = eqos; + data->sph_disable = 1; err = tegra_eqos_init(pdev, eqos); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 9a6d819b84ae..378b4dd826bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -273,7 +273,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->tx_delay = tx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } @@ -283,7 +284,8 @@ static int ingenic_mac_probe(struct platform_device *pdev) mac->rx_delay = rx_delay_ps * 1000; } else { dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps); - return -EINVAL; + ret = -EINVAL; + goto err_remove_config_dt; } } diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 77e5dffb558f..8594ee839628 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -545,43 +545,24 @@ static int try_next_permutation(struct happy_meal *hp, void __iomem *tregs) static void display_link_mode(struct happy_meal *hp, void __iomem *tregs) { - printk(KERN_INFO "%s: Link is up using ", hp->dev->name); - if (hp->tcvr_type == external) - printk("external "); - else - printk("internal "); - printk("transceiver at "); hp->sw_lpa = happy_meal_tcvr_read(hp, tregs, MII_LPA); - if (hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) { - if (hp->sw_lpa & LPA_100FULL) - printk("100Mb/s, Full Duplex.\n"); - else - printk("100Mb/s, Half Duplex.\n"); - } else { - if (hp->sw_lpa & LPA_10FULL) - printk("10Mb/s, Full Duplex.\n"); - else - printk("10Mb/s, Half Duplex.\n"); - } + + netdev_info(hp->dev, + "Link is up using %s transceiver at %dMb/s, %s Duplex.\n", + hp->tcvr_type == external ? "external" : "internal", + hp->sw_lpa & (LPA_100HALF | LPA_100FULL) ? 100 : 10, + hp->sw_lpa & (LPA_100FULL | LPA_10FULL) ? "Full" : "Half"); } static void display_forced_link_mode(struct happy_meal *hp, void __iomem *tregs) { - printk(KERN_INFO "%s: Link has been forced up using ", hp->dev->name); - if (hp->tcvr_type == external) - printk("external "); - else - printk("internal "); - printk("transceiver at "); hp->sw_bmcr = happy_meal_tcvr_read(hp, tregs, MII_BMCR); - if (hp->sw_bmcr & BMCR_SPEED100) - printk("100Mb/s, "); - else - printk("10Mb/s, "); - if (hp->sw_bmcr & BMCR_FULLDPLX) - printk("Full Duplex.\n"); - else - printk("Half Duplex.\n"); + + netdev_info(hp->dev, + "Link has been forced up using %s transceiver at %dMb/s, %s Duplex.\n", + hp->tcvr_type == external ? "external" : "internal", + hp->sw_bmcr & BMCR_SPEED100 ? 100 : 10, + hp->sw_bmcr & BMCR_FULLDPLX ? "Full" : "Half"); } static int set_happy_link_modes(struct happy_meal *hp, void __iomem *tregs) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index fb92d4c1547d..f4a6b590a1e3 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2467,7 +2467,6 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) port->port_id, ret); goto dl_port_unreg; } - devlink_port_type_eth_set(dl_port, port->ndev); } devlink_register(common->devlink); return ret; @@ -2511,6 +2510,7 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; + struct devlink_port *dl_port; struct am65_cpsw_port *port; int ret = 0, i; @@ -2527,6 +2527,10 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) return ret; } + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + return ret; + for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; @@ -2539,25 +2543,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) i, ret); goto err_cleanup_ndev; } + + dl_port = &port->devlink_port; + devlink_port_type_eth_set(dl_port, port->ndev); } ret = am65_cpsw_register_notifiers(common); if (ret) goto err_cleanup_ndev; - ret = am65_cpsw_nuss_register_devlink(common); - if (ret) - goto clean_unregister_notifiers; - /* can't auto unregister ndev using devm_add_action() due to * devres release sequence in DD core for DMA */ return 0; -clean_unregister_notifiers: - am65_cpsw_unregister_notifiers(common); + err_cleanup_ndev: am65_cpsw_nuss_cleanup_ndev(common); + am65_cpsw_unregister_devlink(common); return ret; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7389d6ef8569..b082819509e1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2156,7 +2156,7 @@ static inline void rtl_rx_vlan_tag(struct rx_desc *desc, struct sk_buff *skb) } static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, - struct sk_buff *skb, u32 len, u32 transport_offset) + struct sk_buff *skb, u32 len) { u32 mss = skb_shinfo(skb)->gso_size; u32 opts1, opts2 = 0; @@ -2167,6 +2167,8 @@ static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, opts1 = len | TX_FS | TX_LS; if (mss) { + u32 transport_offset = (u32)skb_transport_offset(skb); + if (transport_offset > GTTCPHO_MAX) { netif_warn(tp, tx_err, tp->netdev, "Invalid transport offset 0x%x for TSO\n", @@ -2197,6 +2199,7 @@ static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, opts1 |= transport_offset << GTTCPHO_SHIFT; opts2 |= min(mss, MSS_MAX) << MSS_SHIFT; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + u32 transport_offset = (u32)skb_transport_offset(skb); u8 ip_protocol; if (transport_offset > TCPHO_MAX) { @@ -2260,7 +2263,6 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) struct tx_desc *tx_desc; struct sk_buff *skb; unsigned int len; - u32 offset; skb = __skb_dequeue(&skb_head); if (!skb) @@ -2276,9 +2278,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) tx_data = tx_agg_align(tx_data); tx_desc = (struct tx_desc *)tx_data; - offset = (u32)skb_transport_offset(skb); - - if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) { + if (r8152_tx_csum(tp, tx_desc, skb, skb->len)) { r8152_csum_workaround(tp, skb, &skb_head); continue; } @@ -2759,9 +2759,9 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, { u32 mss = skb_shinfo(skb)->gso_size; int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX; - int offset = skb_transport_offset(skb); - if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) + if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && + skb_transport_offset(skb) > max_offset) features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index f2d5e07dc148..5d9437ea92cf 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -3822,7 +3822,8 @@ ath11k_wmi_obss_color_collision_event(struct ath11k_base *ab, struct sk_buff *sk switch (ev->evt_type) { case WMI_BSS_COLOR_COLLISION_DETECTION: - ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap); + ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap, + GFP_KERNEL); ath11k_dbg(ab, ATH11K_DBG_WMI, "OBSS color collision detected vdev:%d, event:%d, bitmap:%08llx\n", ev->vdev_id, ev->evt_type, ev->obss_color_bitmap); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 91440b0dc0cc..d51a30f7e1c2 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -5017,6 +5017,8 @@ static int hwsim_virtio_probe(struct virtio_device *vdev) if (err) return err; + virtio_device_ready(vdev); + err = fill_vq(hwsim_vqs[HWSIM_VQ_RX]); if (err) goto out_remove; diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index c02be4ac159e..7db627fc26be 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -1233,9 +1233,6 @@ struct rtw_chip_info { const struct wiphy_wowlan_support *wowlan_stub; const u8 max_sched_scan_ssids; - /* for 8821c set channel */ - u32 ch_param[3]; - /* coex paras */ u32 coex_para_ver; u8 bt_desired_ver; @@ -1937,6 +1934,9 @@ struct rtw_hal { enum rtw_sar_bands sar_band; struct rtw_sar sar; + + /* for 8821c set channel */ + u32 ch_param[3]; }; struct rtw_path_div { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 42841f5d502c..025262a8970e 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -125,6 +125,7 @@ static void rtw8821c_phy_bf_init(struct rtw_dev *rtwdev) static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev) { + struct rtw_hal *hal = &rtwdev->hal; u8 crystal_cap, val; /* power on BB/RF domain */ @@ -159,9 +160,9 @@ static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev) /* post init after header files config */ rtw_write32_set(rtwdev, REG_RXPSEL, BIT_RX_PSEL_RST); - rtwdev->chip->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD); - rtwdev->chip->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD); - rtwdev->chip->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD); + hal->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD); + hal->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD); + hal->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD); rtw_phy_init(rtwdev); rtwdev->dm_info.cck_pd_default = rtw_read8(rtwdev, REG_CSRATIO) & 0x1f; @@ -351,6 +352,7 @@ static void rtw8821c_set_channel_rxdfir(struct rtw_dev *rtwdev, u8 bw) static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw, u8 primary_ch_idx) { + struct rtw_hal *hal = &rtwdev->hal; u32 val32; if (channel <= 14) { @@ -367,11 +369,11 @@ static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw, rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD, 0x00003667); } else { rtw_write32_mask(rtwdev, REG_TXSF2, MASKDWORD, - rtwdev->chip->ch_param[0]); + hal->ch_param[0]); rtw_write32_mask(rtwdev, REG_TXSF6, MASKLWORD, - rtwdev->chip->ch_param[1] & MASKLWORD); + hal->ch_param[1] & MASKLWORD); rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD, - rtwdev->chip->ch_param[2]); + hal->ch_param[2]); } } else if (channel > 35) { rtw_write32_mask(rtwdev, REG_ENTXCCK, BIT(18), 0x1); diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 8df2c736fd23..932762177110 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -495,6 +495,7 @@ static void xenvif_rx_action(struct xenvif_queue *queue) queue->rx_copy.completed = &completed_skbs; while (xenvif_rx_ring_slots_available(queue) && + !skb_queue_empty(&queue->rx_queue) && work_done < RX_BATCH_SIZE) { xenvif_rx_skb(queue); work_done++; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e7af2234e53b..193b44755662 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3465,7 +3465,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index b5f85259461a..37c7f4c89f92 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -69,7 +69,7 @@ TRACE_EVENT(nvme_setup_cmd, __entry->metadata = !!blk_integrity_rq(req); __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->q->disk); - memcpy(__entry->cdw10, &cmd->common.cdw10, + memcpy(__entry->cdw10, &cmd->common.cdws, sizeof(__entry->cdw10)); ), TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)", diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index 33844526c797..02fdef7a16c8 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -2632,7 +2632,7 @@ static void hfa384x_usbctlx_reaper_task(struct work_struct *work) */ static void hfa384x_usbctlx_completion_task(struct work_struct *work) { - struct hfa384x *hw = container_of(work, struct hfa384x, reaper_bh); + struct hfa384x *hw = container_of(work, struct hfa384x, completion_bh); struct hfa384x_usbctlx *ctlx, *temp; unsigned long flags; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..e60b06f2ac22 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -549,6 +549,16 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) if (!iommu_group) return ERR_PTR(-EINVAL); + /* + * VFIO always sets IOMMU_CACHE because we offer no way for userspace to + * restore cache coherency. It has to be checked here because it is only + * valid for cases where we are using iommu groups. + */ + if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) { + iommu_group_put(iommu_group); + return ERR_PTR(-EINVAL); + } + group = vfio_group_get_from_iommu(iommu_group); if (!group) group = vfio_create_group(iommu_group, VFIO_IOMMU); @@ -601,13 +611,6 @@ static int __vfio_register_dev(struct vfio_device *device, int vfio_register_group_dev(struct vfio_device *device) { - /* - * VFIO always sets IOMMU_CACHE because we offer no way for userspace to - * restore cache coherency. - */ - if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY)) - return -EINVAL; - return __vfio_register_dev(device, vfio_group_find_or_alloc(device->dev)); } diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index c4e91715ef00..1a9aa12cf886 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2469,6 +2469,11 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, if (charcount != 256 && charcount != 512) return -EINVAL; + /* font bigger than screen resolution ? */ + if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) || + h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) + return -EINVAL; + /* Make sure drawing engine can handle the font */ if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || !(info->pixmap.blit_y & (1 << (font->height - 1)))) @@ -2731,6 +2736,34 @@ void fbcon_update_vcs(struct fb_info *info, bool all) } EXPORT_SYMBOL(fbcon_update_vcs); +/* let fbcon check if it supports a new screen resolution */ +int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct vc_data *vc; + unsigned int i; + + WARN_CONSOLE_UNLOCKED(); + + if (!ops) + return 0; + + /* prevent setting a screen size which is smaller than font size */ + for (i = first_fb_vc; i <= last_fb_vc; i++) { + vc = vc_cons[i].d; + if (!vc || vc->vc_mode != KD_TEXT || + fbcon_info_from_console(i) != info) + continue; + + if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) || + vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres)) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fbcon_modechange_possible); + int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 8afc4538558c..7ee6eb2fa715 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -511,7 +511,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate, while (n && (n * (logo->width + 8) - 8 > xres)) --n; - image.dx = (xres - n * (logo->width + 8) - 8) / 2; + image.dx = (xres - (n * (logo->width + 8) - 8)) / 2; image.dy = y ?: (yres - logo->height) / 2; } else { image.dx = 0; @@ -1017,6 +1017,16 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (ret) return ret; + /* verify that virtual resolution >= physical resolution */ + if (var->xres_virtual < var->xres || + var->yres_virtual < var->yres) { + pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n", + info->fix.id, + var->xres_virtual, var->yres_virtual, + var->xres, var->yres); + return -EINVAL; + } + if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW) return 0; @@ -1107,7 +1117,9 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, return -EFAULT; console_lock(); lock_fb_info(info); - ret = fb_set_var(info, &var); + ret = fbcon_modechange_possible(info, &var); + if (!ret) + ret = fb_set_var(info, &var); if (!ret) fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 05e0c4a5affd..0172f75e051a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7681,7 +7681,19 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || em->block_start == EXTENT_MAP_INLINE) { free_extent_map(em); - ret = -ENOTBLK; + /* + * If we are in a NOWAIT context, return -EAGAIN in order to + * fallback to buffered IO. This is not only because we can + * block with buffered IO (no support for NOWAIT semantics at + * the moment) but also to avoid returning short reads to user + * space - this happens if we were able to read some data from + * previous non-compressed extents and then when we fallback to + * buffered IO, at btrfs_file_read_iter() by calling + * filemap_read(), we fail to fault in pages for the read buffer, + * in which case filemap_read() returns a short read (the number + * of bytes previously read is > 0, so it does not return -EFAULT). + */ + ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK; goto unlock_err; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 79e8c8cd75ed..d99026df6f67 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1735,12 +1735,14 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &mapped_length, &bioc); if (ret || !bioc || mapped_length < PAGE_SIZE) { - btrfs_put_bioc(bioc); - return -EIO; + ret = -EIO; + goto out_put_bioc; } - if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) - return -EINVAL; + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + ret = -EINVAL; + goto out_put_bioc; + } nofs_flag = memalloc_nofs_save(); nmirrors = (int)bioc->num_stripes; @@ -1759,7 +1761,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, break; } memalloc_nofs_restore(nofs_flag); - +out_put_bioc: + btrfs_put_bioc(bioc); return ret; } @@ -1885,7 +1888,6 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ { struct btrfs_fs_info *fs_info = block_group->fs_info; struct map_lookup *map; - bool need_zone_finish; int ret = 0; int i; @@ -1942,12 +1944,6 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ } } - /* - * The block group is not fully allocated, so not fully written yet. We - * need to send ZONE_FINISH command to free up an active zone. - */ - need_zone_finish = !btrfs_zoned_bg_is_full(block_group); - block_group->zone_is_active = 0; block_group->alloc_offset = block_group->zone_capacity; block_group->free_space_ctl->free_space = 0; @@ -1963,15 +1959,13 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ if (device->zone_info->max_active_zones == 0) continue; - if (need_zone_finish) { - ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, - physical >> SECTOR_SHIFT, - device->zone_info->zone_size >> SECTOR_SHIFT, - GFP_NOFS); + ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, + physical >> SECTOR_SHIFT, + device->zone_info->zone_size >> SECTOR_SHIFT, + GFP_NOFS); - if (ret) - return ret; - } + if (ret) + return ret; btrfs_dev_clear_active_zone(device, physical); } diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index a41ae6efc545..1fee702d5529 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -21,7 +21,8 @@ static int cachefiles_ondemand_fd_release(struct inode *inode, * anon_fd. */ xas_for_each(&xas, req, ULONG_MAX) { - if (req->msg.opcode == CACHEFILES_OP_READ) { + if (req->msg.object_id == object_id && + req->msg.opcode == CACHEFILES_OP_READ) { req->error = -EIO; complete(&req->done); xas_store(&xas, NULL); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fa29c9aae24b..386bb523c69e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1918,7 +1918,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses) list_del_init(&ses->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - spin_lock(&ses->chan_lock); chan_count = ses->chan_count; /* close any extra channels */ @@ -1934,7 +1933,6 @@ void cifs_put_smb_ses(struct cifs_ses *ses) ses->chans[i].server = NULL; } } - spin_unlock(&ses->chan_lock); sesInfoFree(ses); cifs_put_tcp_session(server, 0); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index b85718f32b53..02c8b2906196 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -474,6 +474,14 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, out: if (rc && chan->server) { + /* + * we should avoid race with these delayed works before we + * remove this channel + */ + cancel_delayed_work_sync(&chan->server->echo); + cancel_delayed_work_sync(&chan->server->resolve); + cancel_delayed_work_sync(&chan->server->reconnect); + spin_lock(&ses->chan_lock); /* we rely on all bits beyond chan_count to be clear */ cifs_chan_clear_need_reconnect(ses, chan->server); @@ -484,10 +492,9 @@ out: */ WARN_ON(ses->chan_count < 1); spin_unlock(&ses->chan_lock); - } - if (rc && chan->server) cifs_put_tcp_session(chan->server, 0); + } return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 12b4dddaedb0..c705de32e225 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -571,10 +571,6 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, *total_len += ctxt_len; pneg_ctxt += ctxt_len; - build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); - *total_len += sizeof(struct smb2_posix_neg_context); - pneg_ctxt += sizeof(struct smb2_posix_neg_context); - /* * secondary channels don't have the hostname field populated * use the hostname field in the primary channel instead @@ -586,9 +582,14 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, hostname); *total_len += ctxt_len; pneg_ctxt += ctxt_len; - neg_context_count = 4; - } else /* second channels do not have a hostname */ neg_context_count = 3; + } else + neg_context_count = 2; + + build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); + *total_len += sizeof(struct smb2_posix_neg_context); + pneg_ctxt += sizeof(struct smb2_posix_neg_context); + neg_context_count++; if (server->compress_algorithm) { build_compression_ctxt((struct smb2_compression_capabilities_context *) diff --git a/fs/exec.c b/fs/exec.c index 0989fb8472a1..778123259e42 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1301,7 +1301,7 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS - exit_itimers(me->signal); + exit_itimers(me); flush_itimer_signals(); #endif diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 9d3cf0111709..74920826d8f6 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -372,17 +372,22 @@ nomem: return NULL; } +static inline bool fscache_cookie_is_dropped(struct fscache_cookie *cookie) +{ + return READ_ONCE(cookie->state) == FSCACHE_COOKIE_STATE_DROPPED; +} + static void fscache_wait_on_collision(struct fscache_cookie *candidate, struct fscache_cookie *wait_for) { enum fscache_cookie_state *statep = &wait_for->state; - wait_var_event_timeout(statep, READ_ONCE(*statep) == FSCACHE_COOKIE_STATE_DROPPED, + wait_var_event_timeout(statep, fscache_cookie_is_dropped(wait_for), 20 * HZ); - if (READ_ONCE(*statep) != FSCACHE_COOKIE_STATE_DROPPED) { + if (!fscache_cookie_is_dropped(wait_for)) { pr_notice("Potential collision c=%08x old: c=%08x", candidate->debug_id, wait_for->debug_id); - wait_var_event(statep, READ_ONCE(*statep) == FSCACHE_COOKIE_STATE_DROPPED); + wait_var_event(statep, fscache_cookie_is_dropped(wait_for)); } } @@ -517,7 +522,14 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie) } fscache_see_cookie(cookie, fscache_cookie_see_active); - fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE); + spin_lock(&cookie->lock); + if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags)) + __fscache_set_cookie_state(cookie, + FSCACHE_COOKIE_STATE_INVALIDATING); + else + __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE); + spin_unlock(&cookie->lock); + wake_up_cookie_state(cookie); trace = fscache_access_lookup_cookie_end; out: @@ -752,6 +764,9 @@ again_locked: spin_lock(&cookie->lock); } + if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags)) + fscache_end_cookie_access(cookie, fscache_access_invalidate_cookie_end); + switch (state) { case FSCACHE_COOKIE_STATE_RELINQUISHING: fscache_see_cookie(cookie, fscache_cookie_see_relinquish); @@ -1048,6 +1063,9 @@ void __fscache_invalidate(struct fscache_cookie *cookie, return; case FSCACHE_COOKIE_STATE_LOOKING_UP: + __fscache_begin_cookie_access(cookie, fscache_access_invalidate_cookie); + set_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags); + fallthrough; case FSCACHE_COOKIE_STATE_CREATING: spin_unlock(&cookie->lock); _leave(" [look %x]", cookie->inval_counter); diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index f2aa7dbad766..a058e0136bfe 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -143,7 +143,7 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate, { wait_var_event_timeout(&candidate->flags, !fscache_is_acquire_pending(candidate), 20 * HZ); - if (!fscache_is_acquire_pending(candidate)) { + if (fscache_is_acquire_pending(candidate)) { pr_notice("Potential volume collision new=%08x old=%08x", candidate->debug_id, collidee_debug_id); fscache_stat(&fscache_n_volumes_collision); @@ -182,7 +182,7 @@ static bool fscache_hash_volume(struct fscache_volume *candidate) hlist_bl_add_head(&candidate->hash_link, h); hlist_bl_unlock(h); - if (test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &candidate->flags)) + if (fscache_is_acquire_pending(candidate)) fscache_wait_on_volume_collision(candidate, collidee_debug_id); return true; diff --git a/fs/io_uring.c b/fs/io_uring.c index 0d491ad15b66..a01ea49f3017 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5066,7 +5066,7 @@ static int io_uring_cmd_prep(struct io_kiocb *req, { struct io_uring_cmd *ioucmd = &req->uring_cmd; - if (sqe->rw_flags) + if (sqe->rw_flags || sqe->__pad1) return -EINVAL; ioucmd->cmd = sqe->cmd; ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); @@ -7973,6 +7973,9 @@ static int io_files_update_with_index_alloc(struct io_kiocb *req, struct file *file; int ret, fd; + if (!req->ctx->file_data) + return -ENXIO; + for (done = 0; done < req->rsrc_update.nr_args; done++) { if (copy_from_user(&fd, &fds[done], sizeof(fd))) { ret = -EFAULT; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 0a22a2faf552..e1c4617de771 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -176,7 +176,7 @@ nlm_delete_file(struct nlm_file *file) } } -static int nlm_unlock_files(struct nlm_file *file) +static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner) { struct file_lock lock; @@ -184,6 +184,7 @@ static int nlm_unlock_files(struct nlm_file *file) lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; + lock.fl_owner = owner; if (file->f_file[O_RDONLY] && vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL)) goto out_err; @@ -225,7 +226,7 @@ again: if (match(lockhost, host)) { spin_unlock(&flctx->flc_lock); - if (nlm_unlock_files(file)) + if (nlm_unlock_files(file, fl->fl_owner)) return 1; goto again; } @@ -282,11 +283,10 @@ nlm_file_inuse(struct nlm_file *file) static void nlm_close_files(struct nlm_file *file) { - struct file *f; - - for (f = file->f_file[0]; f <= file->f_file[1]; f++) - if (f) - nlmsvc_ops->fclose(f); + if (file->f_file[O_RDONLY]) + nlmsvc_ops->fclose(file->f_file[O_RDONLY]); + if (file->f_file[O_WRONLY]) + nlmsvc_ops->fclose(file->f_file[O_WRONLY]); } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 61b2aae81abb..2acea7792bb2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -470,6 +470,15 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, return nfserr_bad_xdr; } } + if (bmval[1] & FATTR4_WORD1_TIME_CREATE) { + struct timespec64 ts; + + /* No Linux filesystem supports setting this attribute. */ + bmval[1] &= ~FATTR4_WORD1_TIME_CREATE; + status = nfsd4_decode_nfstime4(argp, &ts); + if (status) + return status; + } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { u32 set_it; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 847b482155ae..9a8b09afc173 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -465,7 +465,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \ + | FATTR4_WORD1_TIME_MODIFY_SET) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \ FATTR4_WORD2_SECURITY_LABEL diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 1344f7d475d3..aecda4fc95f5 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -198,6 +198,9 @@ static inline int nilfs_acl_chmod(struct inode *inode) static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) { + if (S_ISLNK(inode->i_mode)) + return 0; + inode->i_mode &= ~current_umask(); return 0; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e0a2e0468ee7..1ce5c9698393 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1003,6 +1003,9 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { + if (!IS_POSIXACL(inode)) + return -EOPNOTSUPP; + return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } @@ -1018,6 +1021,9 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, struct posix_acl *acl = NULL; int err; + if (!IS_POSIXACL(inode)) + return -EOPNOTSUPP; + /* Check that everything is OK before copy-up */ if (value) { acl = posix_acl_from_xattr(&init_user_ns, value, size); @@ -1960,6 +1966,20 @@ static struct dentry *ovl_get_root(struct super_block *sb, return root; } +static bool ovl_has_idmapped_layers(struct ovl_fs *ofs) +{ + + unsigned int i; + const struct vfsmount *mnt; + + for (i = 0; i < ofs->numlayer; i++) { + mnt = ofs->layers[i].mnt; + if (mnt && is_idmapped_mnt(mnt)) + return true; + } + return false; +} + static int ovl_fill_super(struct super_block *sb, void *data, int silent) { struct path upperpath = { }; @@ -2129,7 +2149,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : ovl_trusted_xattr_handlers; sb->s_fs_info = ofs; - sb->s_flags |= SB_POSIXACL; + if (ovl_has_idmapped_layers(ofs)) + pr_warn("POSIX ACLs are not yet supported with idmapped layers, mounting without ACL support.\n"); + else + sb->s_flags |= SB_POSIXACL; sb->s_iflags |= SB_I_SKIP_SYNC; err = -ENOMEM; diff --git a/fs/remap_range.c b/fs/remap_range.c index e112b5424cdb..5e0d97e02f96 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -71,7 +71,8 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in, * Otherwise, make sure the count is also block-aligned, having * already confirmed the starting offsets' block alignment. */ - if (pos_in + count == size_in) { + if (pos_in + count == size_in && + (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) { bcount = ALIGN(size_in, bs) - pos_in; } else { if (!IS_ALIGNED(count, bs)) @@ -546,7 +547,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) else if (deduped < 0) info->status = deduped; else - info->bytes_deduped = len; + info->bytes_deduped = deduped; next_fdput: fdput(dst_fd); diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index c6108581d97d..d389bab54241 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -145,6 +145,7 @@ extern bool cppc_allow_fast_switch(void); extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data); extern unsigned int cppc_get_transition_latency(int cpu); extern bool cpc_ffh_supported(void); +extern bool cpc_supported_by_cpu(void); extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val); extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val); #else /* !CONFIG_ACPI_CPPC_LIB */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f82a5bc6d98..44975c1bbe12 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -584,7 +584,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; extern bool osc_sb_native_usb4_support_confirmed; -extern bool osc_sb_cppc_not_supported; +extern bool osc_sb_cppc2_support_acked; extern bool osc_cpc_flexible_adr_space_confirmed; /* USB4 Capabilities */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af352..d4427d0a0e18 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -264,7 +264,8 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ - struct list_head mg_preload_node; + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; struct list_head mg_node; /* diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2c7477354744..314802f98b9d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, extern ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index ff5596dd30f8..2382dec6d6ab 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -15,6 +15,8 @@ void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); void fbcon_fb_blanked(struct fb_info *info, int blank); +int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var); void fbcon_update_vcs(struct fb_info *info, bool all); void fbcon_remap_all(struct fb_info *info); int fbcon_set_con2fb_map_ioctl(void __user *argp); @@ -33,6 +35,8 @@ static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {} +static inline int fbcon_modechange_possible(struct fb_info *info, + struct fb_var_screeninfo *var) { return 0; } static inline void fbcon_update_vcs(struct fb_info *info, bool all) {} static inline void fbcon_remap_all(struct fb_info *info) {} static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 72585c9729a2..b86265664879 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -130,6 +130,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_DO_PREP_TO_WRITE 12 /* T if cookie needs write preparation */ #define FSCACHE_COOKIE_HAVE_DATA 13 /* T if this cookie has data stored */ #define FSCACHE_COOKIE_IS_HASHED 14 /* T if this cookie is hashed */ +#define FSCACHE_COOKIE_DO_INVALIDATE 15 /* T if cookie needs invalidation */ enum fscache_cookie_state state; u8 advice; /* FSCACHE_ADV_* */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3af34de54330..56d6a0196534 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -149,19 +149,19 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset); * It is used in atomic context when code wants to access the contents of a * page that might be allocated from high memory (see __GFP_HIGHMEM), for * example a page in the pagecache. The API has two functions, and they - * can be used in a manner similar to the following: + * can be used in a manner similar to the following:: * - * -- Find the page of interest. -- - * struct page *page = find_get_page(mapping, offset); + * // Find the page of interest. + * struct page *page = find_get_page(mapping, offset); * - * -- Gain access to the contents of that page. -- - * void *vaddr = kmap_atomic(page); + * // Gain access to the contents of that page. + * void *vaddr = kmap_atomic(page); * - * -- Do something to the contents of that page. -- - * memset(vaddr, 0, PAGE_SIZE); + * // Do something to the contents of that page. + * memset(vaddr, 0, PAGE_SIZE); * - * -- Unmap that page. -- - * kunmap_atomic(vaddr); + * // Unmap that page. + * kunmap_atomic(vaddr); * * Note that the kunmap_atomic() call takes the result of the kmap_atomic() * call, not the argument. diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4f29139bbfc3..5fcf89faa31a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -612,7 +612,6 @@ struct intel_iommu { struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ - struct list_head table; /* link to pasid table */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ @@ -729,8 +728,6 @@ extern int dmar_ir_support(void); void *alloc_pgtable_page(int node); void free_pgtable_page(void *vaddr); struct intel_iommu *domain_get_iommu(struct dmar_domain *domain); -int for_each_device_domain(int (*fn)(struct device_domain_info *info, - void *data), void *data); void iommu_flush_write_buffer(struct intel_iommu *iommu); int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ce6536f1d269..475683cd67f1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_KEXEC_SIG +void set_kexec_sig_enforced(void); +#else +static inline void set_kexec_sig_enforced(void) {} +#endif + #endif /* !defined(__ASSEBMLY__) */ #endif /* LINUX_KEXEC_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c20f2d55840c..83cf7fd842e0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1513,7 +1513,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) { } -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } diff --git a/include/linux/memregion.h b/include/linux/memregion.h index e11595256cac..c04c4fd2e209 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -16,7 +16,7 @@ static inline int memregion_alloc(gfp_t gfp) { return -ENOMEM; } -void memregion_free(int id) +static inline void memregion_free(int id) { } #endif diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e3934003f239..07cfc922f8e4 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -906,12 +906,14 @@ struct nvme_common_command { __le32 cdw2[2]; __le64 metadata; union nvme_data_ptr dptr; + struct_group(cdws, __le32 cdw10; __le32 cdw11; __le32 cdw12; __le32 cdw13; __le32 cdw14; __le32 cdw15; + ); }; struct nvme_rw_command { diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 15b940ec1eac..10bc88cc3bf6 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -32,11 +32,16 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_OBJTOOL @@ -124,7 +129,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -177,7 +182,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 9e4d056967c6..0a41b2dcccad 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -88,7 +88,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); -extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); +extern void pm_runtime_release_supplier(struct device_link *link); extern int devm_pm_runtime_enable(struct device *dev); @@ -314,8 +314,7 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} -static inline void pm_runtime_release_supplier(struct device_link *link, - bool check_idle) {} +static inline void pm_runtime_release_supplier(struct device_link *link) {} #endif /* !CONFIG_PM */ diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index 159729cffd8e..3247ed8e9ff0 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -54,8 +54,6 @@ struct rtsx_ucr { struct usb_device *pusb_dev; struct usb_interface *pusb_intf; struct usb_sg_request current_sg; - unsigned char *iobuf; - dma_addr_t iobuf_dma; struct timer_list sg_timer; struct mutex dev_mutex; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 505aaf9fe477..81cab4b01edc 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -85,7 +85,7 @@ static inline void exit_thread(struct task_struct *tsk) extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); -extern void exit_itimers(struct signal_struct *); +extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 140354f5f15b..3bcdd20ace66 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8639,11 +8639,12 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, * cfg80211_obss_color_collision_notify - notify about bss color collision * @dev: network device * @color_bitmap: representations of the colors that the local BSS is aware of + * @gfp: allocation flags */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, + return cfg80211_bss_color_notify(dev, gfp, NL80211_CMD_OBSS_COLOR_COLLISION, 0, color_bitmap); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 256b9215e17b..1c005a30313f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7016,10 +7016,11 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is * aware of. + * @gfp: allocation flags */ void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap); + u64 color_bitmap, gfp_t gfp); /** * ieee80211_is_tx_data - check if frame is a data frame diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5c4e5a96a984..64cf655c818c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -657,18 +657,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) tmpl->len = sizeof(struct nft_set_ext); } -static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, - unsigned int len) +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); - BUG_ON(tmpl->len > U8_MAX); + if (tmpl->len > U8_MAX) + return -EINVAL; + tmpl->offset[id] = tmpl->len; tmpl->len += nft_set_ext_types[id].len + len; + + return 0; } -static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { - nft_set_ext_add_length(tmpl, id, 0); + return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, diff --git a/include/net/raw.h b/include/net/raw.h index d224376360e1..5e665934ebc7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -83,7 +83,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/sock.h b/include/net/sock.h index 0dd43c3df49b..f7ad1a7705e9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1544,7 +1544,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - return sk->sk_prot->sysctl_mem[index]; + return READ_ONCE(sk->sk_prot->sysctl_mem[index]); } static inline int sk_mem_pages(int amt) diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index e282ce02fa2d..6d1626e7a4ce 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -160,7 +160,7 @@ TRACE_EVENT(iocost_ioc_vrate_adj, TP_fast_assign( __assign_str(devname, ioc_name(ioc)); - __entry->old_vrate = atomic64_read(&ioc->vtime_rate);; + __entry->old_vrate = atomic64_read(&ioc->vtime_rate); __entry->new_vrate = new_vrate; __entry->busy_level = ioc->busy_level; __entry->read_missed_ppm = missed_ppm[READ]; diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 12c315782766..777ee6cbe933 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 379e68fb866f..3dd13fe738b9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5226,22 +5226,25 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr. + * is a read-only dynptr or if *flags* is not 0. * * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) * Description diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index f10b59d6693e..0ad3da28d2fc 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -22,7 +22,10 @@ struct io_uring_sqe { union { __u64 off; /* offset into file */ __u64 addr2; - __u32 cmd_op; + struct { + __u32 cmd_op; + __u32 __pad1; + }; }; union { __u64 addr; /* pointer to buffer or iovecs */ diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h index e1126a74882a..eff166fdd81b 100644 --- a/include/video/of_display_timing.h +++ b/include/video/of_display_timing.h @@ -8,6 +8,8 @@ #ifndef __LINUX_OF_DISPLAY_TIMING_H #define __LINUX_OF_DISPLAY_TIMING_H +#include <linux/errno.h> + struct device_node; struct display_timing; struct display_timings; diff --git a/ipc/namespace.c b/ipc/namespace.c index 754f3237194a..e1fcaedba4fa 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -64,7 +64,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, goto fail_put; if (!setup_ipc_sysctls(ns)) - goto fail_put; + goto fail_mq; sem_init_ns(ns); msg_init_ns(ns); @@ -72,6 +72,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, return ns; +fail_mq: + retire_mq_sysctls(ns); + fail_put: put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 805c2ad5c793..bfeb9b937315 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -68,11 +68,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns { u8 *ptr = NULL; - if (k >= SKF_NET_OFF) + if (k >= SKF_NET_OFF) { ptr = skb_network_header(skb) + k - SKF_NET_OFF; - else if (k >= SKF_LL_OFF) + } else if (k >= SKF_LL_OFF) { + if (unlikely(!skb_mac_header_was_set(skb))) + return NULL; ptr = skb_mac_header(skb) + k - SKF_LL_OFF; - + } if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a1c84d256f83..1f961f9982d2 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1497,11 +1497,12 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, }; -BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) +BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, + u32, offset, u64, flags) { int err; - if (!src->data) + if (!src->data || flags) return -EINVAL; err = bpf_dynptr_check_off_len(src, offset, len); @@ -1521,13 +1522,15 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = { .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_PTR_TO_DYNPTR, .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, }; -BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) +BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, + u32, len, u64, flags) { int err; - if (!dst->data || bpf_dynptr_is_rdonly(dst)) + if (!dst->data || flags || bpf_dynptr_is_rdonly(dst)) return -EINVAL; err = bpf_dynptr_check_off_len(dst, offset, len); @@ -1547,6 +1550,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = { .arg2_type = ARG_ANYTHING, .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg4_type = ARG_CONST_SIZE_OR_ZERO, + .arg5_type = ARG_ANYTHING, }; BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 1779ccddb734..13c8e91d7862 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -765,7 +765,8 @@ struct css_set init_css_set = { .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), - .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node), + .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), + .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), /* @@ -1240,7 +1241,8 @@ static struct css_set *find_css_set(struct css_set *old_cset, INIT_LIST_HEAD(&cset->threaded_csets); INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); - INIT_LIST_HEAD(&cset->mg_preload_node); + INIT_LIST_HEAD(&cset->mg_src_preload_node); + INIT_LIST_HEAD(&cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in @@ -2597,21 +2599,27 @@ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { - LIST_HEAD(preloaded); struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); - list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_src_preload_node) { + cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; + cset->mg_dst_cset = NULL; + list_del_init(&cset->mg_src_preload_node); + put_css_set_locked(cset); + } - list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { + list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, + mg_dst_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; - list_del_init(&cset->mg_preload_node); + list_del_init(&cset->mg_dst_preload_node); put_css_set_locked(cset); } @@ -2651,7 +2659,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, if (src_cset->dead) return; - if (!list_empty(&src_cset->mg_preload_node)) + if (!list_empty(&src_cset->mg_src_preload_node)) return; src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); @@ -2664,7 +2672,7 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); + list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** @@ -2689,7 +2697,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, - mg_preload_node) { + mg_src_preload_node) { struct css_set *dst_cset; struct cgroup_subsys *ss; int ssid; @@ -2708,7 +2716,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; - list_del_init(&src_cset->mg_preload_node); + list_del_init(&src_cset->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; @@ -2716,8 +2724,8 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) src_cset->mg_dst_cset = dst_cset; - if (list_empty(&dst_cset->mg_preload_node)) - list_add_tail(&dst_cset->mg_preload_node, + if (list_empty(&dst_cset->mg_dst_preload_node)) + list_add_tail(&dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); @@ -2963,7 +2971,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { + list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, + mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ diff --git a/kernel/exit.c b/kernel/exit.c index f072959fcab7..64c938ce36fe 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -766,7 +766,7 @@ void __noreturn do_exit(long code) #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); - exit_itimers(tsk->signal); + exit_itimers(tsk); #endif if (tsk->mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 145321a5e798..f9261c07b048 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -29,6 +29,15 @@ #include <linux/vmalloc.h> #include "kexec_internal.h" +#ifdef CONFIG_KEXEC_SIG +static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE); + +void set_kexec_sig_enforced(void) +{ + sig_enforce = true; +} +#endif + static int kexec_calculate_store_digests(struct kimage *image); /* @@ -159,7 +168,7 @@ kimage_validate_signature(struct kimage *image) image->kernel_buf_len); if (ret) { - if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) { + if (sig_enforce) { pr_notice("Enforced kernel signature verification failed (%d).\n", ret); return ret; } diff --git a/kernel/module/internal.h b/kernel/module/internal.h index bc5507ab8450..ec104c2950c3 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -11,6 +11,7 @@ #include <linux/mutex.h> #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <linux/mm.h> #ifndef ARCH_SHF_SMALL #define ARCH_SHF_SMALL 0 @@ -30,11 +31,13 @@ * to ensure complete separation of code and data, but * only when CONFIG_STRICT_MODULE_RWX=y */ -#ifdef CONFIG_STRICT_MODULE_RWX -# define strict_align(X) PAGE_ALIGN(X) -#else -# define strict_align(X) (X) -#endif +static inline unsigned int strict_align(unsigned int size) +{ + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + return PAGE_ALIGN(size); + else + return size; +} extern struct mutex module_mutex; extern struct list_head modules; diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index 3e11523bc6f6..77e75bead569 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -137,6 +137,7 @@ void layout_symtab(struct module *mod, struct load_info *info) info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1); info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym); mod->data_layout.size += strtab_size; + /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */ info->core_typeoffs = mod->data_layout.size; mod->data_layout.size += ndst * sizeof(char); mod->data_layout.size = strict_align(mod->data_layout.size); @@ -169,19 +170,20 @@ void add_kallsyms(struct module *mod, const struct load_info *info) Elf_Sym *dst; char *s; Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; + unsigned long strtab_size; /* Set up to point into init section. */ mod->kallsyms = (void __rcu *)mod->init_layout.base + info->mod_kallsyms_init_off; - preempt_disable(); + rcu_read_lock(); /* The following is safe since this pointer cannot change */ - rcu_dereference_sched(mod->kallsyms)->symtab = (void *)symsec->sh_addr; - rcu_dereference_sched(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr; + rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - rcu_dereference_sched(mod->kallsyms)->strtab = + rcu_dereference(mod->kallsyms)->strtab = (void *)info->sechdrs[info->index.str].sh_addr; - rcu_dereference_sched(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs; + rcu_dereference(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs; /* * Now populate the cut down core kallsyms for after init @@ -190,22 +192,29 @@ void add_kallsyms(struct module *mod, const struct load_info *info) mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs; mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs; mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs; - src = rcu_dereference_sched(mod->kallsyms)->symtab; - for (ndst = i = 0; i < rcu_dereference_sched(mod->kallsyms)->num_symtab; i++) { - rcu_dereference_sched(mod->kallsyms)->typetab[i] = elf_type(src + i, info); + strtab_size = info->core_typeoffs - info->stroffs; + src = rcu_dereference(mod->kallsyms)->symtab; + for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) { + rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info); if (i == 0 || is_livepatch_module(mod) || is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum, info->index.pcpu)) { + ssize_t ret; + mod->core_kallsyms.typetab[ndst] = - rcu_dereference_sched(mod->kallsyms)->typetab[i]; + rcu_dereference(mod->kallsyms)->typetab[i]; dst[ndst] = src[i]; dst[ndst++].st_name = s - mod->core_kallsyms.strtab; - s += strscpy(s, - &rcu_dereference_sched(mod->kallsyms)->strtab[src[i].st_name], - KSYM_NAME_LEN) + 1; + ret = strscpy(s, + &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name], + strtab_size); + if (ret < 0) + break; + s += ret + 1; + strtab_size -= ret + 1; } } - preempt_enable(); + rcu_read_unlock(); mod->core_kallsyms.num_symtab = ndst; } diff --git a/kernel/module/main.c b/kernel/module/main.c index fed58d30725d..0548151dd933 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2939,24 +2939,25 @@ static void cfi_init(struct module *mod) { #ifdef CONFIG_CFI_CLANG initcall_t *init; +#ifdef CONFIG_MODULE_UNLOAD exitcall_t *exit; +#endif rcu_read_lock_sched(); mod->cfi_check = (cfi_check_fn) find_kallsyms_symbol_value(mod, "__cfi_check"); init = (initcall_t *) find_kallsyms_symbol_value(mod, "__cfi_jt_init_module"); - exit = (exitcall_t *) - find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module"); - rcu_read_unlock_sched(); - /* Fix init/exit functions to point to the CFI jump table */ if (init) mod->init = *init; #ifdef CONFIG_MODULE_UNLOAD + exit = (exitcall_t *) + find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module"); if (exit) mod->exit = *exit; #endif + rcu_read_unlock_sched(); cfi_module_add(mod, mod_tree.addr_min); #endif diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 156a99283b11..1893d909e45c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -222,7 +222,7 @@ static void ptrace_unfreeze_traced(struct task_struct *task) if (lock_task_sighand(task, &flags)) { task->jobctl &= ~JOBCTL_PTRACE_FROZEN; if (__fatal_signal_pending(task)) { - task->jobctl &= ~TASK_TRACED; + task->jobctl &= ~JOBCTL_TRACED; wake_up_state(task, __TASK_TRACED); } unlock_task_sighand(task, &flags); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 85c92e2c2570..619b02443522 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -446,14 +446,14 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, if (*negp) { if (*lvalp > (unsigned long) INT_MAX + 1) return -EINVAL; - *valp = -*lvalp; + WRITE_ONCE(*valp, -*lvalp); } else { if (*lvalp > (unsigned long) INT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } } else { - int val = *valp; + int val = READ_ONCE(*valp); if (val < 0) { *negp = true; *lvalp = -(unsigned long)val; @@ -472,9 +472,9 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, if (write) { if (*lvalp > UINT_MAX) return -EINVAL; - *valp = *lvalp; + WRITE_ONCE(*valp, *lvalp); } else { - unsigned int val = *valp; + unsigned int val = READ_ONCE(*valp); *lvalp = (unsigned long)val; } return 0; @@ -857,7 +857,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, if ((param->min && *param->min > tmp) || (param->max && *param->max < tmp)) return -EINVAL; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; @@ -923,7 +923,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, (param->max && *param->max < tmp)) return -ERANGE; - *valp = tmp; + WRITE_ONCE(*valp, tmp); } return 0; @@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, tmp.maxlen = sizeof(val); tmp.data = &val; - val = *data; + val = READ_ONCE(*data); res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, do_proc_douintvec_minmax_conv, ¶m); if (res) return res; if (write) - *data = val; + WRITE_ONCE(*data, val); return 0; } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); @@ -1090,9 +1090,9 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = -EINVAL; break; } - *i = val; + WRITE_ONCE(*i, val); } else { - val = convdiv * (*i) / convmul; + val = convdiv * READ_ONCE(*i) / convmul; if (!first) proc_put_char(&buffer, &left, '\t'); proc_put_long(&buffer, &left, val, false); @@ -1173,9 +1173,12 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, if (write) { if (*lvalp > INT_MAX / HZ) return 1; - *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); + if (*negp) + WRITE_ONCE(*valp, -*lvalp * HZ); + else + WRITE_ONCE(*valp, *lvalp * HZ); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1221,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, if (jif > INT_MAX) return 1; - *valp = (int)jif; + WRITE_ONCE(*valp, (int)jif); } else { - int val = *valp; + int val = READ_ONCE(*valp); unsigned long lval; if (val < 0) { *negp = true; @@ -1326,8 +1329,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * The values read are assumed to be in 1/1000 seconds, and + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and * are converted into jiffies. * * Returns 0 on success. diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1cd10b102c51..5dead89308b7 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1051,15 +1051,24 @@ retry_delete: } /* - * This is called by do_exit or de_thread, only when there are no more - * references to the shared signal_struct. + * This is called by do_exit or de_thread, only when nobody else can + * modify the signal->posix_timers list. Yet we need sighand->siglock + * to prevent the race with /proc/pid/timers. */ -void exit_itimers(struct signal_struct *sig) +void exit_itimers(struct task_struct *tsk) { + struct list_head timers; struct k_itimer *tmr; - while (!list_empty(&sig->posix_timers)) { - tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); + if (list_empty(&tsk->signal->posix_timers)) + return; + + spin_lock_irq(&tsk->sighand->siglock); + list_replace_init(&tsk->signal->posix_timers, &timers); + spin_unlock_irq(&tsk->sighand->siglock); + + while (!list_empty(&timers)) { + tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); } } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index debbbb083286..ccd6a5ade3e9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -194,7 +194,8 @@ config FUNCTION_TRACER sequence is then dynamically patched into a tracer call when tracing is enabled by the administrator. If it's runtime disabled (the bootup default), then the overhead of the instructions is very - small and not measurable even in micro-benchmarks. + small and not measurable even in micro-benchmarks (at least on + x86, but may have impact on other architectures). config FUNCTION_GRAPH_TRACER bool "Kernel Function Graph Tracer" diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a8cfac0611bc..b8dd54627075 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9864,6 +9864,12 @@ void trace_init_global_iter(struct trace_iterator *iter) /* Output in nanoseconds only if we are using a clock in nanoseconds. */ if (trace_clocks[iter->tr->clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; + + /* Can not use kmalloc for iter.temp and iter.fmt */ + iter->temp = static_temp_buf; + iter->temp_size = STATIC_TEMP_BUF_SIZE; + iter->fmt = static_fmt_buf; + iter->fmt_size = STATIC_FMT_BUF_SIZE; } void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) @@ -9896,11 +9902,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) /* Simulate the iterator */ trace_init_global_iter(&iter); - /* Can not use kmalloc for iter.temp and iter.fmt */ - iter.temp = static_temp_buf; - iter.temp_size = STATIC_TEMP_BUF_SIZE; - iter.fmt = static_fmt_buf; - iter.fmt_size = STATIC_FMT_BUF_SIZE; for_each_tracing_cpu(cpu) { atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48e82e141d54..e87a46794079 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4430,6 +4430,8 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { + kfree(hist_data->attrs->var_defs.name[n_vars]); + hist_data->attrs->var_defs.name[n_vars] = NULL; ret = -ENOMEM; goto free; } diff --git a/lib/idr.c b/lib/idr.c index f4ab4f4aa3c7..7ecdfdb5309e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -491,7 +491,8 @@ void ida_free(struct ida *ida, unsigned int id) struct ida_bitmap *bitmap; unsigned long flags; - BUG_ON((int)id < 0); + if ((int)id < 0) + return; xas_lock_irqsave(&xas, flags); bitmap = xas_load(&xas); diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 59e1653799f8..3c7b9d6dca95 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -336,8 +336,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, if (pte_young(entry)) { referenced = true; entry = pte_mkold(entry); - huge_ptep_set_access_flags(vma, addr, pte, entry, - vma->vm_flags & VM_WRITE); + set_huge_pte_at(mm, addr, pte, entry); } #ifdef CONFIG_MMU_NOTIFIER diff --git a/mm/memory.c b/mm/memory.c index 7a089145cad4..4cf7d4b6c950 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4802,6 +4802,19 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + +static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +{ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) goto split; if (vmf->vma->vm_ops->huge_fault) { vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); @@ -4812,19 +4825,7 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf) split: /* COW or write-notify not handled on PUD level: split pud.*/ __split_huge_pud(vmf->vma, vmf->pud, vmf->address); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - return VM_FAULT_FALLBACK; -} - -static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* No support for anonymous transparent PUD pages yet */ - if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ return VM_FAULT_FALLBACK; } diff --git a/mm/rmap.c b/mm/rmap.c index 5bcb334cd6f2..746c05acad27 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1899,8 +1899,23 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); - subpage = folio_page(folio, - pte_pfn(*pvmw.pte) - folio_pfn(folio)); + if (folio_is_zone_device(folio)) { + /* + * Our PTE is a non-present device exclusive entry and + * calculating the subpage as for the common case would + * result in an invalid pointer. + * + * Since only PAGE_SIZE pages can currently be + * migrated, just set it to page. This will need to be + * changed when hugepage migrations to device private + * memory are supported. + */ + VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio); + subpage = &folio->page; + } else { + subpage = folio_page(folio, + pte_pfn(*pvmw.pte) - folio_pfn(folio)); + } address = pvmw.address; anon_exclusive = folio_test_anon(folio) && PageAnonExclusive(subpage); @@ -1993,15 +2008,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, /* * No need to invalidate here it will synchronize on * against the special swap migration pte. - * - * The assignment to subpage above was computed from a - * swap PTE which results in an invalid pointer. - * Since only PAGE_SIZE pages can currently be - * migrated, just set it to page. This will need to be - * changed when hugepage migrations to device private - * memory are supported. */ - subpage = &folio->page; } else if (PageHWPoison(subpage)) { pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index f4fa61dbbee3..dbbd1a7e65f3 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -78,6 +78,14 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) spin_lock(&init_mm.page_table_lock); if (likely(pmd_leaf(*pmd))) { + /* + * Higher order allocations from buddy allocator must be able to + * be treated as indepdenent small pages (as they can be freed + * individually). + */ + if (!PageReserved(page)) + split_page(page, get_order(PMD_SIZE)); + /* Make pte visible before pmd. See comment in pmd_install(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 4f4892a5f767..07d3befc80e4 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -246,7 +246,10 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, struct page *page; int ret; - ret = shmem_getpage(inode, pgoff, &page, SGP_READ); + ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC); + /* Our caller expects us to return -EFAULT if we failed to find page. */ + if (ret == -ENOENT) + ret = -EFAULT; if (ret) goto out; if (!page) { diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 53b1955b027f..214532173536 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -182,10 +182,14 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, else if (dev->mtu > max_mtu) return -EINVAL; + /* Note: If this initial vlan_changelink() fails, we need + * to call vlan_dev_free_egress_priority() to free memory. + */ err = vlan_changelink(dev, tb, data, extack); - if (err) - return err; - err = register_vlan_dev(dev, extack); + + if (!err) + err = register_vlan_dev(dev, extack); + if (err) vlan_dev_free_egress_priority(dev); return err; diff --git a/net/core/dev.c b/net/core/dev.c index 978ed0622d8f..d588fd0a54ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4863,7 +4863,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, } /* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. + * network taps in order to match in-driver-XDP behavior. This also means + * that XDP packets are able to starve other packets going through a qdisc, + * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX + * queues, so they do not have this starvation issue. */ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) { @@ -4875,7 +4878,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { + if (!netif_xmit_frozen_or_drv_stopped(txq)) { rc = netdev_start_xmit(skb, dev, txq, 0); if (dev_xmit_complete(rc)) free_skb = false; @@ -4883,6 +4886,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) HARD_TX_UNLOCK(dev, txq); if (free_skb) { trace_xdp_exception(dev, xdp_prog, XDP_TX); + dev_core_stats_tx_dropped_inc(dev); kfree_skb(skb); } } diff --git a/net/core/filter.c b/net/core/filter.c index 4ef77ec5255e..59aada9c4e19 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6204,7 +6204,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len if (err) return err; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); return seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7abd652a558f..fac552e98eb9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1247,7 +1247,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1302,7 +1302,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 62d5f99760aa..6cd3b6c559f0 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -239,7 +239,7 @@ static int cipso_v4_cache_check(const unsigned char *key, struct cipso_v4_map_cache_entry *prev_entry = NULL; u32 hash; - if (!cipso_v4_cache_enabled) + if (!READ_ONCE(cipso_v4_cache_enabled)) return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); @@ -296,13 +296,14 @@ static int cipso_v4_cache_check(const unsigned char *key, int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { + int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize); int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; u32 cipso_ptr_len; - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0) return 0; cipso_ptr_len = cipso_ptr[1]; @@ -322,7 +323,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr, bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + if (cipso_v4_cache[bkt].size < bkt_size) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; } else { @@ -1199,7 +1200,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, /* This will send packets using the "optimized" format when * possible as specified in section 3.4.2.6 of the * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10) + if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 && + ret_val <= 10) tag_len = 14; else tag_len = 4 + ret_val; @@ -1603,7 +1605,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) * all the CIPSO validations here but it doesn't * really specify _exactly_ what we need to validate * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { + if (READ_ONCE(cipso_v4_rbm_strictvalid)) { if (cipso_v4_map_lvl_valid(doi_def, tag[3]) < 0) { err_offset = opt_iter + 3; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a5439a8414d4..14f037e8ac55 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1231,7 +1231,7 @@ static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, nh->fib_nh_dev = in_dev->dev; netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); - nh->fib_nh_scope = RT_SCOPE_HOST; + nh->fib_nh_scope = RT_SCOPE_LINK; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; @@ -1812,7 +1812,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) + if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2734c3af7e24..46e8a5125853 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -498,7 +498,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= sysctl_fib_sync_mem) { + if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) { tnode_free_size = 0; synchronize_rcu(); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index efea0e796f06..57c4f0d87a7a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,11 +253,12 @@ bool icmp_global_allow(void) spin_lock(&icmp_global.lock); delta = min_t(u32, now - icmp_global.stamp, HZ); if (delta >= HZ / 50) { - incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; if (incr) WRITE_ONCE(icmp_global.stamp, now); } - credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + credit = min_t(u32, icmp_global.credit + incr, + READ_ONCE(sysctl_icmp_msgs_burst)); if (credit) { /* We want to use a credit of one in average, but need to randomize * it for security reasons. @@ -281,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return true; /* Limit if icmp type is enabled in ratemask. */ - if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask))) return true; return false; @@ -319,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, vif = l3mdev_master_ifindex(dst->dev); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); - rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); + rc = inet_peer_xrlim_allow(peer, + READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) inet_putpeer(peer); out: @@ -692,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) @@ -932,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) * get the other vendor to fix their kit. */ - if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && + if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) && inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, @@ -992,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ - if (net->ipv4.sysctl_icmp_echo_ignore_all) + if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; icmp_param.data.icmph = *icmp_hdr(skb); @@ -1027,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) u16 ident_len; u8 status; - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) return false; /* We currently only support probing interfaces on the proxy node @@ -1248,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb) */ if ((icmph->type == ICMP_ECHO || icmph->type == ICMP_TIMESTAMP) && - net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { + READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) { reason = SKB_DROP_REASON_INVALID_PROTO; goto error; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0ec501845cb3..47ccc343c9fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, { struct inet_timewait_sock *tw; - if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) + if (refcount_read(&dr->tw_refcount) - 1 >= + READ_ONCE(dr->sysctl_max_tw_buckets)) return NULL; tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da21dfce24d7..e9fed83e9b3c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -141,16 +141,20 @@ static void inet_peer_gc(struct inet_peer_base *base, struct inet_peer *gc_stack[], unsigned int gc_cnt) { + int peer_threshold, peer_maxttl, peer_minttl; struct inet_peer *p; __u32 delta, ttl; int i; - if (base->total >= inet_peer_threshold) + peer_threshold = READ_ONCE(inet_peer_threshold); + peer_maxttl = READ_ONCE(inet_peer_maxttl); + peer_minttl = READ_ONCE(inet_peer_minttl); + + if (base->total >= peer_threshold) ttl = 0; /* be aggressive */ else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - base->total / inet_peer_threshold * HZ; + ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ * + base->total / peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e459a391e607..853a75a8fbaf 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); ipv6_stub->ip6_del_rt(net, f6i, - !net->ipv4.sysctl_nexthop_compat_mode); + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); } } @@ -2361,7 +2361,8 @@ out: if (!rc) { nh_base_seq_inc(net); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); - if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) + if (replace_notify && + READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)) nexthop_replace_notify(net, new_nh, &cfg->nlinfo); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f33c31dd7366..b387c4835155 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, if (!ecn_ok) return false; - if (net->ipv4.sysctl_tcp_ecn) + if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) return true; return dst_feature(dst, RTAX_FEATURE_ECN); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cd448cdd3b38..108fd86f2718 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -599,6 +599,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_echo_enable_probe", @@ -615,6 +617,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ignore_bogus_error_responses", @@ -622,6 +626,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_errors_use_inbound_ifaddr", @@ -629,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", @@ -668,6 +676,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, }, { .procname = "tcp_ecn_fallback", @@ -675,6 +685,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 21bdee88383b..96b6e9c22068 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2784,7 +2784,8 @@ static void tcp_orphan_update(struct timer_list *unused) static bool tcp_too_many_orphans(int shift) { - return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; + return READ_ONCE(tcp_orphan_cache) << shift > + READ_ONCE(sysctl_tcp_max_orphans); } bool tcp_check_oom(struct sock *sk, int shift) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 80cb112ef142..acee998c14b2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6726,7 +6726,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); - ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; + ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst; if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d1ca3df275df..858a15cc2cc9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); - bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || + bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; if (!use_ecn) { @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 61770220774e..9d92d51c4757 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && - net->ipv4.sysctl_icmp_echo_enable_probe) + READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) icmpv6_echo_reply(skb); break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 70cd50c1fa6f..69252eb462b2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (nexthop_is_blackhole(rt->nh)) rtm->rtm_type = RTN_BLACKHOLE; - if (net->ipv4.sysctl_nexthop_compat_mode && + if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) && rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) goto nla_put_failure; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d64855010948..e756ba705fd9 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -189,6 +189,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, tot_len); return 0; @@ -241,6 +243,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) } #endif + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; @@ -302,7 +306,6 @@ static int seg6_do_srh(struct sk_buff *skb) break; } - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 98a34287439c..2cd4a8d3b30a 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -826,7 +826,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); @@ -858,7 +857,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb, if (err) goto drop; - ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); seg6_lookup_nexthop(skb, NULL, 0); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fd6c4291c971..64801ab545c1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4463,14 +4463,14 @@ EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) return; - cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap); + cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp); } EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 56dd831fe45f..d5e904bff624 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -380,7 +380,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do bool cancel_scan; struct cfg80211_nan_func *func; + spin_lock_bh(&local->fq.lock); clear_bit(SDATA_STATE_RUNNING, &sdata->state); + spin_unlock_bh(&local->fq.lock); cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata; if (cancel_scan) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 834d2171f344..304b9909f025 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3199,7 +3199,8 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) IEEE80211_HE_OPERATION_BSS_COLOR_MASK); if (color == bss_conf->he_bss_color.color) ieeee80211_obss_color_collision_notify(&rx->sdata->vif, - BIT_ULL(color)); + BIT_ULL(color), + GFP_ATOMIC); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b2430cf8332b..b58c85abcb1b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2802,19 +2802,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, /* * If the skb is shared we need to obtain our own copy. */ - if (skb_shared(skb)) { - struct sk_buff *tmp_skb = skb; - - /* can't happen -- skb is a clone if info_id != 0 */ - WARN_ON(info_id); - - skb = skb_clone(skb, GFP_ATOMIC); - kfree_skb(tmp_skb); - - if (!skb) { - ret = -ENOMEM; - goto free; - } + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) { + ret = -ENOMEM; + goto free; } hdr.frame_control = fc; @@ -3522,15 +3513,9 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, /* after this point (skb is modified) we cannot return false */ - if (skb_shared(skb)) { - struct sk_buff *tmp_skb = skb; - - skb = skb_clone(skb, GFP_ATOMIC); - kfree_skb(tmp_skb); - - if (!skb) - return true; - } + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return true; if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) @@ -4341,7 +4326,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, struct net_device *dev, struct sta_info *sta, struct ieee80211_key *key, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_info *info; struct ieee80211_local *local = sdata->local; struct tid_ampdu_tx *tid_tx; u8 tid; @@ -4356,6 +4341,11 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) goto out_free; + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return; + + info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); ieee80211_aggr_check(sdata, sta, skb); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index bcb4aa7d7599..645f75b0f89f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -301,6 +301,9 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) local_bh_disable(); spin_lock(&fq->lock); + if (!test_bit(SDATA_STATE_RUNNING, &sdata->state)) + goto out; + if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index fc36c8e9d1d2..ecc1de2e68a5 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -148,8 +148,8 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, bool qos; /* all mesh/ocb stations are required to support WME */ - if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || - sdata->vif.type == NL80211_IFTYPE_OCB) + if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT || + sdata->vif.type == NL80211_IFTYPE_OCB)) qos = true; else if (sta) qos = sta->sta.wme; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6cf5fa191b12..57f23f4e3a7c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2884,12 +2884,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - struct mptcp_subflow_context *subflow; + struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_for_each_subflow(msk, subflow) { + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 082a2fd8d85b..369aeabb94fe 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -729,6 +729,9 @@ static void nf_ct_gc_expired(struct nf_conn *ct) if (!refcount_inc_not_zero(&ct->ct_general.use)) return; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) nf_ct_kill(ct); @@ -795,6 +798,9 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, */ ct = nf_ct_tuplehash_to_ctrack(h); if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { + /* re-check key after refcount */ + smp_acquire__after_ctrl_dep(); + if (likely(nf_ct_key_equal(h, tuple, zone, net))) goto found; @@ -1387,6 +1393,9 @@ static unsigned int early_drop_list(struct net *net, if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->ct_net and ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + /* kill only if still in same netns -- might have moved due to * SLAB_TYPESAFE_BY_RCU rules. * @@ -1536,6 +1545,9 @@ static void gc_worker(struct work_struct *work) if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (gc_worker_skip_ct(tmp)) { nf_ct_put(tmp); continue; @@ -1775,6 +1787,16 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); + /* Other CPU might have obtained a pointer to this object before it was + * released. Because refcount is 0, refcount_inc_not_zero() will fail. + * + * After refcount_set(1) it will succeed; ensure that zeroing of + * ct->status and the correct ct->net pointer are visible; else other + * core might observe CONFIRMED bit which means the entry is valid and + * in the hash table, but its not (anymore). + */ + smp_wmb(); + /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 722af5e309ba..f5905b5201a7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1203,6 +1203,7 @@ restart: hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { + /* need to defer nf_ct_kill() until lock is released */ if (i < ARRAY_SIZE(nf_ct_evict) && refcount_inc_not_zero(&ct->ct_general.use)) nf_ct_evict[i++] = ct; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6ad7bbc90d38..05895878610c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -306,6 +306,9 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use))) return 0; + /* load ->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + if (nf_ct_should_gc(ct)) { nf_ct_kill(ct); goto release; diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 77bcb10fc586..cb894f0d63e9 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -67,7 +67,7 @@ dump_arp_packet(struct nf_log_buf *m, unsigned int logflags; struct arphdr _arph; - ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); + ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph); if (!ah) { nf_log_buf_add(m, "TRUNCATED"); return; @@ -96,7 +96,7 @@ dump_arp_packet(struct nf_log_buf *m, ah->ar_pln != sizeof(__be32)) return; - ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); + ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp); if (!ap) { nf_log_buf_add(m, " INCOMPLETE [%zu bytes]", skb->len - sizeof(_arph)); @@ -149,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf, nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); - dump_arp_packet(m, loginfo, skb, 0); + dump_arp_packet(m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } @@ -850,7 +850,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf, if (in) dump_mac_header(m, loginfo, skb); - dump_ipv4_packet(net, m, loginfo, skb, 0); + dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb)); nf_log_buf_close(m); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d6b59beab3a9..646d5fd53604 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5833,8 +5833,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) return -EINVAL; - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (set->flags & NFT_SET_MAP) { if (nla[NFTA_SET_ELEM_DATA] == NULL && @@ -5943,7 +5946,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_set_elem_expr; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto err_parse_key; } if (nla[NFTA_SET_ELEM_KEY_END]) { @@ -5952,22 +5957,31 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto err_parse_key_end; } if (timeout > 0) { - nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); - if (timeout != set->timeout) - nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (err < 0) + goto err_parse_key_end; + + if (timeout != set->timeout) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + if (err < 0) + goto err_parse_key_end; + } } if (num_exprs) { for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, - sizeof(struct nft_set_elem_expr) + - size); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, + sizeof(struct nft_set_elem_expr) + size); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { @@ -5982,7 +5996,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = PTR_ERR(obj); goto err_parse_key_end; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + if (err < 0) + goto err_parse_key_end; } if (nla[NFTA_SET_ELEM_DATA] != NULL) { @@ -6016,7 +6032,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, NFT_VALIDATE_NEED); } - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len); + if (err < 0) + goto err_parse_data; } /* The full maximum length of userdata can exceed the maximum @@ -6026,9 +6044,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ulen = 0; if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); - if (ulen > 0) - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, - ulen); + if (ulen > 0) { + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); + if (err < 0) + goto err_parse_data; + } } err = -ENOMEM; @@ -6256,8 +6277,11 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_prepare(&tmpl); - if (flags != 0) - nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (flags != 0) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS); + if (err < 0) + return err; + } if (nla[NFTA_SET_ELEM_KEY]) { err = nft_setelem_parse_key(ctx, set, &elem.key.val, @@ -6265,16 +6289,20 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) return err; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); + if (err < 0) + goto fail_elem; } if (nla[NFTA_SET_ELEM_KEY_END]) { err = nft_setelem_parse_key(ctx, set, &elem.key_end.val, nla[NFTA_SET_ELEM_KEY_END]); if (err < 0) - return err; + goto fail_elem; - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen); + if (err < 0) + goto fail_elem_key_end; } err = -ENOMEM; @@ -6282,7 +6310,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, elem.key_end.val.data, NULL, 0, 0, GFP_KERNEL_ACCOUNT); if (elem.priv == NULL) - goto fail_elem; + goto fail_elem_key_end; ext = nft_set_elem_ext(set, elem.priv); if (flags) @@ -6306,6 +6334,8 @@ fail_ops: kfree(trans); fail_trans: kfree(elem.priv); +fail_elem_key_end: + nft_data_release(&elem.key_end.val, NFT_DATA_VALUE); fail_elem: nft_data_release(&elem.key.val, NFT_DATA_VALUE); return err; diff --git a/net/tls/tls.h b/net/tls/tls.h index 8005ee25157d..e0ccc96a0850 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -133,7 +133,7 @@ static inline struct tls_msg *tls_msg(struct sk_buff *skb) } #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); +int tls_device_init(void); void tls_device_cleanup(void); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); void tls_device_free_resources_tx(struct sock *sk); @@ -143,7 +143,7 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, struct sk_buff *skb, struct strp_msg *rxm); #else -static inline void tls_device_init(void) {} +static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 227b92a3064a..6f9dff1c11f6 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1420,9 +1420,9 @@ static struct notifier_block tls_dev_notifier = { .notifier_call = tls_dev_event, }; -void __init tls_device_init(void) +int __init tls_device_init(void) { - register_netdevice_notifier(&tls_dev_notifier); + return register_netdevice_notifier(&tls_dev_notifier); } void __exit tls_device_cleanup(void) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index f71b46568112..9703636cfc60 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1141,7 +1141,12 @@ static int __init tls_register(void) if (err) return err; - tls_device_init(); + err = tls_device_init(); + if (err) { + unregister_pernet_subsys(&tls_proc_ops); + return err; + } + tcp_register_ulp(&tcp_tls_ulp_ops); return 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 00be498aab2e..22996d63c15f 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1236,7 +1236,8 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) { ASSERT_WDEV_LOCK(wdev); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; if (WARN_ON(!wdev->connected) || diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c index 18b1e5c4b431..e22da8573116 100644 --- a/samples/fprobe/fprobe_example.c +++ b/samples/fprobe/fprobe_example.c @@ -20,7 +20,7 @@ #define BACKTRACE_DEPTH 16 #define MAX_SYMBOL_LEN 4096 -struct fprobe sample_probe; +static struct fprobe sample_probe; static unsigned long nhit; static char symbol[MAX_SYMBOL_LEN] = "kernel_clone"; diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index f991a66b5b02..fd346f58ddba 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -16,9 +16,8 @@ #include <linux/module.h> #include <linux/kprobes.h> -#define MAX_SYMBOL_LEN 64 -static char symbol[MAX_SYMBOL_LEN] = "kernel_clone"; -module_param_string(symbol, symbol, sizeof(symbol), 0644); +static char symbol[KSYM_NAME_LEN] = "kernel_clone"; +module_param_string(symbol, symbol, KSYM_NAME_LEN, 0644); /* For each probe you need to allocate a kprobe structure */ static struct kprobe kp = { diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 228321ecb161..cbf16542d84e 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -23,11 +23,10 @@ #include <linux/module.h> #include <linux/kprobes.h> #include <linux/ktime.h> -#include <linux/limits.h> #include <linux/sched.h> -static char func_name[NAME_MAX] = "kernel_clone"; -module_param_string(func, func_name, NAME_MAX, S_IRUGO); +static char func_name[KSYM_NAME_LEN] = "kernel_clone"; +module_param_string(func, func_name, KSYM_NAME_LEN, 0644); MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" " function's execution time"); diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index d1425778664b..3fb6a99e78c4 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -236,6 +236,7 @@ objtool_args = \ $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ $(if $(CONFIG_UNWINDER_ORC), --orc) \ $(if $(CONFIG_RETPOLINE), --retpoline) \ + $(if $(CONFIG_RETHUNK), --rethunk) \ $(if $(CONFIG_SLS), --sls) \ $(if $(CONFIG_STACK_VALIDATION), --stackval) \ $(if $(CONFIG_HAVE_STATIC_CALL_INLINE), --static-call) \ diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index c2c43a0ecfe0..16a02e9237d3 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -28,9 +28,6 @@ modules := $(patsubst $(extmod_prefix)%, $(dst)/%$(suffix-y), $(modules)) __modinst: $(modules) @: -quiet_cmd_none = - cmd_none = : - # # Installation # diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 3c97a1564947..84019814f33f 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -44,7 +44,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) objtool_args := \ $(if $(delay-objtool),$(objtool_args)) \ - $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr) \ + $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_CPU_UNRET_ENTRY), --unret)) \ $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \ --link diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 1d1bde1fd45e..47da25b3ba7d 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -157,10 +157,10 @@ def cmdfiles_for_modorder(modorder): if ext != '.ko': sys.exit('{}: module path must end with .ko'.format(ko)) mod = base + '.mod' - # The first line of *.mod lists the objects that compose the module. + # Read from *.mod, to get a list of objects that compose the module. with open(mod) as m: - for obj in m.readline().split(): - yield to_cmdfile(obj) + for mod_line in m: + yield to_cmdfile(mod_line.rstrip()) def process_line(root_directory, command_prefix, file_path): diff --git a/security/Kconfig b/security/Kconfig index f29e4c656983..e6db09a779b7 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,17 +54,6 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. -config PAGE_TABLE_ISOLATION - bool "Remove the kernel mapping in user mode" - default y - depends on (X86_64 || X86_PAE) && !UML - help - This feature reduces the number of hardware side channels by - ensuring that the majority of kernel addresses are not mapped - into userspace. - - See Documentation/x86/pti.rst for more details. - config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index a733aff02006..708de9656bbd 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -75,7 +75,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) { long rc; const char *algo; - struct crypto_shash **tfm, *tmp_tfm = NULL; + struct crypto_shash **tfm, *tmp_tfm; struct shash_desc *desc; if (type == EVM_XATTR_HMAC) { @@ -120,16 +120,13 @@ unlock: alloc: desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm), GFP_KERNEL); - if (!desc) { - crypto_free_shash(tmp_tfm); + if (!desc) return ERR_PTR(-ENOMEM); - } desc->tfm = *tfm; rc = crypto_shash_init(desc); if (rc) { - crypto_free_shash(tmp_tfm); kfree(desc); return ERR_PTR(rc); } diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index cdb84dccd24e..bde74fcecee3 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -514,7 +514,8 @@ int ima_appraise_measurement(enum ima_hooks func, goto out; } - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, + rc < 0 ? 0 : rc, iint); switch (status) { case INTEGRITY_PASS: case INTEGRITY_PASS_IMMUTABLE: diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index a7206cc1d7d1..64499056648a 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -205,6 +205,7 @@ out_array: crypto_free_shash(ima_algo_array[i].tfm); } + kfree(ima_algo_array); out: crypto_free_shash(ima_shash_tfm); return rc; diff --git a/security/integrity/ima/ima_efi.c b/security/integrity/ima/ima_efi.c index 71786d01946f..9db66fe310d4 100644 --- a/security/integrity/ima/ima_efi.c +++ b/security/integrity/ima/ima_efi.c @@ -67,6 +67,8 @@ const char * const *arch_get_ima_policy(void) if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { if (IS_ENABLED(CONFIG_MODULE_SIG)) set_module_sig_enforced(); + if (IS_ENABLED(CONFIG_KEXEC_SIG)) + set_kexec_sig_enforced(); return sb_arch_rules; } return NULL; diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index c877f01a5471..7bf9b1507220 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -323,10 +323,10 @@ static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize, else /* * If digest is NULL, the event being recorded is a violation. - * Make room for the digest by increasing the offset of - * IMA_DIGEST_SIZE. + * Make room for the digest by increasing the offset by the + * hash algorithm digest size. */ - offset += IMA_DIGEST_SIZE; + offset += hash_digest_size[hash_algo]; return ima_write_template_field_data(buffer, offset + digestsize, fmt, field_data); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 3e541a4c0423..83ae21a01bbf 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -944,6 +944,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 007dd8b5e1f2..2f55bc43bfa9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6901,6 +6901,7 @@ enum { ALC298_FIXUP_LENOVO_SPK_VOLUME, ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER, ALC269_FIXUP_ATIV_BOOK_8, + ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE, ALC221_FIXUP_HP_MIC_NO_PRESENCE, ALC256_FIXUP_ASUS_HEADSET_MODE, ALC256_FIXUP_ASUS_MIC, @@ -7837,6 +7838,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_NO_SHUTUP }, + [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC221_FIXUP_HP_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -8886,6 +8897,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), @@ -8895,6 +8907,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), + SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X), SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X), @@ -9010,6 +9023,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC), @@ -9096,6 +9110,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -9355,6 +9373,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), @@ -11217,6 +11236,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50), diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index e32871b3f68a..7434aeeda292 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1760,8 +1760,8 @@ static bool arizona_aif_cfg_changed(struct snd_soc_component *component, if (bclk != (val & ARIZONA_AIF1_BCLK_FREQ_MASK)) return true; - val = snd_soc_component_read(component, base + ARIZONA_AIF_TX_BCLK_RATE); - if (lrclk != (val & ARIZONA_AIF1TX_BCPF_MASK)) + val = snd_soc_component_read(component, base + ARIZONA_AIF_RX_BCLK_RATE); + if (lrclk != (val & ARIZONA_AIF1RX_BCPF_MASK)) return true; val = snd_soc_component_read(component, base + ARIZONA_AIF_FRAME_CTRL_1); diff --git a/sound/soc/codecs/cs47l92.c b/sound/soc/codecs/cs47l92.c index a1b8dcdb9f7b..444026b7d54b 100644 --- a/sound/soc/codecs/cs47l92.c +++ b/sound/soc/codecs/cs47l92.c @@ -119,7 +119,13 @@ static int cs47l92_put_demux(struct snd_kcontrol *kcontrol, end: snd_soc_dapm_mutex_unlock(dapm); - return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL); + if (ret < 0) { + dev_err(madera->dev, "Failed to update demux power state: %d\n", ret); + return ret; + } + + return change; } static SOC_ENUM_SINGLE_DECL(cs47l92_outdemux_enum, diff --git a/sound/soc/codecs/max98396.c b/sound/soc/codecs/max98396.c index 56eb62bb041f..34db38812807 100644 --- a/sound/soc/codecs/max98396.c +++ b/sound/soc/codecs/max98396.c @@ -342,12 +342,15 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_component *component = codec_dai->component; struct max98396_priv *max98396 = snd_soc_component_get_drvdata(component); - unsigned int format = 0; + unsigned int format_mask, format = 0; unsigned int bclk_pol = 0; int ret, status; int reg; bool update = false; + format_mask = MAX98396_PCM_MODE_CFG_FORMAT_MASK | + MAX98396_PCM_MODE_CFG_LRCLKEDGE; + dev_dbg(component->dev, "%s: fmt 0x%08X\n", __func__, fmt); switch (fmt & SND_SOC_DAIFMT_INV_MASK) { @@ -395,7 +398,7 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) ret = regmap_read(max98396->regmap, MAX98396_R2041_PCM_MODE_CFG, ®); if (ret < 0) return -EINVAL; - if (format != (reg & MAX98396_PCM_BCLKEDGE_BSEL_MASK)) { + if (format != (reg & format_mask)) { update = true; } else { ret = regmap_read(max98396->regmap, @@ -412,8 +415,7 @@ static int max98396_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) regmap_update_bits(max98396->regmap, MAX98396_R2041_PCM_MODE_CFG, - MAX98396_PCM_BCLKEDGE_BSEL_MASK, - format); + format_mask, format); regmap_update_bits(max98396->regmap, MAX98396_R2042_PCM_CLK_SETUP, diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 69c80d80ed9d..18b3da9211e3 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -1984,7 +1984,12 @@ static int rt5640_set_bias_level(struct snd_soc_component *component, snd_soc_component_write(component, RT5640_PWR_DIG2, 0x0000); snd_soc_component_write(component, RT5640_PWR_VOL, 0x0000); snd_soc_component_write(component, RT5640_PWR_MIXER, 0x0000); - snd_soc_component_write(component, RT5640_PWR_ANLG1, 0x0000); + if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) + snd_soc_component_write(component, RT5640_PWR_ANLG1, + 0x0018); + else + snd_soc_component_write(component, RT5640_PWR_ANLG1, + 0x0000); snd_soc_component_write(component, RT5640_PWR_ANLG2, 0x0000); break; @@ -2393,9 +2398,15 @@ static void rt5640_jack_work(struct work_struct *work) static irqreturn_t rt5640_irq(int irq, void *data) { struct rt5640_priv *rt5640 = data; + int delay = 0; + + if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) { + cancel_delayed_work_sync(&rt5640->jack_work); + delay = 100; + } if (rt5640->jack) - queue_delayed_work(system_long_wq, &rt5640->jack_work, 0); + queue_delayed_work(system_long_wq, &rt5640->jack_work, delay); return IRQ_HANDLED; } @@ -2580,6 +2591,12 @@ static void rt5640_enable_hda_jack_detect( snd_soc_component_update_bits(component, RT5640_DUMMY1, 0x400, 0x0); + snd_soc_component_update_bits(component, RT5640_PWR_ANLG1, + RT5640_PWR_VREF2, RT5640_PWR_VREF2); + usleep_range(10000, 15000); + snd_soc_component_update_bits(component, RT5640_PWR_ANLG1, + RT5640_PWR_FV2, RT5640_PWR_FV2); + rt5640->jack = jack; ret = request_irq(rt5640->irq, rt5640_irq, @@ -2696,16 +2713,13 @@ static int rt5640_probe(struct snd_soc_component *component) if (device_property_read_u32(component->dev, "realtek,jack-detect-source", &val) == 0) { - if (val <= RT5640_JD_SRC_GPIO4) { + if (val <= RT5640_JD_SRC_GPIO4) rt5640->jd_src = val << RT5640_JD_SFT; - } else if (val == RT5640_JD_SRC_HDA_HEADER) { + else if (val == RT5640_JD_SRC_HDA_HEADER) rt5640->jd_src = RT5640_JD_SRC_HDA_HEADER; - snd_soc_component_update_bits(component, RT5640_DUMMY1, - 0x0300, 0x0); - } else { + else dev_warn(component->dev, "Warning: Invalid jack-detect-source value: %d, leaving jack-detect disabled\n", val); - } } if (!device_property_read_bool(component->dev, "realtek,jack-detect-not-inverted")) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 2aa48aef6a97..3363d1696ad7 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1795,6 +1795,9 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); + clk_disable_unprepare(sgtl5000->mclk); regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies); regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies); @@ -1802,6 +1805,11 @@ static int sgtl5000_i2c_remove(struct i2c_client *client) return 0; } +static void sgtl5000_i2c_shutdown(struct i2c_client *client) +{ + sgtl5000_i2c_remove(client); +} + static const struct i2c_device_id sgtl5000_id[] = { {"sgtl5000", 0}, {}, @@ -1822,6 +1830,7 @@ static struct i2c_driver sgtl5000_i2c_driver = { }, .probe_new = sgtl5000_i2c_probe, .remove = sgtl5000_i2c_remove, + .shutdown = sgtl5000_i2c_shutdown, .id_table = sgtl5000_id, }; diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 56ec5863f250..3a808c762299 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -80,6 +80,7 @@ /* * SGTL5000_CHIP_DIG_POWER */ +#define SGTL5000_DIG_POWER_DEFAULT 0x0000 #define SGTL5000_ADC_EN 0x0040 #define SGTL5000_DAC_EN 0x0020 #define SGTL5000_DAP_POWERUP 0x0010 diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index d395feffb30b..4cb788f3e5f7 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -42,10 +42,12 @@ static void tas2764_reset(struct tas2764_priv *tas2764) gpiod_set_value_cansleep(tas2764->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2764->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2764->component, TAS2764_SW_RST, TAS2764_RST); + usleep_range(1000, 2000); } static int tas2764_set_bias_level(struct snd_soc_component *component, @@ -107,8 +109,10 @@ static int tas2764_codec_resume(struct snd_soc_component *component) struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); int ret; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL, TAS2764_PWR_CTRL_MASK, @@ -131,7 +135,8 @@ static const char * const tas2764_ASI1_src[] = { }; static SOC_ENUM_SINGLE_DECL( - tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src); + tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT, + tas2764_ASI1_src); static const struct snd_kcontrol_new tas2764_asi1_mux = SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum); @@ -329,20 +334,22 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0; - int iface; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; break; + case SND_SOC_DAIFMT_IB_IF: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; break; - default: - dev_err(tas2764->dev, "ASI format Inverse is not found\n"); - return -EINVAL; } ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, @@ -353,13 +360,13 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; + fallthrough; case SND_SOC_DAIFMT_DSP_A: - iface = TAS2764_TDM_CFG2_SCFG_I2S; tdm_rx_start_slot = 1; break; case SND_SOC_DAIFMT_DSP_B: case SND_SOC_DAIFMT_LEFT_J: - iface = TAS2764_TDM_CFG2_SCFG_LEFT_J; tdm_rx_start_slot = 0; break; default: @@ -368,14 +375,15 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, - TAS2764_TDM_CFG1_MASK, - (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0, + TAS2764_TDM_CFG0_FRAME_START, + asi_cfg_0); if (ret < 0) return ret; - ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2, - TAS2764_TDM_CFG2_SCFG_MASK, iface); + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1, + TAS2764_TDM_CFG1_MASK, + (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT)); if (ret < 0) return ret; @@ -501,8 +509,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) tas2764->component = component; - if (tas2764->sdz_gpio) + if (tas2764->sdz_gpio) { gpiod_set_value_cansleep(tas2764->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2764_reset(tas2764); @@ -526,12 +536,12 @@ static int tas2764_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1); static const struct snd_kcontrol_new tas2764_snd_controls[] = { SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0, TAS2764_DVC_MAX, 1, tas2764_playback_volume), - SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0, + SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0, tas2764_digital_tlv), }; @@ -556,7 +566,7 @@ static const struct reg_default tas2764_reg_defaults[] = { { TAS2764_SW_RST, 0x00 }, { TAS2764_PWR_CTRL, 0x1a }, { TAS2764_DVC, 0x00 }, - { TAS2764_CHNL_0, 0x00 }, + { TAS2764_CHNL_0, 0x28 }, { TAS2764_TDM_CFG0, 0x09 }, { TAS2764_TDM_CFG1, 0x02 }, { TAS2764_TDM_CFG2, 0x0a }, diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 67d6fd903c42..f015f22a083b 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -47,6 +47,7 @@ #define TAS2764_TDM_CFG0_MASK GENMASK(3, 1) #define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3) #define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1)) +#define TAS2764_TDM_CFG0_FRAME_START BIT(0) /* TDM Configuration Reg1 */ #define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09) @@ -66,10 +67,7 @@ #define TAS2764_TDM_CFG2_RXS_16BITS 0x0 #define TAS2764_TDM_CFG2_RXS_24BITS BIT(0) #define TAS2764_TDM_CFG2_RXS_32BITS BIT(1) -#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4) -#define TAS2764_TDM_CFG2_SCFG_I2S 0x0 -#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4) -#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5) +#define TAS2764_TDM_CFG2_SCFG_SHIFT 4 /* TDM Configuration Reg3 */ #define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c) diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c index b55f0b836932..0b729658fde8 100644 --- a/sound/soc/codecs/tlv320adcx140.c +++ b/sound/soc/codecs/tlv320adcx140.c @@ -33,7 +33,6 @@ struct adcx140_priv { bool micbias_vg; unsigned int dai_fmt; - unsigned int tdm_delay; unsigned int slot_width; }; @@ -792,12 +791,13 @@ static int adcx140_set_dai_tdm_slot(struct snd_soc_dai *codec_dai, { struct snd_soc_component *component = codec_dai->component; struct adcx140_priv *adcx140 = snd_soc_component_get_drvdata(component); - unsigned int lsb; - /* TDM based on DSP mode requires slots to be adjacent */ - lsb = __ffs(tx_mask); - if ((lsb + 1) != __fls(tx_mask)) { - dev_err(component->dev, "Invalid mask, slots must be adjacent\n"); + /* + * The chip itself supports arbitrary masks, but the driver currently + * only supports adjacent slots beginning at the first slot. + */ + if (tx_mask != GENMASK(__fls(tx_mask), 0)) { + dev_err(component->dev, "Only lower adjacent slots are supported\n"); return -EINVAL; } @@ -812,7 +812,6 @@ static int adcx140_set_dai_tdm_slot(struct snd_soc_dai *codec_dai, return -EINVAL; } - adcx140->tdm_delay = lsb; adcx140->slot_width = slot_width; return 0; diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index d9f135200688..3cb7a3eab8c7 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -342,7 +342,7 @@ struct wcd9335_codec { struct regulator_bulk_data supplies[WCD9335_MAX_SUPPLY]; unsigned int rx_port_value[WCD9335_RX_MAX]; - unsigned int tx_port_value; + unsigned int tx_port_value[WCD9335_TX_MAX]; int hph_l_gain; int hph_r_gain; u32 rx_bias_count; @@ -1334,8 +1334,13 @@ static int slim_tx_mixer_get(struct snd_kcontrol *kc, struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kc); struct wcd9335_codec *wcd = dev_get_drvdata(dapm->dev); + struct snd_soc_dapm_widget *widget = snd_soc_dapm_kcontrol_widget(kc); + struct soc_mixer_control *mixer = + (struct soc_mixer_control *)kc->private_value; + int dai_id = widget->shift; + int port_id = mixer->shift; - ucontrol->value.integer.value[0] = wcd->tx_port_value; + ucontrol->value.integer.value[0] = wcd->tx_port_value[port_id] == dai_id; return 0; } @@ -1358,12 +1363,12 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc, case AIF2_CAP: case AIF3_CAP: /* only add to the list if value not set */ - if (enable && !(wcd->tx_port_value & BIT(port_id))) { - wcd->tx_port_value |= BIT(port_id); + if (enable && wcd->tx_port_value[port_id] != dai_id) { + wcd->tx_port_value[port_id] = dai_id; list_add_tail(&wcd->tx_chs[port_id].list, &wcd->dai[dai_id].slim_ch_list); - } else if (!enable && (wcd->tx_port_value & BIT(port_id))) { - wcd->tx_port_value &= ~BIT(port_id); + } else if (!enable && wcd->tx_port_value[port_id] == dai_id) { + wcd->tx_port_value[port_id] = -1; list_del_init(&wcd->tx_chs[port_id].list); } break; diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index da2f8998df87..b034df47a5ef 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -680,12 +680,17 @@ static int wm5102_out_comp_coeff_put(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct arizona *arizona = dev_get_drvdata(component->dev->parent); + uint16_t dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data); + int ret = 0; mutex_lock(&arizona->dac_comp_lock); - arizona->dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data); + if (arizona->dac_comp_coeff != dac_comp_coeff) { + arizona->dac_comp_coeff = dac_comp_coeff; + ret = 1; + } mutex_unlock(&arizona->dac_comp_lock); - return 0; + return ret; } static int wm5102_out_comp_switch_get(struct snd_kcontrol *kcontrol, @@ -706,12 +711,20 @@ static int wm5102_out_comp_switch_put(struct snd_kcontrol *kcontrol, { struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol); struct arizona *arizona = dev_get_drvdata(component->dev->parent); + struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; + int ret = 0; + + if (ucontrol->value.integer.value[0] > mc->max) + return -EINVAL; mutex_lock(&arizona->dac_comp_lock); - arizona->dac_comp_enabled = ucontrol->value.integer.value[0]; + if (arizona->dac_comp_enabled != ucontrol->value.integer.value[0]) { + arizona->dac_comp_enabled = ucontrol->value.integer.value[0]; + ret = 1; + } mutex_unlock(&arizona->dac_comp_lock); - return 0; + return ret; } static const char * const wm5102_osr_text[] = { diff --git a/sound/soc/codecs/wm8998.c b/sound/soc/codecs/wm8998.c index 00b59fc9b1fe..ab5481187c71 100644 --- a/sound/soc/codecs/wm8998.c +++ b/sound/soc/codecs/wm8998.c @@ -108,6 +108,7 @@ static int wm8998_inmux_put(struct snd_kcontrol *kcontrol, struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; unsigned int mode_reg, mode_index; unsigned int mux, inmode, src_val, mode_val; + int change, ret; mux = ucontrol->value.enumerated.item[0]; if (mux > 1) @@ -137,14 +138,20 @@ static int wm8998_inmux_put(struct snd_kcontrol *kcontrol, snd_soc_component_update_bits(component, mode_reg, ARIZONA_IN1_MODE_MASK, mode_val); - snd_soc_component_update_bits(component, e->reg, - ARIZONA_IN1L_SRC_MASK | - ARIZONA_IN1L_SRC_SE_MASK, - src_val); + change = snd_soc_component_update_bits(component, e->reg, + ARIZONA_IN1L_SRC_MASK | + ARIZONA_IN1L_SRC_SE_MASK, + src_val); - return snd_soc_dapm_mux_update_power(dapm, kcontrol, - ucontrol->value.enumerated.item[0], - e, NULL); + ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, + ucontrol->value.enumerated.item[0], + e, NULL); + if (ret < 0) { + dev_err(arizona->dev, "Failed to update demux power state: %d\n", ret); + return ret; + } + + return change; } static const char * const wm8998_inmux_texts[] = { diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index 77ac4051b827..d34b29a49268 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -90,12 +90,12 @@ links indicates connection part of CPU side (= A). ports@0 { (X) (A) mcpu: port@0 { mcpu0_ep: endpoint { remote-endpoint = <&mcodec0_ep>; }; }; (y) port@1 { mcpu1_ep: endpoint { remote-endpoint = <&cpu1_ep>; }; }; -(y) port@1 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; }; +(y) port@2 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; }; }; ports@1 { (X) port@0 { mcodec0_ep: endpoint { remote-endpoint = <&mcpu0_ep>; }; }; -(y) port@0 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; }; -(y) port@1 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; }; +(y) port@1 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; }; +(y) port@2 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; }; }; }; }; diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index 5d67a2c87a1d..4a90a0a5d831 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -69,11 +69,10 @@ static unsigned long sof_rt5682_quirk = SOF_RT5682_MCLK_EN | static int is_legacy_cpu; -static struct snd_soc_jack sof_hdmi[3]; - struct sof_hdmi_pcm { struct list_head head; struct snd_soc_dai *codec_dai; + struct snd_soc_jack hdmi_jack; int device; }; @@ -434,7 +433,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) char jack_name[NAME_SIZE]; struct sof_hdmi_pcm *pcm; int err; - int i = 0; /* HDMI is not supported by SOF on Baytrail/CherryTrail */ if (is_legacy_cpu || !ctx->idisp_codec) @@ -455,17 +453,15 @@ static int sof_card_late_probe(struct snd_soc_card *card) snprintf(jack_name, sizeof(jack_name), "HDMI/DP, pcm=%d Jack", pcm->device); err = snd_soc_card_jack_new(card, jack_name, - SND_JACK_AVOUT, &sof_hdmi[i]); + SND_JACK_AVOUT, &pcm->hdmi_jack); if (err) return err; err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device, - &sof_hdmi[i]); + &pcm->hdmi_jack); if (err < 0) return err; - - i++; } if (sof_rt5682_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) { diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 2439a574ac2f..deb7b820325e 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -99,7 +99,6 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, struct nhlt_fmt_cfg *fmt_cfg; struct wav_fmt_ext *wav_fmt; unsigned long rate; - bool present = false; int rate_index = 0; u16 channels, bps; u8 clk_src; @@ -112,9 +111,12 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, if (fmt->fmt_count == 0) return; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; for (i = 0; i < fmt->fmt_count; i++) { - fmt_cfg = &fmt->fmt_config[i]; - wav_fmt = &fmt_cfg->fmt_ext; + struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg; + bool present = false; + + wav_fmt = &saved_fmt_cfg->fmt_ext; channels = wav_fmt->fmt.channels; bps = wav_fmt->fmt.bits_per_sample; @@ -132,12 +134,18 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, * derive the rate. */ for (j = i; j < fmt->fmt_count; j++) { - fmt_cfg = &fmt->fmt_config[j]; - wav_fmt = &fmt_cfg->fmt_ext; + struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg; + + wav_fmt = &tmp_fmt_cfg->fmt_ext; if ((fs == wav_fmt->fmt.samples_per_sec) && - (bps == wav_fmt->fmt.bits_per_sample)) + (bps == wav_fmt->fmt.bits_per_sample)) { channels = max_t(u16, channels, wav_fmt->fmt.channels); + saved_fmt_cfg = tmp_fmt_cfg; + } + /* Move to the next nhlt_fmt_cfg */ + tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps + + tmp_fmt_cfg->config.size); } rate = channels * bps * fs; @@ -153,8 +161,11 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, /* Fill rate and parent for sclk/sclkfs */ if (!present) { + struct nhlt_fmt_cfg *first_fmt_cfg; + + first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; i2s_config_ext = (struct skl_i2s_config_blob_ext *) - fmt->fmt_config[0].config.caps; + first_fmt_cfg->config.caps; /* MCLK Divider Source Select */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -168,6 +179,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, parent = skl_get_parent_clk(clk_src); + /* Move to the next nhlt_fmt_cfg */ + fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps + + fmt_cfg->config.size); /* * Do not copy the config data if there is no parent * clock available for this clock source select @@ -176,9 +190,9 @@ static void skl_get_ssp_clks(struct skl_dev *skl, struct skl_ssp_clk *ssp_clks, continue; sclk[id].rate_cfg[rate_index].rate = rate; - sclk[id].rate_cfg[rate_index].config = fmt_cfg; + sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclkfs[id].rate_cfg[rate_index].rate = rate; - sclkfs[id].rate_cfg[rate_index].config = fmt_cfg; + sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg; sclk[id].parent_name = parent->name; sclkfs[id].parent_name = parent->name; @@ -192,13 +206,13 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, { struct skl_i2s_config_blob_ext *i2s_config_ext; struct skl_i2s_config_blob_legacy *i2s_config; - struct nhlt_specific_cfg *fmt_cfg; + struct nhlt_fmt_cfg *fmt_cfg; struct skl_clk_parent_src *parent; u32 clkdiv, div_ratio; u8 clk_src; - fmt_cfg = &fmt->fmt_config[0].config; - i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps; + fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config; + i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps; /* MCLK Divider Source Select and divider */ if (is_legacy_blob(i2s_config_ext->hdr.sig)) { @@ -227,7 +241,7 @@ static void skl_get_mclk(struct skl_dev *skl, struct skl_ssp_clk *mclk, return; mclk[id].rate_cfg[0].rate = parent->rate/div_ratio; - mclk[id].rate_cfg[0].config = &fmt->fmt_config[0]; + mclk[id].rate_cfg[0].config = fmt_cfg; mclk[id].parent_name = parent->name; } diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c index f424d7aa389a..794019286c70 100644 --- a/sound/soc/qcom/qdsp6/q6apm.c +++ b/sound/soc/qcom/qdsp6/q6apm.c @@ -75,6 +75,7 @@ static struct audioreach_graph *q6apm_get_audioreach_graph(struct q6apm *apm, ui id = idr_alloc(&apm->graph_idr, graph, graph_id, graph_id + 1, GFP_KERNEL); if (id < 0) { dev_err(apm->dev, "Unable to allocate graph id (%d)\n", graph_id); + kfree(graph->graph); kfree(graph); mutex_unlock(&apm->lock); return ERR_PTR(id); diff --git a/sound/soc/ti/omap-mcbsp-priv.h b/sound/soc/ti/omap-mcbsp-priv.h index 7865cda4bf0a..da519ea1f303 100644 --- a/sound/soc/ti/omap-mcbsp-priv.h +++ b/sound/soc/ti/omap-mcbsp-priv.h @@ -316,8 +316,6 @@ static inline int omap_mcbsp_read(struct omap_mcbsp *mcbsp, u16 reg, /* Sidetone specific API */ int omap_mcbsp_st_init(struct platform_device *pdev); -void omap_mcbsp_st_cleanup(struct platform_device *pdev); - int omap_mcbsp_st_start(struct omap_mcbsp *mcbsp); int omap_mcbsp_st_stop(struct omap_mcbsp *mcbsp); diff --git a/sound/soc/ti/omap-mcbsp-st.c b/sound/soc/ti/omap-mcbsp-st.c index 0bc7d26c660a..7e8179cae92e 100644 --- a/sound/soc/ti/omap-mcbsp-st.c +++ b/sound/soc/ti/omap-mcbsp-st.c @@ -347,7 +347,7 @@ int omap_mcbsp_st_init(struct platform_device *pdev) if (!st_data) return -ENOMEM; - st_data->mcbsp_iclk = clk_get(mcbsp->dev, "ick"); + st_data->mcbsp_iclk = devm_clk_get(mcbsp->dev, "ick"); if (IS_ERR(st_data->mcbsp_iclk)) { dev_warn(mcbsp->dev, "Failed to get ick, sidetone might be broken\n"); @@ -359,7 +359,7 @@ int omap_mcbsp_st_init(struct platform_device *pdev) if (!st_data->io_base_st) return -ENOMEM; - ret = sysfs_create_group(&mcbsp->dev->kobj, &sidetone_attr_group); + ret = devm_device_add_group(mcbsp->dev, &sidetone_attr_group); if (ret) return ret; @@ -368,16 +368,6 @@ int omap_mcbsp_st_init(struct platform_device *pdev) return 0; } -void omap_mcbsp_st_cleanup(struct platform_device *pdev) -{ - struct omap_mcbsp *mcbsp = platform_get_drvdata(pdev); - - if (mcbsp->st_data) { - sysfs_remove_group(&mcbsp->dev->kobj, &sidetone_attr_group); - clk_put(mcbsp->st_data->mcbsp_iclk); - } -} - static int omap_mcbsp_st_info_volsw(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c index 4479d74f0a45..9933b33c80ca 100644 --- a/sound/soc/ti/omap-mcbsp.c +++ b/sound/soc/ti/omap-mcbsp.c @@ -702,8 +702,7 @@ static int omap_mcbsp_init(struct platform_device *pdev) mcbsp->max_tx_thres = max_thres(mcbsp) - 0x10; mcbsp->max_rx_thres = max_thres(mcbsp) - 0x10; - ret = sysfs_create_group(&mcbsp->dev->kobj, - &additional_attr_group); + ret = devm_device_add_group(mcbsp->dev, &additional_attr_group); if (ret) { dev_err(mcbsp->dev, "Unable to create additional controls\n"); @@ -711,16 +710,7 @@ static int omap_mcbsp_init(struct platform_device *pdev) } } - ret = omap_mcbsp_st_init(pdev); - if (ret) - goto err_st; - - return 0; - -err_st: - if (mcbsp->pdata->buffer_size) - sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group); - return ret; + return omap_mcbsp_st_init(pdev); } /* @@ -1431,11 +1421,6 @@ static int asoc_mcbsp_remove(struct platform_device *pdev) if (cpu_latency_qos_request_active(&mcbsp->pm_qos_req)) cpu_latency_qos_remove_request(&mcbsp->pm_qos_req); - if (mcbsp->pdata->buffer_size) - sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group); - - omap_mcbsp_st_cleanup(pdev); - return 0; } diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index d27e0581b777..2eab6a3a8a8c 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -51,6 +51,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -140,6 +142,13 @@ * bit available to control VERW * behavior. */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 15b940ec1eac..10bc88cc3bf6 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -32,11 +32,16 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_OBJTOOL @@ -124,7 +129,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -177,7 +182,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 379e68fb866f..3dd13fe738b9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5226,22 +5226,25 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr. + * is a read-only dynptr or if *flags* is not 0. * * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) * Description diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 8b990a52aada..c260006106be 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym) { return !strncmp(sym->name, "__x86_indirect_", 15); } + +bool arch_is_rethunk(struct symbol *sym) +{ + return !strcmp(sym->name, "__x86_return_thunk"); +} diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index f4c3a5091737..24fbe803a0d3 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -68,6 +68,8 @@ const struct option check_options[] = { OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"), OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), + OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"), + OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), @@ -123,6 +125,7 @@ static bool opts_valid(void) opts.noinstr || opts.orc || opts.retpoline || + opts.rethunk || opts.sls || opts.stackval || opts.static_call || @@ -135,6 +138,11 @@ static bool opts_valid(void) return true; } + if (opts.unret && !opts.rethunk) { + ERROR("--unret requires --rethunk"); + return false; + } + if (opts.dump_orc) return true; @@ -163,6 +171,11 @@ static bool link_opts_valid(struct objtool_file *file) return false; } + if (opts.unret) { + ERROR("--unret requires --link"); + return false; + } + return true; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 864bb9dd3584..b341f8a8c7c5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -376,7 +376,8 @@ static int decode_instructions(struct objtool_file *file) sec->text = true; if (!strcmp(sec->name, ".noinstr.text") || - !strcmp(sec->name, ".entry.text")) + !strcmp(sec->name, ".entry.text") || + !strncmp(sec->name, ".text.__x86.", 12)) sec->noinstr = true; for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { @@ -749,6 +750,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file) return 0; } +static int create_return_sites_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".return_sites"); + if (sec) { + WARN("file already has .return_sites, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) + idx++; + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".return_sites", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .return_sites"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->return_thunk_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .return_sites"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_ibt_endbr_seal_sections(struct objtool_file *file) { struct instruction *insn; @@ -1083,6 +1130,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) return false; } +__weak bool arch_is_rethunk(struct symbol *sym) +{ + return false; +} + #define NEGATIVE_RELOC ((void *)-1L) static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) @@ -1250,6 +1302,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } +static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) +{ + /* + * Return thunk tail calls are really just returns in disguise, + * so convert them accordingly. + */ + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + + if (add) + list_add_tail(&insn->call_node, &file->return_thunk_list); +} + static bool same_function(struct instruction *insn1, struct instruction *insn2) { return insn1->func->pfunc == insn2->func->pfunc; @@ -1302,6 +1367,9 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->retpoline_thunk) { add_retpoline_call(file, insn); continue; + } else if (reloc->sym->return_thunk) { + add_return_call(file, insn, true); + continue; } else if (insn->func) { /* * External sibling call or internal sibling call with @@ -1320,6 +1388,21 @@ static int add_jump_destinations(struct objtool_file *file) jump_dest = find_insn(file, dest_sec, dest_off); if (!jump_dest) { + struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); + + /* + * This is a special case for zen_untrain_ret(). + * It jumps to __x86_return_thunk(), but objtool + * can't find the thunk's starting RET + * instruction, because the RET is also in the + * middle of another instruction. Objtool only + * knows about the outer instruction. + */ + if (sym && sym->return_thunk) { + add_return_call(file, insn, false); + continue; + } + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", insn->sec, insn->offset, dest_sec->name, dest_off); @@ -1949,16 +2032,35 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; - if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + if (hint->type == UNWIND_HINT_TYPE_SAVE) { + insn->hint = false; + insn->save = true; + continue; + } + + if (hint->type == UNWIND_HINT_TYPE_RESTORE) { + insn->restore = true; + continue; + } + + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); - if (sym && sym->bind == STB_GLOBAL && - insn->type != INSN_ENDBR && !insn->noendbr) { - WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", - insn->sec, insn->offset); + if (sym && sym->bind == STB_GLOBAL) { + if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) { + WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", + insn->sec, insn->offset); + } + + insn->entry = 1; } } + if (hint->type == UNWIND_HINT_TYPE_ENTRY) { + hint->type = UNWIND_HINT_TYPE_CALL; + insn->entry = 1; + } + if (hint->type == UNWIND_HINT_TYPE_FUNC) { insn->cfi = &func_cfi; continue; @@ -2032,8 +2134,10 @@ static int read_retpoline_hints(struct objtool_file *file) } if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) { - WARN_FUNC("retpoline_safe hint not an indirect jump/call", + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", insn->sec, insn->offset); return -1; } @@ -2184,6 +2288,9 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_retpoline(func)) func->retpoline_thunk = true; + if (arch_is_rethunk(func)) + func->return_thunk = true; + if (!strcmp(func->name, "__fentry__")) func->fentry = true; @@ -3218,8 +3325,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - visited = 1 << state.uaccess; - if (insn->visited) { + visited = VISITED_BRANCH << state.uaccess; + if (insn->visited & VISITED_BRANCH_MASK) { if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) return 1; @@ -3233,6 +3340,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, state.instr += insn->instr; if (insn->hint) { + if (insn->restore) { + struct instruction *save_insn, *i; + + i = insn; + save_insn = NULL; + + sym_for_each_insn_continue_reverse(file, func, i) { + if (i->save) { + save_insn = i; + break; + } + } + + if (!save_insn) { + WARN_FUNC("no corresponding CFI save for CFI restore", + sec, insn->offset); + return 1; + } + + if (!save_insn->visited) { + WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", + sec, insn->offset); + return 1; + } + + insn->cfi = save_insn->cfi; + nr_cfi_reused++; + } + state.cfi = *insn->cfi; } else { /* XXX track if we actually changed state.cfi */ @@ -3433,6 +3569,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) return warnings; } +/* + * Validate rethunk entry constraint: must untrain RET before the first RET. + * + * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes + * before an actual RET instruction. + */ +static int validate_entry(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *next, *dest; + int ret, warnings = 0; + + for (;;) { + next = next_insn_to_validate(file, insn); + + if (insn->visited & VISITED_ENTRY) + return 0; + + insn->visited |= VISITED_ENTRY; + + if (!insn->ignore_alts && !list_empty(&insn->alts)) { + struct alternative *alt; + bool skip_orig = false; + + list_for_each_entry(alt, &insn->alts, list) { + if (alt->skip_orig) + skip_orig = true; + + ret = validate_entry(file, alt->insn); + if (ret) { + if (opts.backtrace) + BT_FUNC("(alt)", insn); + return ret; + } + } + + if (skip_orig) + return 0; + } + + switch (insn->type) { + + case INSN_CALL_DYNAMIC: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + WARN_FUNC("early indirect call", insn->sec, insn->offset); + return 1; + + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_CONDITIONAL: + if (!is_sibling_call(insn)) { + if (!insn->jump_dest) { + WARN_FUNC("unresolved jump target after linking?!?", + insn->sec, insn->offset); + return -1; + } + ret = validate_entry(file, insn->jump_dest); + if (ret) { + if (opts.backtrace) { + BT_FUNC("(branch%s)", insn, + insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); + } + return ret; + } + + if (insn->type == INSN_JUMP_UNCONDITIONAL) + return 0; + + break; + } + + /* fallthrough */ + case INSN_CALL: + dest = find_insn(file, insn->call_dest->sec, + insn->call_dest->offset); + if (!dest) { + WARN("Unresolved function after linking!?: %s", + insn->call_dest->name); + return -1; + } + + ret = validate_entry(file, dest); + if (ret) { + if (opts.backtrace) + BT_FUNC("(call)", insn); + return ret; + } + /* + * If a call returns without error, it must have seen UNTRAIN_RET. + * Therefore any non-error return is a success. + */ + return 0; + + case INSN_RETURN: + WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); + return 1; + + case INSN_NOP: + if (insn->retpoline_safe) + return 0; + break; + + default: + break; + } + + if (!next) { + WARN_FUNC("teh end!", insn->sec, insn->offset); + return -1; + } + insn = next; + } + + return warnings; +} + +/* + * Validate that all branches starting at 'insn->entry' encounter UNRET_END + * before RET. + */ +static int validate_unret(struct objtool_file *file) +{ + struct instruction *insn; + int ret, warnings = 0; + + for_each_insn(file, insn) { + if (!insn->entry) + continue; + + ret = validate_entry(file, insn); + if (ret < 0) { + WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); + return ret; + } + warnings += ret; + } + + return warnings; +} + static int validate_retpoline(struct objtool_file *file) { struct instruction *insn; @@ -3440,7 +3715,8 @@ static int validate_retpoline(struct objtool_file *file) for_each_insn(file, insn) { if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC) + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN) continue; if (insn->retpoline_safe) @@ -3455,9 +3731,17 @@ static int validate_retpoline(struct objtool_file *file) if (!strcmp(insn->sec->name, ".init.text") && !opts.module) continue; - WARN_FUNC("indirect %s found in RETPOLINE build", - insn->sec, insn->offset, - insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + if (insn->type == INSN_RETURN) { + if (opts.rethunk) { + WARN_FUNC("'naked' return found in RETHUNK build", + insn->sec, insn->offset); + } else + continue; + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, + insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); + } warnings++; } @@ -3826,8 +4110,7 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__bug_table") || !strcmp(sec->name, "__ex_table") || !strcmp(sec->name, "__jump_table") || - !strcmp(sec->name, "__mcount_loc") || - !strcmp(sec->name, "__tracepoints")) + !strcmp(sec->name, "__mcount_loc")) continue; list_for_each_entry(reloc, &sec->reloc->reloc_list, list) @@ -3946,6 +4229,17 @@ int check(struct objtool_file *file) warnings += ret; } + if (opts.unret) { + /* + * Must be after validate_branch() and friends, it plays + * further games with insn->visited. + */ + ret = validate_unret(file); + if (ret < 0) + return ret; + warnings += ret; + } + if (opts.ibt) { ret = validate_ibt(file); if (ret < 0) @@ -3974,6 +4268,13 @@ int check(struct objtool_file *file) warnings += ret; } + if (opts.rethunk) { + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (opts.mcount) { ret = create_mcount_loc_sections(file); if (ret < 0) diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 9b19cc304195..beb2f3aa94ff 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -89,6 +89,7 @@ const char *arch_ret_insn(int len); int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); +bool arch_is_rethunk(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 280ea18b7f2b..42a52f1a0add 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -19,6 +19,8 @@ struct opts { bool noinstr; bool orc; bool retpoline; + bool rethunk; + bool unret; bool sls; bool stackval; bool static_call; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index f10d7374f388..036129cebeee 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -46,16 +46,19 @@ struct instruction { enum insn_type type; unsigned long immediate; - u8 dead_end : 1, - ignore : 1, - ignore_alts : 1, - hint : 1, - retpoline_safe : 1, - noendbr : 1; - /* 2 bit hole */ + u16 dead_end : 1, + ignore : 1, + ignore_alts : 1, + hint : 1, + save : 1, + restore : 1, + retpoline_safe : 1, + noendbr : 1, + entry : 1; + /* 7 bit hole */ + s8 instr; u8 visited; - /* u8 hole */ struct alt_group *alt_group; struct symbol *call_dest; @@ -69,6 +72,11 @@ struct instruction { struct cfi_state *cfi; }; +#define VISITED_BRANCH 0x01 +#define VISITED_BRANCH_UACCESS 0x02 +#define VISITED_BRANCH_MASK 0x03 +#define VISITED_ENTRY 0x04 + static inline bool is_static_jump(struct instruction *insn) { return insn->type == INSN_JUMP_CONDITIONAL || diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index adebfbc2b518..16f4067b82ae 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -57,6 +57,7 @@ struct symbol { u8 uaccess_safe : 1; u8 static_call_tramp : 1; u8 retpoline_thunk : 1; + u8 return_thunk : 1; u8 fentry : 1; u8 profiling_func : 1; struct list_head pv_target; diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index a6e72d916807..7f2d1b095333 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -24,6 +24,7 @@ struct objtool_file { struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); struct list_head retpoline_call_list; + struct list_head return_thunk_list; struct list_head static_call_list; struct list_head mcount_loc_list; struct list_head endbr_list; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 512669ce064c..a7ecc32e3512 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -102,6 +102,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); INIT_LIST_HEAD(&file.retpoline_call_list); + INIT_LIST_HEAD(&file.return_thunk_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); INIT_LIST_HEAD(&file.endbr_list); diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index d811cff73597..0a26c243e6e9 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -140,12 +140,12 @@ int use_after_invalid(void *ctx) bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); bpf_ringbuf_submit_dynptr(&ptr, 0); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); return 0; } @@ -338,7 +338,7 @@ int invalid_helper2(void *ctx) get_map_val_dynptr(&ptr); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); return 0; } @@ -377,7 +377,7 @@ int invalid_write2(void *ctx) memcpy((void *)&ptr + 8, &x, sizeof(x)); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); bpf_ringbuf_submit_dynptr(&ptr, 0); @@ -473,7 +473,7 @@ int invalid_read2(void *ctx) get_map_val_dynptr(&ptr); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index d67be48df4b2..a3a6103c8569 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -43,10 +43,10 @@ int test_read_write(void *ctx) bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); /* Write data into the dynptr */ - err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); + err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); /* Read the data that was written into the dynptr */ - err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); /* Ensure the data we read matches the data we wrote */ for (i = 0; i < sizeof(read_data); i++) { diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 1257baa79286..892306bdb47d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -36,5 +36,6 @@ test_unix_oob gro ioam6_parser toeplitz +tun cmsg_sender unix_connect
\ No newline at end of file diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ddad703ace34..db05b3764b77 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,7 +11,7 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh +TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh new file mode 100755 index 000000000000..b7b928b38ce4 --- /dev/null +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# ns: h1 | ns: h2 +# 192.168.0.1/24 | +# eth0 | +# | 192.168.1.1/32 +# veth0 <---|---> veth1 +# Validate source address selection for route without gateway + +PAUSE_ON_FAIL=no +VERBOSE=0 +ret=0 + +################################################################################ +# helpers + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# config +setup() +{ + ip netns add h1 + ip -n h1 link set lo up + ip netns add h2 + ip -n h2 link set lo up + + # Add a fake eth0 to support an ip address + ip -n h1 link add name eth0 type dummy + ip -n h1 link set eth0 up + ip -n h1 address add 192.168.0.1/24 dev eth0 + + # Configure veths (same @mac, arp off) + ip -n h1 link add name veth0 type veth peer name veth1 netns h2 + ip -n h1 link set veth0 up + + ip -n h2 link set veth1 up + + # Configure @IP in the peer netns + ip -n h2 address add 192.168.1.1/32 dev veth1 + ip -n h2 route add default dev veth1 + + # Add a nexthop without @gw and use it in a route + ip -n h1 nexthop add id 1 dev veth0 + ip -n h1 route add 192.168.1.1 nhid 1 +} + +cleanup() +{ + ip netns del h1 2>/dev/null + ip netns del h2 2>/dev/null +} + +trap cleanup EXIT + +################################################################################ +# main + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + esac +done + +cleanup +setup + +run_cmd ip -netns h1 route get 192.168.1.1 +log_test $? 0 "nexthop: get route with nexthop without gw" +run_cmd ip netns exec h1 ping -c1 192.168.1.1 +log_test $? 0 "nexthop: ping through nexthop without gw" + +exit $ret diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 669ffd6f2a68..a9c5c1be5088 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -38,6 +38,7 @@ TEST_PROGS = bridge_igmp.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + local_termination.sh \ loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ @@ -53,6 +54,7 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ mirror_vlan.sh \ + no_forwarding.sh \ pedit_dsfield.sh \ pedit_ip.sh \ pedit_l4port.sh \ diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f905d5358e68..48a99e1453e1 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh mptcp_sockopt.sh + simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq |