diff options
520 files changed, 8169 insertions, 5634 deletions
@@ -422,6 +422,7 @@ Tony Luck <tony.luck@intel.com> TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org> TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn> Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com> +Tudor Ambarus <tudor.ambarus@linaro.org> <tudor.ambarus@microchip.com> Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws> Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com> Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de> diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 96fe10ec6c24..f8d0a7288c73 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -369,6 +369,16 @@ Before jumping into the kernel, the following conditions must be met: - HCR_EL2.ATA (bit 56) must be initialised to 0b1. + For CPUs with the Scalable Matrix Extension version 2 (FEAT_SME2): + + - If EL3 is present: + + - SMCR_EL3.EZT0 (bit 30) must be initialised to 0b1. + + - If the kernel is entered at EL1 and EL2 is present: + + - SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 6fed84f935df..8a9d4bf7daf4 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -284,6 +284,24 @@ HWCAP2_RPRFM HWCAP2_SVE2P1 Functionality implied by ID_AA64ZFR0_EL1.SVEver == 0b0010. +HWCAP2_SME2 + Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0001. + +HWCAP2_SME2P1 + Functionality implied by ID_AA64SMFR0_EL1.SMEver == 0b0010. + +HWCAP2_SMEI16I32 + Functionality implied by ID_AA64SMFR0_EL1.I16I32 == 0b0101 + +HWCAP2_SMEBI32I32 + Functionality implied by ID_AA64SMFR0_EL1.BI32I32 == 0b1 + +HWCAP2_SMEB16B16 + Functionality implied by ID_AA64SMFR0_EL1.B16B16 == 0b1 + +HWCAP2_SMEF16F16 + Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1 + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 808ade4cc008..ec5f889d7681 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -120,6 +120,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A715 | #2645198 | ARM64_ERRATUM_2645198 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | diff --git a/Documentation/arm64/sme.rst b/Documentation/arm64/sme.rst index 16d2db4c2e2e..68d1efb7d171 100644 --- a/Documentation/arm64/sme.rst +++ b/Documentation/arm64/sme.rst @@ -18,14 +18,19 @@ model features for SME is included in Appendix A. 1. General ----------- -* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA - register state and TPIDR2_EL0 are tracked per thread. +* PSTATE.SM, PSTATE.ZA, the streaming mode vector length, the ZA and (when + present) ZTn register state and TPIDR2_EL0 are tracked per thread. * The presence of SME is reported to userspace via HWCAP2_SME in the aux vector AT_HWCAP2 entry. Presence of this flag implies the presence of the SME instructions and registers, and the Linux-specific system interfaces described in this document. SME is reported in /proc/cpuinfo as "sme". +* The presence of SME2 is reported to userspace via HWCAP2_SME2 in the + aux vector AT_HWCAP2 entry. Presence of this flag implies the presence of + the SME2 instructions and ZT0, and the Linux-specific system interfaces + described in this document. SME2 is reported in /proc/cpuinfo as "sme2". + * Support for the execution of SME instructions in userspace can also be detected by reading the CPU ID register ID_AA64PFR1_EL1 using an MRS instruction, and checking that the value of the SME field is nonzero. [3] @@ -44,6 +49,7 @@ model features for SME is included in Appendix A. HWCAP2_SME_B16F32 HWCAP2_SME_F32F32 HWCAP2_SME_FA64 + HWCAP2_SME2 This list may be extended over time as the SME architecture evolves. @@ -52,8 +58,8 @@ model features for SME is included in Appendix A. cpu-feature-registers.txt for details. * Debuggers should restrict themselves to interacting with the target via the - NT_ARM_SVE, NT_ARM_SSVE and NT_ARM_ZA regsets. The recommended way - of detecting support for these regsets is to connect to a target process + NT_ARM_SVE, NT_ARM_SSVE, NT_ARM_ZA and NT_ARM_ZT regsets. The recommended + way of detecting support for these regsets is to connect to a target process first and then attempt a ptrace(PTRACE_GETREGSET, pid, NT_ARM_<regset>, &iov). @@ -89,13 +95,13 @@ be zeroed. ------------------------- * On syscall PSTATE.ZA is preserved, if PSTATE.ZA==1 then the contents of the - ZA matrix are preserved. + ZA matrix and ZTn (if present) are preserved. * On syscall PSTATE.SM will be cleared and the SVE registers will be handled as per the standard SVE ABI. -* Neither the SVE registers nor ZA are used to pass arguments to or receive - results from any syscall. +* None of the SVE registers, ZA or ZTn are used to pass arguments to + or receive results from any syscall. * On process creation (eg, clone()) the newly created process will have PSTATE.SM cleared. @@ -134,6 +140,14 @@ be zeroed. __reserved[] referencing this space. za_context is then written in the extra space. Refer to [1] for further details about this mechanism. +* If ZTn is supported and PSTATE.ZA==1 then a signal frame record for ZTn will + be generated. + +* The signal record for ZTn has magic ZT_MAGIC (0x5a544e01) and consists of a + standard signal frame header followed by a struct zt_context specifying + the number of ZTn registers supported by the system, then zt_context.nregs + blocks of 64 bytes of data per register. + 5. Signal return ----------------- @@ -151,6 +165,9 @@ When returning from a signal handler: the signal frame does not match the current vector length, the signal return attempt is treated as illegal, resulting in a forced SIGSEGV. +* If ZTn is not supported or PSTATE.ZA==0 then it is illegal to have a + signal frame record for ZTn, resulting in a forced SIGSEGV. + 6. prctl extensions -------------------- @@ -214,8 +231,8 @@ prctl(PR_SME_SET_VL, unsigned long arg) vector length that will be applied at the next execve() by the calling thread. - * Changing the vector length causes all of ZA, P0..P15, FFR and all bits of - Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become + * Changing the vector length causes all of ZA, ZTn, P0..P15, FFR and all + bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become unspecified, including both streaming and non-streaming SVE state. Calling PR_SME_SET_VL with vl equal to the thread's current vector length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag, @@ -317,6 +334,15 @@ The regset data starts with struct user_za_header, containing: * The effect of writing a partial, incomplete payload is unspecified. +* A new regset NT_ARM_ZT is defined for access to ZTn state via + PTRACE_GETREGSET and PTRACE_SETREGSET. + +* The NT_ARM_ZT regset consists of a single 512 bit register. + +* When PSTATE.ZA==0 reads of NT_ARM_ZT will report all bits of ZTn as 0. + +* Writes to NT_ARM_ZT will set PSTATE.ZA to 1. + 8. ELF coredump extensions --------------------------- @@ -331,6 +357,11 @@ The regset data starts with struct user_za_header, containing: been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread when the coredump was generated. +* A NT_ARM_ZT note will be added to each coredump for each thread of the + dumped process. The contents will be equivalent to the data that would have + been read if a PTRACE_GETREGSET of NT_ARM_ZT were executed for each thread + when the coredump was generated. + * The NT_ARM_TLS note will be extended to two registers, the second register will contain TPIDR2_EL0 on systems that support SME and will be read as zero with writes ignored otherwise. @@ -406,6 +437,9 @@ In A64 state, SME adds the following: For best system performance it is strongly encouraged for software to enable ZA only when it is actively being used. +* A new ZT0 register is introduced when SME2 is present. This is a 512 bit + register which is accessible when PSTATE.ZA is set, as ZA itself is. + * Two new 1 bit fields in PSTATE which may be controlled via the SMSTART and SMSTOP instructions or by access to the SVCR system register: diff --git a/Documentation/conf.py b/Documentation/conf.py index a5c45df0bd83..d927737e3c10 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -31,6 +31,12 @@ def have_command(cmd): # Get Sphinx version major, minor, patch = sphinx.version_info[:3] +# +# Warn about older versions that we don't want to support for much +# longer. +# +if (major < 2) or (major == 2 and minor < 4): + print('WARNING: support for Sphinx < 2.4 will be removed soon.') # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -339,7 +345,11 @@ html_use_smartypants = False # Custom sidebar templates, maps document names to template names. # Note that the RTD theme ignores this -html_sidebars = { '**': ["about.html", 'searchbox.html', 'localtoc.html', 'sourcelink.html']} +html_sidebars = { '**': ['searchbox.html', 'localtoc.html', 'sourcelink.html']} + +# about.html is available for alabaster theme. Add it at the front. +if html_theme == 'alabaster': + html_sidebars['**'].insert(0, 'about.html') # Output file base name for HTML help builder. htmlhelp_basename = 'TheLinuxKerneldoc' diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml index 903b31129f01..99e159bc5fb1 100644 --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml @@ -54,6 +54,17 @@ properties: - const: xo - const: alternate + interrupts: + minItems: 1 + maxItems: 3 + + interrupt-names: + minItems: 1 + items: + - const: dcvsh-irq-0 + - const: dcvsh-irq-1 + - const: dcvsh-irq-2 + '#freq-domain-cells': const: 1 diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml index 0ccaab16dc61..0b7383b3106b 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml index 5163c51b4547..ee2ffb034325 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml index fcc5adf03cad..3d6ed24b1b00 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> properties: compatible: diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index f2c143730a55..6e2fd6e9fa7f 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -32,7 +32,7 @@ properties: - description: Display byte clock - description: Display byte interface clock - description: Display pixel clock - - description: Display escape clock + - description: Display core clock - description: Display AHB clock - description: Display AXI clock @@ -137,8 +137,6 @@ required: - phys - assigned-clocks - assigned-clock-parents - - power-domains - - operating-points-v2 - ports additionalProperties: false diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml index d9ad8b659f58..3ec466c3ab38 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml @@ -69,7 +69,6 @@ required: - compatible - reg - reg-names - - vdds-supply unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml index 819de5ce0bc9..a43e11d3b00d 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml @@ -39,7 +39,6 @@ required: - compatible - reg - reg-names - - vcca-supply unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml index 3d8540a06fe2..2f1fd140c87d 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml @@ -34,6 +34,10 @@ properties: vddio-supply: description: Phandle to vdd-io regulator device node. + qcom,dsi-phy-regulator-ldo-mode: + type: boolean + description: Indicates if the LDO mode PHY regulator is wanted. + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml index d6f043a4b08d..4795e13c7b59 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,qcm2290-mdss.yaml @@ -72,7 +72,7 @@ examples: #include <dt-bindings/interconnect/qcom,qcm2290.h> #include <dt-bindings/power/qcom-rpmpd.h> - mdss@5e00000 { + display-subsystem@5e00000 { #address-cells = <1>; #size-cells = <1>; compatible = "qcom,qcm2290-mdss"; diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml index a86d7f53fa84..886858ef6700 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6115-mdss.yaml @@ -62,7 +62,7 @@ examples: #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/power/qcom-rpmpd.h> - mdss@5e00000 { + display-subsystem@5e00000 { #address-cells = <1>; #size-cells = <1>; compatible = "qcom,sm6115-mdss"; diff --git a/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml b/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml index 9d3139990237..aa23b0024c46 100644 --- a/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml +++ b/Documentation/devicetree/bindings/sound/mt8186-mt6366-rt1019-rt5682s.yaml @@ -16,6 +16,7 @@ properties: compatible: enum: - mediatek,mt8186-mt6366-rt1019-rt5682s-sound + - mediatek,mt8186-mt6366-rt5682s-max98360-sound mediatek,platform: $ref: "/schemas/types.yaml#/definitions/phandle" diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml index 66431aade3b7..da5f70910da5 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml @@ -30,7 +30,9 @@ properties: const: 0 clocks: - maxItems: 5 + oneOf: + - maxItems: 3 + - maxItems: 5 clock-names: oneOf: diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml index 2bf8d082f8f1..66cbb1f5e31a 100644 --- a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml @@ -9,9 +9,6 @@ title: LPASS(Low Power Audio Subsystem) VA Macro audio codec maintainers: - Srinivas Kandagatla <srinivas.kandagatla@linaro.org> -allOf: - - $ref: dai-common.yaml# - properties: compatible: enum: @@ -30,15 +27,12 @@ properties: const: 0 clocks: - maxItems: 5 + minItems: 5 + maxItems: 6 clock-names: - items: - - const: mclk - - const: npl - - const: macro - - const: dcodec - - const: fsgen + minItems: 5 + maxItems: 6 clock-output-names: maxItems: 1 @@ -55,10 +49,51 @@ required: - reg - "#sound-dai-cells" +allOf: + - $ref: dai-common.yaml# + + - if: + properties: + compatible: + enum: + - qcom,sc7280-lpass-wsa-macro + - qcom,sm8450-lpass-wsa-macro + - qcom,sc8280xp-lpass-wsa-macro + then: + properties: + clocks: + maxItems: 5 + clock-names: + items: + - const: mclk + - const: npl + - const: macro + - const: dcodec + - const: fsgen + + - if: + properties: + compatible: + enum: + - qcom,sm8250-lpass-wsa-macro + then: + properties: + clocks: + minItems: 6 + clock-names: + items: + - const: mclk + - const: npl + - const: macro + - const: dcodec + - const: va + - const: fsgen + unevaluatedProperties: false examples: - | + #include <dt-bindings/clock/qcom,sm8250-lpass-aoncc.h> #include <dt-bindings/sound/qcom,q6afe.h> codec@3240000 { compatible = "qcom,sm8250-lpass-wsa-macro"; @@ -69,7 +104,8 @@ examples: <&audiocc 0>, <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>, <&q6afecc LPASS_HW_DCODEC_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>, + <&aoncc LPASS_CDC_VA_MCLK>, <&vamacro>; - clock-names = "mclk", "npl", "macro", "dcodec", "fsgen"; + clock-names = "mclk", "npl", "macro", "dcodec", "va", "fsgen"; clock-output-names = "mclk"; }; diff --git a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml index 4dd973e341e6..6c57dd6c3a36 100644 --- a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel SPI device maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml index 1d493add4053..b0d99bc10535 100644 --- a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Quad Serial Peripheral Interface (QSPI) maintainers: - - Tudor Ambarus <tudor.ambarus@microchip.com> + - Tudor Ambarus <tudor.ambarus@linaro.org> allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index ead2cccf658f..9a60c0664bbe 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -44,9 +44,9 @@ properties: description: Maximum SPI clocking speed of the device in Hz. - spi-cs-setup-ns: + spi-cs-setup-delay-ns: description: - Delay in nanosecods to be introduced by the controller after CS is + Delay in nanoseconds to be introduced by the controller after CS is asserted. spi-rx-bus-width: diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst index 39494a6ea739..e1af54424192 100644 --- a/Documentation/networking/rxrpc.rst +++ b/Documentation/networking/rxrpc.rst @@ -880,8 +880,8 @@ The kernel interface functions are as follows: notify_end_rx can be NULL or it can be used to specify a function to be called when the call changes state to end the Tx phase. This function is - called with the call-state spinlock held to prevent any reply or final ACK - from being delivered first. + called with a spinlock held to prevent the last DATA packet from being + transmitted until the function returns. (#) Receive data from a call:: diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py index eeb394b39e2c..8b416bfd75ac 100644 --- a/Documentation/sphinx/load_config.py +++ b/Documentation/sphinx/load_config.py @@ -3,7 +3,7 @@ import os import sys -from sphinx.util.pycompat import execfile_ +from sphinx.util.osutil import fs_encoding # ------------------------------------------------------------------------------ def loadConfig(namespace): @@ -48,7 +48,9 @@ def loadConfig(namespace): sys.stdout.write("load additional sphinx-config: %s\n" % config_file) config = namespace.copy() config['__file__'] = config_file - execfile_(config_file, config) + with open(config_file, 'rb') as f: + code = compile(f.read(), fs_encoding, 'exec') + exec(code, config) del config['__file__'] namespace.update(config) else: diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index deb494f759ed..9807b05a1b57 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an mmap() that affects the region will be made visible immediately. Another example is madvise(MADV_DROP). +Note: On arm64, a write generated by the page-table walker (to update +the Access and Dirty flags, for example) never results in a +KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This +is because KVM cannot provide the data that would be written by the +page-table walker, making it impossible to emulate the access. +Instead, an abort (data abort if the cause of the page-table update +was a load or a store, instruction abort if it was an instruction +fetch) is injected in the guest. 4.36 KVM_SET_TSS_ADDR --------------------- @@ -8310,6 +8318,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID`` It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel has enabled in-kernel emulation of the local APIC. +CPU topology +~~~~~~~~~~~~ + +Several CPUID values include topology information for the host CPU: +0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different +versions of KVM return different values for this information and userspace +should not rely on it. Currently they return all zeroes. + +If userspace wishes to set up a guest topology, it should be careful that +the values of these three leaves differ for each CPU. In particular, +the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX +for 0x8000001e; the latter also encodes the core id and node id in bits +7:0 of EBX and ECX respectively. + Obsolete ioctls and capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index a3ca76f9be75..14c4e9fa501d 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -9,6 +9,8 @@ KVM Lock Overview The acquisition orders for mutexes are as follows: +- cpus_read_lock() is taken outside kvm_lock + - kvm->lock is taken outside vcpu->mutex - kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock @@ -24,21 +26,22 @@ The acquisition orders for mutexes are as follows: For SRCU: -- ``synchronize_srcu(&kvm->srcu)`` is called _inside_ - the kvm->slots_lock critical section, therefore kvm->slots_lock - cannot be taken inside a kvm->srcu read-side critical section. - Instead, kvm->slots_arch_lock is released before the call - to ``synchronize_srcu()`` and _can_ be taken inside a - kvm->srcu read-side critical section. +- ``synchronize_srcu(&kvm->srcu)`` is called inside critical sections + for kvm->lock, vcpu->mutex and kvm->slots_lock. These locks _cannot_ + be taken inside a kvm->srcu read-side critical section; that is, the + following is broken:: + + srcu_read_lock(&kvm->srcu); + mutex_lock(&kvm->slots_lock); -- kvm->lock is taken inside kvm->srcu, therefore - ``synchronize_srcu(&kvm->srcu)`` cannot be called inside - a kvm->lock critical section. If you cannot delay the - call until after kvm->lock is released, use ``call_srcu``. +- kvm->slots_arch_lock instead is released before the call to + ``synchronize_srcu()``. It _can_ therefore be taken inside a + kvm->srcu read-side critical section, for example while processing + a vmexit. On x86: -- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock +- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock and kvm->arch.xen.xen_lock - kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and @@ -225,15 +228,10 @@ time it will be set using the Dirty tracking mechanism described above. :Type: mutex :Arch: any :Protects: - vm_list - -``kvm_count_lock`` -^^^^^^^^^^^^^^^^^^ - -:Type: raw_spinlock_t -:Arch: any -:Protects: - hardware virtualization enable/disable -:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt - migration. + - kvm_usage_count + - hardware virtualization enable/disable +:Comment: KVM also disables CPU hotplug via cpus_read_lock() during + enable/disable. ``kvm->mn_invalidate_lock`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -291,3 +289,13 @@ time it will be set using the Dirty tracking mechanism described above. wakeup notification event since external interrupts from the assigned devices happens, we will find the vCPU on the list to wakeup. + +``vendor_module_lock`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:Type: mutex +:Arch: x86 +:Protects: loading a vendor module (kvm_amd or kvm_intel) +:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is + taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and + many operations need to take cpu_hotplug_lock when loading a vendor module, + e.g. updating static calls. diff --git a/MAINTAINERS b/MAINTAINERS index a36df9ed283d..9c2f78f8c383 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11355,13 +11355,12 @@ F: virt/kvm/* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Marc Zyngier <maz@kernel.org> +M: Oliver Upton <oliver.upton@linux.dev> R: James Morse <james.morse@arm.com> -R: Alexandru Elisei <alexandru.elisei@arm.com> R: Suzuki K Poulose <suzuki.poulose@arm.com> -R: Oliver Upton <oliver.upton@linux.dev> +R: Zenghui Yu <yuzenghui@huawei.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.linux.dev -L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git F: arch/arm64/include/asm/kvm* @@ -13620,7 +13619,7 @@ F: arch/microblaze/ MICROCHIP AT91 DMA DRIVERS M: Ludovic Desroches <ludovic.desroches@microchip.com> -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: dmaengine@vger.kernel.org S: Supported @@ -13665,7 +13664,7 @@ F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml F: drivers/media/platform/microchip/microchip-csi2dc.c MICROCHIP ECC DRIVER -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/atmel-ecc.* @@ -13762,7 +13761,7 @@ S: Maintained F: drivers/mmc/host/atmel-mci.c MICROCHIP NAND DRIVER -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> L: linux-mtd@lists.infradead.org S: Supported F: Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -13814,7 +13813,7 @@ S: Supported F: drivers/power/reset/at91-sama5d2_shdwc.c MICROCHIP SPI DRIVER -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> S: Supported F: drivers/spi/spi-atmel.* @@ -14919,7 +14918,8 @@ T: git://git.infradead.org/nvme.git F: Documentation/nvme/ F: drivers/nvme/host/ F: drivers/nvme/common/ -F: include/linux/nvme* +F: include/linux/nvme.h +F: include/linux/nvme-*.h F: include/uapi/linux/nvme_ioctl.h NVM EXPRESS FABRICS AUTHENTICATION @@ -19672,7 +19672,7 @@ F: drivers/clk/spear/ F: drivers/pinctrl/spear/ SPI NOR SUBSYSTEM -M: Tudor Ambarus <tudor.ambarus@microchip.com> +M: Tudor Ambarus <tudor.ambarus@linaro.org> M: Pratyush Yadav <pratyush@kernel.org> R: Michael Walle <michael@walle.cc> L: linux-mtd@lists.infradead.org @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 03934808b2ed..c5ccca26a408 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -184,8 +184,6 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_ARGS \ - if $(cc-option,-fpatchable-function-entry=2) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \ if DYNAMIC_FTRACE_WITH_ARGS select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -972,6 +970,22 @@ config ARM64_ERRATUM_2457168 If unsure, say Y. +config ARM64_ERRATUM_2645198 + bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption" + default y + help + This option adds the workaround for ARM Cortex-A715 erratum 2645198. + + If a Cortex-A715 cpu sees a page mapping permissions change from executable + to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the + next instruction abort caused by permission fault. + + Only user-space does executable to non-executable permission transition via + mprotect() system call. Workaround the problem by doing a break-before-make + TLB invalidation, for all changes to executable user space mappings. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 0890e4f568fb..cbb3d961123b 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ "2:" \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ + : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ : cl); \ \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 52075e93de6c..a94d6dacc029 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -311,7 +311,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(unsigned long *)ptr) \ + [v] "+Q" (*(__uint128_t *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ : cl); \ diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index c0b178d1bb4f..a51e6e8f3171 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -16,6 +16,15 @@ #define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7) #define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7) +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */ +#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT) + /* * Memory returned by kmalloc() may be used for DMA, so we must make * sure that all such allocations are cache aligned. Otherwise, diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 03d1c9d7af82..fc2c739f48f1 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -769,6 +769,12 @@ static __always_inline bool system_supports_sme(void) cpus_have_const_cap(ARM64_SME); } +static __always_inline bool system_supports_sme2(void) +{ + return IS_ENABLED(CONFIG_ARM64_SME) && + cpus_have_const_cap(ARM64_SME2); +} + static __always_inline bool system_supports_fa64(void) { return IS_ENABLED(CONFIG_ARM64_SME) && diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 4e8b66c74ea2..683ca3af4084 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -124,6 +124,8 @@ #define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 #define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 #define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 +#define APPLE_CPU_PART_M2_BLIZZARD 0x032 +#define APPLE_CPU_PART_M2_AVALANCHE 0x033 #define AMPERE_CPU_PART_AMPERE1 0xAC3 @@ -177,6 +179,8 @@ #define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) #define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) +#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD) +#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 668569adf4d3..ea78c095a9c7 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -196,4 +196,103 @@ __init_el2_nvhe_prepare_eret .endm +#ifndef __KVM_NVHE_HYPERVISOR__ +// This will clobber tmp1 and tmp2, and expect tmp1 to contain +// the id register value as read from the HW +.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2 + ubfx \tmp1, \tmp1, #\fld, #\width + cbz \tmp1, \fail + + adr_l \tmp1, \idreg\()_override + ldr \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET] + ldr \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET] + ubfx \tmp2, \tmp2, #\fld, #\width + ubfx \tmp1, \tmp1, #\fld, #\width + cmp \tmp1, xzr + and \tmp2, \tmp2, \tmp1 + csinv \tmp2, \tmp2, xzr, ne + cbnz \tmp2, \pass + b \fail +.endm + +// This will clobber tmp1 and tmp2 +.macro check_override idreg, fld, pass, fail, tmp1, tmp2 + mrs \tmp1, \idreg\()_el1 + __check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2 +.endm +#else +// This will clobber tmp +.macro __check_override idreg, fld, width, pass, fail, tmp, ignore + ldr_l \tmp, \idreg\()_el1_sys_val + ubfx \tmp, \tmp, #\fld, #\width + cbnz \tmp, \pass + b \fail +.endm + +.macro check_override idreg, fld, pass, fail, tmp, ignore + __check_override \idreg \fld 4 \pass \fail \tmp \ignore +.endm +#endif + +.macro finalise_el2_state + check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 + +.Linit_sve_\@: /* SVE register access */ + mrs x0, cptr_el2 // Disable SVE traps + bic x0, x0, #CPTR_EL2_TZ + msr cptr_el2, x0 + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. + +.Lskip_sve_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2 + +.Linit_sme_\@: /* SME register access and priority mapping */ + mrs x0, cptr_el2 // Disable SME traps + bic x0, x0, #CPTR_EL2_TSM + msr cptr_el2, x0 + isb + + mrs x1, sctlr_el2 + orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps + msr sctlr_el2, x1 + isb + + mov x0, #0 // SMCR controls + + // Full FP in SM? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2 + +.Linit_sme_fa64_\@: + orr x0, x0, SMCR_ELx_FA64_MASK +.Lskip_sme_fa64_\@: + + // ZT0 available? + mrs_s x1, SYS_ID_AA64SMFR0_EL1 + __check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2 +.Linit_sme_zt0_\@: + orr x0, x0, SMCR_ELx_EZT0_MASK +.Lskip_sme_zt0_\@: + + orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector + msr_s SYS_SMCR_EL2, x0 // length for EL1. + + mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? + ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 + cbz x1, .Lskip_sme_\@ + + msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal + + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x1, .Lskip_sme_\@ + + mrs_s x1, SYS_HCRX_EL2 + orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping + msr_s SYS_HCRX_EL2, x1 +.Lskip_sme_\@: +.endm + #endif /* __ARM_KVM_INIT_H__ */ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 15b34fbfca66..c9f15b9e3c71 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -114,6 +114,15 @@ #define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_FSC_SEA_TTW0 (0x14) +#define ESR_ELx_FSC_SEA_TTW1 (0x15) +#define ESR_ELx_FSC_SEA_TTW2 (0x16) +#define ESR_ELx_FSC_SEA_TTW3 (0x17) +#define ESR_ELx_FSC_SECC (0x18) +#define ESR_ELx_FSC_SECC_TTW0 (0x1c) +#define ESR_ELx_FSC_SECC_TTW1 (0x1d) +#define ESR_ELx_FSC_SECC_TTW2 (0x1e) +#define ESR_ELx_FSC_SECC_TTW3 (0x1f) /* ISS field definitions for Data Aborts */ #define ESR_ELx_ISV_SHIFT (24) @@ -341,6 +350,7 @@ #define ESR_ELx_SME_ISS_ILL 1 #define ESR_ELx_SME_ISS_SM_DISABLED 2 #define ESR_ELx_SME_ISS_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_ZT_DISABLED 4 #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index e6fa1e2982c8..67f2fb781f59 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void); struct cpu_fp_state { struct user_fpsimd_state *st; void *sve_state; - void *za_state; + void *sme_state; u64 *svcr; unsigned int sve_vl; unsigned int sme_vl; @@ -105,6 +105,13 @@ static inline void *sve_pffr(struct thread_struct *thread) return (char *)thread->sve_state + sve_ffr_offset(vl); } +static inline void *thread_zt_state(struct thread_struct *thread) +{ + /* The ZT register state is stored immediately after the ZA state */ + unsigned int sme_vq = sve_vq_from_vl(thread_get_sme_vl(thread)); + return thread->sme_state + ZA_SIG_REGS_SIZE(sme_vq); +} + extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, int restore_ffr); @@ -112,12 +119,13 @@ extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); extern void sme_set_vq(unsigned long vq_minus_1); -extern void za_save_state(void *state); -extern void za_load_state(void const *state); +extern void sme_save_state(void *state, int zt); +extern void sme_load_state(void const *state, int zt); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); @@ -355,14 +363,20 @@ extern int sme_get_current_vl(void); /* * Return how many bytes of memory are required to store the full SME - * specific state (currently just ZA) for task, given task's currently - * configured vector length. + * specific state for task, given task's currently configured vector + * length. */ -static inline size_t za_state_size(struct task_struct const *task) +static inline size_t sme_state_size(struct task_struct const *task) { unsigned int vl = task_get_sme_vl(task); + size_t size; + + size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); + + if (system_supports_sme2()) + size += ZT_SIG_REG_SIZE; - return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl)); + return size; } #else @@ -382,7 +396,7 @@ static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } -static inline size_t za_state_size(struct task_struct const *task) +static inline size_t sme_state_size(struct task_struct const *task) { return 0; } diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 5e0910cf4832..cd03819a3b68 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -221,6 +221,28 @@ .endm /* + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + _check_general_reg \nx + .inst 0xe11f8000 \ + | (\nx << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx + */ +.macro _str_zt nx + _check_general_reg \nx + .inst 0xe13f8000 \ + | (\nx << 5) +.endm + +/* * Zero the entire ZA array * ZERO ZA */ diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index d20f5da2d76f..6a4a1ab8eb23 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -49,6 +49,15 @@ extern pte_t huge_ptep_get(pte_t *ptep); void __init arm64_hugetlb_cma_reserve(void); +#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start +extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit +extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); + #include <asm-generic/hugetlb.h> #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 06dd12c514e6..475c803ecf42 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -123,6 +123,12 @@ #define KERNEL_HWCAP_CSSC __khwcap2_feature(CSSC) #define KERNEL_HWCAP_RPRFM __khwcap2_feature(RPRFM) #define KERNEL_HWCAP_SVE2P1 __khwcap2_feature(SVE2P1) +#define KERNEL_HWCAP_SME2 __khwcap2_feature(SME2) +#define KERNEL_HWCAP_SME2P1 __khwcap2_feature(SME2P1) +#define KERNEL_HWCAP_SME_I16I32 __khwcap2_feature(SME_I16I32) +#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32) +#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) +#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index fb6e32e23450..5a4b2342d571 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -81,11 +81,12 @@ * SWIO: Turn set/way invalidates into set/way clean+invalidate * PTW: Take a stage2 fault if a stage1 walk steps in device memory * TID3: Trap EL1 reads of group 3 ID registers + * TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1 */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 ) + HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) @@ -319,21 +320,6 @@ BIT(18) | \ GENMASK(16, 15)) -/* For compatibility with fault code shared with 32-bit */ -#define FSC_FAULT ESR_ELx_FSC_FAULT -#define FSC_ACCESS ESR_ELx_FSC_ACCESS -#define FSC_PERM ESR_ELx_FSC_PERM -#define FSC_SEA ESR_ELx_FSC_EXTABT -#define FSC_SEA_TTW0 (0x14) -#define FSC_SEA_TTW1 (0x15) -#define FSC_SEA_TTW2 (0x16) -#define FSC_SEA_TTW3 (0x17) -#define FSC_SECC (0x18) -#define FSC_SECC_TTW0 (0x1c) -#define FSC_SECC_TTW1 (0x1d) -#define FSC_SECC_TTW2 (0x1e) -#define FSC_SECC_TTW3 (0x1f) - /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) /* diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 9bdba47f7e14..b1134af843bd 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -88,10 +88,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_RW; - if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || - vcpu_el1_is_32bit(vcpu)) - vcpu->arch.hcr_el2 |= HCR_TID2; - if (kvm_has_mte(vcpu->kvm)) vcpu->arch.hcr_el2 |= HCR_ATA; } @@ -349,16 +345,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { - case FSC_SEA: - case FSC_SEA_TTW0: - case FSC_SEA_TTW1: - case FSC_SEA_TTW2: - case FSC_SEA_TTW3: - case FSC_SECC: - case FSC_SECC_TTW0: - case FSC_SECC_TTW1: - case FSC_SECC_TTW2: - case FSC_SECC_TTW3: + case ESR_ELx_FSC_EXTABT: + case ESR_ELx_FSC_SEA_TTW0: + case ESR_ELx_FSC_SEA_TTW1: + case ESR_ELx_FSC_SEA_TTW2: + case ESR_ELx_FSC_SEA_TTW3: + case ESR_ELx_FSC_SECC: + case ESR_ELx_FSC_SECC_TTW0: + case ESR_ELx_FSC_SECC_TTW1: + case ESR_ELx_FSC_SECC_TTW2: + case ESR_ELx_FSC_SECC_TTW3: return true; default: return false; @@ -373,8 +369,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_abt_iss1tw(vcpu)) - return true; + if (kvm_vcpu_abt_iss1tw(vcpu)) { + /* + * Only a permission fault on a S1PTW should be + * considered as a write. Otherwise, page tables baked + * in a read-only memslot will result in an exception + * being delivered in the guest. + * + * The drawback is that we end-up faulting twice if the + * guest is using any of HW AF/DB: a translation fault + * to map the page containing the PT (read only at + * first), then a permission fault to allow the flags + * to be set. + */ + switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + case ESR_ELx_FSC_PERM: + return true; + default: + return false; + } + } if (kvm_vcpu_trap_is_iabt(vcpu)) return false; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 37766e57c8f2..b14a0199eba4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -66,8 +66,8 @@ enum kvm_mode kvm_get_mode(void); DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -extern unsigned int kvm_sve_max_vl; -int kvm_arm_init_sve(void); +extern unsigned int __ro_after_init kvm_sve_max_vl; +int __init kvm_arm_init_sve(void); u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -252,6 +252,7 @@ struct kvm_vcpu_fault_info { enum vcpu_sysreg { __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ + CLIDR_EL1, /* Cache Level ID Register */ CSSELR_EL1, /* Cache Size Selection Register */ SCTLR_EL1, /* System Control Register */ ACTLR_EL1, /* Auxiliary Control Register */ @@ -501,6 +502,9 @@ struct kvm_vcpu_arch { u64 last_steal; gpa_t base; } steal; + + /* Per-vcpu CCSIDR override or NULL */ + u32 *ccsidr; }; /* @@ -705,7 +709,6 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) return false; switch (reg) { - case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; @@ -750,7 +753,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) return false; switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; @@ -877,7 +879,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu); void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); -int kvm_sys_reg_table_init(void); +int __init kvm_sys_reg_table_init(void); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); @@ -908,9 +910,9 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -extern unsigned int kvm_arm_vmid_bits; -int kvm_arm_vmid_alloc_init(void); -void kvm_arm_vmid_alloc_free(void); +extern unsigned int __ro_after_init kvm_arm_vmid_bits; +int __init kvm_arm_vmid_alloc_init(void); +void __init kvm_arm_vmid_alloc_free(void); void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); @@ -943,7 +945,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void) void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); -static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} @@ -994,7 +995,7 @@ static inline void kvm_clr_pmu_events(u32 clr) {} void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); -int kvm_set_ipa_limit(void); +int __init kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 6797eafe7890..bdd9cf546d95 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -122,6 +122,7 @@ extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val); extern unsigned long kvm_nvhe_sym(__icache_flags); extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e4a7e6369499..7f7c1231679e 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -163,7 +163,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **haddr); int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void **haddr); -void free_hyp_pgds(void); +void __init free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); @@ -175,7 +175,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -int kvm_mmu_init(u32 *hyp_va_bits); +int __init kvm_mmu_init(u32 *hyp_va_bits); static inline void *__kvm_vector_slot2addr(void *base, enum arm64_hyp_spectre_vector slot) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 63f81b27a4e3..4cd6762bda80 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -71,6 +71,11 @@ static inline kvm_pte_t kvm_phys_to_pte(u64 pa) return pte; } +static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte) +{ + return __phys_to_pfn(kvm_pte_to_phys(pte)); +} + static inline u64 kvm_granule_shift(u32 level) { /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ @@ -188,12 +193,15 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, * children. * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared * with other software walkers. + * @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was + * invoked from a fault handler. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), KVM_PGTABLE_WALK_SHARED = BIT(3), + KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4), }; struct kvm_pgtable_visit_ctx { diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b4bbeed80fb6..65e78999c75d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -681,7 +681,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) - +#define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -730,6 +730,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #else #define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) +#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */ /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ #define pmd_set_fixmap(addr) NULL @@ -862,12 +863,12 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && pud_user(pud); + return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif @@ -1093,6 +1094,15 @@ static inline bool pud_sect_supported(void) } +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +#define ptep_modify_prot_start ptep_modify_prot_start +extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define ptep_modify_prot_commit ptep_modify_prot_commit +extern void ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index d51b32a69309..3918f2a67970 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -161,7 +161,7 @@ struct thread_struct { enum fp_type fp_type; /* registers FPSIMD or SVE? */ unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ - void *za_state; /* ZA register, if any */ + void *sme_state; /* ZA and ZT state, if any */ unsigned int vl[ARM64_VEC_MAX]; /* vector length */ unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1312fb48f18b..a2a93b3fc557 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -404,7 +404,6 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) -#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) #define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index ba4bff5ca674..2b09495499c6 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -16,7 +16,7 @@ #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE #define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES -typedef u32 uprobe_opcode_t; +typedef __le32 uprobe_opcode_t; struct arch_uprobe_task { }; diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b713d30544f1..69a4fb749c65 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -96,5 +96,11 @@ #define HWCAP2_CSSC (1UL << 34) #define HWCAP2_RPRFM (1UL << 35) #define HWCAP2_SVE2P1 (1UL << 36) +#define HWCAP2_SME2 (1UL << 37) +#define HWCAP2_SME2P1 (1UL << 38) +#define HWCAP2_SME_I16I32 (1UL << 39) +#define HWCAP2_SME_BI32I32 (1UL << 40) +#define HWCAP2_SME_B16B16 (1UL << 41) +#define HWCAP2_SME_F16F16 (1UL << 42) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 9525041e4a14..46e9072985a5 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -152,6 +152,14 @@ struct za_context { __u16 __reserved[3]; }; +#define ZT_MAGIC 0x5a544e01 + +struct zt_context { + struct _aarch64_ctx head; + __u16 nregs; + __u16 __reserved[3]; +}; + #endif /* !__ASSEMBLY__ */ #include <asm/sve_context.h> @@ -304,4 +312,15 @@ struct za_context { #define ZA_SIG_CONTEXT_SIZE(vq) \ (ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq)) +#define ZT_SIG_REG_SIZE 512 + +#define ZT_SIG_REG_BYTES (ZT_SIG_REG_SIZE / 8) + +#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context) + +#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n) + +#define ZT_SIG_CONTEXT_SIZE(n) \ + (sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n)) + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 97c42be71338..daa7b3f55997 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -11,11 +11,6 @@ #include <linux/of.h> #define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ -/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ -#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) -#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) -#define CLIDR_CTYPE(clidr, level) \ - (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) int cache_line_size(void) { diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 89ac00084f38..307faa2b4395 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -661,6 +661,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_2645198 + { + .desc = "ARM erratum 2645198", + .capability = ARM64_WORKAROUND_2645198, + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A715) + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_2077057 { .desc = "ARM erratum 2077057", diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a77315b338e6..5bd959bd9a1f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -283,16 +283,26 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -2649,6 +2659,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .cpu_enable = fa64_kernel_enable, }, + { + .desc = "SME2", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_SME2, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_EL1_SME_SHIFT, + .field_width = ID_AA64PFR1_EL1_SME_WIDTH, + .min_field_value = ID_AA64PFR1_EL1_SME_SME2, + .matches = has_cpuid_feature, + .cpu_enable = sme2_kernel_enable, + }, #endif /* CONFIG_ARM64_SME */ { .desc = "WFx with timeout", @@ -2827,11 +2849,17 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SME HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_SMEver_SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16B16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F16_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I8I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_B16F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_BI32I32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F32F32_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), #endif /* CONFIG_ARM64_SME */ {}, diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 379695262b77..85e54417d141 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -119,6 +119,12 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_CSSC] = "cssc", [KERNEL_HWCAP_RPRFM] = "rprfm", [KERNEL_HWCAP_SVE2P1] = "sve2p1", + [KERNEL_HWCAP_SME2] = "sme2", + [KERNEL_HWCAP_SME2P1] = "sme2p1", + [KERNEL_HWCAP_SME_I16I32] = "smei16i32", + [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32", + [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", + [KERNEL_HWCAP_SME_F16F16] = "smef16f16", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index a00886410537..d872d18101d8 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -4,6 +4,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> SYM_FUNC_START(__efi_rt_asm_wrapper) stp x29, x30, [sp, #-112]! diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 353009d7f307..2e94d20c4ac7 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,28 +8,27 @@ #include <asm/cpufeature.h> #include <asm/mte.h> -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(cprm, i, m) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ - if (vma->vm_flags & VM_MTE) + for (i = 0, m = cprm->vma_meta; \ + i < cprm->vma_count; \ + i++, m = cprm->vma_meta + i) \ + if (m->flags & VM_MTE) -static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m) { - if (vma->vm_flags & VM_DONTDUMP) - return 0; - - return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; + return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE; } /* Derived from dump_user_range(); start/end must be page-aligned */ static int mte_dump_tag_range(struct coredump_params *cprm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long len) { int ret = 1; unsigned long addr; void *tags = NULL; - for (addr = start; addr < end; addr += PAGE_SIZE) { + for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page = get_dump_page(addr); /* @@ -65,7 +64,6 @@ static int mte_dump_tag_range(struct coredump_params *cprm, mte_save_page_tags(page_address(page), tags); put_page(page); if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { - mte_free_tag_storage(tags); ret = 0; break; } @@ -77,13 +75,13 @@ static int mte_dump_tag_range(struct coredump_params *cprm, return ret; } -Elf_Half elf_core_extra_phdrs(void) +Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(cprm, i, m) vma_count++; return vma_count; @@ -91,18 +89,18 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); + int i; + struct core_vma_metadata *m; - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(cprm, i, m) { struct elf_phdr phdr; phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; - phdr.p_vaddr = vma->vm_start; + phdr.p_vaddr = m->start; phdr.p_paddr = 0; - phdr.p_filesz = mte_vma_tag_dump_size(vma); - phdr.p_memsz = vma->vm_end - vma->vm_start; + phdr.p_filesz = mte_vma_tag_dump_size(m); + phdr.p_memsz = m->end - m->start; offset += phdr.p_filesz; phdr.p_flags = 0; phdr.p_align = 0; @@ -114,28 +112,25 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) - data_size += mte_vma_tag_dump_size(vma); + for_each_mte_vma(cprm, i, m) + data_size += mte_vma_tag_dump_size(m); return data_size; } int elf_core_write_extra_data(struct coredump_params *cprm) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - - for_each_mte_vma(vmi, vma) { - if (vma->vm_flags & VM_DONTDUMP) - continue; + int i; + struct core_vma_metadata *m; - if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + for_each_mte_vma(cprm, i, m) { + if (!mte_dump_tag_range(cprm, m->start, m->dump_size)) return 0; } diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 229436f33df5..6325db1a2179 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -100,25 +100,35 @@ SYM_FUNC_START(sme_set_vq) SYM_FUNC_END(sme_set_vq) /* - * Save the SME state + * Save the ZA and ZT state * * x0 - pointer to buffer for state + * x1 - number of ZT registers to save */ -SYM_FUNC_START(za_save_state) - _sme_rdsvl 1, 1 // x1 = VL/8 - sme_save_za 0, x1, 12 +SYM_FUNC_START(sme_save_state) + _sme_rdsvl 2, 1 // x2 = VL/8 + sme_save_za 0, x2, 12 // Leaves x0 pointing to the end of ZA + + cbz x1, 1f + _str_zt 0 +1: ret -SYM_FUNC_END(za_save_state) +SYM_FUNC_END(sme_save_state) /* - * Load the SME state + * Load the ZA and ZT state * * x0 - pointer to buffer for state + * x1 - number of ZT registers to save */ -SYM_FUNC_START(za_load_state) - _sme_rdsvl 1, 1 // x1 = VL/8 - sme_load_za 0, x1, 12 +SYM_FUNC_START(sme_load_state) + _sme_rdsvl 2, 1 // x2 = VL/8 + sme_load_za 0, x2, 12 // Leaves x0 pointing to the end of ZA + + cbz x1, 1f + _ldr_zt 0 +1: ret -SYM_FUNC_END(za_load_state) +SYM_FUNC_END(sme_load_state) #endif /* CONFIG_ARM64_SME */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index dcc81e7200d4..7c67190c44e4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -299,7 +299,7 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type, /* * TIF_SME controls whether a task can use SME without trapping while * in userspace, when TIF_SME is set then we must have storage - * alocated in sve_state and za_state to store the contents of both ZA + * alocated in sve_state and sme_state to store the contents of both ZA * and the SVE registers for both streaming and non-streaming modes. * * If both SVCR.ZA and SVCR.SM are disabled then at any point we @@ -385,7 +385,7 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (system_supports_sve()) { + if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: /* Stop tracking SVE for this task until next use. */ @@ -429,7 +429,8 @@ static void task_fpsimd_load(void) write_sysreg_s(current->thread.svcr, SYS_SVCR); if (thread_za_enabled(¤t->thread)) - za_load_state(current->thread.za_state); + sme_load_state(current->thread.sme_state, + system_supports_sme2()); if (thread_sm_enabled(¤t->thread)) restore_ffr = system_supports_fa64(); @@ -490,7 +491,8 @@ static void fpsimd_save(void) *svcr = read_sysreg_s(SYS_SVCR); if (*svcr & SVCR_ZA_MASK) - za_save_state(last->za_state); + sme_save_state(last->sme_state, + system_supports_sme2()); /* If we are in streaming mode override regular SVE. */ if (*svcr & SVCR_SM_MASK) { @@ -1257,30 +1259,30 @@ void fpsimd_release_task(struct task_struct *dead_task) #ifdef CONFIG_ARM64_SME /* - * Ensure that task->thread.za_state is allocated and sufficiently large. + * Ensure that task->thread.sme_state is allocated and sufficiently large. * * This function should be used only in preparation for replacing - * task->thread.za_state with new data. The memory is always zeroed + * task->thread.sme_state with new data. The memory is always zeroed * here to prevent stale data from showing through: this is done in * the interest of testability and predictability, the architecture * guarantees that when ZA is enabled it will be zeroed. */ void sme_alloc(struct task_struct *task) { - if (task->thread.za_state) { - memset(task->thread.za_state, 0, za_state_size(task)); + if (task->thread.sme_state) { + memset(task->thread.sme_state, 0, sme_state_size(task)); return; } /* This could potentially be up to 64K. */ - task->thread.za_state = - kzalloc(za_state_size(task), GFP_KERNEL); + task->thread.sme_state = + kzalloc(sme_state_size(task), GFP_KERNEL); } static void sme_free(struct task_struct *task) { - kfree(task->thread.za_state); - task->thread.za_state = NULL; + kfree(task->thread.sme_state); + task->thread.sme_state = NULL; } void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) @@ -1302,6 +1304,17 @@ void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) * This must be called after sme_kernel_enable(), we rely on the * feature table being sorted to ensure this. */ +void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) +{ + /* Allow use of ZT0 */ + write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK, + SYS_SMCR_EL1); +} + +/* + * This must be called after sme_kernel_enable(), we rely on the + * feature table being sorted to ensure this. + */ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { /* Allow use of FA64 */ @@ -1488,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sve_alloc(current, false); sme_alloc(current); - if (!current->thread.sve_state || !current->thread.za_state) { + if (!current->thread.sve_state || !current->thread.sme_state) { force_sig(SIGKILL); return; } @@ -1609,7 +1622,7 @@ static void fpsimd_flush_thread_vl(enum vec_type type) void fpsimd_flush_thread(void) { void *sve_state = NULL; - void *za_state = NULL; + void *sme_state = NULL; if (!system_supports_fpsimd()) return; @@ -1634,8 +1647,8 @@ void fpsimd_flush_thread(void) clear_thread_flag(TIF_SME); /* Defer kfree() while in atomic context */ - za_state = current->thread.za_state; - current->thread.za_state = NULL; + sme_state = current->thread.sme_state; + current->thread.sme_state = NULL; fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; @@ -1645,7 +1658,7 @@ void fpsimd_flush_thread(void) put_cpu_fpsimd_context(); kfree(sve_state); - kfree(za_state); + kfree(sme_state); } /* @@ -1711,7 +1724,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; - last->za_state = current->thread.za_state; + last->sme_state = current->thread.sme_state; last->sve_vl = task_get_sve_vl(current); last->sme_vl = task_get_sme_vl(current); last->svcr = ¤t->thread.svcr; diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 2ee18c860f2a..9439240c3fcf 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -16,30 +16,6 @@ #include <asm/ptrace.h> #include <asm/virt.h> -// Warning, hardcoded register allocation -// This will clobber x1 and x2, and expect x1 to contain -// the id register value as read from the HW -.macro __check_override idreg, fld, width, pass, fail - ubfx x1, x1, #\fld, #\width - cbz x1, \fail - - adr_l x1, \idreg\()_override - ldr x2, [x1, FTR_OVR_VAL_OFFSET] - ldr x1, [x1, FTR_OVR_MASK_OFFSET] - ubfx x2, x2, #\fld, #\width - ubfx x1, x1, #\fld, #\width - cmp x1, xzr - and x2, x2, x1 - csinv x2, x2, xzr, ne - cbnz x2, \pass - b \fail -.endm - -.macro check_override idreg, fld, pass, fail - mrs x1, \idreg\()_el1 - __check_override \idreg \fld 4 \pass \fail -.endm - .text .pushsection .hyp.text, "ax" @@ -98,58 +74,7 @@ SYM_CODE_START_LOCAL(elx_sync) SYM_CODE_END(elx_sync) SYM_CODE_START_LOCAL(__finalise_el2) - check_override id_aa64pfr0 ID_AA64PFR0_EL1_SVE_SHIFT .Linit_sve .Lskip_sve - -.Linit_sve: /* SVE register access */ - mrs x0, cptr_el2 // Disable SVE traps - bic x0, x0, #CPTR_EL2_TZ - msr cptr_el2, x0 - isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. - -.Lskip_sve: - check_override id_aa64pfr1 ID_AA64PFR1_EL1_SME_SHIFT .Linit_sme .Lskip_sme - -.Linit_sme: /* SME register access and priority mapping */ - mrs x0, cptr_el2 // Disable SME traps - bic x0, x0, #CPTR_EL2_TSM - msr cptr_el2, x0 - isb - - mrs x1, sctlr_el2 - orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps - msr sctlr_el2, x1 - isb - - mov x0, #0 // SMCR controls - - // Full FP in SM? - mrs_s x1, SYS_ID_AA64SMFR0_EL1 - __check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64 - -.Linit_sme_fa64: - orr x0, x0, SMCR_ELx_FA64_MASK -.Lskip_sme_fa64: - - orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector - msr_s SYS_SMCR_EL2, x0 // length for EL1. - - mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported? - ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1 - cbz x1, .Lskip_sme - - msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal - - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 - cbz x1, .Lskip_sme - - mrs_s x1, SYS_HCRX_EL2 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping - msr_s SYS_HCRX_EL2, x1 - -.Lskip_sme: + finalise_el2_state // nVHE? No way! Give me the real thing! // Sanity check: MMU *must* be off @@ -157,7 +82,7 @@ SYM_CODE_START_LOCAL(__finalise_el2) tbnz x1, #0, 1f // Needs to be VHE capable, obviously - check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f + check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f x1 x2 1: mov_q x0, HVC_STUB_ERR eret diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 95133765ed29..d833d78a7f31 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -131,6 +131,7 @@ static const struct ftr_set_desc smfr0 __initconst = { .name = "id_aa64smfr0", .override = &id_aa64smfr0_override, .fields = { + FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL), /* FA64 is a one bit field... :-/ */ { "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, }, {} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 269ac1c25ae2..71d59b5abede 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -307,27 +307,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * In the unlikely event that we create a new thread with ZA - * enabled we should retain the ZA state so duplicate it here. - * This may be shortly freed if we exec() or if CLONE_SETTLS - * but it's simpler to do it here. To avoid confusing the rest - * of the code ensure that we have a sve_state allocated - * whenever za_state is allocated. + * enabled we should retain the ZA and ZT state so duplicate + * it here. This may be shortly freed if we exec() or if + * CLONE_SETTLS but it's simpler to do it here. To avoid + * confusing the rest of the code ensure that we have a + * sve_state allocated whenever sme_state is allocated. */ if (thread_za_enabled(&src->thread)) { dst->thread.sve_state = kzalloc(sve_state_size(src), GFP_KERNEL); if (!dst->thread.sve_state) return -ENOMEM; - dst->thread.za_state = kmemdup(src->thread.za_state, - za_state_size(src), - GFP_KERNEL); - if (!dst->thread.za_state) { + + dst->thread.sme_state = kmemdup(src->thread.sme_state, + sme_state_size(src), + GFP_KERNEL); + if (!dst->thread.sme_state) { kfree(dst->thread.sve_state); dst->thread.sve_state = NULL; return -ENOMEM; } } else { - dst->thread.za_state = NULL; + dst->thread.sme_state = NULL; clear_tsk_thread_flag(dst, TIF_SME); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2686ab157601..89b87f1021ed 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1045,7 +1045,7 @@ static int za_get(struct task_struct *target, if (thread_za_enabled(&target->thread)) { start = end; end = ZA_PT_SIZE(vq); - membuf_write(&to, target->thread.za_state, end - start); + membuf_write(&to, target->thread.sme_state, end - start); } /* Zero any trailing padding */ @@ -1099,7 +1099,7 @@ static int za_set(struct task_struct *target, /* Allocate/reinit ZA storage */ sme_alloc(target); - if (!target->thread.za_state) { + if (!target->thread.sme_state) { ret = -ENOMEM; goto out; } @@ -1124,7 +1124,7 @@ static int za_set(struct task_struct *target, start = ZA_PT_ZA_OFFSET; end = ZA_PT_SIZE(vq); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.za_state, + target->thread.sme_state, start, end); if (ret) goto out; @@ -1138,6 +1138,51 @@ out: return ret; } +static int zt_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + if (!system_supports_sme2()) + return -EINVAL; + + /* + * If PSTATE.ZA is not set then ZT will be zeroed when it is + * enabled so report the current register value as zero. + */ + if (thread_za_enabled(&target->thread)) + membuf_write(&to, thread_zt_state(&target->thread), + ZT_SIG_REG_BYTES); + else + membuf_zero(&to, ZT_SIG_REG_BYTES); + + return 0; +} + +static int zt_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!system_supports_sme2()) + return -EINVAL; + + if (!thread_za_enabled(&target->thread)) { + sme_alloc(target); + if (!target->thread.sme_state) + return -ENOMEM; + } + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + thread_zt_state(&target->thread), + 0, ZT_SIG_REG_BYTES); + if (ret == 0) + target->thread.svcr |= SVCR_ZA_MASK; + + return ret; +} + #endif /* CONFIG_ARM64_SME */ #ifdef CONFIG_ARM64_PTR_AUTH @@ -1357,9 +1402,10 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif -#ifdef CONFIG_ARM64_SVE +#ifdef CONFIG_ARM64_SME REGSET_SSVE, REGSET_ZA, + REGSET_ZT, #endif #ifdef CONFIG_ARM64_PTR_AUTH REGSET_PAC_MASK, @@ -1467,6 +1513,14 @@ static const struct user_regset aarch64_regsets[] = { .regset_get = za_get, .set = za_set, }, + [REGSET_ZT] = { /* SME ZT */ + .core_note_type = NT_ARM_ZT, + .n = 1, + .size = ZT_SIG_REG_BYTES, + .align = sizeof(u64), + .regset_get = zt_get, + .set = zt_set, + }, #endif #ifdef CONFIG_ARM64_PTR_AUTH [REGSET_PAC_MASK] = { diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e0d09bf5b01b..14779619375b 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -57,6 +57,7 @@ struct rt_sigframe_user_layout { unsigned long esr_offset; unsigned long sve_offset; unsigned long za_offset; + unsigned long zt_offset; unsigned long extra_offset; unsigned long end_offset; }; @@ -221,6 +222,7 @@ struct user_ctxs { struct fpsimd_context __user *fpsimd; struct sve_context __user *sve; struct za_context __user *za; + struct zt_context __user *zt; }; #ifdef CONFIG_ARM64_SVE @@ -281,7 +283,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { - if (!system_supports_sve()) + /* + * A SME only system use SVE for streaming mode so can + * have a SVE formatted context with a zero VL and no + * payload data. + */ + if (!system_supports_sve() && !system_supports_sme()) return -EINVAL; vl = task_get_sve_vl(current); @@ -389,7 +396,7 @@ static int preserve_za_context(struct za_context __user *ctx) * fpsimd_signal_preserve_current_state(). */ err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET, - current->thread.za_state, + current->thread.sme_state, ZA_SIG_REGS_SIZE(vq)); } @@ -420,7 +427,7 @@ static int restore_za_context(struct user_ctxs *user) /* * Careful: we are about __copy_from_user() directly into - * thread.za_state with preemption enabled, so protection is + * thread.sme_state with preemption enabled, so protection is * needed to prevent a racing context switch from writing stale * registers back over the new data. */ @@ -429,13 +436,13 @@ static int restore_za_context(struct user_ctxs *user) /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ sme_alloc(current); - if (!current->thread.za_state) { + if (!current->thread.sme_state) { current->thread.svcr &= ~SVCR_ZA_MASK; clear_thread_flag(TIF_SME); return -ENOMEM; } - err = __copy_from_user(current->thread.za_state, + err = __copy_from_user(current->thread.sme_state, (char __user const *)user->za + ZA_SIG_REGS_OFFSET, ZA_SIG_REGS_SIZE(vq)); @@ -447,11 +454,81 @@ static int restore_za_context(struct user_ctxs *user) return 0; } + +static int preserve_zt_context(struct zt_context __user *ctx) +{ + int err = 0; + u16 reserved[ARRAY_SIZE(ctx->__reserved)]; + + if (WARN_ON(!thread_za_enabled(¤t->thread))) + return -EINVAL; + + memset(reserved, 0, sizeof(reserved)); + + __put_user_error(ZT_MAGIC, &ctx->head.magic, err); + __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16), + &ctx->head.size, err); + __put_user_error(1, &ctx->nregs, err); + BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved)); + err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved)); + + /* + * This assumes that the ZT state has already been saved to + * the task struct by calling the function + * fpsimd_signal_preserve_current_state(). + */ + err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET, + thread_zt_state(¤t->thread), + ZT_SIG_REGS_SIZE(1)); + + return err ? -EFAULT : 0; +} + +static int restore_zt_context(struct user_ctxs *user) +{ + int err; + struct zt_context zt; + + /* ZA must be restored first for this check to be valid */ + if (!thread_za_enabled(¤t->thread)) + return -EINVAL; + + if (__copy_from_user(&zt, user->zt, sizeof(zt))) + return -EFAULT; + + if (zt.nregs != 1) + return -EINVAL; + + if (zt.head.size != ZT_SIG_CONTEXT_SIZE(zt.nregs)) + return -EINVAL; + + /* + * Careful: we are about __copy_from_user() directly into + * thread.zt_state with preemption enabled, so protection is + * needed to prevent a racing context switch from writing stale + * registers back over the new data. + */ + + fpsimd_flush_task_state(current); + /* From now, fpsimd_thread_switch() won't touch ZT in thread state */ + + err = __copy_from_user(thread_zt_state(¤t->thread), + (char __user const *)user->zt + + ZT_SIG_REGS_OFFSET, + ZT_SIG_REGS_SIZE(1)); + if (err) + return -EFAULT; + + return 0; +} + #else /* ! CONFIG_ARM64_SME */ /* Turn any non-optimised out attempts to use these into a link error: */ extern int preserve_za_context(void __user *ctx); extern int restore_za_context(struct user_ctxs *user); +extern int preserve_zt_context(void __user *ctx); +extern int restore_zt_context(struct user_ctxs *user); #endif /* ! CONFIG_ARM64_SME */ @@ -469,6 +546,7 @@ static int parse_user_sigframe(struct user_ctxs *user, user->fpsimd = NULL; user->sve = NULL; user->za = NULL; + user->zt = NULL; if (!IS_ALIGNED((unsigned long)base, 16)) goto invalid; @@ -547,6 +625,19 @@ static int parse_user_sigframe(struct user_ctxs *user, user->za = (struct za_context __user *)head; break; + case ZT_MAGIC: + if (!system_supports_sme2()) + goto invalid; + + if (user->zt) + goto invalid; + + if (size < sizeof(*user->zt)) + goto invalid; + + user->zt = (struct zt_context __user *)head; + break; + case EXTRA_MAGIC: if (have_extra_context) goto invalid; @@ -669,6 +760,9 @@ static int restore_sigframe(struct pt_regs *regs, if (err == 0 && system_supports_sme() && user.za) err = restore_za_context(&user); + if (err == 0 && system_supports_sme2() && user.zt) + err = restore_zt_context(&user); + return err; } @@ -732,7 +826,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } - if (system_supports_sve()) { + if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE) || @@ -769,6 +863,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } + if (system_supports_sme2()) { + if (add_all || thread_za_enabled(¤t->thread)) { + err = sigframe_alloc(user, &user->zt_offset, + ZT_SIG_CONTEXT_SIZE(1)); + if (err) + return err; + } + } + return sigframe_alloc_end(user); } @@ -824,6 +927,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= preserve_za_context(za_ctx); } + /* ZT state if present */ + if (system_supports_sme2() && err == 0 && user->zt_offset) { + struct zt_context __user *zt_ctx = + apply_user_offset(user, user->zt_offset); + err |= preserve_zt_context(zt_ctx); + } + if (err == 0 && user->extra_offset) { char __user *sfp = (char __user *)user->sigframe; char __user *userp = diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 05da3c8f7e88..ca6eadeb7d1a 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -21,6 +21,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + select KVM_GENERIC_HARDWARE_ENABLING select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index bb24a76b4224..00610477ec7b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -428,14 +428,17 @@ static void timer_emulate(struct arch_timer_context *ctx) * scheduled for the future. If the timer cannot fire at all, * then we also don't need a soft timer. */ - if (!kvm_timer_irq_can_fire(ctx)) { - soft_timer_cancel(&ctx->hrtimer); + if (should_fire || !kvm_timer_irq_can_fire(ctx)) return; - } soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); } +static void set_cntvoff(u64 cntvoff) +{ + kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); +} + static void timer_save_state(struct arch_timer_context *ctx) { struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); @@ -459,6 +462,22 @@ static void timer_save_state(struct arch_timer_context *ctx) write_sysreg_el0(0, SYS_CNTV_CTL); isb(); + /* + * The kernel may decide to run userspace after + * calling vcpu_put, so we reset cntvoff to 0 to + * ensure a consistent read between user accesses to + * the virtual counter and kernel access to the + * physical counter of non-VHE case. + * + * For VHE, the virtual counter uses a fixed virtual + * offset of zero, so no need to zero CNTVOFF_EL2 + * register, but this is actually useful when switching + * between EL1/vEL2 with NV. + * + * Do it unconditionally, as this is either unavoidable + * or dirt cheap. + */ + set_cntvoff(0); break; case TIMER_PTIMER: timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); @@ -532,6 +551,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) switch (index) { case TIMER_VTIMER: + set_cntvoff(timer_get_offset(ctx)); write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); @@ -552,11 +572,6 @@ out: local_irq_restore(flags); } -static void set_cntvoff(u64 cntvoff) -{ - kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); -} - static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) { int r; @@ -631,8 +646,6 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) kvm_timer_vcpu_load_nogic(vcpu); } - set_cntvoff(timer_get_offset(map.direct_vtimer)); - kvm_timer_unblocking(vcpu); timer_restore_state(map.direct_vtimer); @@ -688,15 +701,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) if (kvm_vcpu_is_blocking(vcpu)) kvm_timer_blocking(vcpu); - - /* - * The kernel may decide to run userspace after calling vcpu_put, so - * we reset cntvoff to 0 to ensure a consistent read between user - * accesses to the virtual counter and kernel access to the physical - * counter of non-VHE case. For VHE, the virtual counter uses a fixed - * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. - */ - set_cntvoff(0); } /* @@ -811,10 +815,18 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) ptimer->host_timer_irq_flags = host_ptimer_irq_flags; } -static void kvm_timer_init_interrupt(void *info) +void kvm_timer_cpu_up(void) { enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); - enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); + if (host_ptimer_irq) + enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); +} + +void kvm_timer_cpu_down(void) +{ + disable_percpu_irq(host_vtimer_irq); + if (host_ptimer_irq) + disable_percpu_irq(host_ptimer_irq); } int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) @@ -926,14 +938,22 @@ u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timers tmr, enum kvm_arch_timer_regs treg) { + struct arch_timer_context *timer; + struct timer_map map; u64 val; + get_timer_map(vcpu, &map); + timer = vcpu_get_timer(vcpu, tmr); + + if (timer == map.emul_ptimer) + return kvm_arm_timer_read(vcpu, timer, treg); + preempt_disable(); - kvm_timer_vcpu_put(vcpu); + timer_save_state(timer); - val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); + val = kvm_arm_timer_read(vcpu, timer, treg); - kvm_timer_vcpu_load(vcpu); + timer_restore_state(timer); preempt_enable(); return val; @@ -967,25 +987,22 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, enum kvm_arch_timer_regs treg, u64 val) { - preempt_disable(); - kvm_timer_vcpu_put(vcpu); - - kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); - - kvm_timer_vcpu_load(vcpu); - preempt_enable(); -} - -static int kvm_timer_starting_cpu(unsigned int cpu) -{ - kvm_timer_init_interrupt(NULL); - return 0; -} + struct arch_timer_context *timer; + struct timer_map map; -static int kvm_timer_dying_cpu(unsigned int cpu) -{ - disable_percpu_irq(host_vtimer_irq); - return 0; + get_timer_map(vcpu, &map); + timer = vcpu_get_timer(vcpu, tmr); + if (timer == map.emul_ptimer) { + soft_timer_cancel(&timer->hrtimer); + kvm_arm_timer_write(vcpu, timer, treg, val); + timer_emulate(timer); + } else { + preempt_disable(); + timer_save_state(timer); + kvm_arm_timer_write(vcpu, timer, treg, val); + timer_restore_state(timer); + preempt_enable(); + } } static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) @@ -1117,7 +1134,7 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) return 0; } -int kvm_timer_hyp_init(bool has_gic) +int __init kvm_timer_hyp_init(bool has_gic) { struct arch_timer_kvm_info *info; int err; @@ -1185,9 +1202,6 @@ int kvm_timer_hyp_init(bool has_gic) goto out_free_irq; } - cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, - "kvm/arm/timer:starting", kvm_timer_starting_cpu, - kvm_timer_dying_cpu); return 0; out_free_irq: free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6b59d89811be..64da2c25422d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -63,16 +63,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } -int kvm_arch_hardware_setup(void *opaque) -{ - return 0; -} - -int kvm_arch_check_processor_compat(void *opaque) -{ - return 0; -} - int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -146,7 +136,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto err_unshare_kvm; - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) { ret = -ENOMEM; goto err_unshare_kvm; } @@ -1539,7 +1529,7 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) +static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; @@ -1682,7 +1672,15 @@ static void _kvm_arch_hardware_enable(void *discard) int kvm_arch_hardware_enable(void) { + int was_enabled = __this_cpu_read(kvm_arm_hardware_enabled); + _kvm_arch_hardware_enable(NULL); + + if (!was_enabled) { + kvm_vgic_cpu_up(); + kvm_timer_cpu_up(); + } + return 0; } @@ -1696,6 +1694,11 @@ static void _kvm_arch_hardware_disable(void *discard) void kvm_arch_hardware_disable(void) { + if (__this_cpu_read(kvm_arm_hardware_enabled)) { + kvm_timer_cpu_down(); + kvm_vgic_cpu_down(); + } + if (!is_protected_kvm_enabled()) _kvm_arch_hardware_disable(NULL); } @@ -1738,26 +1741,26 @@ static struct notifier_block hyp_init_cpu_pm_nb = { .notifier_call = hyp_init_cpu_pm_notifier, }; -static void hyp_cpu_pm_init(void) +static void __init hyp_cpu_pm_init(void) { if (!is_protected_kvm_enabled()) cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } -static void hyp_cpu_pm_exit(void) +static void __init hyp_cpu_pm_exit(void) { if (!is_protected_kvm_enabled()) cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); } #else -static inline void hyp_cpu_pm_init(void) +static inline void __init hyp_cpu_pm_init(void) { } -static inline void hyp_cpu_pm_exit(void) +static inline void __init hyp_cpu_pm_exit(void) { } #endif -static void init_cpu_logical_map(void) +static void __init init_cpu_logical_map(void) { unsigned int cpu; @@ -1774,7 +1777,7 @@ static void init_cpu_logical_map(void) #define init_psci_0_1_impl_state(config, what) \ config.psci_0_1_ ## what ## _implemented = psci_ops.what -static bool init_psci_relay(void) +static bool __init init_psci_relay(void) { /* * If PSCI has not been initialized, protected KVM cannot install @@ -1797,7 +1800,7 @@ static bool init_psci_relay(void) return true; } -static int init_subsystems(void) +static int __init init_subsystems(void) { int err = 0; @@ -1838,13 +1841,22 @@ static int init_subsystems(void) kvm_register_perf_callbacks(NULL); out: + if (err) + hyp_cpu_pm_exit(); + if (err || !is_protected_kvm_enabled()) on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); return err; } -static void teardown_hyp_mode(void) +static void __init teardown_subsystems(void) +{ + kvm_unregister_perf_callbacks(); + hyp_cpu_pm_exit(); +} + +static void __init teardown_hyp_mode(void) { int cpu; @@ -1855,7 +1867,7 @@ static void teardown_hyp_mode(void) } } -static int do_pkvm_init(u32 hyp_va_bits) +static int __init do_pkvm_init(u32 hyp_va_bits) { void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); int ret; @@ -1887,11 +1899,12 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; } -static int kvm_hyp_init_protection(u32 hyp_va_bits) +static int __init kvm_hyp_init_protection(u32 hyp_va_bits) { void *addr = phys_to_virt(hyp_mem_base); int ret; @@ -1910,7 +1923,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) } /* Inits Hyp-mode on all online CPUs */ -static int init_hyp_mode(void) +static int __init init_hyp_mode(void) { u32 hyp_va_bits; int cpu; @@ -2092,7 +2105,7 @@ out_err: return err; } -static void _kvm_host_prot_finalize(void *arg) +static void __init _kvm_host_prot_finalize(void *arg) { int *err = arg; @@ -2100,7 +2113,7 @@ static void _kvm_host_prot_finalize(void *arg) WRITE_ONCE(*err, -EINVAL); } -static int pkvm_drop_host_privileges(void) +static int __init pkvm_drop_host_privileges(void) { int ret = 0; @@ -2113,7 +2126,7 @@ static int pkvm_drop_host_privileges(void) return ret; } -static int finalize_hyp_mode(void) +static int __init finalize_hyp_mode(void) { if (!is_protected_kvm_enabled()) return 0; @@ -2186,7 +2199,7 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) } /* Initialize Hyp-mode and memory mappings on all CPUs */ -int kvm_arch_init(void *opaque) +static __init int kvm_arm_init(void) { int err; bool in_hyp_mode; @@ -2237,7 +2250,7 @@ int kvm_arch_init(void *opaque) err = kvm_init_vector_slots(); if (err) { kvm_err("Cannot initialise vector slots\n"); - goto out_err; + goto out_hyp; } err = init_subsystems(); @@ -2248,7 +2261,7 @@ int kvm_arch_init(void *opaque) err = finalize_hyp_mode(); if (err) { kvm_err("Failed to finalize Hyp protection\n"); - goto out_hyp; + goto out_subs; } } @@ -2260,10 +2273,19 @@ int kvm_arch_init(void *opaque) kvm_info("Hyp mode initialized successfully\n"); } + /* + * FIXME: Do something reasonable if kvm_init() fails after pKVM + * hypervisor protection is finalized. + */ + err = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (err) + goto out_subs; + return 0; +out_subs: + teardown_subsystems(); out_hyp: - hyp_cpu_pm_exit(); if (!in_hyp_mode) teardown_hyp_mode(); out_err: @@ -2271,12 +2293,6 @@ out_err: return err; } -/* NOP: Compiling as a module not supported */ -void kvm_arch_exit(void) -{ - kvm_unregister_perf_callbacks(); -} - static int __init early_kvm_mode_cfg(char *arg) { if (!arg) @@ -2315,10 +2331,4 @@ enum kvm_mode kvm_get_mode(void) return kvm_mode; } -static int arm_init(void) -{ - int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - return rc; -} - -module_init(arm_init); +module_init(kvm_arm_init); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index f5799f571317..1279949599b5 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -143,7 +143,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.st = &vcpu->arch.ctxt.fp_regs; fp_state.sve_state = vcpu->arch.sve_state; fp_state.sve_vl = vcpu->arch.sve_max_vl; - fp_state.za_state = NULL; + fp_state.sme_state = NULL; fp_state.svcr = &vcpu->arch.svcr; fp_state.fp_type = &vcpu->arch.fp_type; diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 1b8a2dcd712f..9ddcfe2c3e57 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) */ if (!(esr & ESR_ELx_S1PTW) && (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) { if (!__translate_far_to_hpfar(far, &hpfar)) return false; } else { diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 3330d1b76bdd..07d37ff88a3f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; - valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_abt_issea(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index baa5b9b3dde5..147cb4c846c6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -39,7 +39,6 @@ static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { - ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); @@ -95,7 +94,6 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); - write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); if (has_vhe() || !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index c953fb4b9a13..a6d67c2bb5ae 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -183,6 +183,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) /* Initialize EL2 CPU state to sane values. */ init_el2_state // Clobbers x0..x2 + finalise_el2_state /* Enable MMU, set vectors and stack. */ mov x0, x28 diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 0f9ac25afdf4..08d2b004f4b7 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -26,6 +26,7 @@ u64 id_aa64isar2_el1_sys_val; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; +u64 id_aa64smfr0_el1_sys_val; /* * Inject an unknown/undefined exception to an AArch64 guest while most of its diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index b11cf2c618a6..3d61bd3e591d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -168,6 +168,25 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, return walker->cb(ctx, visit); } +static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, + int r) +{ + /* + * Visitor callbacks return EAGAIN when the conditions that led to a + * fault are no longer reflected in the page tables due to a race to + * update a PTE. In the context of a fault handler this is interpreted + * as a signal to retry guest execution. + * + * Ignore the return code altogether for walkers outside a fault handler + * (e.g. write protecting a range of memory) and chug along with the + * page table walk. + */ + if (r == -EAGAIN) + return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); + + return !r; +} + static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); @@ -200,7 +219,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, table = kvm_pte_table(ctx.old, level); } - if (ret) + if (!kvm_pgtable_walk_continue(data->walker, ret)) goto out; if (!table) { @@ -211,13 +230,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); - if (ret) + if (!kvm_pgtable_walk_continue(data->walker, ret)) goto out; if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); out: + if (kvm_pgtable_walk_continue(data->walker, ret)) + return 0; + return ret; } @@ -584,12 +606,14 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) lvls = 2; vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); +#ifdef CONFIG_ARM64_HW_AFDBM /* * Enable the Hardware Access Flag management, unconditionally * on all CPUs. The features is RES0 on CPUs without the support * and must be ignored by the CPUs. */ vtcr |= VTCR_EL2_HA; +#endif /* CONFIG_ARM64_HW_AFDBM */ /* Set the vmid bits */ vtcr |= (get_vmid_bits(mmfr1) == 16) ? @@ -1026,7 +1050,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; if (!kvm_pte_valid(ctx->old)) - return 0; + return -EAGAIN; data->level = ctx->level; data->pte = pte; @@ -1094,9 +1118,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; - stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL, 0); - dsb(ishst); + int ret; + + ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, + &pte, NULL, + KVM_PGTABLE_WALK_HANDLE_FAULT | + KVM_PGTABLE_WALK_SHARED); + if (!ret) + dsb(ishst); + return pte; } @@ -1141,6 +1171,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, + KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); if (!ret) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 31d7fa4c7c14..cfc7777fa490 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -25,11 +25,11 @@ static struct kvm_pgtable *hyp_pgtable; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); -static unsigned long hyp_idmap_start; -static unsigned long hyp_idmap_end; -static phys_addr_t hyp_idmap_vector; +static unsigned long __ro_after_init hyp_idmap_start; +static unsigned long __ro_after_init hyp_idmap_end; +static phys_addr_t __ro_after_init hyp_idmap_vector; -static unsigned long io_map_base; +static unsigned long __ro_after_init io_map_base; static phys_addr_t stage2_range_addr_end(phys_addr_t addr, phys_addr_t end) { @@ -280,7 +280,7 @@ static void stage2_flush_vm(struct kvm *kvm) /** * free_hyp_pgds - free Hyp-mode page tables */ -void free_hyp_pgds(void) +void __init free_hyp_pgds(void) { mutex_lock(&kvm_hyp_pgd_mutex); if (hyp_pgtable) { @@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); - if (fault_status == FSC_PERM && !write_fault && !exec_fault) { + if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } @@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * only exception to this is when dirty logging is enabled at runtime * and a write fault needs to collapse a block entry into a table. */ - if (fault_status != FSC_PERM || (logging_active && write_fault)) { + if (fault_status != ESR_ELx_FSC_PERM || + (logging_active && write_fault)) { ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm)); if (ret) @@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * backed by a THP and thus use block mapping if possible. */ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { - if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE) + if (fault_status == ESR_ELx_FSC_PERM && + fault_granule > PAGE_SIZE) vma_pagesize = fault_granule; else vma_pagesize = transparent_hugepage_adjust(kvm, memslot, @@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, &fault_ipa); } - if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { + if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new disallowed VMA */ if (kvm_vma_mte_allowed(vma)) { sanitise_mte_tags(kvm, pfn, vma_pagesize); @@ -1376,12 +1378,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_status == FSC_PERM && vma_pagesize == fault_granule) + if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, - memcache, KVM_PGTABLE_WALK_SHARED); + memcache, + KVM_PGTABLE_WALK_HANDLE_FAULT | + KVM_PGTABLE_WALK_SHARED); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { @@ -1399,20 +1403,18 @@ out_unlock: /* Resolve the access fault by making the page young again. */ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { - pte_t pte; - kvm_pte_t kpte; + kvm_pte_t pte; struct kvm_s2_mmu *mmu; trace_kvm_access_fault(fault_ipa); - write_lock(&vcpu->kvm->mmu_lock); + read_lock(&vcpu->kvm->mmu_lock); mmu = vcpu->arch.hw_mmu; - kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); - write_unlock(&vcpu->kvm->mmu_lock); + pte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa); + read_unlock(&vcpu->kvm->mmu_lock); - pte = __pte(kpte); - if (pte_valid(pte)) - kvm_set_pfn_accessed(pte_pfn(pte)); + if (kvm_pte_valid(pte)) + kvm_set_pfn_accessed(kvm_pte_to_pfn(pte)); } /** @@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (fault_status == FSC_FAULT) { + if (fault_status == ESR_ELx_FSC_FAULT) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); @@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - if (fault_status != FSC_FAULT && fault_status != FSC_PERM && - fault_status != FSC_ACCESS) { + if (fault_status != ESR_ELx_FSC_FAULT && + fault_status != ESR_ELx_FSC_PERM && + fault_status != ESR_ELx_FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); - if (fault_status == FSC_ACCESS) { + if (fault_status == ESR_ELx_FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); ret = 1; goto out_unlock; @@ -1665,7 +1668,7 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { .virt_to_phys = kvm_host_pa, }; -int kvm_mmu_init(u32 *hyp_va_bits) +int __init kvm_mmu_init(u32 *hyp_va_bits) { int err; u32 idmap_bits; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e0267f672b8a..4a39da302b88 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -30,7 +30,7 @@ #include <asm/virt.h> /* Maximum phys_shift supported for any VM on this host */ -static u32 kvm_ipa_limit; +static u32 __ro_after_init kvm_ipa_limit; /* * ARMv8 Reset Values @@ -41,9 +41,9 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) -unsigned int kvm_sve_max_vl; +unsigned int __ro_after_init kvm_sve_max_vl; -int kvm_arm_init_sve(void) +int __init kvm_arm_init_sve(void) { if (system_supports_sve()) { kvm_sve_max_vl = sve_max_virtualisable_vl(); @@ -157,6 +157,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) if (sve_state) kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); kfree(sve_state); + kfree(vcpu->arch.ccsidr); } static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) @@ -352,7 +353,7 @@ u32 get_kvm_ipa_limit(void) return kvm_ipa_limit; } -int kvm_set_ipa_limit(void) +int __init kvm_set_ipa_limit(void) { unsigned int parange; u64 mmfr0; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d852e4c26177..0bc7df55916e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -11,6 +11,7 @@ #include <linux/bitfield.h> #include <linux/bsearch.h> +#include <linux/cacheinfo.h> #include <linux/kvm_host.h> #include <linux/mm.h> #include <linux/printk.h> @@ -81,25 +82,97 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) __vcpu_sys_reg(vcpu, reg) = val; } -/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ -static u32 cache_levels; - /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ #define CSSELR_MAX 14 +/* + * Returns the minimum line size for the selected cache, expressed as + * Log2(bytes). + */ +static u8 get_min_cache_line_size(bool icache) +{ + u64 ctr = read_sanitised_ftr_reg(SYS_CTR_EL0); + u8 field; + + if (icache) + field = SYS_FIELD_GET(CTR_EL0, IminLine, ctr); + else + field = SYS_FIELD_GET(CTR_EL0, DminLine, ctr); + + /* + * Cache line size is represented as Log2(words) in CTR_EL0. + * Log2(bytes) can be derived with the following: + * + * Log2(words) + 2 = Log2(bytes / 4) + 2 + * = Log2(bytes) - 2 + 2 + * = Log2(bytes) + */ + return field + 2; +} + /* Which cache CCSIDR represents depends on CSSELR value. */ -static u32 get_ccsidr(u32 csselr) +static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr) +{ + u8 line_size; + + if (vcpu->arch.ccsidr) + return vcpu->arch.ccsidr[csselr]; + + line_size = get_min_cache_line_size(csselr & CSSELR_EL1_InD); + + /* + * Fabricate a CCSIDR value as the overriding value does not exist. + * The real CCSIDR value will not be used as it can vary by the + * physical CPU which the vcpu currently resides in. + * + * The line size is determined with get_min_cache_line_size(), which + * should be valid for all CPUs even if they have different cache + * configuration. + * + * The associativity bits are cleared, meaning the geometry of all data + * and unified caches (which are guaranteed to be PIPT and thus + * non-aliasing) are 1 set and 1 way. + * Guests should not be doing cache operations by set/way at all, and + * for this reason, we trap them and attempt to infer the intent, so + * that we can flush the entire guest's address space at the appropriate + * time. The exposed geometry minimizes the number of the traps. + * [If guests should attempt to infer aliasing properties from the + * geometry (which is not permitted by the architecture), they would + * only do so for virtually indexed caches.] + * + * We don't check if the cache level exists as it is allowed to return + * an UNKNOWN value if not. + */ + return SYS_FIELD_PREP(CCSIDR_EL1, LineSize, line_size - 4); +} + +static int set_ccsidr(struct kvm_vcpu *vcpu, u32 csselr, u32 val) { - u32 ccsidr; + u8 line_size = FIELD_GET(CCSIDR_EL1_LineSize, val) + 4; + u32 *ccsidr = vcpu->arch.ccsidr; + u32 i; + + if ((val & CCSIDR_EL1_RES0) || + line_size < get_min_cache_line_size(csselr & CSSELR_EL1_InD)) + return -EINVAL; + + if (!ccsidr) { + if (val == get_ccsidr(vcpu, csselr)) + return 0; + + ccsidr = kmalloc_array(CSSELR_MAX, sizeof(u32), GFP_KERNEL_ACCOUNT); + if (!ccsidr) + return -ENOMEM; - /* Make sure noone else changes CSSELR during this! */ - local_irq_disable(); - write_sysreg(csselr, csselr_el1); - isb(); - ccsidr = read_sysreg(ccsidr_el1); - local_irq_enable(); + for (i = 0; i < CSSELR_MAX; i++) + ccsidr[i] = get_ccsidr(vcpu, i); - return ccsidr; + vcpu->arch.ccsidr = ccsidr; + } + + ccsidr[csselr] = val; + + return 0; } /* @@ -646,7 +719,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return; /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ - pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; + pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT); if (!kvm_supports_32bit_el0()) pmcr |= ARMV8_PMU_PMCR_LC; @@ -1049,7 +1122,9 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, treg = TIMER_REG_CVAL; break; default: - BUG(); + print_sys_reg_msg(p, "%s", "Unhandled trapped timer register"); + kvm_inject_undefined(vcpu); + return false; } if (p->is_write) @@ -1155,6 +1230,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), pmuver_to_perfmon(vcpu_pmuver(vcpu))); break; + case SYS_ID_AA64MMFR2_EL1: + val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK; + break; + case SYS_ID_MMFR4_EL1: + val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX); + break; } return val; @@ -1385,10 +1466,78 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) return write_to_read_only(vcpu, p, r); - p->regval = read_sysreg(clidr_el1); + p->regval = __vcpu_sys_reg(vcpu, r->reg); return true; } +/* + * Fabricate a CLIDR_EL1 value instead of using the real value, which can vary + * by the physical CPU which the vcpu currently resides in. + */ +static void reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + u64 clidr; + u8 loc; + + if ((ctr_el0 & CTR_EL0_IDC)) { + /* + * Data cache clean to the PoU is not required so LoUU and LoUIS + * will not be set and a unified cache, which will be marked as + * LoC, will be added. + * + * If not DIC, let the unified cache L2 so that an instruction + * cache can be added as L1 later. + */ + loc = (ctr_el0 & CTR_EL0_DIC) ? 1 : 2; + clidr = CACHE_TYPE_UNIFIED << CLIDR_CTYPE_SHIFT(loc); + } else { + /* + * Data cache clean to the PoU is required so let L1 have a data + * cache and mark it as LoUU and LoUIS. As L1 has a data cache, + * it can be marked as LoC too. + */ + loc = 1; + clidr = 1 << CLIDR_LOUU_SHIFT; + clidr |= 1 << CLIDR_LOUIS_SHIFT; + clidr |= CACHE_TYPE_DATA << CLIDR_CTYPE_SHIFT(1); + } + + /* + * Instruction cache invalidation to the PoU is required so let L1 have + * an instruction cache. If L1 already has a data cache, it will be + * CACHE_TYPE_SEPARATE. + */ + if (!(ctr_el0 & CTR_EL0_DIC)) + clidr |= CACHE_TYPE_INST << CLIDR_CTYPE_SHIFT(1); + + clidr |= loc << CLIDR_LOC_SHIFT; + + /* + * Add tag cache unified to data cache. Allocation tags and data are + * unified in a cache line so that it looks valid even if there is only + * one cache line. + */ + if (kvm_has_mte(vcpu->kvm)) + clidr |= 2 << CLIDR_TTYPE_SHIFT(loc); + + __vcpu_sys_reg(vcpu, r->reg) = clidr; +} + +static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + u64 val) +{ + u64 ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val)); + + if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc)) + return -EINVAL; + + __vcpu_sys_reg(vcpu, rd->reg) = val; + + return 0; +} + static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { @@ -1410,22 +1559,10 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return write_to_read_only(vcpu, p, r); csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1); - p->regval = get_ccsidr(csselr); + csselr &= CSSELR_EL1_Level | CSSELR_EL1_InD; + if (csselr < CSSELR_MAX) + p->regval = get_ccsidr(vcpu, csselr); - /* - * Guests should not be doing cache operations by set/way at all, and - * for this reason, we trap them and attempt to infer the intent, so - * that we can flush the entire guest's address space at the appropriate - * time. - * To prevent this trapping from causing performance problems, let's - * expose the geometry of all data and unified caches (which are - * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way. - * [If guests should attempt to infer aliasing properties from the - * geometry (which is not permitted by the architecture), they would - * only do so for virtually indexed caches.] - */ - if (!(csselr & 1)) // data or unified cache - p->regval &= ~GENMASK(27, 3); return true; } @@ -1717,7 +1854,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, - { SYS_DESC(SYS_CLIDR_EL1), access_clidr }, + { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1, + .set_user = set_clidr }, + { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, @@ -2219,6 +2358,10 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, + + /* CCSIDR2 */ + { Op1(1), CRn( 0), CRm( 0), Op2(2), undef_access }, + { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 }, }; @@ -2724,7 +2867,6 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id, FUNCTION_INVARIANT(midr_el1) FUNCTION_INVARIANT(revidr_el1) -FUNCTION_INVARIANT(clidr_el1) FUNCTION_INVARIANT(aidr_el1) static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) @@ -2733,10 +2875,9 @@ static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) } /* ->val is filled in by kvm_sys_reg_table_init() */ -static struct sys_reg_desc invariant_sys_regs[] = { +static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = { { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 }, - { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 }, { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 }, { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 }, }; @@ -2773,33 +2914,7 @@ static int set_invariant_sys_reg(u64 id, u64 __user *uaddr) return 0; } -static bool is_valid_cache(u32 val) -{ - u32 level, ctype; - - if (val >= CSSELR_MAX) - return false; - - /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */ - level = (val >> 1); - ctype = (cache_levels >> (level * 3)) & 7; - - switch (ctype) { - case 0: /* No cache */ - return false; - case 1: /* Instruction cache only */ - return (val & 1); - case 2: /* Data cache only */ - case 4: /* Unified cache */ - return !(val & 1); - case 3: /* Separate instruction and data caches */ - return true; - default: /* Reserved: we can't know instruction or data. */ - return false; - } -} - -static int demux_c15_get(u64 id, void __user *uaddr) +static int demux_c15_get(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) { u32 val; u32 __user *uval = uaddr; @@ -2815,16 +2930,16 @@ static int demux_c15_get(u64 id, void __user *uaddr) return -ENOENT; val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) >> KVM_REG_ARM_DEMUX_VAL_SHIFT; - if (!is_valid_cache(val)) + if (val >= CSSELR_MAX) return -ENOENT; - return put_user(get_ccsidr(val), uval); + return put_user(get_ccsidr(vcpu, val), uval); default: return -ENOENT; } } -static int demux_c15_set(u64 id, void __user *uaddr) +static int demux_c15_set(struct kvm_vcpu *vcpu, u64 id, void __user *uaddr) { u32 val, newval; u32 __user *uval = uaddr; @@ -2840,16 +2955,13 @@ static int demux_c15_set(u64 id, void __user *uaddr) return -ENOENT; val = (id & KVM_REG_ARM_DEMUX_VAL_MASK) >> KVM_REG_ARM_DEMUX_VAL_SHIFT; - if (!is_valid_cache(val)) + if (val >= CSSELR_MAX) return -ENOENT; if (get_user(newval, uval)) return -EFAULT; - /* This is also invariant: you can't change it. */ - if (newval != get_ccsidr(val)) - return -EINVAL; - return 0; + return set_ccsidr(vcpu, val, newval); default: return -ENOENT; } @@ -2886,7 +2998,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg int err; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) - return demux_c15_get(reg->id, uaddr); + return demux_c15_get(vcpu, reg->id, uaddr); err = get_invariant_sys_reg(reg->id, uaddr); if (err != -ENOENT) @@ -2930,7 +3042,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg int err; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) - return demux_c15_set(reg->id, uaddr); + return demux_c15_set(vcpu, reg->id, uaddr); err = set_invariant_sys_reg(reg->id, uaddr); if (err != -ENOENT) @@ -2942,13 +3054,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg static unsigned int num_demux_regs(void) { - unsigned int i, count = 0; - - for (i = 0; i < CSSELR_MAX; i++) - if (is_valid_cache(i)) - count++; - - return count; + return CSSELR_MAX; } static int write_demux_regids(u64 __user *uindices) @@ -2958,8 +3064,6 @@ static int write_demux_regids(u64 __user *uindices) val |= KVM_REG_ARM_DEMUX_ID_CCSIDR; for (i = 0; i < CSSELR_MAX; i++) { - if (!is_valid_cache(i)) - continue; if (put_user(val | i, uindices)) return -EFAULT; uindices++; @@ -3057,11 +3161,10 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } -int kvm_sys_reg_table_init(void) +int __init kvm_sys_reg_table_init(void) { bool valid = true; unsigned int i; - struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); @@ -3078,23 +3181,5 @@ int kvm_sys_reg_table_init(void) for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]); - /* - * CLIDR format is awkward, so clean it up. See ARM B4.1.20: - * - * If software reads the Cache Type fields from Ctype1 - * upwards, once it has seen a value of 0b000, no caches - * exist at further-out levels of the hierarchy. So, for - * example, if Ctype3 is the first Cache Type field with a - * value of 0b000, the values of Ctype4 to Ctype7 must be - * ignored. - */ - get_clidr_el1(NULL, &clidr); /* Ugly... */ - cache_levels = clidr.val; - for (i = 0; i < 7; i++) - if (((cache_levels >> (i*3)) & 7) == 0) - break; - /* Clear all higher bits. */ - cache_levels &= (1 << (i*3))-1; - return 0; } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index f6d4f4052555..cd134db41a57 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -465,17 +465,15 @@ out: /* GENERIC PROBE */ -static int vgic_init_cpu_starting(unsigned int cpu) +void kvm_vgic_cpu_up(void) { enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); - return 0; } -static int vgic_init_cpu_dying(unsigned int cpu) +void kvm_vgic_cpu_down(void) { disable_percpu_irq(kvm_vgic_global_state.maint_irq); - return 0; } static irqreturn_t vgic_maintenance_handler(int irq, void *data) @@ -572,7 +570,7 @@ int kvm_vgic_hyp_init(void) if (ret) return ret; - if (!has_mask) + if (!has_mask && !kvm_vgic_global_state.maint_irq) return 0; ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, @@ -584,19 +582,6 @@ int kvm_vgic_hyp_init(void) return ret; } - ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - "kvm/arm/vgic:starting", - vgic_init_cpu_starting, vgic_init_cpu_dying); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); return 0; - -out_free_irq: - free_percpu_irq(kvm_vgic_global_state.maint_irq, - kvm_get_running_vcpus()); - return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index efb2726efbb3..9d63149d9e53 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -617,6 +617,8 @@ static const struct midr_range broken_seis[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), + MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), {}, }; diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index d78ae63d7c15..08978d0672e7 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -16,7 +16,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> -unsigned int kvm_arm_vmid_bits; +unsigned int __ro_after_init kvm_arm_vmid_bits; static DEFINE_RAW_SPINLOCK(cpu_vmid_lock); static atomic64_t vmid_generation; @@ -172,7 +172,7 @@ void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) /* * Initialize the VMID allocator */ -int kvm_arm_vmid_alloc_init(void) +int __init kvm_arm_vmid_alloc_init(void) { kvm_arm_vmid_bits = kvm_get_vmid_bits(); @@ -190,7 +190,7 @@ int kvm_arm_vmid_alloc_init(void) return 0; } -void kvm_arm_vmid_alloc_free(void) +void __init kvm_arm_vmid_alloc_free(void) { kfree(vmid_map); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 35e9a468d13e..95364e8bdc19 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -559,3 +559,24 @@ bool __init arch_hugetlb_valid_size(unsigned long size) { return __hugetlb_valid_size(size); } + +pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) && + cpus_have_const_cap(ARM64_WORKAROUND_2645198)) { + /* + * Break-before-make (BBM) is required for all user space mappings + * when the permission changes from executable to non-executable + * in cases where cpu is affected with errata #2645198. + */ + if (pte_user_exec(READ_ONCE(*ptep))) + return huge_ptep_clear_flush(vma, addr, ptep); + } + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); +} + +void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 14c87e8d69d8..d77c9f56b7b4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1630,3 +1630,24 @@ static int __init prevent_bootmem_remove_init(void) } early_initcall(prevent_bootmem_remove_init); #endif + +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ + if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) && + cpus_have_const_cap(ARM64_WORKAROUND_2645198)) { + /* + * Break-before-make (BBM) is required for all user space mappings + * when the permission changes from executable to non-executable + * in cases where cpu is affected with errata #2645198. + */ + if (pte_user_exec(READ_ONCE(*ptep))) + return ptep_clear_flush(vma, addr, ptep); + } + return ptep_get_and_clear(vma->vm_mm, addr, ptep); +} + +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + set_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index a86ee376920a..37b6a1d8d8b0 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -50,6 +50,7 @@ MTE MTE_ASYMM SME SME_FA64 +SME2 SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 @@ -71,6 +72,7 @@ WORKAROUND_2038923 WORKAROUND_2064142 WORKAROUND_2077057 WORKAROUND_2457168 +WORKAROUND_2645198 WORKAROUND_2658417 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE diff --git a/arch/arm64/tools/gen-sysreg.awk b/arch/arm64/tools/gen-sysreg.awk index c350164a3955..e1df4b956596 100755 --- a/arch/arm64/tools/gen-sysreg.awk +++ b/arch/arm64/tools/gen-sysreg.awk @@ -98,6 +98,7 @@ END { res0 = "UL(0)" res1 = "UL(0)" + unkn = "UL(0)" next_bit = 63 @@ -112,11 +113,13 @@ END { define(reg "_RES0", "(" res0 ")") define(reg "_RES1", "(" res1 ")") + define(reg "_UNKN", "(" unkn ")") print "" reg = null res0 = null res1 = null + unkn = null next } @@ -134,6 +137,7 @@ END { res0 = "UL(0)" res1 = "UL(0)" + unkn = "UL(0)" define("REG_" reg, "S" op0 "_" op1 "_C" crn "_C" crm "_" op2) define("SYS_" reg, "sys_reg(" op0 ", " op1 ", " crn ", " crm ", " op2 ")") @@ -161,7 +165,9 @@ END { define(reg "_RES0", "(" res0 ")") if (res1 != null) define(reg "_RES1", "(" res1 ")") - if (res0 != null || res1 != null) + if (unkn != null) + define(reg "_UNKN", "(" unkn ")") + if (res0 != null || res1 != null || unkn != null) print "" reg = null @@ -172,6 +178,7 @@ END { op2 = null res0 = null res1 = null + unkn = null next } @@ -190,6 +197,7 @@ END { next_bit = 0 res0 = null res1 = null + unkn = null next } @@ -215,6 +223,16 @@ END { next } +/^Unkn/ && (block == "Sysreg" || block == "SysregFields") { + expect_fields(2) + parse_bitdef(reg, "UNKN", $2) + field = "UNKN_" msb "_" lsb + + unkn = unkn " | GENMASK_ULL(" msb ", " lsb ")" + + next +} + /^Field/ && (block == "Sysreg" || block == "SysregFields") { expect_fields(3) field = $3 diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 184e58fd5631..330569fb2336 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -15,6 +15,8 @@ # Res1 <msb>[:<lsb>] +# Unkn <msb>[:<lsb>] + # Field <msb>[:<lsb>] <name> # Enum <msb>[:<lsb>] <name> @@ -894,6 +896,7 @@ EndEnum Enum 27:24 SME 0b0000 NI 0b0001 IMP + 0b0010 SME2 EndEnum Res0 23:20 Enum 19:16 MPAM_frac @@ -975,7 +978,9 @@ Enum 63 FA64 EndEnum Res0 62:60 Enum 59:56 SMEver - 0b0000 IMP + 0b0000 SME + 0b0001 SME2 + 0b0010 SME2p1 EndEnum Enum 55:52 I16I64 0b0000 NI @@ -986,7 +991,19 @@ Enum 48 F64F64 0b0 NI 0b1 IMP EndEnum -Res0 47:40 +Enum 47:44 I16I32 + 0b0000 NI + 0b0101 IMP +EndEnum +Enum 43 B16B16 + 0b0 NI + 0b1 IMP +EndEnum +Enum 42 F16F16 + 0b0 NI + 0b1 IMP +EndEnum +Res0 41:40 Enum 39:36 I8I32 0b0000 NI 0b1111 IMP @@ -999,7 +1016,10 @@ Enum 34 B16F32 0b0 NI 0b1 IMP EndEnum -Res0 33 +Enum 33 BI32I32 + 0b0 NI + 0b1 IMP +EndEnum Enum 32 F32F32 0b0 NI 0b1 IMP @@ -1599,7 +1619,8 @@ EndSysreg SysregFields SMCR_ELx Res0 63:32 Field 31 FA64 -Res0 30:9 +Field 30 EZT0 +Res0 29:9 Raz 8:4 Field 3:0 LEN EndSysregFields @@ -1635,6 +1656,16 @@ Sysreg SCXTNUM_EL1 3 0 13 0 7 Field 63:0 SoftwareContextNumber EndSysreg +# The bit layout for CCSIDR_EL1 depends on whether FEAT_CCIDX is implemented. +# The following is for case when FEAT_CCIDX is not implemented. +Sysreg CCSIDR_EL1 3 1 0 0 0 +Res0 63:32 +Unkn 31:28 +Field 27:13 NumSets +Field 12:3 Associativity +Field 2:0 LineSize +EndSysreg + Sysreg CLIDR_EL1 3 1 0 0 1 Res0 63:47 Field 46:33 Ttypen @@ -1651,6 +1682,11 @@ Field 5:3 Ctype2 Field 2:0 Ctype1 EndSysreg +Sysreg CCSIDR2_EL1 3 1 0 0 2 +Res0 63:24 +Field 23:0 NumSets +EndSysreg + Sysreg GMID_EL1 3 1 0 0 4 Res0 63:4 Field 3:0 BS diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c index 94680521fbf9..8895df121540 100644 --- a/arch/ia64/kernel/elfcore.c +++ b/arch/ia64/kernel/elfcore.c @@ -7,7 +7,7 @@ #include <asm/elf.h> -Elf64_Half elf_core_extra_phdrs(void) +Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return GATE_EHDR->e_phnum; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { const struct elf_phdr *const gate_phdrs = (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 5cedb28e8a40..2803c9c21ef9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -758,7 +758,7 @@ struct kvm_mips_callbacks { void (*vcpu_reenter)(struct kvm_vcpu *vcpu); }; extern struct kvm_mips_callbacks *kvm_mips_callbacks; -int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); +int kvm_mips_emulation_init(void); /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); @@ -888,7 +888,6 @@ extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq); -static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 91d197bee9c0..29e51649203b 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select MMU_NOTIFIER select SRCU select INTERVAL_TREE + select KVM_GENERIC_HARDWARE_ENABLING help Support for hosting Guest kernels. diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 21ff75bcdbc4..805aeea2166e 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -17,4 +17,4 @@ kvm-$(CONFIG_CPU_LOONGSON64) += loongson_ipi.o kvm-y += vz.o obj-$(CONFIG_KVM) += kvm.o -obj-y += callback.o tlb.o +obj-y += tlb.o diff --git a/arch/mips/kvm/callback.c b/arch/mips/kvm/callback.c deleted file mode 100644 index d88aa2173fb0..000000000000 --- a/arch/mips/kvm/callback.c +++ /dev/null @@ -1,14 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. - * Authors: Yann Le Du <ledu@kymasys.com> - */ - -#include <linux/export.h> -#include <linux/kvm_host.h> - -struct kvm_mips_callbacks *kvm_mips_callbacks; -EXPORT_SYMBOL_GPL(kvm_mips_callbacks); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index a25e0b73ee70..36c8991b5d39 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -135,16 +135,6 @@ void kvm_arch_hardware_disable(void) kvm_mips_callbacks->hardware_disable(); } -int kvm_arch_hardware_setup(void *opaque) -{ - return 0; -} - -int kvm_arch_check_processor_compat(void *opaque) -{ - return 0; -} - extern void kvm_init_loongson_ipi(struct kvm *kvm); int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) @@ -1015,21 +1005,6 @@ long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return r; } -int kvm_arch_init(void *opaque) -{ - if (kvm_mips_callbacks) { - kvm_err("kvm: module already exists\n"); - return -EEXIST; - } - - return kvm_mips_emulation_init(&kvm_mips_callbacks); -} - -void kvm_arch_exit(void) -{ - kvm_mips_callbacks = NULL; -} - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -1646,16 +1621,21 @@ static int __init kvm_mips_init(void) if (ret) return ret; - ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - + ret = kvm_mips_emulation_init(); if (ret) return ret; + if (boot_cpu_type() == CPU_LOONGSON64) kvm_priority_to_irq = kvm_loongson3_priority_to_irq; register_die_notifier(&kvm_mips_csr_die_notifier); + ret = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (ret) { + unregister_die_notifier(&kvm_mips_csr_die_notifier); + return ret; + } return 0; } diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index c706f5890a05..dafab003ea0d 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -3304,7 +3304,10 @@ static struct kvm_mips_callbacks kvm_vz_callbacks = { .vcpu_reenter = kvm_vz_vcpu_reenter, }; -int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) +/* FIXME: Get rid of the callbacks now that trap-and-emulate is gone. */ +struct kvm_mips_callbacks *kvm_mips_callbacks = &kvm_vz_callbacks; + +int kvm_mips_emulation_init(void) { if (!cpu_has_vz) return -ENODEV; @@ -3318,7 +3321,5 @@ int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) return -ENODEV; pr_info("Starting KVM with MIPS VZ extensions\n"); - - *install_callbacks = &kvm_vz_callbacks; return 0; } diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index af04cea82b94..352d7de24018 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -210,6 +210,10 @@ ld_version() gsub(".*version ", ""); gsub("-.*", ""); split($1,a, "."); + if( length(a[3]) == "8" ) + # a[3] is probably a date of format yyyymmdd used for release snapshots. We + # can assume it to be zero as it does not signify a new version as such. + a[3] = 0; print a[1]*100000000 + a[2]*1000000 + a[3]*10000; exit }' diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4f897993b710..699a88584ae1 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -137,7 +137,7 @@ struct imc_pmu { * are inited. */ struct imc_pmu_ref { - struct mutex lock; + spinlock_t lock; unsigned int id; int refc; }; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index caea15dcb91d..959f566a455c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -876,13 +876,10 @@ struct kvm_vcpu_arch { #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE -static inline void kvm_arch_hardware_disable(void) {} -static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} -static inline void kvm_arch_exit(void) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index eae9619b6190..6bef23d6d0e3 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -118,7 +118,6 @@ extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu); extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); -extern int kvmppc_core_check_processor_compat(void); extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 6d525285dbe8..57f4e7896d67 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -999,16 +999,6 @@ int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store); -int kvmppc_core_check_processor_compat(void) -{ - /* - * We always return 0 for book3s. We check - * for compatibility while loading the HV - * or PR module - */ - return 0; -} - int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) { return kvm->arch.kvm_ops->hcall_implemented(hcall); @@ -1062,7 +1052,7 @@ static int kvmppc_book3s_init(void) { int r; - r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (r) return r; #ifdef CONFIG_KVM_BOOK3S_32_HANDLER diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index c8b2b4478545..b0f695428733 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -314,7 +314,7 @@ static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu) kvmppc_booke_vcpu_put(vcpu); } -int kvmppc_core_check_processor_compat(void) +static int kvmppc_e500_check_processor_compat(void) { int r; @@ -507,7 +507,7 @@ static int __init kvmppc_e500_init(void) unsigned long handler_len; unsigned long max_ivor = 0; - r = kvmppc_core_check_processor_compat(); + r = kvmppc_e500_check_processor_compat(); if (r) goto err_out; @@ -531,7 +531,7 @@ static int __init kvmppc_e500_init(void) flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + ivor[max_ivor] + handler_len); - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); if (r) goto err_out; kvm_ops_e500.owner = THIS_MODULE; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 57e0ad6a2ca3..611532a0dedc 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -388,6 +388,10 @@ static int __init kvmppc_e500mc_init(void) { int r; + r = kvmppc_e500mc_check_processor_compat(); + if (r) + return kvmppc_e500mc; + r = kvmppc_booke_init(); if (r) goto err_out; @@ -400,7 +404,7 @@ static int __init kvmppc_e500mc_init(void) */ kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core); - r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); if (r) goto err_out; kvm_ops_e500mc.owner = THIS_MODULE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 04494a4fb37a..4c5405fc5538 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -435,21 +435,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, } EXPORT_SYMBOL_GPL(kvmppc_ld); -int kvm_arch_hardware_enable(void) -{ - return 0; -} - -int kvm_arch_hardware_setup(void *opaque) -{ - return 0; -} - -int kvm_arch_check_processor_compat(void *opaque) -{ - return kvmppc_core_check_processor_compat(); -} - int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { struct kvmppc_ops *kvm_ops = NULL; @@ -2544,11 +2529,6 @@ void kvmppc_init_lpid(unsigned long nr_lpids_param) } EXPORT_SYMBOL_GPL(kvmppc_init_lpid); -int kvm_arch_init(void *opaque) -{ - return 0; -} - EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr); void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 80a148c57de8..44a35ed4f686 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1012,7 +1012,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, void hpt_clear_stress(void); static struct timer_list stress_hpt_timer; -void stress_hpt_timer_fn(struct timer_list *timer) +static void stress_hpt_timer_fn(struct timer_list *timer) { int next_cpu; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index d517aba94d1b..100e97daf76b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -14,6 +14,7 @@ #include <asm/cputhreads.h> #include <asm/smp.h> #include <linux/string.h> +#include <linux/spinlock.h> /* Nest IMC data structures and variables */ @@ -21,7 +22,7 @@ * Used to avoid races in counting the nest-pmu units during hotplug * register and unregister */ -static DEFINE_MUTEX(nest_init_lock); +static DEFINE_SPINLOCK(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; @@ -50,7 +51,7 @@ static int trace_imc_mem_size; * core and trace-imc */ static struct imc_pmu_ref imc_global_refc = { - .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), .id = 0, .refc = 0, }; @@ -400,7 +401,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) get_hard_smp_processor_id(cpu)); /* * If this is the last cpu in this chip then, skip the reference - * count mutex lock and make the reference count on this chip zero. + * count lock and make the reference count on this chip zero. */ ref = get_nest_pmu_ref(cpu); if (!ref) @@ -462,15 +463,15 @@ static void nest_imc_counters_release(struct perf_event *event) /* * See if we need to disable the nest PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the nest counters. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return; - /* Take the mutex lock for this node and then decrement the reference count */ - mutex_lock(&ref->lock); + /* Take the lock for this node and then decrement the reference count */ + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -482,7 +483,7 @@ static void nest_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that node. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -490,7 +491,7 @@ static void nest_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); return; } @@ -498,7 +499,7 @@ static void nest_imc_counters_release(struct perf_event *event) WARN(1, "nest-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); } static int nest_imc_event_init(struct perf_event *event) @@ -557,26 +558,25 @@ static int nest_imc_event_init(struct perf_event *event) /* * Get the imc_pmu_ref struct for this node. - * Take the mutex lock and then increment the count of nest pmu events - * inited. + * Take the lock and then increment the count of nest pmu events inited. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to start the counters for node %d\n", node_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); event->destroy = nest_imc_counters_release; return 0; @@ -612,9 +612,8 @@ static int core_imc_mem_init(int cpu, int size) return -ENOMEM; mem_info->vbase = page_address(page); - /* Init the mutex */ core_imc_refc[core_id].id = core_id; - mutex_init(&core_imc_refc[core_id].lock); + spin_lock_init(&core_imc_refc[core_id].lock); rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, __pa((void *)mem_info->vbase), @@ -703,9 +702,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); } else { /* - * If this is the last cpu in this core then, skip taking refernce - * count mutex lock for this core and directly zero "refc" for - * this core. + * If this is the last cpu in this core then skip taking reference + * count lock for this core and directly zero "refc" for this core. */ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); @@ -720,11 +718,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) * last cpu in this core and core-imc event running * in this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_CORE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } return 0; } @@ -739,7 +737,7 @@ static int core_imc_pmu_cpumask_init(void) static void reset_global_refc(struct perf_event *event) { - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); imc_global_refc.refc--; /* @@ -751,7 +749,7 @@ static void reset_global_refc(struct perf_event *event) imc_global_refc.refc = 0; imc_global_refc.id = 0; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } static void core_imc_counters_release(struct perf_event *event) @@ -764,17 +762,17 @@ static void core_imc_counters_release(struct perf_event *event) /* * See if we need to disable the IMC PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the core counters. */ core_id = event->cpu / threads_per_core; - /* Take the mutex lock and decrement the refernce count for this core */ + /* Take the lock and decrement the refernce count for this core */ ref = &core_imc_refc[core_id]; if (!ref) return; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -786,7 +784,7 @@ static void core_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that core. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -794,7 +792,7 @@ static void core_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("IMC: Unable to stop the counters for core %d\n", core_id); return; } @@ -802,7 +800,7 @@ static void core_imc_counters_release(struct perf_event *event) WARN(1, "core-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); reset_global_refc(event); } @@ -840,7 +838,6 @@ static int core_imc_event_init(struct perf_event *event) if ((!pcmi->vbase)) return -ENODEV; - /* Get the core_imc mutex for this core */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; @@ -848,22 +845,22 @@ static int core_imc_event_init(struct perf_event *event) /* * Core pmu units are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the core counters. + * If yes, take the lock and enable the core counters. * If not, just increment the count in core_imc_refc struct. */ - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("core-imc: Unable to start the counters for core %d\n", core_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* * Since the system can run either in accumulation or trace-mode @@ -874,7 +871,7 @@ static int core_imc_event_init(struct perf_event *event) * to know whether any other trace/thread imc * events are running. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { /* * No other trace/thread imc events are running in @@ -883,10 +880,10 @@ static int core_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_CORE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; @@ -958,10 +955,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu) mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); /* Reduce the refc if thread-imc event running on this cpu */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_THREAD) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1001,7 +998,7 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); /* * Check if any other trace/core imc events are running in the * system, if not set the global id to thread-imc. @@ -1010,10 +1007,10 @@ static int thread_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_THREAD; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; @@ -1135,25 +1132,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags) /* * imc pmus are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the counters. + * If yes, take the lock and enable the counters. * If not, just increment the count in ref count struct. */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to start the counter\ for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1170,12 +1167,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to stop the counters\ for core %d\n", core_id); return; @@ -1183,7 +1180,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags) } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); @@ -1224,9 +1221,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size) } } - /* Init the mutex, if not already */ trace_imc_refc[core_id].id = core_id; - mutex_init(&trace_imc_refc[core_id].lock); + spin_lock_init(&trace_imc_refc[core_id].lock); mtspr(SPRN_LDBAR, 0); return 0; @@ -1246,10 +1242,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu) * Reduce the refc if any trace-imc event running * on this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_TRACE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1371,17 +1367,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags) } mtspr(SPRN_LDBAR, ldbar_value); - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1414,19 +1410,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); return; } } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); trace_imc_event_stop(event, flags); } @@ -1448,7 +1444,7 @@ static int trace_imc_event_init(struct perf_event *event) * no other thread is running any core/thread imc * events */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { /* * No core/thread imc events are running in the @@ -1457,10 +1453,10 @@ static int trace_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_TRACE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.idx = -1; @@ -1533,10 +1529,10 @@ static int init_nest_pmu_ref(void) i = 0; for_each_node(nid) { /* - * Mutex lock to avoid races while tracking the number of + * Take the lock to avoid races while tracking the number of * sessions using the chip's nest pmu units. */ - mutex_init(&nest_imc_refc[i].lock); + spin_lock_init(&nest_imc_refc[i].lock); /* * Loop to init the "id" with the node_id. Variable "i" initialized to @@ -1633,7 +1629,7 @@ static void imc_common_mem_free(struct imc_pmu *pmu_ptr) static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) { if (pmu_ptr->domain == IMC_DOMAIN_NEST) { - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); @@ -1643,7 +1639,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus > 0) nest_pmus--; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); } /* Free core_imc memory */ @@ -1800,11 +1796,11 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id * rest. To handle the cpuhotplug callback unregister, we track * the number of nest pmus in "nest_pmus". */ - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 0) { ret = init_nest_pmu_ref(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; goto err_free_mem; @@ -1812,7 +1808,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id /* Register for cpu hotplug notification. */ ret = nest_pmu_cpumask_init(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(nest_imc_refc); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; @@ -1820,7 +1816,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id } } nest_pmus++; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); break; case IMC_DOMAIN_CORE: ret = core_imc_pmu_cpumask_init(); diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 93f43a3e7886..cf24da85b13c 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -230,7 +230,6 @@ struct kvm_vcpu_arch { bool pause; }; -static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} @@ -297,11 +296,11 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm); void kvm_riscv_gstage_free_pgd(struct kvm *kvm); void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu); -void kvm_riscv_gstage_mode_detect(void); -unsigned long kvm_riscv_gstage_mode(void); +void __init kvm_riscv_gstage_mode_detect(void); +unsigned long __init kvm_riscv_gstage_mode(void); int kvm_riscv_gstage_gpa_bits(void); -void kvm_riscv_gstage_vmid_detect(void); +void __init kvm_riscv_gstage_vmid_detect(void); unsigned long kvm_riscv_gstage_vmid_bits(void); int kvm_riscv_gstage_vmid_init(struct kvm *kvm); bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid); diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index f36a737d5f96..d5a658a047a7 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -20,6 +20,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" depends on RISCV_SBI && MMU + select KVM_GENERIC_HARDWARE_ENABLING select MMU_NOTIFIER select PREEMPT_NOTIFIERS select KVM_MMIO diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 58c5489d3031..e2da56ed9069 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -20,16 +20,6 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_check_processor_compat(void *opaque) -{ - return 0; -} - -int kvm_arch_hardware_setup(void *opaque) -{ - return 0; -} - int kvm_arch_hardware_enable(void) { unsigned long hideleg, hedeleg; @@ -70,7 +60,7 @@ void kvm_arch_hardware_disable(void) csr_write(CSR_HIDELEG, 0); } -int kvm_arch_init(void *opaque) +static int __init riscv_kvm_init(void) { const char *str; @@ -115,16 +105,7 @@ int kvm_arch_init(void *opaque) kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); - return 0; -} - -void kvm_arch_exit(void) -{ -} - -static int __init riscv_kvm_init(void) -{ - return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); } module_init(riscv_kvm_init); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 34b57e0be2ef..66ef19676fe4 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -20,12 +20,12 @@ #include <asm/pgtable.h> #ifdef CONFIG_64BIT -static unsigned long gstage_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); -static unsigned long gstage_pgd_levels = 3; +static unsigned long gstage_mode __ro_after_init = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); +static unsigned long gstage_pgd_levels __ro_after_init = 3; #define gstage_index_bits 9 #else -static unsigned long gstage_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); -static unsigned long gstage_pgd_levels = 2; +static unsigned long gstage_mode __ro_after_init = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); +static unsigned long gstage_pgd_levels __ro_after_init = 2; #define gstage_index_bits 10 #endif @@ -758,7 +758,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) kvm_riscv_local_hfence_gvma_all(); } -void kvm_riscv_gstage_mode_detect(void) +void __init kvm_riscv_gstage_mode_detect(void) { #ifdef CONFIG_64BIT /* Try Sv57x4 G-stage mode */ @@ -782,7 +782,7 @@ skip_sv48x4_test: #endif } -unsigned long kvm_riscv_gstage_mode(void) +unsigned long __init kvm_riscv_gstage_mode(void) { return gstage_mode >> HGATP_MODE_SHIFT; } diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 6cd93995fb65..5246da1c9167 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -17,10 +17,10 @@ static unsigned long vmid_version = 1; static unsigned long vmid_next; -static unsigned long vmid_bits; +static unsigned long vmid_bits __ro_after_init; static DEFINE_SPINLOCK(vmid_lock); -void kvm_riscv_gstage_vmid_detect(void) +void __init kvm_riscv_gstage_vmid_detect(void) { unsigned long old; diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index e27c2140d620..8dcd7af2911a 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -23,9 +23,9 @@ #define memmove memmove #define memzero(s, n) memset((s), 0, (n)) -#ifdef CONFIG_KERNEL_BZIP2 +#if defined(CONFIG_KERNEL_BZIP2) #define BOOT_HEAP_SIZE 0x400000 -#elif CONFIG_KERNEL_ZSTD +#elif defined(CONFIG_KERNEL_ZSTD) #define BOOT_HEAP_SIZE 0x30000 #else #define BOOT_HEAP_SIZE 0x10000 diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index a7b4e1d82758..74b35ec2ad28 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -190,7 +190,6 @@ CONFIG_NFT_CT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m @@ -569,6 +568,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +# CONFIG_LEGACY_TIOCSTI is not set CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m @@ -660,6 +660,7 @@ CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -705,6 +706,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_INTEGRITY_PLATFORM_KEYRING=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y @@ -781,6 +783,7 @@ CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m @@ -848,7 +851,6 @@ CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set @@ -870,7 +872,6 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y -CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 2bc2d0fe5774..cec71268e3bc 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -181,7 +181,6 @@ CONFIG_NFT_CT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m -CONFIG_NFT_OBJREF=m CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m @@ -559,6 +558,7 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 +# CONFIG_LEGACY_TIOCSTI is not set CONFIG_VIRTIO_CONSOLE=m CONFIG_HW_RANDOM_VIRTIO=m CONFIG_HANGCHECK_TIMER=m @@ -645,6 +645,7 @@ CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y @@ -688,6 +689,7 @@ CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y CONFIG_INTEGRITY_SIGNATURE=y CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_INTEGRITY_PLATFORM_KEYRING=y CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y @@ -766,6 +768,7 @@ CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRYPTO_LIB_CURVE25519=m @@ -798,7 +801,6 @@ CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index ae14ab0b864d..a9c0c81d1de9 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -13,7 +13,6 @@ CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 CONFIG_HZ_100=y -# CONFIG_RELOCATABLE is not set # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y @@ -50,6 +49,7 @@ CONFIG_ZFCP=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_TIOCSTI is not set # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index feaba12dbecb..efa103b52a1a 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -131,19 +131,21 @@ struct hws_combined_entry { struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ } __packed; -struct hws_trailer_entry { - union { - struct { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned int :29; /* 3 - 31: Reserved */ - unsigned int bsdes:16; /* 32-47: size of basic SDE */ - unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ - }; - unsigned long long flags; /* 0 - 63: All indicators */ +union hws_trailer_header { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ + unsigned long long overflow; /* 64 - Overflow Count */ }; - unsigned long long overflow; /* 64 - sample Overflow count */ + __uint128_t val; +}; + +struct hws_trailer_entry { + union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */ unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ @@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, return USEC_PER_SEC * qsi->cpu_speed / rate; } -#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL -#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL - /* Return TOD timestamp contained in an trailer entry */ static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) { /* TOD in STCKE format */ - if (te->t) + if (te->header.t) return *((unsigned long long *) &te->timestamp[1]); /* TOD in STCK format */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 77f24262c25c..ac665b9670c5 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 1999, 2020 */ -#ifndef DEBUG_H -#define DEBUG_H +#ifndef _ASM_S390_DEBUG_H +#define _ASM_S390_DEBUG_H #include <linux/string.h> #include <linux/spinlock.h> @@ -487,4 +487,4 @@ void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas); #endif /* MODULE */ -#endif /* DEBUG_H */ +#endif /* _ASM_S390_DEBUG_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d67ce719d16a..2bbc3d54959d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1031,7 +1031,6 @@ extern char sie_exit; extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); -static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index cb5fc0690435..081837b391e3 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ *ptr__; \ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ - prev__ = *ptr__; \ + prev__ = READ_ONCE(*ptr__); \ do { \ old__ = prev__; \ new__ = old__ op (val); \ diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fc6d5f58debe..2df94d32140c 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - if (image->type == KEXEC_TYPE_CRASH) - buf.mem += crashk_res.start; ptr = (void *)ipl_cert_list_addr; end = ptr + ipl_cert_list_size; @@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + ret = kexec_add_buffer(&buf); out: return ret; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 332a49965130..ce886a03545a 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb) static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) { - unsigned long sdb, *trailer; + struct hws_trailer_entry *te; + unsigned long sdb; /* Allocate and initialize sample-data-block */ sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - trailer = trailer_entry_ptr(sdb); - *trailer = SDB_TE_ALERT_REQ_MASK; + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te->header.a = 1; /* Link SDB into the sample-data-block-table */ *sdbt = sdb; @@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, "%s: Found unknown" " sampling data entry: te->f %i" " basic.def %#4x (%p)\n", __func__, - te->f, sample->def, sample); + te->header.f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * that are not full. Stop processing if the first * invalid format was detected. */ - if (!te->f) + if (!te->header.f) break; } @@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } +static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) +{ + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} + /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { + unsigned long long event_overflow, sampl_overflow, num_sdb; + union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; /* @@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); /* Leave loop if no more work to do (block full indicator) */ - if (!te->f) { + if (!te->header.f) { done = 1; if (!flush_all) break; } /* Check the sample overflow count */ - if (te->overflow) + if (te->header.overflow) /* Account sample overflows and, if a particular limit * is reached, extend the sampling buffer. * For details, see sfb_account_overflows(). */ - sampl_overflow += te->overflow; + sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " "overflow %llu timestamp %#llx\n", - __func__, (unsigned long)sdbt, te->overflow, - (te->f) ? trailer_timestamp(te) : 0ULL); + __func__, (unsigned long)sdbt, te->header.overflow, + (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU @@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) num_sdb++; /* Reset trailer (using compare-double-and-swap) */ + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te_flags |= SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - te->flags, te->overflow, - te_flags, 0ULL)); + old.val = prev.val; + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); /* Advance to next sample-data-block */ sdbt++; @@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle) range_scan = AUX_SDB_NUM_ALERT(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + if (!te->header.f) break; } /* i is num of SDBs which are full */ @@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle) /* Remove alert indicators in the buffer */ te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags &= ~SDB_TE_ALERT_REQ_MASK; + te->header.a = 0; debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", __func__, i, range_scan, aux->head); @@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle, idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | - SDB_TE_ALERT_REQ_MASK); - te->overflow = 0; + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; } /* Save the position of empty SDBs */ aux->empty_mark = aux->head + range - 1; @@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle, /* Set alert indicator */ aux->alert_mark = aux->head + range/2 - 1; te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + te->header.a = 1; /* Reset hardware buffer head */ head = AUX_SDB_INDEX(aux, aux->head); @@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle, static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; + union hws_trailer_header old, prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - *overflow = orig_overflow = te->overflow; - if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + old.val = prev.val; + new.val = prev.val; + *overflow = old.overflow; + if (old.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, */ return false; } - new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); return true; } @@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; unsigned long i, range_scan, idx, idx_old; + union hws_trailer_header old, prev, new; + unsigned long long orig_overflow; struct hws_trailer_entry *te; debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " @@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - orig_overflow = te->overflow; - new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + old.val = prev.val; + new.val = prev.val; + orig_overflow = old.overflow; + new.f = 0; + new.overflow = 0; if (idx == aux->alert_mark) - new_flags |= SDB_TE_ALERT_REQ_MASK; + new.a = 1; else - new_flags &= ~SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); *overflow += orig_overflow; } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5ea3830af0cc..cbf9c1b0beda 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -17,6 +17,8 @@ /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA +#define RUNTIME_DISCARD_EXIT + #define EMITS_PT_NOTE #include <asm-generic/vmlinux.lds.h> @@ -79,6 +81,7 @@ SECTIONS _end_amode31_refs = .; } + . = ALIGN(PAGE_SIZE); _edata = .; /* End of data section */ /* will be freed after init */ @@ -193,6 +196,7 @@ SECTIONS BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE) + . = ALIGN(PAGE_SIZE); _end = . ; /* diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1dae78deddf2..8edb3bee74b6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -83,8 +83,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union esca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -95,8 +96,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union bsca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -126,16 +128,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old = *sigp_ctrl; + union esca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old = *sigp_ctrl; + union bsca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } @@ -3411,7 +3415,7 @@ void kvm_s390_gib_destroy(void) gib = NULL; } -int kvm_s390_gib_init(u8 nisc) +int __init kvm_s390_gib_init(u8 nisc) { int rc = 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e4890e04b210..bd25076aa19b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -256,17 +256,6 @@ debug_info_t *kvm_s390_dbf; debug_info_t *kvm_s390_dbf_uv; /* Section: not file related */ -int kvm_arch_hardware_enable(void) -{ - /* every s390 is virtualization enabled ;-) */ - return 0; -} - -int kvm_arch_check_processor_compat(void *opaque) -{ - return 0; -} - /* forward declarations */ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); @@ -329,25 +318,6 @@ static struct notifier_block kvm_clock_notifier = { .notifier_call = kvm_clock_sync, }; -int kvm_arch_hardware_setup(void *opaque) -{ - gmap_notifier.notifier_call = kvm_gmap_notifier; - gmap_register_pte_notifier(&gmap_notifier); - vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; - gmap_register_pte_notifier(&vsie_gmap_notifier); - atomic_notifier_chain_register(&s390_epoch_delta_notifier, - &kvm_clock_notifier); - return 0; -} - -void kvm_arch_hardware_unsetup(void) -{ - gmap_unregister_pte_notifier(&gmap_notifier); - gmap_unregister_pte_notifier(&vsie_gmap_notifier); - atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, - &kvm_clock_notifier); -} - static void allow_cpu_feat(unsigned long nr) { set_bit_inv(nr, kvm_s390_available_cpu_feat); @@ -385,7 +355,7 @@ static __always_inline void __insn32_query(unsigned int opcode, u8 *query) #define INSN_SORTL 0xb938 #define INSN_DFLTCC 0xb939 -static void kvm_s390_cpu_feat_init(void) +static void __init kvm_s390_cpu_feat_init(void) { int i; @@ -488,7 +458,7 @@ static void kvm_s390_cpu_feat_init(void) */ } -int kvm_arch_init(void *opaque) +static int __init __kvm_s390_init(void) { int rc = -ENOMEM; @@ -498,11 +468,11 @@ int kvm_arch_init(void *opaque) kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); if (!kvm_s390_dbf_uv) - goto out; + goto err_kvm_uv; if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) - goto out; + goto err_debug_view; kvm_s390_cpu_feat_init(); @@ -510,30 +480,49 @@ int kvm_arch_init(void *opaque) rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); if (rc) { pr_err("A FLIC registration call failed with rc=%d\n", rc); - goto out; + goto err_flic; } if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) { rc = kvm_s390_pci_init(); if (rc) { pr_err("Unable to allocate AIFT for PCI\n"); - goto out; + goto err_pci; } } rc = kvm_s390_gib_init(GAL_ISC); if (rc) - goto out; + goto err_gib; + + gmap_notifier.notifier_call = kvm_gmap_notifier; + gmap_register_pte_notifier(&gmap_notifier); + vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; + gmap_register_pte_notifier(&vsie_gmap_notifier); + atomic_notifier_chain_register(&s390_epoch_delta_notifier, + &kvm_clock_notifier); return 0; -out: - kvm_arch_exit(); +err_gib: + if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) + kvm_s390_pci_exit(); +err_pci: +err_flic: +err_debug_view: + debug_unregister(kvm_s390_dbf_uv); +err_kvm_uv: + debug_unregister(kvm_s390_dbf); return rc; } -void kvm_arch_exit(void) +static void __kvm_s390_exit(void) { + gmap_unregister_pte_notifier(&gmap_notifier); + gmap_unregister_pte_notifier(&vsie_gmap_notifier); + atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, + &kvm_clock_notifier); + kvm_s390_gib_destroy(); if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) kvm_s390_pci_exit(); @@ -5696,7 +5685,7 @@ static inline unsigned long nonhyp_mask(int i) static int __init kvm_s390_init(void) { - int i; + int i, r; if (!sclp.has_sief2) { pr_info("SIE is not available\n"); @@ -5712,12 +5701,23 @@ static int __init kvm_s390_init(void) kvm_s390_fac_base[i] |= stfle_fac_list[i] & nonhyp_mask(i); - return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + r = __kvm_s390_init(); + if (r) + return r; + + r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (r) { + __kvm_s390_exit(); + return r; + } + return 0; } static void __exit kvm_s390_exit(void) { kvm_exit(); + + __kvm_s390_exit(); } module_init(kvm_s390_init); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index d48588c207d8..0261d42c7d01 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -470,7 +470,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm); void kvm_s390_gisa_destroy(struct kvm *kvm); void kvm_s390_gisa_disable(struct kvm *kvm); void kvm_s390_gisa_enable(struct kvm *kvm); -int kvm_s390_gib_init(u8 nisc); +int __init kvm_s390_gib_init(u8 nisc); void kvm_s390_gib_destroy(void); /* implemented in guestdbg.c */ diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c index ec51e810e381..b124d586db55 100644 --- a/arch/s390/kvm/pci.c +++ b/arch/s390/kvm/pci.c @@ -672,7 +672,7 @@ out: return r; } -int kvm_s390_pci_init(void) +int __init kvm_s390_pci_init(void) { zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; diff --git a/arch/s390/kvm/pci.h b/arch/s390/kvm/pci.h index 486d06ef563f..ff0972dd5e71 100644 --- a/arch/s390/kvm/pci.h +++ b/arch/s390/kvm/pci.h @@ -60,7 +60,7 @@ void kvm_s390_pci_clear_list(struct kvm *kvm); int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args); -int kvm_s390_pci_init(void); +int __init kvm_s390_pci_init(void); void kvm_s390_pci_exit(void); static inline bool kvm_s390_pci_interp_allowed(void) diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index a889a3a938ba..d1ce73f3bd85 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -28,7 +28,7 @@ #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) -typedef struct { +typedef union { struct { unsigned long pmd_low; unsigned long pmd_high; diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 5521ea12f44e..aa9b96457584 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S @@ -32,7 +32,7 @@ intcall: movw %dx, %si movw %sp, %di movw $11, %cx - rep; movsd + rep; movsl /* Pop full state from the stack */ popal @@ -67,7 +67,7 @@ intcall: jz 4f movw %sp, %si movw $11, %cx - rep; movsd + rep; movsl 4: addw $44, %sp /* Restore state and return */ diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index a2834bc93149..3019fb1926e3 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,6 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL + * MTL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,50 +52,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL + * ICL,TGL,RKL,ADL,RPL,MTL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR + * RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR + * TGL,TNT,RKL,ADL,RPL,SPR,MTL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL,RPL + * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL + * ADL,RPL,MTL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL + * TNT,RKL,ADL,RPL,MTL * Scope: Package (physical package) * */ @@ -686,6 +687,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6f1ccc57a692..459b1aafd4d4 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1833,6 +1833,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ecced3a52668..c65d8906cbcf 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -69,6 +69,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_G: case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_EMERALDRAPIDS_X: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: @@ -107,6 +108,8 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 08e822bd7aa6..1e2fe398b66d 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -255,6 +255,9 @@ enum hv_isolation_type { /* TSC invariant control */ #define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 +/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ +#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0) + /* Register name aliases for temporary compatibility */ #define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT #define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index abccd51dcfca..dba2909e5ae2 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -14,6 +14,7 @@ BUILD_BUG_ON(1) * to make a definition optional, but in this case the default will * be __static_call_return0. */ +KVM_X86_OP(check_processor_compatibility) KVM_X86_OP(hardware_enable) KVM_X86_OP(hardware_disable) KVM_X86_OP(hardware_unsetup) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 46e50cb6c9ca..5024ba7fe8b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1086,6 +1086,7 @@ struct kvm_hv { u64 hv_reenlightenment_control; u64 hv_tsc_emulation_control; u64 hv_tsc_emulation_status; + u64 hv_invtsc_control; /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; @@ -1109,6 +1110,7 @@ struct msr_bitmap_range { /* Xen emulation context */ struct kvm_xen { + struct mutex xen_lock; u32 xen_version; bool long_mode; bool runstate_update_flag; @@ -1339,21 +1341,12 @@ struct kvm_arch { struct task_struct *nx_huge_page_recovery_thread; #ifdef CONFIG_X86_64 - /* - * Whether the TDP MMU is enabled for this VM. This contains a - * snapshot of the TDP MMU module parameter from when the VM was - * created and remains unchanged for the life of the VM. If this is - * true, TDP MMU handler functions will run for various MMU - * operations. - */ - bool tdp_mmu_enabled; - /* The number of TDP MMU pages across all roots. */ atomic64_t tdp_mmu_pages; /* - * List of kvm_mmu_page structs being used as roots. - * All kvm_mmu_page structs in the list should have + * List of struct kvm_mmu_pages being used as roots. + * All struct kvm_mmu_pages in the list should have * tdp_mmu_page set. * * For reads, this list is protected by: @@ -1517,6 +1510,8 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) struct kvm_x86_ops { const char *name; + int (*check_processor_compatibility)(void); + int (*hardware_enable)(void); void (*hardware_disable)(void); void (*hardware_unsetup)(void); @@ -1728,9 +1723,6 @@ struct kvm_x86_nested_ops { }; struct kvm_x86_init_ops { - int (*cpu_has_kvm_support)(void); - int (*disabled_by_bios)(void); - int (*check_processor_compatibility)(void); int (*hardware_setup)(void); unsigned int (*handle_intel_pt_intr)(void); @@ -1757,6 +1749,9 @@ extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP #include <asm/kvm-x86-ops.h> +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); +void kvm_x86_vendor_exit(void); + #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 46668e255421..bfbdc072017d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -388,7 +388,7 @@ static void __init ms_hyperv_init_platform(void) * setting of this MSR bit should happen before init_intel() * is called. */ - wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1); + wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_EXPOSE_INVARIANT_TSC); setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); } diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index efe0c30d3a12..77538abeb72a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } +static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + + /* + * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured + * with a valid event code for supported resource type and the bits + * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, + * IA32_QM_CTR.data (bits 61:0) reports the monitored data. + * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) + * are error bits. + */ + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) @@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); - if (am) + if (am) { memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __rmid_read(rmid, eventid, &am->prev_msr); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; + int ret; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; - /* - * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured - * with a valid event code for supported resource type and the bits - * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, - * IA32_QM_CTR.data (bits 61:0) reports the monitored data. - * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) - * are error bits. - */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, msr_val); - - if (msr_val & RMID_VAL_ERROR) - return -EIO; - if (msr_val & RMID_VAL_UNAVAIL) - return -EINVAL; + ret = __rmid_read(rmid, eventid, &msr_val); + if (ret) + return ret; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e5a48f05e787..5993da21d822 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, /* * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_sched_in() during context switch. */ - barrier(); + smp_mb(); /* * By now, the task's closid and rmid are set. If the task is current @@ -2402,6 +2404,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, WRITE_ONCE(t->rmid, to->mon.rmid); /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and resctrl_sched_in() + * during context switch. + */ + smp_mb(); + + /* * If the task is on a CPU, set the CPU in the mask. * The detection is inaccurate as tasks might move or * schedule before the smp function call takes place. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index fbeaa9ddef59..8e578311ca9d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -49,6 +49,7 @@ config KVM select SRCU select INTERVAL_TREE select HAVE_KVM_PM_NOTIFIER if PM + select KVM_GENERIC_HARDWARE_ENABLING help Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b14653b61470..2a9f1e200dbc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -8,6 +8,7 @@ * Copyright 2011 Red Hat, Inc. and/or its affiliates. * Copyright IBM Corporation, 2008 */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/export.h> @@ -701,6 +702,10 @@ void kvm_set_cpu_caps(void) if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64)) kvm_cpu_cap_set(X86_FEATURE_GBPAGES); + kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX, + SF(CONSTANT_TSC) + ); + kvm_cpu_cap_mask(CPUID_8000_0008_EBX, F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | @@ -770,16 +775,22 @@ struct kvm_cpuid_array { int nent; }; +static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) +{ + if (array->nent >= array->maxnent) + return NULL; + + return &array->entries[array->nent++]; +} + static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, u32 function, u32 index) { - struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); - if (array->nent >= array->maxnent) + if (!entry) return NULL; - entry = &array->entries[array->nent++]; - memset(entry, 0, sizeof(*entry)); entry->function = function; entry->index = index; @@ -956,22 +967,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = edx.full; break; } - /* - * Per Intel's SDM, the 0x1f is a superset of 0xb, - * thus they can be handled by common code. - */ case 0x1f: case 0xb: /* - * Populate entries until the level type (ECX[15:8]) of the - * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is - * the starting entry, filled by the primary do_host_cpuid(). + * No topology; a valid topology is indicated by the presence + * of subleaf 1. */ - for (i = 1; entry->ecx & 0xff00; ++i) { - entry = do_host_cpuid(array, function, i); - if (!entry) - goto out; - } + entry->eax = entry->ebx = entry->ecx = 0; break; case 0xd: { u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm(); @@ -1151,8 +1153,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx &= ~GENMASK(17, 16); break; case 0x80000007: /* Advanced power management */ - /* invariant TSC is CPUID.80000007H:EDX[8] */ - entry->edx &= (1 << 8); + cpuid_entry_override(entry, CPUID_8000_0007_EDX); + /* mask against host */ entry->edx &= boot_cpu_data.x86_power; entry->eax = entry->ebx = entry->ecx = 0; @@ -1202,6 +1204,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = entry->ecx = entry->edx = 0; break; case 0x8000001e: + /* Do not return host topology information. */ + entry->eax = entry->ebx = entry->ecx = 0; + entry->edx = 0; /* reserved */ break; case 0x8000001F: if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { @@ -1482,6 +1487,9 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) && (data & TSX_CTRL_CPUID_CLEAR)) *ebx &= ~(F(RTM) | F(HLE)); + } else if (function == 0x80000007) { + if (kvm_hv_invtsc_suppressed(vcpu)) + *edx &= ~SF(CONSTANT_TSC); } } else { *eax = *ebx = *ecx = *edx = 0; diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index c1390357126a..ee8c4c3496ed 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -4,6 +4,8 @@ * * Copyright 2016 Red Hat, Inc. and/or its affiliates. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kvm_host.h> #include <linux/debugfs.h> #include "lapic.h" diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5cc3efa0e21c..c3443045cd93 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -17,6 +17,7 @@ * * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "kvm_cache_regs.h" diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index e8296942a868..71aff0edc0ed 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -17,6 +17,7 @@ * Ben-Ami Yassour <benami@il.ibm.com> * Andrey Smetanin <asmetanin@virtuozzo.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "x86.h" #include "lapic.h" @@ -999,6 +1000,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: + case HV_X64_MSR_TSC_INVARIANT_CONTROL: case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: r = true; @@ -1283,6 +1285,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr) case HV_X64_MSR_TSC_EMULATION_STATUS: return hv_vcpu->cpuid_cache.features_eax & HV_ACCESS_REENLIGHTENMENT; + case HV_X64_MSR_TSC_INVARIANT_CONTROL: + return hv_vcpu->cpuid_cache.features_eax & + HV_ACCESS_TSC_INVARIANT; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: return hv_vcpu->cpuid_cache.features_edx & @@ -1410,6 +1415,17 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, if (!host) return 1; break; + case HV_X64_MSR_TSC_INVARIANT_CONTROL: + /* Only bit 0 is supported */ + if (data & ~HV_EXPOSE_INVARIANT_TSC) + return 1; + + /* The feature can't be disabled from the guest */ + if (!host && hv->hv_invtsc_control && !data) + return 1; + + hv->hv_invtsc_control = data; + break; case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: return syndbg_set_msr(vcpu, msr, data, host); @@ -1585,6 +1601,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, case HV_X64_MSR_TSC_EMULATION_STATUS: data = hv->hv_tsc_emulation_status; break; + case HV_X64_MSR_TSC_INVARIANT_CONTROL: + data = hv->hv_invtsc_control; + break; case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: return syndbg_get_msr(vcpu, msr, pdata, host); @@ -2733,6 +2752,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; ent->eax |= HV_ACCESS_FREQUENCY_MSRS; ent->eax |= HV_ACCESS_REENLIGHTENMENT; + ent->eax |= HV_ACCESS_TSC_INVARIANT; ent->ebx |= HV_POST_MESSAGES; ent->ebx |= HV_SIGNAL_EVENTS; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 9f96414a31c5..f83b8db72b11 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -136,6 +136,33 @@ static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) HV_SYNIC_STIMER_COUNT); } +/* + * With HV_ACCESS_TSC_INVARIANT feature, invariant TSC (CPUID.80000007H:EDX[8]) + * is only observed after HV_X64_MSR_TSC_INVARIANT_CONTROL was written to. + */ +static inline bool kvm_hv_invtsc_suppressed(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + /* + * If Hyper-V's invariant TSC control is not exposed to the guest, + * the invariant TSC CPUID flag is not suppressed, Windows guests were + * observed to be able to handle it correctly. Going forward, VMMs are + * encouraged to enable Hyper-V's invariant TSC control when invariant + * TSC CPUID flag is set to make KVM's behavior match genuine Hyper-V. + */ + if (!hv_vcpu || + !(hv_vcpu->cpuid_cache.features_eax & HV_ACCESS_TSC_INVARIANT)) + return false; + + /* + * If Hyper-V's invariant TSC control is exposed to the guest, KVM is + * responsible for suppressing the invariant TSC CPUID flag if the + * Hyper-V control is not enabled. + */ + return !(to_kvm_hv(vcpu->kvm)->hv_invtsc_control & HV_EXPOSE_INVARIANT_TSC); +} + void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index e0a7a0e7a73c..cd57a517d04a 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -30,7 +30,7 @@ * Based on QEMU and Xen. */ -#define pr_fmt(fmt) "pit: " fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/slab.h> @@ -351,7 +351,7 @@ static void create_pit_timer(struct kvm_pit *pit, u32 val, int is_period) if (ps->period < min_period) { pr_info_ratelimited( - "kvm: requested %lld ns " + "requested %lld ns " "i8254 timer period limited to %lld ns\n", ps->period, min_period); ps->period = min_period; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index e1bb6218bb96..4756bcb5724f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -26,6 +26,8 @@ * Yaozu (Eddie) Dong <Eddie.dong@intel.com> * Port from Qemu. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/mm.h> #include <linux/slab.h> #include <linux/bitops.h> @@ -35,7 +37,7 @@ #include "trace.h" #define pr_pic_unimpl(fmt, ...) \ - pr_err_ratelimited("kvm: pic: " fmt, ## __VA_ARGS__) + pr_err_ratelimited("pic: " fmt, ## __VA_ARGS__) static void pic_irq_request(struct kvm *kvm, int level); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 765943d7cfa5..042dee556125 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -26,6 +26,7 @@ * Yaozu (Eddie) Dong <eddie.dong@intel.com> * Based on Xen 3.1 code. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/kvm.h> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index a70952eca905..b2c397dd2bc6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -7,6 +7,7 @@ * Authors: * Yaozu (Eddie) Dong <Eddie.dong@intel.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/export.h> #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3742d9adacfc..16d076a1b91a 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -8,6 +8,7 @@ * * Copyright 2010 Red Hat, Inc. and/or its affiliates. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/slab.h> @@ -56,7 +57,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (irq->dest_mode == APIC_DEST_PHYSICAL && irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) { - printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); + pr_info("apic: phys broadcast and lowest prio\n"); irq->delivery_mode = APIC_DM_FIXED; } @@ -199,7 +200,7 @@ int kvm_request_irq_source_id(struct kvm *kvm) irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); if (irq_source_id >= BITS_PER_LONG) { - printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); + pr_warn("exhausted allocatable IRQ sources!\n"); irq_source_id = -EFAULT; goto unlock; } @@ -221,7 +222,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) mutex_lock(&kvm->irq_lock); if (irq_source_id < 0 || irq_source_id >= BITS_PER_LONG) { - printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); + pr_err("IRQ source ID out of range!\n"); goto unlock; } clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c index ee4f696a0782..482d6639ef88 100644 --- a/arch/x86/kvm/kvm_onhyperv.c +++ b/arch/x86/kvm/kvm_onhyperv.c @@ -2,6 +2,7 @@ /* * KVM L1 hypervisor optimizations on Hyper-V. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <asm/mshyperv.h> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4efdb4a4d72c..cfaf1d8c64ca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -15,6 +15,7 @@ * * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/kvm.h> @@ -941,8 +942,7 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm) { if (!kvm->arch.disabled_lapic_found) { kvm->arch.disabled_lapic_found = true; - printk(KERN_INFO - "Disabled LAPIC found during irq injection\n"); + pr_info("Disabled LAPIC found during irq injection\n"); } } @@ -1560,7 +1560,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) if (apic->lapic_timer.period < min_period) { pr_info_ratelimited( - "kvm: vcpu %i: requested %lld ns " + "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, apic->lapic_timer.period, min_period); @@ -1845,7 +1845,7 @@ static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) deadline = apic->lapic_timer.period; else if (unlikely(deadline > apic->lapic_timer.period)) { pr_info_ratelimited( - "kvm: vcpu %i: requested lapic timer restore with " + "vcpu %i: requested lapic timer restore with " "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " "Using initial count to start timer.\n", apic->vcpu->vcpu_id, diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 6bdaacb6faa0..168c46fd8dd1 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -230,14 +230,14 @@ static inline bool kvm_shadow_root_allocated(struct kvm *kvm) } #ifdef CONFIG_X86_64 -static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; } +extern bool tdp_mmu_enabled; #else -static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; } +#define tdp_mmu_enabled false #endif static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) { - return !is_tdp_mmu_enabled(kvm) || kvm_shadow_root_allocated(kvm); + return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm); } static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 835426254e76..aeb240b339f5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -14,6 +14,7 @@ * Yaniv Kamay <yaniv@qumranet.com> * Avi Kivity <avi@qumranet.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "irq.h" #include "ioapic.h" @@ -99,6 +100,13 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); */ bool tdp_enabled = false; +bool __ro_after_init tdp_mmu_allowed; + +#ifdef CONFIG_X86_64 +bool __read_mostly tdp_mmu_enabled = true; +module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0444); +#endif + static int max_huge_page_level __read_mostly; static int tdp_root_level __read_mostly; static int max_tdp_level __read_mostly; @@ -609,9 +617,14 @@ static bool mmu_spte_age(u64 *sptep) return true; } +static inline bool is_tdp_mmu_active(struct kvm_vcpu *vcpu) +{ + return tdp_mmu_enabled && vcpu->arch.mmu->root_role.direct; +} + static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { - if (is_tdp_mmu(vcpu->arch.mmu)) { + if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_begin(); } else { /* @@ -630,7 +643,7 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { - if (is_tdp_mmu(vcpu->arch.mmu)) { + if (is_tdp_mmu_active(vcpu)) { kvm_tdp_mmu_walk_lockless_end(); } else { /* @@ -1279,7 +1292,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, { struct kvm_rmap_head *rmap_head; - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, true); @@ -1312,7 +1325,7 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, { struct kvm_rmap_head *rmap_head; - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot, slot->base_gfn + gfn_offset, mask, false); @@ -1395,7 +1408,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, } } - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) write_protected |= kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level); @@ -1558,7 +1571,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) flush = kvm_handle_gfn_range(kvm, range, kvm_zap_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); return flush; @@ -1571,7 +1584,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range); return flush; @@ -1646,7 +1659,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) young = kvm_handle_gfn_range(kvm, range, kvm_age_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) young |= kvm_tdp_mmu_age_gfn_range(kvm, range); return young; @@ -1659,7 +1672,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) if (kvm_memslots_have_rmaps(kvm)) young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmap); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) young |= kvm_tdp_mmu_test_age_gfn(kvm, range); return young; @@ -1921,7 +1934,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) return true; /* TDP MMU pages do not use the MMU generation. */ - return !sp->tdp_mmu_page && + return !is_tdp_mmu_page(sp) && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } @@ -2355,7 +2368,16 @@ static void __link_shadow_page(struct kvm *kvm, mmu_page_add_parent_pte(cache, sp, sptep); - if (sp->unsync_children || sp->unsync) + /* + * The non-direct sub-pagetable must be updated before linking. For + * L1 sp, the pagetable is updated via kvm_sync_page() in + * kvm_mmu_find_shadow_page() without write-protecting the gfn, + * so sp->unsync can be true or false. For higher level non-direct + * sp, the pagetable is updated/synced via mmu_sync_children() in + * FNAME(fetch)(), so sp->unsync_children can only be false. + * WARN_ON_ONCE() if anything happens unexpectedly. + */ + if (WARN_ON_ONCE(sp->unsync_children) || sp->unsync) mark_unsync(sptep); } @@ -3116,11 +3138,11 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ !is_large_pte(spte) && spte_to_child_sp(spte)->nx_huge_page_disallowed) { /* - * A small SPTE exists for this pfn, but FNAME(fetch) - * and __direct_map would like to create a large PTE - * instead: just force them to go down another level, - * patching back for them into pfn the next 9 bits of - * the address. + * A small SPTE exists for this pfn, but FNAME(fetch), + * direct_map(), or kvm_tdp_mmu_map() would like to create a + * large PTE instead: just force them to go down another level, + * patching back for them into pfn the next 9 bits of the + * address. */ u64 page_mask = KVM_PAGES_PER_HPAGE(cur_level) - KVM_PAGES_PER_HPAGE(cur_level - 1); @@ -3129,7 +3151,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ } } -static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; @@ -3173,14 +3195,16 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) return ret; } -static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) +static void kvm_send_hwpoison_signal(struct kvm_memory_slot *slot, gfn_t gfn) { - send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk); + unsigned long hva = gfn_to_hva_memslot(slot, gfn); + + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, PAGE_SHIFT, current); } -static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) +static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - if (is_sigpending_pfn(pfn)) { + if (is_sigpending_pfn(fault->pfn)) { kvm_handle_signal_exit(vcpu); return -EINTR; } @@ -3190,43 +3214,43 @@ static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) * into the spte otherwise read access on readonly gfn also can * caused mmio page fault and treat it as mmio access. */ - if (pfn == KVM_PFN_ERR_RO_FAULT) + if (fault->pfn == KVM_PFN_ERR_RO_FAULT) return RET_PF_EMULATE; - if (pfn == KVM_PFN_ERR_HWPOISON) { - kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current); + if (fault->pfn == KVM_PFN_ERR_HWPOISON) { + kvm_send_hwpoison_signal(fault->slot, fault->gfn); return RET_PF_RETRY; } return -EFAULT; } -static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, - unsigned int access) +static int kvm_handle_noslot_fault(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault, + unsigned int access) { - /* The pfn is invalid, report the error! */ - if (unlikely(is_error_pfn(fault->pfn))) - return kvm_handle_error_pfn(vcpu, fault->gfn, fault->pfn); + gva_t gva = fault->is_tdp ? 0 : fault->addr; - if (unlikely(!fault->slot)) { - gva_t gva = fault->is_tdp ? 0 : fault->addr; + vcpu_cache_mmio_info(vcpu, gva, fault->gfn, + access & shadow_mmio_access_mask); - vcpu_cache_mmio_info(vcpu, gva, fault->gfn, - access & shadow_mmio_access_mask); - /* - * If MMIO caching is disabled, emulate immediately without - * touching the shadow page tables as attempting to install an - * MMIO SPTE will just be an expensive nop. Do not cache MMIO - * whose gfn is greater than host.MAXPHYADDR, any guest that - * generates such gfns is running nested and is being tricked - * by L0 userspace (you can observe gfn > L1.MAXPHYADDR if - * and only if L1's MAXPHYADDR is inaccurate with respect to - * the hardware's). - */ - if (unlikely(!enable_mmio_caching) || - unlikely(fault->gfn > kvm_mmu_max_gfn())) - return RET_PF_EMULATE; - } + /* + * If MMIO caching is disabled, emulate immediately without + * touching the shadow page tables as attempting to install an + * MMIO SPTE will just be an expensive nop. + */ + if (unlikely(!enable_mmio_caching)) + return RET_PF_EMULATE; + + /* + * Do not create an MMIO SPTE for a gfn greater than host.MAXPHYADDR, + * any guest that generates such gfns is running nested and is being + * tricked by L0 userspace (you can observe gfn > L1.MAXPHYADDR if and + * only if L1's MAXPHYADDR is inaccurate with respect to the + * hardware's). + */ + if (unlikely(fault->gfn > kvm_mmu_max_gfn())) + return RET_PF_EMULATE; return RET_PF_CONTINUE; } @@ -3350,7 +3374,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) do { u64 new_spte; - if (is_tdp_mmu(vcpu->arch.mmu)) + if (tdp_mmu_enabled) sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, fault->addr, &spte); else sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte); @@ -3433,8 +3457,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } if (++retry_count > 4) { - printk_once(KERN_WARNING - "kvm: Fast #PF retrying more than 4 times.\n"); + pr_warn_once("Fast #PF retrying more than 4 times.\n"); break; } @@ -3596,7 +3619,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (r < 0) goto out_unlock; - if (is_tdp_mmu_enabled(vcpu->kvm)) { + if (tdp_mmu_enabled) { root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu); mmu->root.hpa = root; } else if (shadow_root_level >= PT64_ROOT_4LEVEL) { @@ -4026,7 +4049,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) walk_shadow_page_lockless_begin(vcpu); - if (is_tdp_mmu(vcpu->arch.mmu)) + if (is_tdp_mmu_active(vcpu)) leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else leaf = get_walk(vcpu, addr, sptes, &root); @@ -4174,7 +4197,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); } -static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) +static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_memory_slot *slot = fault->slot; bool async; @@ -4235,12 +4258,33 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) return RET_PF_CONTINUE; } +static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, + unsigned int access) +{ + int ret; + + fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; + smp_rmb(); + + ret = __kvm_faultin_pfn(vcpu, fault); + if (ret != RET_PF_CONTINUE) + return ret; + + if (unlikely(is_error_pfn(fault->pfn))) + return kvm_handle_error_pfn(vcpu, fault); + + if (unlikely(!fault->slot)) + return kvm_handle_noslot_fault(vcpu, fault, access); + + return RET_PF_CONTINUE; +} + /* * Returns true if the page fault is stale and needs to be retried, i.e. if the * root was invalidated by a memslot update or a relevant mmu_notifier fired. */ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, - struct kvm_page_fault *fault, int mmu_seq) + struct kvm_page_fault *fault) { struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa); @@ -4260,19 +4304,13 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, return true; return fault->slot && - mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva); + mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva); } static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { - bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu); - - unsigned long mmu_seq; int r; - fault->gfn = fault->addr >> PAGE_SHIFT; - fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn); - if (page_fault_handle_page_track(vcpu, fault)) return RET_PF_EMULATE; @@ -4284,41 +4322,24 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - mmu_seq = vcpu->kvm->mmu_invalidate_seq; - smp_rmb(); - - r = kvm_faultin_pfn(vcpu, fault); - if (r != RET_PF_CONTINUE) - return r; - - r = handle_abnormal_pfn(vcpu, fault, ACC_ALL); + r = kvm_faultin_pfn(vcpu, fault, ACC_ALL); if (r != RET_PF_CONTINUE) return r; r = RET_PF_RETRY; + write_lock(&vcpu->kvm->mmu_lock); - if (is_tdp_mmu_fault) - read_lock(&vcpu->kvm->mmu_lock); - else - write_lock(&vcpu->kvm->mmu_lock); + if (is_page_fault_stale(vcpu, fault)) + goto out_unlock; - if (is_page_fault_stale(vcpu, fault, mmu_seq)) + r = make_mmu_pages_available(vcpu); + if (r) goto out_unlock; - if (is_tdp_mmu_fault) { - r = kvm_tdp_mmu_map(vcpu, fault); - } else { - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - r = __direct_map(vcpu, fault); - } + r = direct_map(vcpu, fault); out_unlock: - if (is_tdp_mmu_fault) - read_unlock(&vcpu->kvm->mmu_lock); - else - write_unlock(&vcpu->kvm->mmu_lock); + write_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(fault->pfn); return r; } @@ -4366,6 +4387,42 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, } EXPORT_SYMBOL_GPL(kvm_handle_page_fault); +#ifdef CONFIG_X86_64 +static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu, + struct kvm_page_fault *fault) +{ + int r; + + if (page_fault_handle_page_track(vcpu, fault)) + return RET_PF_EMULATE; + + r = fast_page_fault(vcpu, fault); + if (r != RET_PF_INVALID) + return r; + + r = mmu_topup_memory_caches(vcpu, false); + if (r) + return r; + + r = kvm_faultin_pfn(vcpu, fault, ACC_ALL); + if (r != RET_PF_CONTINUE) + return r; + + r = RET_PF_RETRY; + read_lock(&vcpu->kvm->mmu_lock); + + if (is_page_fault_stale(vcpu, fault)) + goto out_unlock; + + r = kvm_tdp_mmu_map(vcpu, fault); + +out_unlock: + read_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(fault->pfn); + return r; +} +#endif + int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { /* @@ -4383,13 +4440,18 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) { for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) { int page_num = KVM_PAGES_PER_HPAGE(fault->max_level); - gfn_t base = (fault->addr >> PAGE_SHIFT) & ~(page_num - 1); + gfn_t base = fault->gfn & ~(page_num - 1); if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num)) break; } } +#ifdef CONFIG_X86_64 + if (tdp_mmu_enabled) + return kvm_tdp_mmu_page_fault(vcpu, fault); +#endif + return direct_page_fault(vcpu, fault); } @@ -5719,6 +5781,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, tdp_root_level = tdp_forced_root_level; max_tdp_level = tdp_max_root_level; +#ifdef CONFIG_X86_64 + tdp_mmu_enabled = tdp_mmu_allowed && tdp_enabled; +#endif /* * max_huge_page_level reflects KVM's MMU capabilities irrespective * of kernel support, e.g. KVM may be capable of using 1GB pages when @@ -5966,7 +6031,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) * write and in the same critical section as making the reload request, * e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield. */ - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_invalidate_all_roots(kvm); /* @@ -5991,7 +6056,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) * Deferring the zap until the final reference to the root is put would * lead to use-after-free. */ - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_zap_invalidated_roots(kvm); } @@ -6017,9 +6082,11 @@ int kvm_mmu_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - r = kvm_mmu_init_tdp_mmu(kvm); - if (r < 0) - return r; + if (tdp_mmu_enabled) { + r = kvm_mmu_init_tdp_mmu(kvm); + if (r < 0) + return r; + } node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; @@ -6049,7 +6116,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) kvm_page_track_unregister_notifier(kvm, node); - kvm_mmu_uninit_tdp_mmu(kvm); + if (tdp_mmu_enabled) + kvm_mmu_uninit_tdp_mmu(kvm); mmu_free_vm_memory_caches(kvm); } @@ -6103,7 +6171,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end); - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, gfn_end, true, flush); @@ -6136,7 +6204,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level); read_unlock(&kvm->mmu_lock); @@ -6379,7 +6447,7 @@ void kvm_mmu_try_split_huge_pages(struct kvm *kvm, u64 start, u64 end, int target_level) { - if (!is_tdp_mmu_enabled(kvm)) + if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) @@ -6400,7 +6468,7 @@ void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, u64 start = memslot->base_gfn; u64 end = start + memslot->npages; - if (!is_tdp_mmu_enabled(kvm)) + if (!tdp_mmu_enabled) return; if (kvm_memslots_have_rmaps(kvm)) { @@ -6483,7 +6551,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot); read_unlock(&kvm->mmu_lock); @@ -6518,7 +6586,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } - if (is_tdp_mmu_enabled(kvm)) { + if (tdp_mmu_enabled) { read_lock(&kvm->mmu_lock); kvm_tdp_mmu_clear_dirty_slot(kvm, memslot); read_unlock(&kvm->mmu_lock); @@ -6553,7 +6621,7 @@ restart: kvm_mmu_commit_zap_page(kvm, &invalid_list); - if (is_tdp_mmu_enabled(kvm)) + if (tdp_mmu_enabled) kvm_tdp_mmu_zap_all(kvm); write_unlock(&kvm->mmu_lock); @@ -6579,7 +6647,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) * zap all shadow pages. */ if (unlikely(gen == 0)) { - kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); + kvm_debug_ratelimited("zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_zap_all_fast(kvm); } } @@ -6718,6 +6786,13 @@ void __init kvm_mmu_x86_module_init(void) if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); + /* + * Snapshot userspace's desire to enable the TDP MMU. Whether or not the + * TDP MMU is actually enabled is determined in kvm_configure_mmu() + * when the vendor module is loaded. + */ + tdp_mmu_allowed = tdp_mmu_enabled; + kvm_mmu_spte_module_init(); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index dbaf6755c5a7..ac00bfbf32f6 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -199,7 +199,7 @@ struct kvm_page_fault { /* * Maximum page size that can be created for this fault; input to - * FNAME(fetch), __direct_map and kvm_tdp_mmu_map. + * FNAME(fetch), direct_map() and kvm_tdp_mmu_map(). */ u8 max_level; @@ -222,6 +222,7 @@ struct kvm_page_fault { struct kvm_memory_slot *slot; /* Outputs of kvm_faultin_pfn. */ + unsigned long mmu_seq; kvm_pfn_t pfn; hva_t hva; bool map_writable; @@ -279,6 +280,11 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, }; int r; + if (vcpu->arch.mmu->root_role.direct) { + fault.gfn = fault.addr >> PAGE_SHIFT; + fault.slot = kvm_vcpu_gfn_to_memslot(vcpu, fault.gfn); + } + /* * Async #PF "faults", a.k.a. prefetch faults, are not faults from the * guest perspective and have already been counted at the time of the diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 2e09d1b6249f..0a2ac438d647 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -10,6 +10,7 @@ * Author: * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <linux/rculist.h> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 0f6455072055..e5662dbd519c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -791,7 +791,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault { struct guest_walker walker; int r; - unsigned long mmu_seq; bool is_self_change_mapping; pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code); @@ -838,14 +837,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault else fault->max_level = walker.level; - mmu_seq = vcpu->kvm->mmu_invalidate_seq; - smp_rmb(); - - r = kvm_faultin_pfn(vcpu, fault); - if (r != RET_PF_CONTINUE) - return r; - - r = handle_abnormal_pfn(vcpu, fault, walker.pte_access); + r = kvm_faultin_pfn(vcpu, fault, walker.pte_access); if (r != RET_PF_CONTINUE) return r; @@ -871,7 +863,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault r = RET_PF_RETRY; write_lock(&vcpu->kvm->mmu_lock); - if (is_page_fault_stale(vcpu, fault, mmu_seq)) + if (is_page_fault_stale(vcpu, fault)) goto out_unlock; r = make_mmu_pages_available(vcpu); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index c0fd7e049b4e..fce6f047399f 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -7,7 +7,7 @@ * Copyright (C) 2006 Qumranet, Inc. * Copyright 2020 Red Hat, Inc. and/or its affiliates. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "mmu.h" @@ -352,7 +352,7 @@ u64 mark_spte_for_access_track(u64 spte) WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), - "kvm: Access Tracking saved bit locations are not zero\n"); + "Access Tracking saved bit locations are not zero\n"); spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 6f54dc9409c9..0d8deefee66c 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -435,11 +435,11 @@ static inline void check_spte_writable_invariants(u64 spte) { if (spte & shadow_mmu_writable_mask) WARN_ONCE(!(spte & shadow_host_writable_mask), - "kvm: MMU-writable SPTE is not Host-writable: %llx", + KBUILD_MODNAME ": MMU-writable SPTE is not Host-writable: %llx", spte); else WARN_ONCE(is_writable_pte(spte), - "kvm: Writable SPTE is not MMU-writable: %llx", spte); + KBUILD_MODNAME ": Writable SPTE is not MMU-writable: %llx", spte); } static inline bool is_mmu_writable_spte(u64 spte) diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index 39b48e7d7d1a..e26e744df1d1 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "mmu_internal.h" #include "tdp_iter.h" diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d6df38d371a0..bba33aea0fb0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "mmu.h" #include "mmu_internal.h" @@ -10,23 +11,15 @@ #include <asm/cmpxchg.h> #include <trace/events/kvm.h> -static bool __read_mostly tdp_mmu_enabled = true; -module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); - /* Initializes the TDP MMU for the VM, if enabled. */ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { struct workqueue_struct *wq; - if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled)) - return 0; - wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); if (!wq) return -ENOMEM; - /* This should not be changed for the lifetime of the VM. */ - kvm->arch.tdp_mmu_enabled = true; INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); kvm->arch.tdp_mmu_zap_wq = wq; @@ -47,9 +40,6 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm, void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) { - if (!kvm->arch.tdp_mmu_enabled) - return; - /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); @@ -144,7 +134,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) return; - WARN_ON(!root->tdp_mmu_page); + WARN_ON(!is_tdp_mmu_page(root)); /* * The root now has refcount=0. It is valid, but readers already diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index d3714200b932..0a63b1afabd3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -7,6 +7,9 @@ #include "spte.h" +int kvm_mmu_init_tdp_mmu(struct kvm *kvm); +void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); + hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) @@ -68,31 +71,9 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, u64 *spte); #ifdef CONFIG_X86_64 -int kvm_mmu_init_tdp_mmu(struct kvm *kvm); -void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; } - -static inline bool is_tdp_mmu(struct kvm_mmu *mmu) -{ - struct kvm_mmu_page *sp; - hpa_t hpa = mmu->root.hpa; - - if (WARN_ON(!VALID_PAGE(hpa))) - return false; - - /* - * A NULL shadow page is legal when shadowing a non-paging guest with - * PAE paging, as the MMU will be direct with root_hpa pointing at the - * pae_root page, not a shadow page. - */ - sp = to_shadow_page(hpa); - return sp && is_tdp_mmu_page(sp) && sp->root_count; -} #else -static inline int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return 0; } -static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {} static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; } -static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; } #endif #endif /* __KVM_X86_MMU_TDP_MMU_H */ diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index a8502e02f479..9fac1ec03463 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -13,6 +13,7 @@ * Paolo Bonzini <pbonzini@redhat.com> * Xiao Guangrong <guangrong.xiao@linux.intel.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include <asm/mtrr.h> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index eb594620dd75..d939d3b84e6f 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -9,6 +9,7 @@ * Gleb Natapov <gleb@redhat.com> * Wei Huang <wei@redhat.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 042d0aca3c92..4945456fd646 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -14,6 +14,7 @@ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, CPUID_7_1_EDX, + CPUID_8000_0007_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -43,6 +44,9 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) #define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) +/* CPUID level 0x80000007 (EDX). */ +#define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) + struct cpuid_reg { u32 function; u32 index; @@ -68,6 +72,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, + [CPUID_8000_0007_EDX] = {0x80000007, 0, CPUID_EDX}, }; /* @@ -100,6 +105,8 @@ static __always_inline u32 __feature_translate(int x86_feature) return KVM_X86_FEATURE_SGX2; else if (x86_feature == X86_FEATURE_SGX_EDECCSSA) return KVM_X86_FEATURE_SGX_EDECCSSA; + else if (x86_feature == X86_FEATURE_CONSTANT_TSC) + return KVM_X86_FEATURE_CONSTANT_TSC; return x86_feature; } diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index a9c1c2af8d94..cc43638d48a3 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "x86.h" diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 6919dee69f18..f52f5e0dd465 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -12,7 +12,7 @@ * Avi Kivity <avi@qumranet.com> */ -#define pr_fmt(fmt) "SVM: " fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_types.h> #include <linux/hashtable.h> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bc9cd7086fa9..500da957e590 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -12,7 +12,7 @@ * Avi Kivity <avi@qumranet.com> */ -#define pr_fmt(fmt) "SVM: " fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_types.h> #include <linux/kvm_host.h> @@ -138,15 +138,13 @@ void recalc_intercepts(struct vcpu_svm *svm) c->intercepts[i] = h->intercepts[i]; if (g->int_ctl & V_INTR_MASKING_MASK) { - /* We only want the cr8 intercept bits of L1 */ - vmcb_clr_intercept(c, INTERCEPT_CR8_READ); - vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); - /* - * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not - * affect any interrupt we may want to inject; therefore, - * interrupt window vmexits are irrelevant to L0. + * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8 + * does not affect any interrupt we may want to inject; + * therefore, writes to CR8 are irrelevant to L0, as are + * interrupt window vmexits. */ + vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); vmcb_clr_intercept(c, INTERCEPT_VINTR); } diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 0e313fbae055..1ff068f23841 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -9,6 +9,8 @@ * * Implementation is based on pmu_intel.c file */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/kvm_host.h> #include <linux/perf_event.h> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 86d6897f4806..273cba809328 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -6,6 +6,7 @@ * * Copyright 2010 Red Hat, Inc. and/or its affiliates. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_types.h> #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9a194aa1a75a..799b24801d31 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1,4 +1,4 @@ -#define pr_fmt(fmt) "SVM: " fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> @@ -519,21 +519,37 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu) vcpu->arch.osvw.status |= 1; } -static int has_svm(void) +static bool kvm_is_svm_supported(void) { + int cpu = raw_smp_processor_id(); const char *msg; + u64 vm_cr; if (!cpu_has_svm(&msg)) { - printk(KERN_INFO "has_svm: %s\n", msg); - return 0; + pr_err("SVM not supported by CPU %d, %s\n", cpu, msg); + return false; } if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { pr_info("KVM is unsupported when running as an SEV guest\n"); - return 0; + return false; } - return 1; + rdmsrl(MSR_VM_CR, vm_cr); + if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) { + pr_err("SVM disabled (by BIOS) in MSR_VM_CR on CPU %d\n", cpu); + return false; + } + + return true; +} + +static int svm_check_processor_compat(void) +{ + if (!kvm_is_svm_supported()) + return -EIO; + + return 0; } void __svm_write_tsc_multiplier(u64 multiplier) @@ -572,10 +588,6 @@ static int svm_hardware_enable(void) if (efer & EFER_SVME) return -EBUSY; - if (!has_svm()) { - pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); - return -EINVAL; - } sd = per_cpu_ptr(&svm_data, me); sd->asid_generation = 1; sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; @@ -2076,7 +2088,7 @@ static void svm_handle_mce(struct kvm_vcpu *vcpu) * Erratum 383 triggered. Guest state is corrupt so kill the * guest. */ - pr_err("KVM: Guest triggered AMD Erratum 383\n"); + pr_err("Guest triggered AMD Erratum 383\n"); kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); @@ -4076,17 +4088,6 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, vmcb_mark_dirty(svm->vmcb, VMCB_CR); } -static int is_disabled(void) -{ - u64 vm_cr; - - rdmsrl(MSR_VM_CR, vm_cr); - if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) - return 1; - - return 0; -} - static void svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) { @@ -4098,11 +4099,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xd9; } -static int __init svm_check_processor_compat(void) -{ - return 0; -} - /* * The kvm parameter can be NULL (module initialization, or invocation before * VM creation). Be sure to check the kvm parameter before using it. @@ -4629,7 +4625,7 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, smap = cr4 & X86_CR4_SMAP; is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { - pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); + pr_err_ratelimited("SEV Guest triggered AMD Erratum 1096\n"); /* * If the fault occurred in userspace, arbitrarily inject #GP @@ -4701,7 +4697,9 @@ static int svm_vm_init(struct kvm *kvm) } static struct kvm_x86_ops svm_x86_ops __initdata = { - .name = "kvm_amd", + .name = KBUILD_MODNAME, + + .check_processor_compatibility = svm_check_processor_compat, .hardware_unsetup = svm_hardware_unsetup, .hardware_enable = svm_hardware_enable, @@ -4978,7 +4976,7 @@ static __init int svm_hardware_setup(void) } if (nested) { - printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); + pr_info("Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } @@ -4996,7 +4994,7 @@ static __init int svm_hardware_setup(void) /* Force VM NPT level equal to the host's paging level */ kvm_configure_mmu(npt_enabled, get_npt_level(), get_npt_level(), PG_LEVEL_1G); - pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); + pr_info("Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); /* Setup shadow_me_value and shadow_me_mask */ kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask); @@ -5086,10 +5084,7 @@ err: static struct kvm_x86_init_ops svm_init_ops __initdata = { - .cpu_has_kvm_support = has_svm, - .disabled_by_bios = is_disabled, .hardware_setup = svm_hardware_setup, - .check_processor_compatibility = svm_check_processor_compat, .runtime_ops = &svm_x86_ops, .pmu_ops = &amd_pmu_ops, @@ -5097,15 +5092,37 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = { static int __init svm_init(void) { + int r; + __unused_size_checks(); - return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm), - __alignof__(struct vcpu_svm), THIS_MODULE); + if (!kvm_is_svm_supported()) + return -EOPNOTSUPP; + + r = kvm_x86_vendor_init(&svm_init_ops); + if (r) + return r; + + /* + * Common KVM initialization _must_ come last, after this, /dev/kvm is + * exposed to userspace! + */ + r = kvm_init(sizeof(struct vcpu_svm), __alignof__(struct vcpu_svm), + THIS_MODULE); + if (r) + goto err_kvm_init; + + return 0; + +err_kvm_init: + kvm_x86_vendor_exit(); + return r; } static void __exit svm_exit(void) { kvm_exit(); + kvm_x86_vendor_exit(); } module_init(svm_init) diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 26a89d0da93e..7af8422d3382 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -2,6 +2,7 @@ /* * KVM L1 hypervisor optimizations on Hyper-V for SVM. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 45faf84476ce..6981c1e9a809 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -34,7 +34,7 @@ static inline void svm_hv_hardware_setup(void) { if (npt_enabled && ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) { - pr_info("kvm: Hyper-V enlightened NPT TLB flush enabled\n"); + pr_info(KBUILD_MODNAME ": Hyper-V enlightened NPT TLB flush enabled\n"); svm_x86_ops.tlb_remote_flush = hv_remote_flush_tlb; svm_x86_ops.tlb_remote_flush_with_range = hv_remote_flush_tlb_with_range; @@ -43,7 +43,7 @@ static inline void svm_hv_hardware_setup(void) if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) { int cpu; - pr_info("kvm: Hyper-V Direct TLB Flush enabled\n"); + pr_info(KBUILD_MODNAME ": Hyper-V Direct TLB Flush enabled\n"); for_each_online_cpu(cpu) { struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(cpu); diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index cd2ac9536c99..45162c1bcd8f 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -66,13 +66,13 @@ struct vmcs_config { u64 misc; struct nested_vmx_msrs nested; }; -extern struct vmcs_config vmcs_config; +extern struct vmcs_config vmcs_config __ro_after_init; struct vmx_capability { u32 ept; u32 vpid; }; -extern struct vmx_capability vmx_capability; +extern struct vmx_capability vmx_capability __ro_after_init; static inline bool cpu_has_vmx_basic_inout(void) { diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c index ae03d1fe0355..22daca752797 100644 --- a/arch/x86/kvm/vmx/hyperv.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/errno.h> #include <linux/smp.h> @@ -361,35 +362,43 @@ enum evmcs_revision { enum evmcs_ctrl_type { EVMCS_EXIT_CTRLS, EVMCS_ENTRY_CTRLS, + EVMCS_EXEC_CTRL, EVMCS_2NDEXEC, + EVMCS_3RDEXEC, EVMCS_PINCTRL, EVMCS_VMFUNC, NR_EVMCS_CTRLS, }; -static const u32 evmcs_unsupported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = { +static const u32 evmcs_supported_ctrls[NR_EVMCS_CTRLS][NR_EVMCS_REVISIONS] = { [EVMCS_EXIT_CTRLS] = { - [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL, + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMEXIT_CTRL, }, [EVMCS_ENTRY_CTRLS] = { - [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL, + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMENTRY_CTRL, + }, + [EVMCS_EXEC_CTRL] = { + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_EXEC_CTRL, }, [EVMCS_2NDEXEC] = { - [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_2NDEXEC, + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_2NDEXEC & ~SECONDARY_EXEC_TSC_SCALING, + }, + [EVMCS_3RDEXEC] = { + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_3RDEXEC, }, [EVMCS_PINCTRL] = { - [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_PINCTRL, + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_PINCTRL, }, [EVMCS_VMFUNC] = { - [EVMCSv1_LEGACY] = EVMCS1_UNSUPPORTED_VMFUNC, + [EVMCSv1_LEGACY] = EVMCS1_SUPPORTED_VMFUNC, }, }; -static u32 evmcs_get_unsupported_ctls(enum evmcs_ctrl_type ctrl_type) +static u32 evmcs_get_supported_ctls(enum evmcs_ctrl_type ctrl_type) { enum evmcs_revision evmcs_rev = EVMCSv1_LEGACY; - return evmcs_unsupported_ctrls[ctrl_type][evmcs_rev]; + return evmcs_supported_ctrls[ctrl_type][evmcs_rev]; } static bool evmcs_has_perf_global_ctrl(struct kvm_vcpu *vcpu) @@ -413,7 +422,7 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 * { u32 ctl_low = (u32)*pdata; u32 ctl_high = (u32)(*pdata >> 32); - u32 unsupported_ctrls; + u32 supported_ctrls; /* * Hyper-V 2016 and 2019 try using these features even when eVMCS @@ -422,27 +431,31 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 * switch (msr_index) { case MSR_IA32_VMX_EXIT_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS: - unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_EXIT_CTRLS); + supported_ctrls = evmcs_get_supported_ctls(EVMCS_EXIT_CTRLS); if (!evmcs_has_perf_global_ctrl(vcpu)) - unsupported_ctrls |= VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; - ctl_high &= ~unsupported_ctrls; + supported_ctrls &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + ctl_high &= supported_ctrls; break; case MSR_IA32_VMX_ENTRY_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS: - unsupported_ctrls = evmcs_get_unsupported_ctls(EVMCS_ENTRY_CTRLS); + supported_ctrls = evmcs_get_supported_ctls(EVMCS_ENTRY_CTRLS); if (!evmcs_has_perf_global_ctrl(vcpu)) - unsupported_ctrls |= VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; - ctl_high &= ~unsupported_ctrls; + supported_ctrls &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + ctl_high &= supported_ctrls; + break; + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + ctl_high &= evmcs_get_supported_ctls(EVMCS_EXEC_CTRL); break; case MSR_IA32_VMX_PROCBASED_CTLS2: - ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_2NDEXEC); + ctl_high &= evmcs_get_supported_ctls(EVMCS_2NDEXEC); break; case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: - ctl_high &= ~evmcs_get_unsupported_ctls(EVMCS_PINCTRL); + ctl_high &= evmcs_get_supported_ctls(EVMCS_PINCTRL); break; case MSR_IA32_VMX_VMFUNC: - ctl_low &= ~evmcs_get_unsupported_ctls(EVMCS_VMFUNC); + ctl_low &= evmcs_get_supported_ctls(EVMCS_VMFUNC); break; } @@ -452,7 +465,7 @@ void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 * static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type ctrl_type, u32 val) { - return !(val & evmcs_get_unsupported_ctls(ctrl_type)); + return !(val & ~evmcs_get_supported_ctls(ctrl_type)); } int nested_evmcs_check_controls(struct vmcs12 *vmcs12) @@ -461,6 +474,10 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12) vmcs12->pin_based_vm_exec_control))) return -EINVAL; + if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXEC_CTRL, + vmcs12->cpu_based_vm_exec_control))) + return -EINVAL; + if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC, vmcs12->secondary_vm_exec_control))) return -EINVAL; @@ -488,6 +505,38 @@ int nested_evmcs_check_controls(struct vmcs12 *vmcs12) return 0; } +#if IS_ENABLED(CONFIG_HYPERV) +/* + * KVM on Hyper-V always uses the latest known eVMCSv1 revision, the assumption + * is: in case a feature has corresponding fields in eVMCS described and it was + * exposed in VMX feature MSRs, KVM is free to use it. Warn if KVM meets a + * feature which has no corresponding eVMCS field, this likely means that KVM + * needs to be updated. + */ +#define evmcs_check_vmcs_conf(field, ctrl) \ + do { \ + typeof(vmcs_conf->field) unsupported; \ + \ + unsupported = vmcs_conf->field & ~EVMCS1_SUPPORTED_ ## ctrl; \ + if (unsupported) { \ + pr_warn_once(#field " unsupported with eVMCS: 0x%llx\n",\ + (u64)unsupported); \ + vmcs_conf->field &= EVMCS1_SUPPORTED_ ## ctrl; \ + } \ + } \ + while (0) + +void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +{ + evmcs_check_vmcs_conf(cpu_based_exec_ctrl, EXEC_CTRL); + evmcs_check_vmcs_conf(pin_based_exec_ctrl, PINCTRL); + evmcs_check_vmcs_conf(cpu_based_2nd_exec_ctrl, 2NDEXEC); + evmcs_check_vmcs_conf(cpu_based_3rd_exec_ctrl, 3RDEXEC); + evmcs_check_vmcs_conf(vmentry_ctrl, VMENTRY_CTRL); + evmcs_check_vmcs_conf(vmexit_ctrl, VMEXIT_CTRL); +} +#endif + int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h index 571e7929d14e..ab08a9b9ab7d 100644 --- a/arch/x86/kvm/vmx/hyperv.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -48,22 +48,84 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); * Currently unsupported in KVM: * GUEST_IA32_RTIT_CTL = 0x00002814, */ -#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \ - PIN_BASED_VMX_PREEMPTION_TIMER) -#define EVMCS1_UNSUPPORTED_EXEC_CTRL (CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) -#define EVMCS1_UNSUPPORTED_2NDEXEC \ - (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ - SECONDARY_EXEC_APIC_REGISTER_VIRT | \ - SECONDARY_EXEC_ENABLE_PML | \ - SECONDARY_EXEC_ENABLE_VMFUNC | \ - SECONDARY_EXEC_SHADOW_VMCS | \ +#define EVMCS1_SUPPORTED_PINCTRL \ + (PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \ + PIN_BASED_EXT_INTR_MASK | \ + PIN_BASED_NMI_EXITING | \ + PIN_BASED_VIRTUAL_NMIS) + +#define EVMCS1_SUPPORTED_EXEC_CTRL \ + (CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | \ + CPU_BASED_HLT_EXITING | \ + CPU_BASED_CR3_LOAD_EXITING | \ + CPU_BASED_CR3_STORE_EXITING | \ + CPU_BASED_UNCOND_IO_EXITING | \ + CPU_BASED_MOV_DR_EXITING | \ + CPU_BASED_USE_TSC_OFFSETTING | \ + CPU_BASED_MWAIT_EXITING | \ + CPU_BASED_MONITOR_EXITING | \ + CPU_BASED_INVLPG_EXITING | \ + CPU_BASED_RDPMC_EXITING | \ + CPU_BASED_INTR_WINDOW_EXITING | \ + CPU_BASED_CR8_LOAD_EXITING | \ + CPU_BASED_CR8_STORE_EXITING | \ + CPU_BASED_RDTSC_EXITING | \ + CPU_BASED_TPR_SHADOW | \ + CPU_BASED_USE_IO_BITMAPS | \ + CPU_BASED_MONITOR_TRAP_FLAG | \ + CPU_BASED_USE_MSR_BITMAPS | \ + CPU_BASED_NMI_WINDOW_EXITING | \ + CPU_BASED_PAUSE_EXITING | \ + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) + +#define EVMCS1_SUPPORTED_2NDEXEC \ + (SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | \ + SECONDARY_EXEC_WBINVD_EXITING | \ + SECONDARY_EXEC_ENABLE_VPID | \ + SECONDARY_EXEC_ENABLE_EPT | \ + SECONDARY_EXEC_UNRESTRICTED_GUEST | \ + SECONDARY_EXEC_DESC | \ + SECONDARY_EXEC_ENABLE_RDTSCP | \ + SECONDARY_EXEC_ENABLE_INVPCID | \ + SECONDARY_EXEC_XSAVES | \ + SECONDARY_EXEC_RDSEED_EXITING | \ + SECONDARY_EXEC_RDRAND_EXITING | \ SECONDARY_EXEC_TSC_SCALING | \ - SECONDARY_EXEC_PAUSE_LOOP_EXITING) -#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ - (VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) -#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (0) -#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | \ + SECONDARY_EXEC_PT_USE_GPA | \ + SECONDARY_EXEC_PT_CONCEAL_VMX | \ + SECONDARY_EXEC_BUS_LOCK_DETECTION | \ + SECONDARY_EXEC_NOTIFY_VM_EXITING | \ + SECONDARY_EXEC_ENCLS_EXITING) + +#define EVMCS1_SUPPORTED_3RDEXEC (0ULL) + +#define EVMCS1_SUPPORTED_VMEXIT_CTRL \ + (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | \ + VM_EXIT_SAVE_DEBUG_CONTROLS | \ + VM_EXIT_ACK_INTR_ON_EXIT | \ + VM_EXIT_HOST_ADDR_SPACE_SIZE | \ + VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_IA32_PAT | \ + VM_EXIT_LOAD_IA32_PAT | \ + VM_EXIT_SAVE_IA32_EFER | \ + VM_EXIT_LOAD_IA32_EFER | \ + VM_EXIT_CLEAR_BNDCFGS | \ + VM_EXIT_PT_CONCEAL_PIP | \ + VM_EXIT_CLEAR_IA32_RTIT_CTL) + +#define EVMCS1_SUPPORTED_VMENTRY_CTRL \ + (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | \ + VM_ENTRY_LOAD_DEBUG_CONTROLS | \ + VM_ENTRY_IA32E_MODE | \ + VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_ENTRY_LOAD_IA32_PAT | \ + VM_ENTRY_LOAD_IA32_EFER | \ + VM_ENTRY_LOAD_BNDCFGS | \ + VM_ENTRY_PT_CONCEAL_PIP | \ + VM_ENTRY_LOAD_IA32_RTIT_CTL) + +#define EVMCS1_SUPPORTED_VMFUNC (0) struct evmcs_field { u16 offset; @@ -117,9 +179,7 @@ static __always_inline int get_evmcs_offset(unsigned long field, { int offset = evmcs_field_offset(field, clean_field); - WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n", - field); - + WARN_ONCE(offset < 0, "accessing unsupported EVMCS field %lx\n", field); return offset; } @@ -211,6 +271,7 @@ static inline void evmcs_load(u64 phys_addr) vp_ap->enlighten_vmentry = 1; } +void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); #else /* !IS_ENABLED(CONFIG_HYPERV) */ static __always_inline void evmcs_write64(unsigned long field, u64 value) {} static inline void evmcs_write32(unsigned long field, u32 value) {} diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d93c715cda6a..557b9c468734 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/objtool.h> #include <linux/percpu.h> @@ -203,7 +204,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) { /* TODO: not to reset guest simply here. */ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); + pr_debug_ratelimited("nested vmx abort, indicator %d\n", indicator); } static inline bool vmx_control_verify(u32 control, u32 low, u32 high) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index e5cec07ca8d9..efce9ad70e4e 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -8,6 +8,8 @@ * Avi Kivity <avi@redhat.com> * Gleb Natapov <gleb@redhat.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/types.h> #include <linux/kvm_host.h> #include <linux/perf_event.h> @@ -762,8 +764,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) return; warn: - pr_warn_ratelimited("kvm: vcpu-%d: fail to passthrough LBR.\n", - vcpu->vcpu_id); + pr_warn_ratelimited("vcpu-%d: fail to passthrough LBR.\n", vcpu->vcpu_id); } static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 1b56c5e5c9fb..94c38bea60e7 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kvm_host.h> #include <asm/irq_remapping.h> diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index b12da2a6dec9..aa53c98034bf 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2021 Intel Corporation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <asm/sgx.h> @@ -164,7 +165,7 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, if (!vcpu->kvm->arch.sgx_provisioning_allowed && (attributes & SGX_ATTR_PROVISIONKEY)) { if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) - pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n"); + pr_warn_once("SGX PROVISIONKEY advertised but not allowed\n"); kvm_inject_gp(vcpu, 0); return 1; } @@ -381,7 +382,7 @@ int handle_encls(struct kvm_vcpu *vcpu) return handle_encls_ecreate(vcpu); if (leaf == EINIT) return handle_encls_einit(vcpu); - WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf); + WARN_ONCE(1, "unexpected exit on ENCLS[%u]", leaf); vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS; return 0; diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index 2251b60920f8..106a72c923ca 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "vmcs12.h" diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fc9008dbed33..73005d7e4e43 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -12,6 +12,7 @@ * Avi Kivity <avi@qumranet.com> * Yaniv Kamay <yaniv@qumranet.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/highmem.h> #include <linux/hrtimer.h> @@ -444,36 +445,36 @@ void vmread_error(unsigned long field, bool fault) if (fault) kvm_spurious_fault(); else - vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); + vmx_insn_failed("vmread failed: field=%lx\n", field); } noinline void vmwrite_error(unsigned long field, unsigned long value) { - vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%u\n", + vmx_insn_failed("vmwrite failed: field=%lx val=%lx err=%u\n", field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); } noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) { - vmx_insn_failed("kvm: vmclear failed: %p/%llx err=%u\n", + vmx_insn_failed("vmclear failed: %p/%llx err=%u\n", vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR)); } noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) { - vmx_insn_failed("kvm: vmptrld failed: %p/%llx err=%u\n", + vmx_insn_failed("vmptrld failed: %p/%llx err=%u\n", vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR)); } noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) { - vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", + vmx_insn_failed("invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", ext, vpid, gva); } noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) { - vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", + vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", ext, eptp, gpa); } @@ -488,8 +489,8 @@ static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); -struct vmcs_config vmcs_config; -struct vmx_capability vmx_capability; +struct vmcs_config vmcs_config __ro_after_init; +struct vmx_capability vmx_capability __ro_after_init; #define VMX_SEGMENT_FIELD(seg) \ [VCPU_SREG_##seg] = { \ @@ -523,6 +524,8 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; #if IS_ENABLED(CONFIG_HYPERV) +static struct kvm_x86_ops vmx_x86_ops __initdata; + static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -551,6 +554,71 @@ static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) return 0; } +static __init void hv_init_evmcs(void) +{ + int cpu; + + if (!enlightened_vmcs) + return; + + /* + * Enlightened VMCS usage should be recommended and the host needs + * to support eVMCS v1 or above. + */ + if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && + (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= + KVM_EVMCS_VERSION) { + + /* Check that we have assist pages on all online CPUs */ + for_each_online_cpu(cpu) { + if (!hv_get_vp_assist_page(cpu)) { + enlightened_vmcs = false; + break; + } + } + + if (enlightened_vmcs) { + pr_info("Using Hyper-V Enlightened VMCS\n"); + static_branch_enable(&enable_evmcs); + } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) + vmx_x86_ops.enable_l2_tlb_flush + = hv_enable_l2_tlb_flush; + + } else { + enlightened_vmcs = false; + } +} + +static void hv_reset_evmcs(void) +{ + struct hv_vp_assist_page *vp_ap; + + if (!static_branch_unlikely(&enable_evmcs)) + return; + + /* + * KVM should enable eVMCS if and only if all CPUs have a VP assist + * page, and should reject CPU onlining if eVMCS is enabled the CPU + * doesn't have a VP assist page allocated. + */ + vp_ap = hv_get_vp_assist_page(smp_processor_id()); + if (WARN_ON_ONCE(!vp_ap)) + return; + + /* + * Reset everything to support using non-enlightened VMCS access later + * (e.g. when we reload the module with enlightened_vmcs=0) + */ + vp_ap->nested_control.features.directhypercall = 0; + vp_ap->current_nested_vmcs = 0; + vp_ap->enlighten_vmentry = 0; +} + +#else /* IS_ENABLED(CONFIG_HYPERV) */ +static void hv_init_evmcs(void) {} +static void hv_reset_evmcs(void) {} #endif /* IS_ENABLED(CONFIG_HYPERV) */ /* @@ -1613,8 +1681,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) if (!instr_len) goto rip_updated; - WARN(exit_reason.enclave_mode, - "KVM: skipping instruction after SGX enclave VM-Exit"); + WARN_ONCE(exit_reason.enclave_mode, + "skipping instruction after SGX enclave VM-Exit"); orig_rip = kvm_rip_read(vcpu); rip = orig_rip + instr_len; @@ -2448,88 +2516,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static __init int cpu_has_kvm_support(void) -{ - return cpu_has_vmx(); -} - -static __init int vmx_disabled_by_bios(void) -{ - return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || - !boot_cpu_has(X86_FEATURE_VMX); -} - -static int kvm_cpu_vmxon(u64 vmxon_pointer) -{ - u64 msr; - - cr4_set_bits(X86_CR4_VMXE); - - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" - _ASM_EXTABLE(1b, %l[fault]) - : : [vmxon_pointer] "m"(vmxon_pointer) - : : fault); - return 0; - -fault: - WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", - rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); - cr4_clear_bits(X86_CR4_VMXE); - - return -EFAULT; -} - -static int vmx_hardware_enable(void) -{ - int cpu = raw_smp_processor_id(); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - int r; - - if (cr4_read_shadow() & X86_CR4_VMXE) - return -EBUSY; - - /* - * This can happen if we hot-added a CPU but failed to allocate - * VP assist page for it. - */ - if (static_branch_unlikely(&enable_evmcs) && - !hv_get_vp_assist_page(cpu)) - return -EFAULT; - - intel_pt_handle_vmx(1); - - r = kvm_cpu_vmxon(phys_addr); - if (r) { - intel_pt_handle_vmx(0); - return r; - } - - if (enable_ept) - ept_sync_global(); - - return 0; -} - -static void vmclear_local_loaded_vmcss(void) -{ - int cpu = raw_smp_processor_id(); - struct loaded_vmcs *v, *n; - - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), - loaded_vmcss_on_cpu_link) - __loaded_vmcs_clear(v); -} - -static void vmx_hardware_disable(void) -{ - vmclear_local_loaded_vmcss(); - - if (cpu_vmxoff()) - kvm_spurious_fault(); - - intel_pt_handle_vmx(0); -} - /* * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID * directly instead of going through cpu_has(), to ensure KVM is trapping @@ -2565,8 +2551,7 @@ static bool cpu_has_perf_global_ctrl_bug(void) return false; } -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, - u32 msr, u32 *result) +static int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { u32 vmx_msr_low, vmx_msr_high; u32 ctl = ctl_min | ctl_opt; @@ -2584,7 +2569,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, return 0; } -static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) +static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) { u64 allowed; @@ -2593,8 +2578,8 @@ static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) return ctl_opt & allowed; } -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, - struct vmx_capability *vmx_cap) +static int setup_vmcs_config(struct vmcs_config *vmcs_conf, + struct vmx_capability *vmx_cap) { u32 vmx_msr_low, vmx_msr_high; u32 _pin_based_exec_control = 0; @@ -2752,9 +2737,127 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmentry_ctrl = _vmentry_control; vmcs_conf->misc = misc_msr; +#if IS_ENABLED(CONFIG_HYPERV) + if (enlightened_vmcs) + evmcs_sanitize_exec_ctrls(vmcs_conf); +#endif + + return 0; +} + +static bool kvm_is_vmx_supported(void) +{ + int cpu = raw_smp_processor_id(); + + if (!cpu_has_vmx()) { + pr_err("VMX not supported by CPU %d\n", cpu); + return false; + } + + if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !this_cpu_has(X86_FEATURE_VMX)) { + pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu); + return false; + } + + return true; +} + +static int vmx_check_processor_compat(void) +{ + int cpu = raw_smp_processor_id(); + struct vmcs_config vmcs_conf; + struct vmx_capability vmx_cap; + + if (!kvm_is_vmx_supported()) + return -EIO; + + if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) { + pr_err("Failed to setup VMCS config on CPU %d\n", cpu); + return -EIO; + } + if (nested) + nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept); + if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config))) { + pr_err("Inconsistent VMCS config on CPU %d\n", cpu); + return -EIO; + } + return 0; +} + +static int kvm_cpu_vmxon(u64 vmxon_pointer) +{ + u64 msr; + + cr4_set_bits(X86_CR4_VMXE); + + asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" + _ASM_EXTABLE(1b, %l[fault]) + : : [vmxon_pointer] "m"(vmxon_pointer) + : : fault); + return 0; + +fault: + WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", + rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); + cr4_clear_bits(X86_CR4_VMXE); + + return -EFAULT; +} + +static int vmx_hardware_enable(void) +{ + int cpu = raw_smp_processor_id(); + u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); + int r; + + if (cr4_read_shadow() & X86_CR4_VMXE) + return -EBUSY; + + /* + * This can happen if we hot-added a CPU but failed to allocate + * VP assist page for it. + */ + if (static_branch_unlikely(&enable_evmcs) && + !hv_get_vp_assist_page(cpu)) + return -EFAULT; + + intel_pt_handle_vmx(1); + + r = kvm_cpu_vmxon(phys_addr); + if (r) { + intel_pt_handle_vmx(0); + return r; + } + + if (enable_ept) + ept_sync_global(); + return 0; } +static void vmclear_local_loaded_vmcss(void) +{ + int cpu = raw_smp_processor_id(); + struct loaded_vmcs *v, *n; + + list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu), + loaded_vmcss_on_cpu_link) + __loaded_vmcs_clear(v); +} + +static void vmx_hardware_disable(void) +{ + vmclear_local_loaded_vmcss(); + + if (cpu_vmxoff()) + kvm_spurious_fault(); + + hv_reset_evmcs(); + + intel_pt_handle_vmx(0); +} + struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) { int node = cpu_to_node(cpu); @@ -2950,9 +3053,8 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save) var.type = 0x3; var.avl = 0; if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not " - "paragraph aligned when entering " - "protected mode (seg=%d)", seg); + pr_warn_once("segment base is not paragraph aligned " + "when entering protected mode (seg=%d)", seg); } vmcs_write16(sf->selector, var.selector); @@ -2982,8 +3084,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) * vcpu. Warn the user that an update is overdue. */ if (!kvm_vmx->tss_addr) - printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " - "called before entering vcpu\n"); + pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n"); vmx_segment_cache_clear(vmx); @@ -6851,7 +6952,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) gate_desc *desc = (gate_desc *)host_idt_base + vector; if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, - "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) + "unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); @@ -7449,29 +7550,6 @@ static int vmx_vm_init(struct kvm *kvm) return 0; } -static int __init vmx_check_processor_compat(void) -{ - struct vmcs_config vmcs_conf; - struct vmx_capability vmx_cap; - - if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || - !this_cpu_has(X86_FEATURE_VMX)) { - pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id()); - return -EIO; - } - - if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) - return -EIO; - if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept); - if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { - printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", - smp_processor_id()); - return -EIO; - } - return 0; -} - static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { u8 cache; @@ -8071,7 +8149,9 @@ static void vmx_vm_destroy(struct kvm *kvm) } static struct kvm_x86_ops vmx_x86_ops __initdata = { - .name = "kvm_intel", + .name = KBUILD_MODNAME, + + .check_processor_compatibility = vmx_check_processor_compat, .hardware_unsetup = vmx_hardware_unsetup, @@ -8291,7 +8371,7 @@ static __init int hardware_setup(void) return -EIO; if (cpu_has_perf_global_ctrl_bug()) - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " + pr_warn_once("VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " "does not work properly. Using workaround\n"); if (boot_cpu_has(X86_FEATURE_NX)) @@ -8299,7 +8379,7 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_MPX)) { rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); - WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); + WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost"); } if (!cpu_has_vmx_mpx()) @@ -8318,7 +8398,7 @@ static __init int hardware_setup(void) /* NX support is required for shadow paging. */ if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) { - pr_err_ratelimited("kvm: NX (Execute Disable) not supported\n"); + pr_err_ratelimited("NX (Execute Disable) not supported\n"); return -EOPNOTSUPP; } @@ -8470,9 +8550,6 @@ static __init int hardware_setup(void) } static struct kvm_x86_init_ops vmx_init_ops __initdata = { - .cpu_has_kvm_support = cpu_has_kvm_support, - .disabled_by_bios = vmx_disabled_by_bios, - .check_processor_compatibility = vmx_check_processor_compat, .hardware_setup = hardware_setup, .handle_intel_pt_intr = NULL, @@ -8490,41 +8567,23 @@ static void vmx_cleanup_l1d_flush(void) l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO; } -static void vmx_exit(void) +static void __vmx_exit(void) { + allow_smaller_maxphyaddr = false; + #ifdef CONFIG_KEXEC_CORE RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); synchronize_rcu(); #endif + vmx_cleanup_l1d_flush(); +} +static void vmx_exit(void) +{ kvm_exit(); + kvm_x86_vendor_exit(); -#if IS_ENABLED(CONFIG_HYPERV) - if (static_branch_unlikely(&enable_evmcs)) { - int cpu; - struct hv_vp_assist_page *vp_ap; - /* - * Reset everything to support using non-enlightened VMCS - * access later (e.g. when we reload the module with - * enlightened_vmcs=0) - */ - for_each_online_cpu(cpu) { - vp_ap = hv_get_vp_assist_page(cpu); - - if (!vp_ap) - continue; - - vp_ap->nested_control.features.directhypercall = 0; - vp_ap->current_nested_vmcs = 0; - vp_ap->enlighten_vmentry = 0; - } - - static_branch_disable(&enable_evmcs); - } -#endif - vmx_cleanup_l1d_flush(); - - allow_smaller_maxphyaddr = false; + __vmx_exit(); } module_exit(vmx_exit); @@ -8532,56 +8591,29 @@ static int __init vmx_init(void) { int r, cpu; -#if IS_ENABLED(CONFIG_HYPERV) + if (!kvm_is_vmx_supported()) + return -EOPNOTSUPP; + /* - * Enlightened VMCS usage should be recommended and the host needs - * to support eVMCS v1 or above. We can also disable eVMCS support - * with module parameter. + * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing + * to unwind if a later step fails. */ - if (enlightened_vmcs && - ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && - (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= - KVM_EVMCS_VERSION) { - - /* Check that we have assist pages on all online CPUs */ - for_each_online_cpu(cpu) { - if (!hv_get_vp_assist_page(cpu)) { - enlightened_vmcs = false; - break; - } - } - - if (enlightened_vmcs) { - pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); - static_branch_enable(&enable_evmcs); - } + hv_init_evmcs(); - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_l2_tlb_flush - = hv_enable_l2_tlb_flush; - - } else { - enlightened_vmcs = false; - } -#endif - - r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); + r = kvm_x86_vendor_init(&vmx_init_ops); if (r) return r; /* - * Must be called after kvm_init() so enable_ept is properly set + * Must be called after common x86 init so enable_ept is properly set * up. Hand the parameter mitigation value in which was stored in * the pre module init parser. If no parameter was given, it will * contain 'auto' which will be turned into the default 'cond' * mitigation mode. */ r = vmx_setup_l1d_flush(vmentry_l1d_flush_param); - if (r) { - vmx_exit(); - return r; - } + if (r) + goto err_l1d_flush; vmx_setup_fb_clear_ctrl(); @@ -8605,6 +8637,21 @@ static int __init vmx_init(void) if (!enable_ept) allow_smaller_maxphyaddr = true; + /* + * Common KVM initialization _must_ come last, after this, /dev/kvm is + * exposed to userspace! + */ + r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), + THIS_MODULE); + if (r) + goto err_kvm_init; + return 0; + +err_kvm_init: + __vmx_exit(); +err_l1d_flush: + kvm_x86_vendor_exit(); + return r; } module_init(vmx_init); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 842dc898c972..a5282014616c 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -100,8 +100,8 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) return value; do_fail: - WARN_ONCE(1, "kvm: vmread failed: field=%lx\n", field); - pr_warn_ratelimited("kvm: vmread failed: field=%lx\n", field); + WARN_ONCE(1, KBUILD_MODNAME ": vmread failed: field=%lx\n", field); + pr_warn_ratelimited(KBUILD_MODNAME ": vmread failed: field=%lx\n", field); return 0; do_exception: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index da4bbd043a7b..c3ac88036b52 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -15,6 +15,7 @@ * Amit Shah <amit.shah@qumranet.com> * Ben-Ami Yassour <benami@il.ibm.com> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kvm_host.h> #include "irq.h" @@ -128,6 +129,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu); static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2); +static DEFINE_MUTEX(vendor_module_lock); struct kvm_x86_ops kvm_x86_ops __read_mostly; #define KVM_X86_OP(func) \ @@ -1480,7 +1482,7 @@ static const u32 emulated_msrs_all[] = { HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_VP_ASSIST_PAGE, HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, - HV_X64_MSR_TSC_EMULATION_STATUS, + HV_X64_MSR_TSC_EMULATION_STATUS, HV_X64_MSR_TSC_INVARIANT_CONTROL, HV_X64_MSR_SYNDBG_OPTIONS, HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, @@ -2086,7 +2088,7 @@ static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn) !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT)) return kvm_handle_invalid_op(vcpu); - pr_warn_once("kvm: %s instruction emulated as NOP!\n", insn); + pr_warn_once("%s instruction emulated as NOP!\n", insn); return kvm_emulate_as_nop(vcpu); } int kvm_emulate_mwait(struct kvm_vcpu *vcpu) @@ -2433,7 +2435,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { - pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); + pr_debug("requested TSC rate %u falls outside tolerance [%u,%u]\n", + user_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } return set_tsc_khz(vcpu, user_tsc_khz, use_scaling); @@ -3821,6 +3824,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: + case HV_X64_MSR_TSC_INVARIANT_CONTROL: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -4191,6 +4195,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: + case HV_X64_MSR_TSC_INVARIANT_CONTROL: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data, msr_info->host_initiated); @@ -7699,7 +7704,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; emul_write: - printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); + pr_warn_once("emulating exchange as write\n"); return emulator_write_emulated(ctxt, addr, new, bytes, exception); } @@ -8260,7 +8265,7 @@ static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu) ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT); if (!ctxt) { - pr_err("kvm: failed to allocate vcpu's emulator\n"); + pr_err("failed to allocate vcpu's emulator\n"); return NULL; } @@ -9271,35 +9276,66 @@ static struct notifier_block pvclock_gtod_notifier = { }; #endif -int kvm_arch_init(void *opaque) +static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) +{ + memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); + +#define __KVM_X86_OP(func) \ + static_call_update(kvm_x86_##func, kvm_x86_ops.func); +#define KVM_X86_OP(func) \ + WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func) +#define KVM_X86_OP_OPTIONAL __KVM_X86_OP +#define KVM_X86_OP_OPTIONAL_RET0(func) \ + static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \ + (void *)__static_call_return0); +#include <asm/kvm-x86-ops.h> +#undef __KVM_X86_OP + + kvm_pmu_ops_update(ops->pmu_ops); +} + +static int kvm_x86_check_processor_compatibility(void) +{ + int cpu = smp_processor_id(); + struct cpuinfo_x86 *c = &cpu_data(cpu); + + /* + * Compatibility checks are done when loading KVM and when enabling + * hardware, e.g. during CPU hotplug, to ensure all online CPUs are + * compatible, i.e. KVM should never perform a compatibility check on + * an offline CPU. + */ + WARN_ON(!cpu_online(cpu)); + + if (__cr4_reserved_bits(cpu_has, c) != + __cr4_reserved_bits(cpu_has, &boot_cpu_data)) + return -EIO; + + return static_call(kvm_x86_check_processor_compatibility)(); +} + +static void kvm_x86_check_cpu_compat(void *ret) +{ + *(int *)ret = kvm_x86_check_processor_compatibility(); +} + +static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) { - struct kvm_x86_init_ops *ops = opaque; u64 host_pat; - int r; + int r, cpu; if (kvm_x86_ops.hardware_enable) { - pr_err("kvm: already loaded vendor module '%s'\n", kvm_x86_ops.name); + pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name); return -EEXIST; } - if (!ops->cpu_has_kvm_support()) { - pr_err_ratelimited("kvm: no hardware support for '%s'\n", - ops->runtime_ops->name); - return -EOPNOTSUPP; - } - if (ops->disabled_by_bios()) { - pr_err_ratelimited("kvm: support for '%s' disabled by bios\n", - ops->runtime_ops->name); - return -EOPNOTSUPP; - } - /* * KVM explicitly assumes that the guest has an FPU and * FXSAVE/FXRSTOR. For example, the KVM_GET_FPU explicitly casts the * vCPU's FPU state as a fxregs_state struct. */ if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) { - printk(KERN_ERR "kvm: inadequate fpu\n"); + pr_err("inadequate fpu\n"); return -EOPNOTSUPP; } @@ -9317,19 +9353,19 @@ int kvm_arch_init(void *opaque) */ if (rdmsrl_safe(MSR_IA32_CR_PAT, &host_pat) || (host_pat & GENMASK(2, 0)) != 6) { - pr_err("kvm: host PAT[0] is not WB\n"); + pr_err("host PAT[0] is not WB\n"); return -EIO; } x86_emulator_cache = kvm_alloc_emulator_cache(); if (!x86_emulator_cache) { - pr_err("kvm: failed to allocate cache for x86 emulator\n"); + pr_err("failed to allocate cache for x86 emulator\n"); return -ENOMEM; } user_return_msrs = alloc_percpu(struct kvm_user_return_msrs); if (!user_return_msrs) { - printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n"); + pr_err("failed to allocate percpu kvm_user_return_msrs\n"); r = -ENOMEM; goto out_free_x86_emulator_cache; } @@ -9339,13 +9375,37 @@ int kvm_arch_init(void *opaque) if (r) goto out_free_percpu; - kvm_timer_init(); - if (boot_cpu_has(X86_FEATURE_XSAVE)) { host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; } + rdmsrl_safe(MSR_EFER, &host_efer); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + rdmsrl(MSR_IA32_XSS, host_xss); + + kvm_init_pmu_capability(); + + r = ops->hardware_setup(); + if (r != 0) + goto out_mmu_exit; + + kvm_ops_update(ops); + + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, kvm_x86_check_cpu_compat, &r, 1); + if (r < 0) + goto out_unwind_ops; + } + + /* + * Point of no return! DO NOT add error paths below this point unless + * absolutely necessary, as most operations from this point forward + * require unwinding. + */ + kvm_timer_init(); + if (pi_inject_timer == -1) pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER); #ifdef CONFIG_X86_64 @@ -9355,8 +9415,35 @@ int kvm_arch_init(void *opaque) set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); #endif + kvm_register_perf_callbacks(ops->handle_intel_pt_intr); + + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) + kvm_caps.supported_xss = 0; + +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) + cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); +#undef __kvm_cpu_cap_has + + if (kvm_caps.has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated because it will always + * be 1 on all machines. + */ + u64 max = min(0x7fffffffULL, + __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); + kvm_caps.max_guest_tsc_khz = max; + } + kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; + kvm_init_msr_list(); return 0; +out_unwind_ops: + kvm_x86_ops.hardware_enable = NULL; + static_call(kvm_x86_hardware_unsetup)(); +out_mmu_exit: + kvm_mmu_vendor_module_exit(); out_free_percpu: free_percpu(user_return_msrs); out_free_x86_emulator_cache: @@ -9364,8 +9451,22 @@ out_free_x86_emulator_cache: return r; } -void kvm_arch_exit(void) +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) +{ + int r; + + mutex_lock(&vendor_module_lock); + r = __kvm_x86_vendor_init(ops); + mutex_unlock(&vendor_module_lock); + + return r; +} +EXPORT_SYMBOL_GPL(kvm_x86_vendor_init); + +void kvm_x86_vendor_exit(void) { + kvm_unregister_perf_callbacks(); + #ifdef CONFIG_X86_64 if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) clear_hv_tscchange_cb(); @@ -9382,7 +9483,7 @@ void kvm_arch_exit(void) irq_work_sync(&pvclock_irq_work); cancel_work_sync(&pvclock_gtod_work); #endif - kvm_x86_ops.hardware_enable = NULL; + static_call(kvm_x86_hardware_unsetup)(); kvm_mmu_vendor_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_emulator_cache); @@ -9390,7 +9491,11 @@ void kvm_arch_exit(void) static_key_deferred_flush(&kvm_xen_enabled); WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); #endif + mutex_lock(&vendor_module_lock); + kvm_x86_ops.hardware_enable = NULL; + mutex_unlock(&vendor_module_lock); } +EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit); static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) { @@ -11531,7 +11636,7 @@ static int sync_regs(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { if (kvm_check_tsc_unstable() && kvm->created_vcpus) - pr_warn_once("kvm: SMP vm created on host with unstable TSC; " + pr_warn_once("SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); if (!kvm->arch.max_vcpu_ids) @@ -11608,7 +11713,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto free_wbinvd_dirty_mask; if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) { - pr_err("kvm: failed to allocate vcpu's fpu\n"); + pr_err("failed to allocate vcpu's fpu\n"); goto free_emulate_ctxt; } @@ -11882,6 +11987,11 @@ int kvm_arch_hardware_enable(void) bool stable, backwards_tsc = false; kvm_user_return_msr_cpu_online(); + + ret = kvm_x86_check_processor_compatibility(); + if (ret) + return ret; + ret = static_call(kvm_x86_hardware_enable)(); if (ret != 0) return ret; @@ -11968,88 +12078,6 @@ void kvm_arch_hardware_disable(void) drop_user_return_notifiers(); } -static inline void kvm_ops_update(struct kvm_x86_init_ops *ops) -{ - memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); - -#define __KVM_X86_OP(func) \ - static_call_update(kvm_x86_##func, kvm_x86_ops.func); -#define KVM_X86_OP(func) \ - WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func) -#define KVM_X86_OP_OPTIONAL __KVM_X86_OP -#define KVM_X86_OP_OPTIONAL_RET0(func) \ - static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \ - (void *)__static_call_return0); -#include <asm/kvm-x86-ops.h> -#undef __KVM_X86_OP - - kvm_pmu_ops_update(ops->pmu_ops); -} - -int kvm_arch_hardware_setup(void *opaque) -{ - struct kvm_x86_init_ops *ops = opaque; - int r; - - rdmsrl_safe(MSR_EFER, &host_efer); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - rdmsrl(MSR_IA32_XSS, host_xss); - - kvm_init_pmu_capability(); - - r = ops->hardware_setup(); - if (r != 0) - return r; - - kvm_ops_update(ops); - - kvm_register_perf_callbacks(ops->handle_intel_pt_intr); - - if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) - kvm_caps.supported_xss = 0; - -#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) - cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); -#undef __kvm_cpu_cap_has - - if (kvm_caps.has_tsc_control) { - /* - * Make sure the user can only configure tsc_khz values that - * fit into a signed integer. - * A min value is not calculated because it will always - * be 1 on all machines. - */ - u64 max = min(0x7fffffffULL, - __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz)); - kvm_caps.max_guest_tsc_khz = max; - } - kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; - kvm_init_msr_list(); - return 0; -} - -void kvm_arch_hardware_unsetup(void) -{ - kvm_unregister_perf_callbacks(); - - static_call(kvm_x86_hardware_unsetup)(); -} - -int kvm_arch_check_processor_compat(void *opaque) -{ - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); - struct kvm_x86_init_ops *ops = opaque; - - WARN_ON(!irqs_disabled()); - - if (__cr4_reserved_bits(cpu_has, c) != - __cr4_reserved_bits(cpu_has, &boot_cpu_data)) - return -EIO; - - return ops->check_processor_compatibility(); -} - bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2e29bdc2949c..2681e2007e39 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -5,6 +5,7 @@ * * KVM Xen emulation */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "x86.h" #include "xen.h" @@ -271,7 +272,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * Attempt to obtain the GPC lock on *both* (if there are two) * gfn_to_pfn caches that cover the region. */ - read_lock_irqsave(&gpc1->lock, flags); + if (atomic) { + local_irq_save(flags); + if (!read_trylock(&gpc1->lock)) { + local_irq_restore(flags); + return; + } + } else { + read_lock_irqsave(&gpc1->lock, flags); + } while (!kvm_gpc_check(gpc1, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); @@ -304,9 +313,18 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else - * takes them more than one at a time. + * takes them more than one at a time. Set a subclass on the + * gpc1 lock to make lockdep shut up about it. */ - read_lock(&gpc2->lock); + lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); + if (atomic) { + if (!read_trylock(&gpc2->lock)) { + read_unlock_irqrestore(&gpc1->lock, flags); + return; + } + } else { + read_lock(&gpc2->lock); + } if (!kvm_gpc_check(gpc2, user_len2)) { read_unlock(&gpc2->lock); @@ -590,26 +608,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { r = -EINVAL; } else { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.long_mode = !!data->u.long_mode; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); break; case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.upcall_vector = data->u.vector; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; @@ -619,9 +637,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.xen_version = data->u.xen_version; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; @@ -630,9 +648,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = -EOPNOTSUPP; break; } - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; @@ -647,7 +665,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: @@ -686,7 +704,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return r; } @@ -694,7 +712,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int idx, r = -ENOENT; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.xen.xen_lock); idx = srcu_read_lock(&vcpu->kvm->srcu); switch (data->type) { @@ -922,7 +940,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) } srcu_read_unlock(&vcpu->kvm->srcu, idx); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } @@ -930,7 +948,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int r = -ENOENT; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: @@ -1013,7 +1031,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } @@ -1106,7 +1124,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); if (xhc->msr && !kvm->arch.xen_hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); @@ -1115,7 +1133,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return 0; } @@ -1658,15 +1676,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) mm_borrowed = true; } - /* - * For the irqfd workqueue, using the main kvm->lock mutex is - * fine since this function is invoked from kvm_set_irq() with - * no other lock held, no srcu. In future if it will be called - * directly from a vCPU thread (e.g. on hypercall for an IPI) - * then it may need to switch to using a leaf-node mutex for - * serializing the shared_info mapping. - */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); /* * It is theoretically possible for the page to be unmapped @@ -1695,7 +1705,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } while(!rc); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (mm_borrowed) kthread_unuse_mm(kvm->mm); @@ -1811,7 +1821,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, int ret; /* Protect writes to evtchnfd as well as the idr lookup. */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); ret = -ENOENT; @@ -1842,7 +1852,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, } ret = 0; out_unlock: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return ret; } @@ -1905,10 +1915,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; } - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, GFP_KERNEL); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (ret >= 0) return 0; @@ -1926,9 +1936,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) { struct evtchnfd *evtchnfd; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (!evtchnfd) return -ENOENT; @@ -1946,7 +1956,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) int i; int n = 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); /* * Because synchronize_srcu() cannot be called inside the @@ -1958,7 +1968,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); if (!all_evtchnfds) { - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return -ENOMEM; } @@ -1967,7 +1977,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) all_evtchnfds[n++] = evtchnfd; idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); synchronize_srcu(&kvm->srcu); @@ -2069,6 +2079,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_init_vm(struct kvm *kvm) { + mutex_init(&kvm->arch.xen.xen_lock); idr_init(&kvm->arch.xen.evtchn_ports); kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d3987359d441..cb258f58fdc8 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -26,6 +26,7 @@ #include <asm/pti.h> #include <asm/text-patching.h> #include <asm/memtype.h> +#include <asm/paravirt.h> /* * We need to define the tracepoints somewhere, and tlb.c @@ -804,6 +805,9 @@ void __init poking_init(void) poking_mm = mm_alloc(); BUG_ON(!poking_mm); + /* Xen PV guests need the PGD to be pinned. */ + paravirt_arch_dup_mmap(NULL, poking_mm); + /* * Randomize the poking address, but make sure that the following page * will be mapped at the same PMD. We need 2 pages, so find space for 3, diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 46de9cf5c91d..fb4b1b5e0dea 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -387,7 +387,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, u8 mtrr_type, uniform; mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK) + if (mtrr_type != MTRR_TYPE_WRBACK && + mtrr_type != MTRR_TYPE_INVALID) return _PAGE_CACHE_MODE_UC_MINUS; return _PAGE_CACHE_MODE_WB; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 758cbfe55daa..4b3efaa82ab7 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -12,6 +12,7 @@ */ #include <linux/acpi.h> +#include <linux/efi.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/bitmap.h> @@ -442,17 +443,42 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used) return mcfg_res.flags; } +static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used) +{ +#ifdef CONFIG_EFI + efi_memory_desc_t *md; + u64 size, mmio_start, mmio_end; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + mmio_start = md->phys_addr; + mmio_end = mmio_start + size; + + /* + * N.B. Caller supplies (start, start + size), + * so to match, mmio_end is the first address + * *past* the EFI_MEMORY_MAPPED_IO area. + */ + if (mmio_start <= start && end <= mmio_end) + return true; + } + } +#endif + + return false; +} + typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type); static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, struct pci_mmcfg_region *cfg, - struct device *dev, int with_e820) + struct device *dev, const char *method) { u64 addr = cfg->res.start; u64 size = resource_size(&cfg->res); u64 old_size = size; int num_buses; - char *method = with_e820 ? "E820" : "ACPI motherboard resources"; while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) { size >>= 1; @@ -464,10 +490,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, return false; if (dev) - dev_info(dev, "MMCONFIG at %pR reserved in %s\n", + dev_info(dev, "MMCONFIG at %pR reserved as %s\n", &cfg->res, method); else - pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n", + pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n", &cfg->res, method); if (old_size != size) { @@ -500,7 +526,8 @@ static bool __ref pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early) { if (!early && !acpi_disabled) { - if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) + if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, + "ACPI motherboard resource")) return true; if (dev) @@ -513,6 +540,10 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e "MMCONFIG at %pR not reserved in " "ACPI motherboard resources\n", &cfg->res); + + if (is_mmconf_reserved(is_efi_mmio, cfg, dev, + "EfiMemoryMappedIO")) + return true; } /* @@ -527,7 +558,8 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1); + return is_mmconf_reserved(e820__mapped_all, cfg, dev, + "E820 entry"); return false; } diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c index 48a3eb09d951..650cdbbdaf45 100644 --- a/arch/x86/um/elfcore.c +++ b/arch/x86/um/elfcore.c @@ -7,7 +7,7 @@ #include <asm/elf.h> -Elf32_Half elf_core_extra_phdrs(void) +Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { if ( vsyscall_ehdr ) { const struct elfhdr *const ehdrp = diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 58db86f7b384..9bdc3b656b2c 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -134,11 +134,6 @@ static inline unsigned p2m_mid_index(unsigned long pfn) return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; } -static inline unsigned p2m_index(unsigned long pfn) -{ - return pfn % P2M_PER_PAGE; -} - static void p2m_top_mfn_init(unsigned long *top) { unsigned i; diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 228e4dff5fb2..a6d09fe04831 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -154,11 +154,6 @@ struct thread_struct { unsigned long ra; /* kernel's a0: return address and window call size */ unsigned long sp; /* kernel's a1: stack pointer */ - /* struct xtensa_cpuinfo info; */ - - unsigned long bad_vaddr; /* last user fault */ - unsigned long bad_uaddr; /* last kernel fault accessing user space */ - unsigned long error_code; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bp[XCHAL_NUM_IBREAK]; struct perf_event *ptrace_wp[XCHAL_NUM_DBREAK]; @@ -176,10 +171,6 @@ struct thread_struct { { \ ra: 0, \ sp: sizeof(init_stack) + (long) &init_stack, \ - /*info: {0}, */ \ - bad_vaddr: 0, \ - bad_uaddr: 0, \ - error_code: 0, \ } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 0c25e035ff10..cd98366a9b23 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -362,8 +362,6 @@ static void do_unaligned_user(struct pt_regs *regs) __die_if_kernel("Unhandled unaligned exception in kernel", regs, SIGKILL); - current->thread.bad_vaddr = regs->excvaddr; - current->thread.error_code = -3; pr_info_ratelimited("Unaligned memory access to %08lx in '%s' " "(pid = %d, pc = %#010lx)\n", regs->excvaddr, current->comm, diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 8c781b05c0bd..faf7cf35a0ee 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -206,8 +206,6 @@ good_area: bad_area: mmap_read_unlock(mm); if (user_mode(regs)) { - current->thread.bad_vaddr = address; - current->thread.error_code = is_write; force_sig_fault(SIGSEGV, code, (void *) address); return; } @@ -232,7 +230,6 @@ do_sigbus: /* Send a sigbus, regardless of whether we were in kernel * or user mode. */ - current->thread.bad_vaddr = address; force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address); /* Kernel mode? Handle exceptions or die */ @@ -252,7 +249,6 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if ((entry = search_exception_tables(regs->pc)) != NULL) { pr_debug("%s: Exception at pc=%#010lx (%lx)\n", current->comm, regs->pc, entry->fixup); - current->thread.bad_uaddr = address; regs->pc = entry->fixup; return; } diff --git a/block/blk-core.c b/block/blk-core.c index 9321767470dc..b5098355d8b2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -283,12 +283,9 @@ static void blk_free_queue(struct request_queue *q) * * Decrements the refcount of the request_queue and free it when the refcount * reaches 0. - * - * Context: Can sleep. */ void blk_put_queue(struct request_queue *q) { - might_sleep(); if (refcount_dec_and_test(&q->refs)) blk_free_queue(q); } diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 204fe94c7e45..a194f30876c5 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) } #define FIND_CHILD_MIN_SCORE 1 -#define FIND_CHILD_MAX_SCORE 2 +#define FIND_CHILD_MID_SCORE 2 +#define FIND_CHILD_MAX_SCORE 3 static int match_any(struct acpi_device *adev, void *not_used) { @@ -96,8 +97,17 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); - if (status == AE_NOT_FOUND) + if (status == AE_NOT_FOUND) { + /* + * Special case: backlight device objects without _STA are + * preferred to other objects with the same _ADR value, because + * it is more likely that they are actually useful. + */ + if (adev->pnp.type.backlight) + return FIND_CHILD_MID_SCORE; + return FIND_CHILD_MIN_SCORE; + } if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) return -ENODEV; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 16dcd31d124f..192d1784e409 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -433,6 +433,13 @@ static const struct dmi_system_id asus_laptop[] = { }, }, { + .ident = "Asus ExpertBook B2402CBA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B2402CBA"), + }, + }, + { .ident = "Asus ExpertBook B2502", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 274344434282..0c6f06abe3f4 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, * Some devices don't reliably have _HIDs & _CIDs, so add * synthetic HIDs to make sure drivers can find them. */ - if (acpi_is_video_device(handle)) + if (acpi_is_video_device(handle)) { acpi_add_id(pnp, ACPI_VIDEO_HID); - else if (acpi_bay_match(handle)) + pnp->type.backlight = 1; + break; + } + if (acpi_bay_match(handle)) acpi_add_id(pnp, ACPI_BAY_HID); else if (acpi_dock_match(handle)) acpi_add_id(pnp, ACPI_DOCK_HID); diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 1b78c7434492..8a541efc5675 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -50,6 +50,10 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_video; if (!strcmp("native", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_native; + if (!strcmp("nvidia_wmi_ec", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; + if (!strcmp("apple_gmux", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_apple_gmux; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index eceaec33af65..9695c4404e26 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -640,6 +640,7 @@ config PATA_CS5530 config PATA_CS5535 tristate "CS5535 PATA support (Experimental)" depends on PCI && (X86_32 || (X86_64 && COMPILE_TEST)) + depends on !UML help This option enables support for the NatSemi/AMD CS5535 companion chip used with the Geode processor family. diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c0227dfa4688..4807af1d5805 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -524,7 +524,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, return 0; } -static int xen_blkbk_remove(struct xenbus_device *dev) +static void xen_blkbk_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -547,8 +547,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) /* Put the reference we set in xen_blkif_alloc(). */ xen_blkif_put(be->blkif); } - - return 0; } int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b28489290323..23ed258b57f0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2467,7 +2467,7 @@ static void blkback_changed(struct xenbus_device *dev, } } -static int blkfront_remove(struct xenbus_device *xbdev) +static void blkfront_remove(struct xenbus_device *xbdev) { struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); @@ -2488,7 +2488,6 @@ static int blkfront_remove(struct xenbus_device *xbdev) } kfree(info); - return 0; } static int blkfront_is_ready(struct xenbus_device *dev) diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 379291826261..80cca3b83b22 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -360,14 +360,13 @@ static int tpmfront_probe(struct xenbus_device *dev, return tpm_chip_register(priv->chip); } -static int tpmfront_remove(struct xenbus_device *dev) +static void tpmfront_remove(struct xenbus_device *dev) { struct tpm_chip *chip = dev_get_drvdata(&dev->dev); struct tpm_private *priv = dev_get_drvdata(&chip->dev); tpm_chip_unregister(chip); ring_free(priv); dev_set_drvdata(&chip->dev, NULL); - return 0; } static int tpmfront_resume(struct xenbus_device *dev) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 204e39006dda..c17bd845f5fc 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -307,6 +307,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); + cpufreq_cpu_put(policy); } static int amd_get_min_freq(struct amd_cpudata *cpudata) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index d1801281cdd9..c11d22fd84c3 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -280,6 +280,7 @@ static int apple_soc_cpufreq_init(struct cpufreq_policy *policy) policy->cpuinfo.transition_latency = transition_latency; policy->dvfs_possible_from_any_cpu = true; policy->fast_switch_possible = true; + policy->suspend_freq = freq_table[0].frequency; if (policy_has_boost_freq(policy)) { ret = cpufreq_enable_boost_support(); @@ -321,7 +322,6 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = { .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, - .attr = cpufreq_generic_attr, .get = apple_soc_cpufreq_get_rate, .init = apple_soc_cpufreq_init, .exit = apple_soc_cpufreq_exit, @@ -329,6 +329,7 @@ static struct cpufreq_driver apple_soc_cpufreq_driver = { .fast_switch = apple_soc_cpufreq_fast_switch, .register_em = cpufreq_register_em_with_opp, .attr = apple_soc_cpufreq_hw_attr, + .suspend = cpufreq_generic_suspend, }; static int __init apple_soc_cpufreq_module_init(void) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index c10fc33b29b1..b74289a95a17 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -445,7 +445,7 @@ static int __init armada37xx_cpufreq_driver_init(void) return -ENODEV; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { dev_err(cpu_dev, "Cannot get clock for CPU0\n"); return PTR_ERR(clk); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 432dfb4e8027..022e3555407c 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -487,7 +487,8 @@ static unsigned int get_perf_level_count(struct cpufreq_policy *policy) cpu_data = policy->driver_data; perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu); - min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf); + min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); if ((min_cap == 0) || (max_cap < min_cap)) return 0; return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP; @@ -519,10 +520,10 @@ static int cppc_get_cpu_power(struct device *cpu_dev, cpu_data = policy->driver_data; perf_caps = &cpu_data->perf_caps; max_cap = arch_scale_cpu_capacity(cpu_dev->id); - min_cap = div_u64(max_cap * perf_caps->lowest_perf, - perf_caps->highest_perf); - - perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap; + min_cap = div_u64((u64)max_cap * perf_caps->lowest_perf, + perf_caps->highest_perf); + perf_step = div_u64((u64)CPPC_EM_CAP_STEP * perf_caps->highest_perf, + max_cap); min_step = min_cap / CPPC_EM_CAP_STEP; max_step = max_cap / CPPC_EM_CAP_STEP; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 8ab672883043..e85703651098 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -137,6 +137,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "nvidia,tegra30", }, { .compatible = "nvidia,tegra124", }, { .compatible = "nvidia,tegra210", }, + { .compatible = "nvidia,tegra234", }, { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, @@ -150,6 +151,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sdm845", }, { .compatible = "qcom,sm6115", }, { .compatible = "qcom,sm6350", }, + { .compatible = "qcom,sm6375", }, { .compatible = "qcom,sm8150", }, { .compatible = "qcom,sm8250", }, { .compatible = "qcom,sm8350", }, diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 340fed35e45d..9505a812d6a1 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -649,9 +649,10 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) { struct clk_hw_onecell_data *clk_data; struct device *dev = &pdev->dev; + struct device_node *soc_node; struct device *cpu_dev; struct clk *clk; - int ret, i, num_domains; + int ret, i, num_domains, reg_sz; clk = clk_get(dev, "xo"); if (IS_ERR(clk)) @@ -679,7 +680,21 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) return ret; /* Allocate qcom_cpufreq_data based on the available frequency domains in DT */ - num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * 4); + soc_node = of_get_parent(dev->of_node); + if (!soc_node) + return -EINVAL; + + ret = of_property_read_u32(soc_node, "#address-cells", ®_sz); + if (ret) + goto of_exit; + + ret = of_property_read_u32(soc_node, "#size-cells", &i); + if (ret) + goto of_exit; + + reg_sz += i; + + num_domains = of_property_count_elems_of_size(dev->of_node, "reg", sizeof(u32) * reg_sz); if (num_domains <= 0) return num_domains; @@ -743,6 +758,9 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) else dev_dbg(dev, "QCOM CPUFreq HW driver initialized\n"); +of_exit: + of_node_put(soc_node); + return ret; } diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 53100fb9b07b..12205e2b53b4 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * Author: Tudor Ambarus */ #include <linux/delay.h> @@ -411,6 +411,6 @@ static void __exit atmel_ecc_exit(void) module_init(atmel_ecc_init); module_exit(atmel_ecc_exit); -MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>"); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 81ce09bedda8..55bff1e13142 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * Author: Tudor Ambarus */ #include <linux/bitrev.h> @@ -390,6 +390,6 @@ static void __exit atmel_i2c_exit(void) module_init(atmel_i2c_init); module_exit(atmel_i2c_exit); -MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>"); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 48929efe2a5b..35f7857a7f7c 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus <tudor.ambarus@microchip.com> + * Author: Tudor Ambarus */ #ifndef __ATMEL_I2C_H__ diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 19522c568aa5..878deb4880cd 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -394,17 +394,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) * Then restart the workq on the new delay */ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, - unsigned long value) + unsigned long msec) { - unsigned long jiffs = msecs_to_jiffies(value); - - if (value == 1000) - jiffs = round_jiffies_relative(value); - - edac_dev->poll_msec = value; - edac_dev->delay = jiffs; + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); - edac_mod_work(&edac_dev->work, jiffs); + /* See comment in edac_device_workq_setup() above */ + if (edac_dev->poll_msec == 1000) + edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_mod_work(&edac_dev->work, edac_dev->delay); } int edac_device_alloc_index(void) diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 763c076d96f2..47593afdc234 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -53,7 +53,7 @@ bool edac_stop_work(struct delayed_work *work); bool edac_mod_work(struct delayed_work *work, unsigned long delay); extern void edac_device_reset_delay_period(struct edac_device_ctl_info - *edac_dev, unsigned long value); + *edac_dev, unsigned long msec); extern void edac_mc_reset_delay_period(unsigned long value); /* diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 61b76ec226af..19fba258ae10 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -174,8 +174,10 @@ static int highbank_mc_probe(struct platform_device *pdev) drvdata = mci->pvt_info; platform_set_drvdata(pdev, mci); - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) - return -ENOMEM; + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + res = -ENOMEM; + goto free; + } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -243,6 +245,7 @@ err2: edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); +free: edac_mc_free(mci); return res; } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 09716eebe8ac..a2b0cbc8741c 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -394,8 +394,8 @@ static int __init efisubsys_init(void) efi_kobj = kobject_create_and_add("efi", firmware_kobj); if (!efi_kobj) { pr_err("efi: Firmware registration failed.\n"); - destroy_workqueue(efi_rts_wq); - return -ENOMEM; + error = -ENOMEM; + goto err_destroy_wq; } if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | @@ -443,7 +443,10 @@ err_unregister: err_put: kobject_put(efi_kobj); efi_kobj = NULL; - destroy_workqueue(efi_rts_wq); +err_destroy_wq: + if (efi_rts_wq) + destroy_workqueue(efi_rts_wq); + return error; } diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 7feee3d9c2bf..1fba4e09cdcf 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work; \ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ pr_warn_once("EFI Runtime Services are disabled!\n"); \ + efi_rts_work.status = EFI_DEVICE_ERROR; \ goto exit; \ } \ \ diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index 2652c396c423..33ae94745aef 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -93,14 +93,19 @@ static int coreboot_table_populate(struct device *dev, void *ptr) for (i = 0; i < header->table_entries; i++) { entry = ptr_entry; - device = kzalloc(sizeof(struct device) + entry->size, GFP_KERNEL); + if (entry->size < sizeof(*entry)) { + dev_warn(dev, "coreboot table entry too small!\n"); + return -EINVAL; + } + + device = kzalloc(sizeof(device->dev) + entry->size, GFP_KERNEL); if (!device) return -ENOMEM; device->dev.parent = dev; device->dev.bus = &coreboot_bus_type; device->dev.release = coreboot_device_release; - memcpy(&device->entry, ptr_entry, entry->size); + memcpy(device->raw, ptr_entry, entry->size); switch (device->entry.tag) { case LB_TAG_CBMEM_ENTRY: diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h index 37f4d335a606..d814dca33a08 100644 --- a/drivers/firmware/google/coreboot_table.h +++ b/drivers/firmware/google/coreboot_table.h @@ -79,6 +79,7 @@ struct coreboot_device { struct lb_cbmem_ref cbmem_ref; struct lb_cbmem_entry cbmem_entry; struct lb_framebuffer framebuffer; + DECLARE_FLEX_ARRAY(u8, raw); }; }; diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e7bcfca4159f..447ee4ea5c90 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -440,6 +440,9 @@ static const struct file_operations psci_debugfs_ops = { static int __init psci_debugfs_init(void) { + if (!invoke_psci_fn || !psci_ops.get_version) + return 0; + return PTR_ERR_OR_ZERO(debugfs_create_file("psci", 0444, NULL, NULL, &psci_debugfs_ops)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b15091d8310d..3b5c53712d31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2099,7 +2099,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b } amdgpu_amdkfd_remove_eviction_fence( - bo, bo->kfd_bo->process_info->eviction_fence); + bo, bo->vm_bo->vm->process_info->eviction_fence); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8516c814bc9b..7b5ce00f0602 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -61,6 +61,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_ctx_put(p->ctx); return -ECANCELED; } + + amdgpu_sync_create(&p->sync); return 0; } @@ -452,18 +454,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, } r = amdgpu_sync_fence(&p->sync, fence); - if (r) - goto error; - - /* - * When we have an explicit dependency it might be necessary to insert a - * pipeline sync to make sure that all caches etc are flushed and the - * next job actually sees the results from the previous one. - */ - if (fence->context == p->gang_leader->base.entity->fence_context) - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); - -error: dma_fence_put(fence); return r; } @@ -1188,10 +1178,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct drm_gpu_scheduler *sched; struct amdgpu_bo_list_entry *e; + struct dma_fence *fence; unsigned int i; int r; + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); + if (r) { + if (r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + return r; + } + list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); struct dma_resv *resv = bo->tbo.base.resv; @@ -1211,10 +1210,24 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); - if (r && r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; + sched = p->gang_leader->base.entity->rq->sched; + while ((fence = amdgpu_sync_get_fence(&p->sync))) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + + /* + * When we have an dependency it might be necessary to insert a + * pipeline sync to make sure that all caches etc are flushed and the + * next job actually sees the results from the previous one + * before we start executing on the same scheduler ring. + */ + if (!s_fence || s_fence->sched != sched) + continue; + + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + if (r) + return r; + } + return 0; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1254,9 +1267,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, continue; fence = &p->jobs[i]->base.s_fence->scheduled; + dma_fence_get(fence); r = drm_sched_job_add_dependency(&leader->base, fence); - if (r) + if (r) { + dma_fence_put(fence); goto error_cleanup; + } } if (p->gang_size > 1) { @@ -1344,6 +1360,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { unsigned i; + amdgpu_sync_free(&parser->sync); for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); kfree(parser->post_deps[i].chain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index afe6af9c0138..2f28a8c02f64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -36,6 +36,7 @@ #include <generated/utsrelease.h> #include <linux/pci-p2pdma.h> +#include <drm/drm_aperture.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> @@ -90,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +static const struct drm_driver amdgpu_kms_driver; + const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", @@ -3687,6 +3690,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Get rid of things like offb */ + r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); + if (r) + return r; + /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1353ffd08988..cd4caaa29528 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,6 @@ */ #include <drm/amdgpu_drm.h> -#include <drm/drm_aperture.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_generic.h> #include <drm/drm_gem.h> @@ -2122,11 +2121,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif - /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); - if (ret) - return ret; - adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) return PTR_ERR(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 4e684c2afc70..25a68d8888e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -470,8 +470,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, return true; fail: - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + if (man) + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, + man->size); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bac7976975bd..dcd8c066bc1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -391,8 +391,10 @@ int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) dma_fence_get(f); r = drm_sched_job_add_dependency(&job->base, f); - if (r) + if (r) { + dma_fence_put(f); return r; + } } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index faa12146635c..9fa1d814508a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -882,7 +882,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + drm_buddy_free_list(&mgr->mm, &rsv->allocated); kfree(rsv); } drm_buddy_fini(&mgr->mm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ecb4c3abc629..c06ada0844ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -200,7 +200,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; if (q->wptr_bo) { - wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va; + wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 814f99888ab1..b94d2c1422ad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -570,6 +570,15 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, goto reserve_bo_failed; } + if (clear) { + r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); + if (r) { + pr_debug("failed %d to sync bo\n", r); + amdgpu_bo_unreserve(bo); + goto reserve_bo_failed; + } + } + r = dma_resv_reserve_fences(bo->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve bo\n", r); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index e54b760b875b..b4373b6568ae 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1261,7 +1261,8 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; - uint32_t tach_period, crystal_clock_freq; + uint32_t crystal_clock_freq = 2500; + uint32_t tach_period; int ret; if (!speed) @@ -1271,7 +1272,6 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, if (ret) return ret; - crystal_clock_freq = amdgpu_asic_get_xclk(adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, regCG_TACH_CTRL, REG_SET_FIELD(RREG32_SOC15(THM, 0, regCG_TACH_CTRL), @@ -2298,6 +2298,10 @@ bool smu_v13_0_baco_is_support(struct smu_context *smu) !smu_baco->platform_support) return false; + /* return true if ASIC is in BACO state already */ + if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) return false; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 9643b21c636a..4c20d17e7416 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -213,6 +213,7 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 5c6c6ad011ca..e87db7e02e8a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -192,6 +192,7 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 11bb59399471..3d1f50f481cf 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm, kmem_cache_free(slab_blocks, block); } +static void list_insert_sorted(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + struct drm_buddy_block *node; + struct list_head *head; + + head = &mm->free_list[drm_buddy_block_order(block)]; + if (list_empty(head)) { + list_add(&block->link, head); + return; + } + + list_for_each_entry(node, head, link) + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node)) + break; + + __list_add(&block->link, node->link.prev, &node->link); +} + static void mark_allocated(struct drm_buddy_block *block) { block->header &= ~DRM_BUDDY_HEADER_STATE; @@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm, block->header &= ~DRM_BUDDY_HEADER_STATE; block->header |= DRM_BUDDY_FREE; - list_add(&block->link, - &mm->free_list[drm_buddy_block_order(block)]); + list_insert_sorted(mm, block); } static void mark_split(struct drm_buddy_block *block) @@ -387,20 +405,26 @@ err_undo: } static struct drm_buddy_block * -get_maxblock(struct list_head *head) +get_maxblock(struct drm_buddy *mm, unsigned int order) { struct drm_buddy_block *max_block = NULL, *node; + unsigned int i; - max_block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!max_block) - return NULL; + for (i = order; i <= mm->max_order; ++i) { + if (!list_empty(&mm->free_list[i])) { + node = list_last_entry(&mm->free_list[i], + struct drm_buddy_block, + link); + if (!max_block) { + max_block = node; + continue; + } - list_for_each_entry(node, head, link) { - if (drm_buddy_block_offset(node) > - drm_buddy_block_offset(max_block)) - max_block = node; + if (drm_buddy_block_offset(node) > + drm_buddy_block_offset(max_block)) { + max_block = node; + } + } } return max_block; @@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm, unsigned long flags) { struct drm_buddy_block *block = NULL; - unsigned int i; + unsigned int tmp; int err; - for (i = order; i <= mm->max_order; ++i) { - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { - block = get_maxblock(&mm->free_list[i]); - if (block) - break; - } else { - block = list_first_entry_or_null(&mm->free_list[i], - struct drm_buddy_block, - link); - if (block) - break; + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + block = get_maxblock(mm, order); + if (block) + /* Store the obtained block order */ + tmp = drm_buddy_block_order(block); + } else { + for (tmp = order; tmp <= mm->max_order; ++tmp) { + if (!list_empty(&mm->free_list[tmp])) { + block = list_last_entry(&mm->free_list[tmp], + struct drm_buddy_block, + link); + if (block) + break; + } } } @@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm, BUG_ON(!drm_buddy_block_is_free(block)); - while (i != order) { + while (tmp != order) { err = split_block(mm, block); if (unlikely(err)) goto err_undo; block = block->right; - i--; + tmp--; } return block; err_undo: - if (i != order) + if (tmp != order) __drm_buddy_free(mm, block); return ERR_PTR(err); } diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 52d8800a8ab8..3659f0465a72 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -304,6 +304,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Ideapad D330-10IGL (HD) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGL"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Lenovo Yoga Book X90F / X91F / X91L */ .matches = { /* Non exact match to match all versions */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7f2831efc798..6250de9b9196 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1688,6 +1688,10 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } +/* + * Note that this implicitly consumes the ctx reference, by placing + * the ctx in the context_xa. + */ static void gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 id) @@ -1703,10 +1707,6 @@ static void gem_context_register(struct i915_gem_context *ctx, snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); - /* And finally expose ourselves to userspace via the idr */ - old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); - WARN_ON(old); - spin_lock(&ctx->client->ctx_lock); list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list); spin_unlock(&ctx->client->ctx_lock); @@ -1714,6 +1714,10 @@ static void gem_context_register(struct i915_gem_context *ctx, spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); + + /* And finally expose ourselves to userspace via the idr */ + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); } int i915_gem_context_open(struct drm_i915_private *i915, @@ -2199,14 +2203,22 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, if (IS_ERR(ctx)) return ctx; + /* + * One for the xarray and one for the caller. We need to grab + * the reference *prior* to making the ctx visble to userspace + * in gem_context_register(), as at any point after that + * userspace can try to race us with another thread destroying + * the context under our feet. + */ + i915_gem_context_get(ctx); + gem_context_register(ctx, file_priv, id); old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); proto_context_close(file_priv->dev_priv, pc); - /* One for the xarray and one for the caller */ - return i915_gem_context_get(ctx); + return ctx; } struct i915_gem_context * diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index c3cd92691795..4a14f87e441e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -406,10 +406,10 @@ #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) -#define CHICKEN_RASTER_1 _MMIO(0x6204) +#define CHICKEN_RASTER_1 MCR_REG(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) -#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) #define VFLSKPD MCR_REG(0x62a8) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 24736ebee17c..78dc5e493c62 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -278,6 +278,7 @@ out: static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { struct intel_uncore *uncore = gt->uncore; + int loops = 2; int err; /* @@ -285,18 +286,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) * for fifo space for the write or forcewake the chip for * the read */ - intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + do { + intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(uncore, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); + /* + * Wait for the device to ack the reset requests. + * + * On some platforms, e.g. Jasperlake, we see that the + * engine register state is not cleared until shortly after + * GDRST reports completion, causing a failure as we try + * to immediately resume while the internal state is still + * in flux. If we immediately repeat the reset, the second + * reset appears to serialise with the first, and since + * it is a no-op, the registers should retain their reset + * value. However, there is still a concern that upon + * leaving the second reset, the internal engine state + * is still in flux and not ready for resuming. + */ + err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, + hw_domain_mask, 0, + 2000, 0, + NULL); + } while (err == 0 && --loops); if (err) GT_TRACE(gt, "Wait for 0x%08x engines reset failed\n", hw_domain_mask); + /* + * As we have observed that the engine state is still volatile + * after GDRST is acked, impose a small delay to let everything settle. + */ + udelay(50); + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 2afb4f80a954..5be2f91801fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -645,7 +645,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); + wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_mcr_add(wal, @@ -775,7 +775,7 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_15010599737:dg2 */ - wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 3a33be5401ed..135390d975b6 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -2116,7 +2116,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) if (!obj->mm.rsgt) return -EBUSY; - err = dma_resv_reserve_fences(obj->base.resv, 1); + err = dma_resv_reserve_fences(obj->base.resv, 2); if (err) return -EBUSY; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 6484b97c5344..f3c9600221d4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -876,7 +876,8 @@ static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu) #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASK BIT(1) -static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) +static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, + bool gx_off) { struct msm_gpu *gpu = &adreno_gpu->base; @@ -889,9 +890,11 @@ static void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu) return; } - /* Halt the gx side of GBIF */ - gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); - spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); + if (gx_off) { + /* Halt the gx side of GBIF */ + gpu_write(gpu, REG_A6XX_RBBM_GBIF_HALT, 1); + spin_until(gpu_read(gpu, REG_A6XX_RBBM_GBIF_HALT_ACK) & 1); + } /* Halt new client requests on GBIF */ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); @@ -929,7 +932,7 @@ static void a6xx_gmu_force_off(struct a6xx_gmu *gmu) /* Halt the gmu cm3 core */ gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1); - a6xx_bus_clear_pending_transactions(adreno_gpu); + a6xx_bus_clear_pending_transactions(adreno_gpu, true); /* Reset GPU core blocks */ gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, 1); @@ -1083,7 +1086,7 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu) return; } - a6xx_bus_clear_pending_transactions(adreno_gpu); + a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung); /* tell the GMU we want to slumber */ ret = a6xx_gmu_notify_slumber(gmu); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 36c8fb699b56..3be0f2928b57 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1270,6 +1270,12 @@ static void a6xx_recover(struct msm_gpu *gpu) if (hang_debug) a6xx_dump(gpu); + /* + * To handle recovery specific sequences during the rpm suspend we are + * about to trigger + */ + a6xx_gpu->hung = true; + /* Halt SQE first */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 3); @@ -1312,6 +1318,7 @@ static void a6xx_recover(struct msm_gpu *gpu) mutex_unlock(&gpu->active_lock); msm_gpu_hw_init(gpu); + a6xx_gpu->hung = false; } static const char *a6xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index ab853f61db63..eea2e60ce3b7 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -32,6 +32,7 @@ struct a6xx_gpu { void *llc_slice; void *htw_llc_slice; bool have_mmu500; + bool hung; }; #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 5d4b1c95033f..b4f9b1343d63 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -29,11 +29,9 @@ enum { ADRENO_FW_MAX, }; -enum adreno_quirks { - ADRENO_QUIRK_TWO_PASS_USE_WFI = 1, - ADRENO_QUIRK_FAULT_DETECT_MASK = 2, - ADRENO_QUIRK_LMLOADKILL_DISABLE = 3, -}; +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1) +#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2) struct adreno_rev { uint8_t core; @@ -65,7 +63,7 @@ struct adreno_info { const char *name; const char *fw[ADRENO_FW_MAX]; uint32_t gmem; - enum adreno_quirks quirks; + u64 quirks; struct msm_gpu *(*init)(struct drm_device *dev); const char *zapfw; u32 inactive_period; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 7cbcef6efe17..62f6ff6abf41 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -132,7 +132,6 @@ static void dpu_encoder_phys_wb_set_qos(struct dpu_encoder_phys *phys_enc) * dpu_encoder_phys_wb_setup_fb - setup output framebuffer * @phys_enc: Pointer to physical encoder * @fb: Pointer to output framebuffer - * @wb_roi: Pointer to output region of interest */ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc, struct drm_framebuffer *fb) @@ -692,7 +691,7 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) /** * dpu_encoder_phys_wb_init - initialize writeback encoder - * @init: Pointer to init info structure with initialization params + * @p: Pointer to init info structure with initialization params */ struct dpu_encoder_phys *dpu_encoder_phys_wb_init( struct dpu_enc_phys_init_params *p) diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index d030a93a08c3..cc3efed593aa 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -423,6 +423,10 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux) isr = dp_catalog_aux_get_irq(aux->catalog); + /* no interrupts pending, return immediately */ + if (!isr) + return; + if (!aux->cmd_busy) return; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 4d3fdc806bef..97372bb241d8 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -532,11 +532,19 @@ static int msm_hdmi_dev_probe(struct platform_device *pdev) ret = devm_pm_runtime_enable(&pdev->dev); if (ret) - return ret; + goto err_put_phy; platform_set_drvdata(pdev, hdmi); - return component_add(&pdev->dev, &msm_hdmi_ops); + ret = component_add(&pdev->dev, &msm_hdmi_ops); + if (ret) + goto err_put_phy; + + return 0; + +err_put_phy: + msm_hdmi_put_phy(hdmi); + return ret; } static int msm_hdmi_dev_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 8b0b0ac74a6f..45e81eb148a8 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1278,7 +1278,7 @@ void msm_drv_shutdown(struct platform_device *pdev) * msm_drm_init, drm_dev->registered is used as an indicator that the * shutdown will be successful. */ - if (drm && drm->registered) + if (drm && drm->registered && priv->kms) drm_atomic_helper_shutdown(drm); } diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 86b28add1fff..2527afef9c19 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -47,15 +47,17 @@ struct msm_mdss { static int msm_mdss_parse_data_bus_icc_path(struct device *dev, struct msm_mdss *msm_mdss) { - struct icc_path *path0 = of_icc_get(dev, "mdp0-mem"); - struct icc_path *path1 = of_icc_get(dev, "mdp1-mem"); + struct icc_path *path0; + struct icc_path *path1; + path0 = of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); msm_mdss->path[0] = path0; msm_mdss->num_paths = 1; + path1 = of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { msm_mdss->path[1] = path1; msm_mdss->num_paths++; diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c deleted file mode 100644 index e87de7906f78..000000000000 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright © 2007 David Airlie - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * David Airlie - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/tty.h> -#include <linux/sysrq.h> -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/screen_info.h> -#include <linux/vga_switcheroo.h> -#include <linux/console.h> - -#include <drm/drm_crtc.h> -#include <drm/drm_crtc_helper.h> -#include <drm/drm_probe_helper.h> -#include <drm/drm_fb_helper.h> -#include <drm/drm_fourcc.h> -#include <drm/drm_atomic.h> - -#include "nouveau_drv.h" -#include "nouveau_gem.h" -#include "nouveau_bo.h" -#include "nouveau_fbcon.h" -#include "nouveau_chan.h" -#include "nouveau_vmm.h" - -#include "nouveau_crtc.h" - -MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration"); -int nouveau_nofbaccel = 0; -module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400); - -MODULE_PARM_DESC(fbcon_bpp, "fbcon bits-per-pixel (default: auto)"); -static int nouveau_fbcon_bpp; -module_param_named(fbcon_bpp, nouveau_fbcon_bpp, int, 0400); - -static void -nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nvif_device *device = &drm->client.device; - int ret; - - if (info->state != FBINFO_STATE_RUNNING) - return; - - ret = -ENODEV; - if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) && - mutex_trylock(&drm->client.mutex)) { - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_fillrect(info, rect); - else - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_fillrect(info, rect); - else - ret = nvc0_fbcon_fillrect(info, rect); - mutex_unlock(&drm->client.mutex); - } - - if (ret == 0) - return; - - if (ret != -ENODEV) - nouveau_fbcon_gpu_lockup(info); - drm_fb_helper_cfb_fillrect(info, rect); -} - -static void -nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nvif_device *device = &drm->client.device; - int ret; - - if (info->state != FBINFO_STATE_RUNNING) - return; - - ret = -ENODEV; - if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) && - mutex_trylock(&drm->client.mutex)) { - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_copyarea(info, image); - else - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_copyarea(info, image); - else - ret = nvc0_fbcon_copyarea(info, image); - mutex_unlock(&drm->client.mutex); - } - - if (ret == 0) - return; - - if (ret != -ENODEV) - nouveau_fbcon_gpu_lockup(info); - drm_fb_helper_cfb_copyarea(info, image); -} - -static void -nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nvif_device *device = &drm->client.device; - int ret; - - if (info->state != FBINFO_STATE_RUNNING) - return; - - ret = -ENODEV; - if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) && - mutex_trylock(&drm->client.mutex)) { - if (device->info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_imageblit(info, image); - else - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_imageblit(info, image); - else - ret = nvc0_fbcon_imageblit(info, image); - mutex_unlock(&drm->client.mutex); - } - - if (ret == 0) - return; - - if (ret != -ENODEV) - nouveau_fbcon_gpu_lockup(info); - drm_fb_helper_cfb_imageblit(info, image); -} - -static int -nouveau_fbcon_sync(struct fb_info *info) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - struct nouveau_channel *chan = drm->channel; - int ret; - - if (!chan || !chan->accel_done || in_interrupt() || - info->state != FBINFO_STATE_RUNNING || - info->flags & FBINFO_HWACCEL_DISABLED) - return 0; - - if (!mutex_trylock(&drm->client.mutex)) - return 0; - - ret = nouveau_channel_idle(chan); - mutex_unlock(&drm->client.mutex); - if (ret) { - nouveau_fbcon_gpu_lockup(info); - return 0; - } - - chan->accel_done = false; - return 0; -} - -static int -nouveau_fbcon_open(struct fb_info *info, int user) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - int ret = pm_runtime_get_sync(drm->dev->dev); - if (ret < 0 && ret != -EACCES) { - pm_runtime_put(drm->dev->dev); - return ret; - } - return 0; -} - -static int -nouveau_fbcon_release(struct fb_info *info, int user) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - pm_runtime_put(drm->dev->dev); - return 0; -} - -static const struct fb_ops nouveau_fbcon_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = nouveau_fbcon_open, - .fb_release = nouveau_fbcon_release, - .fb_fillrect = nouveau_fbcon_fillrect, - .fb_copyarea = nouveau_fbcon_copyarea, - .fb_imageblit = nouveau_fbcon_imageblit, - .fb_sync = nouveau_fbcon_sync, -}; - -static const struct fb_ops nouveau_fbcon_sw_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = nouveau_fbcon_open, - .fb_release = nouveau_fbcon_release, - .fb_fillrect = drm_fb_helper_cfb_fillrect, - .fb_copyarea = drm_fb_helper_cfb_copyarea, - .fb_imageblit = drm_fb_helper_cfb_imageblit, -}; - -void -nouveau_fbcon_accel_save_disable(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon && drm->fbcon->helper.info) { - drm->fbcon->saved_flags = drm->fbcon->helper.info->flags; - drm->fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED; - } -} - -void -nouveau_fbcon_accel_restore(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon && drm->fbcon->helper.info) - drm->fbcon->helper.info->flags = drm->fbcon->saved_flags; -} - -static void -nouveau_fbcon_accel_fini(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon = drm->fbcon; - if (fbcon && drm->channel) { - console_lock(); - if (fbcon->helper.info) - fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED; - console_unlock(); - nouveau_channel_idle(drm->channel); - nvif_object_dtor(&fbcon->twod); - nvif_object_dtor(&fbcon->blit); - nvif_object_dtor(&fbcon->gdi); - nvif_object_dtor(&fbcon->patt); - nvif_object_dtor(&fbcon->rop); - nvif_object_dtor(&fbcon->clip); - nvif_object_dtor(&fbcon->surf2d); - } -} - -static void -nouveau_fbcon_accel_init(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon = drm->fbcon; - struct fb_info *info = fbcon->helper.info; - int ret; - - if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) - ret = nv04_fbcon_accel_init(info); - else - if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI) - ret = nv50_fbcon_accel_init(info); - else - ret = nvc0_fbcon_accel_init(info); - - if (ret == 0) - info->fbops = &nouveau_fbcon_ops; -} - -static void -nouveau_fbcon_zfill(struct drm_device *dev, struct nouveau_fbdev *fbcon) -{ - struct fb_info *info = fbcon->helper.info; - struct fb_fillrect rect; - - /* Clear the entire fbcon. The drm will program every connector - * with it's preferred mode. If the sizes differ, one display will - * quite likely have garbage around the console. - */ - rect.dx = rect.dy = 0; - rect.width = info->var.xres_virtual; - rect.height = info->var.yres_virtual; - rect.color = 0; - rect.rop = ROP_COPY; - info->fbops->fb_fillrect(info, &rect); -} - -static int -nouveau_fbcon_create(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct nouveau_fbdev *fbcon = - container_of(helper, struct nouveau_fbdev, helper); - struct drm_device *dev = fbcon->helper.dev; - struct nouveau_drm *drm = nouveau_drm(dev); - struct nvif_device *device = &drm->client.device; - struct fb_info *info; - struct drm_framebuffer *fb; - struct nouveau_channel *chan; - struct nouveau_bo *nvbo; - struct drm_mode_fb_cmd2 mode_cmd = {}; - int ret; - - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - mode_cmd.pitches[0] = mode_cmd.width * (sizes->surface_bpp >> 3); - mode_cmd.pitches[0] = roundup(mode_cmd.pitches[0], 256); - - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - ret = nouveau_gem_new(&drm->client, mode_cmd.pitches[0] * - mode_cmd.height, 0, NOUVEAU_GEM_DOMAIN_VRAM, - 0, 0x0000, &nvbo); - if (ret) { - NV_ERROR(drm, "failed to allocate framebuffer\n"); - goto out; - } - - ret = nouveau_framebuffer_new(dev, &mode_cmd, &nvbo->bo.base, &fb); - if (ret) - goto out_unref; - - ret = nouveau_bo_pin(nvbo, NOUVEAU_GEM_DOMAIN_VRAM, false); - if (ret) { - NV_ERROR(drm, "failed to pin fb: %d\n", ret); - goto out_unref; - } - - ret = nouveau_bo_map(nvbo); - if (ret) { - NV_ERROR(drm, "failed to map fb: %d\n", ret); - goto out_unpin; - } - - chan = nouveau_nofbaccel ? NULL : drm->channel; - if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) { - ret = nouveau_vma_new(nvbo, chan->vmm, &fbcon->vma); - if (ret) { - NV_ERROR(drm, "failed to map fb into chan: %d\n", ret); - chan = NULL; - } - } - - info = drm_fb_helper_alloc_info(helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto out_unlock; - } - - /* setup helper */ - fbcon->helper.fb = fb; - - if (!chan) - info->flags = FBINFO_HWACCEL_DISABLED; - else - info->flags = FBINFO_HWACCEL_COPYAREA | - FBINFO_HWACCEL_FILLRECT | - FBINFO_HWACCEL_IMAGEBLIT; - info->fbops = &nouveau_fbcon_sw_ops; - info->fix.smem_start = nvbo->bo.resource->bus.offset; - info->fix.smem_len = nvbo->bo.base.size; - - info->screen_base = nvbo_kmap_obj_iovirtual(nvbo); - info->screen_size = nvbo->bo.base.size; - - drm_fb_helper_fill_info(info, &fbcon->helper, sizes); - - /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - - if (chan) - nouveau_fbcon_accel_init(dev); - nouveau_fbcon_zfill(dev, fbcon); - - /* To allow resizeing without swapping buffers */ - NV_INFO(drm, "allocated %dx%d fb: 0x%llx, bo %p\n", - fb->width, fb->height, nvbo->offset, nvbo); - - if (dev_is_pci(dev->dev)) - vga_switcheroo_client_fb_set(to_pci_dev(dev->dev), info); - - return 0; - -out_unlock: - if (chan) - nouveau_vma_del(&fbcon->vma); - nouveau_bo_unmap(nvbo); -out_unpin: - nouveau_bo_unpin(nvbo); -out_unref: - nouveau_bo_ref(NULL, &nvbo); -out: - return ret; -} - -static int -nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) -{ - struct drm_framebuffer *fb = fbcon->helper.fb; - struct nouveau_bo *nvbo; - - drm_fb_helper_unregister_info(&fbcon->helper); - drm_fb_helper_fini(&fbcon->helper); - - if (fb && fb->obj[0]) { - nvbo = nouveau_gem_object(fb->obj[0]); - nouveau_vma_del(&fbcon->vma); - nouveau_bo_unmap(nvbo); - nouveau_bo_unpin(nvbo); - drm_framebuffer_put(fb); - } - - return 0; -} - -void nouveau_fbcon_gpu_lockup(struct fb_info *info) -{ - struct nouveau_fbdev *fbcon = info->par; - struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev); - - NV_ERROR(drm, "GPU lockup - switching to software fbcon\n"); - info->flags |= FBINFO_HWACCEL_DISABLED; -} - -static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = { - .fb_probe = nouveau_fbcon_create, -}; - -static void -nouveau_fbcon_set_suspend_work(struct work_struct *work) -{ - struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work); - int state = READ_ONCE(drm->fbcon_new_state); - - if (state == FBINFO_STATE_RUNNING) - pm_runtime_get_sync(drm->dev->dev); - - console_lock(); - if (state == FBINFO_STATE_RUNNING) - nouveau_fbcon_accel_restore(drm->dev); - drm_fb_helper_set_suspend(&drm->fbcon->helper, state); - if (state != FBINFO_STATE_RUNNING) - nouveau_fbcon_accel_save_disable(drm->dev); - console_unlock(); - - if (state == FBINFO_STATE_RUNNING) { - nouveau_fbcon_hotplug_resume(drm->fbcon); - pm_runtime_mark_last_busy(drm->dev->dev); - pm_runtime_put_autosuspend(drm->dev->dev); - } -} - -void -nouveau_fbcon_set_suspend(struct drm_device *dev, int state) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - - if (!drm->fbcon) - return; - - drm->fbcon_new_state = state; - /* Since runtime resume can happen as a result of a sysfs operation, - * it's possible we already have the console locked. So handle fbcon - * init/deinit from a seperate work thread - */ - schedule_work(&drm->fbcon_work); -} - -void -nouveau_fbcon_output_poll_changed(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon = drm->fbcon; - int ret; - - if (!fbcon) - return; - - mutex_lock(&fbcon->hotplug_lock); - - ret = pm_runtime_get(dev->dev); - if (ret == 1 || ret == -EACCES) { - drm_fb_helper_hotplug_event(&fbcon->helper); - - pm_runtime_mark_last_busy(dev->dev); - pm_runtime_put_autosuspend(dev->dev); - } else if (ret == 0) { - /* If the GPU was already in the process of suspending before - * this event happened, then we can't block here as we'll - * deadlock the runtime pmops since they wait for us to - * finish. So, just defer this event for when we runtime - * resume again. It will be handled by fbcon_work. - */ - NV_DEBUG(drm, "fbcon HPD event deferred until runtime resume\n"); - fbcon->hotplug_waiting = true; - pm_runtime_put_noidle(drm->dev->dev); - } else { - DRM_WARN("fbcon HPD event lost due to RPM failure: %d\n", - ret); - } - - mutex_unlock(&fbcon->hotplug_lock); -} - -void -nouveau_fbcon_hotplug_resume(struct nouveau_fbdev *fbcon) -{ - struct nouveau_drm *drm; - - if (!fbcon) - return; - drm = nouveau_drm(fbcon->helper.dev); - - mutex_lock(&fbcon->hotplug_lock); - if (fbcon->hotplug_waiting) { - fbcon->hotplug_waiting = false; - - NV_DEBUG(drm, "Handling deferred fbcon HPD events\n"); - drm_fb_helper_hotplug_event(&fbcon->helper); - } - mutex_unlock(&fbcon->hotplug_lock); -} - -int -nouveau_fbcon_init(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_fbdev *fbcon; - int preferred_bpp = nouveau_fbcon_bpp; - int ret; - - if (!dev->mode_config.num_crtc || - (to_pci_dev(dev->dev)->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return 0; - - fbcon = kzalloc(sizeof(struct nouveau_fbdev), GFP_KERNEL); - if (!fbcon) - return -ENOMEM; - - drm->fbcon = fbcon; - INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work); - mutex_init(&fbcon->hotplug_lock); - - drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs); - - ret = drm_fb_helper_init(dev, &fbcon->helper); - if (ret) - goto free; - - if (preferred_bpp != 8 && preferred_bpp != 16 && preferred_bpp != 32) { - if (drm->client.device.info.ram_size <= 32 * 1024 * 1024) - preferred_bpp = 8; - else - if (drm->client.device.info.ram_size <= 64 * 1024 * 1024) - preferred_bpp = 16; - else - preferred_bpp = 32; - } - - /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!drm_drv_uses_atomic_modeset(dev)) - drm_helper_disable_unused_functions(dev); - - ret = drm_fb_helper_initial_config(&fbcon->helper, preferred_bpp); - if (ret) - goto fini; - - if (fbcon->helper.info) - fbcon->helper.info->pixmap.buf_align = 4; - return 0; - -fini: - drm_fb_helper_fini(&fbcon->helper); -free: - kfree(fbcon); - drm->fbcon = NULL; - return ret; -} - -void -nouveau_fbcon_fini(struct drm_device *dev) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - - if (!drm->fbcon) - return; - - drm_kms_helper_poll_fini(dev); - nouveau_fbcon_accel_fini(dev); - nouveau_fbcon_destroy(dev, drm->fbcon); - kfree(drm->fbcon); - drm->fbcon = NULL; -} diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index ba3aa0a0fc43..da5493f789df 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -173,7 +173,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) - ttm_move_memcpy(clear, ttm->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, PFN_UP(dst_mem->size), dst_iter, src_iter); if (!src_iter->ops->maps_tt) ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem); diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5d05093014ac..9f4a90493aea 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -358,10 +358,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */ rc->bo_handle = handle; + + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } @@ -723,11 +731,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc_blob->res_handle = bo->hw_res_handle; rc_blob->bo_handle = handle; + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 932b125ebf3d..ddf8373c1d77 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -254,40 +254,6 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) kref_put(&base->refcount, ttm_release_base); } -/** - * ttm_base_object_noref_lookup - look up a base object without reference - * @tfile: The struct ttm_object_file the object is registered with. - * @key: The object handle. - * - * This function looks up a ttm base object and returns a pointer to it - * without refcounting the pointer. The returned pointer is only valid - * until ttm_base_object_noref_release() is called, and the object - * pointed to by the returned pointer may be doomed. Any persistent usage - * of the object requires a refcount to be taken using kref_get_unless_zero(). - * Iff this function returns successfully it needs to be paired with - * ttm_base_object_noref_release() and no sleeping- or scheduling functions - * may be called inbetween these function callse. - * - * Return: A pointer to the object if successful or NULL otherwise. - */ -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) -{ - struct vmwgfx_hash_item *hash; - int ret; - - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); - if (ret) { - rcu_read_unlock(); - return NULL; - } - - __release(RCU); - return hlist_entry(hash, struct ttm_ref_object, hash)->obj; -} -EXPORT_SYMBOL(ttm_base_object_noref_lookup); - struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, uint64_t key) { @@ -295,15 +261,16 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, struct vmwgfx_hash_item *hash; int ret; - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); + spin_lock(&tfile->lock); + ret = ttm_tfile_find_ref(tfile, key, &hash); if (likely(ret == 0)) { base = hlist_entry(hash, struct ttm_ref_object, hash)->obj; if (!kref_get_unless_zero(&base->refcount)) base = NULL; } - rcu_read_unlock(); + spin_unlock(&tfile->lock); + return base; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index f0ebbe340ad6..8098a3846bae 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -307,18 +307,4 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, #define ttm_prime_object_kfree(__obj, __prime) \ kfree_rcu(__obj, __prime.base.rhead) -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key); - -/** - * ttm_base_object_noref_release - release a base object pointer looked up - * without reference - * - * Releases a base object pointer looked up with ttm_base_object_noref_lookup(). - */ -static inline void ttm_base_object_noref_release(void) -{ - __acquire(RCU); - rcu_read_unlock(); -} #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 321c551784a1..aa1cd5126a32 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -716,44 +716,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, } /** - * vmw_user_bo_noref_lookup - Look up a vmw user buffer object without reference - * @filp: The TTM object file the handle is registered with. - * @handle: The user buffer object handle. - * - * This function looks up a struct vmw_bo and returns a pointer to the - * struct vmw_buffer_object it derives from without refcounting the pointer. - * The returned pointer is only valid until vmw_user_bo_noref_release() is - * called, and the object pointed to by the returned pointer may be doomed. - * Any persistent usage of the object requires a refcount to be taken using - * ttm_bo_reference_unless_doomed(). Iff this function returns successfully it - * needs to be paired with vmw_user_bo_noref_release() and no sleeping- - * or scheduling functions may be called in between these function calls. - * - * Return: A struct vmw_buffer_object pointer if successful or negative - * error pointer on failure. - */ -struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle) -{ - struct vmw_buffer_object *vmw_bo; - struct ttm_buffer_object *bo; - struct drm_gem_object *gobj = drm_gem_object_lookup(filp, handle); - - if (!gobj) { - DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", - (unsigned long)handle); - return ERR_PTR(-ESRCH); - } - vmw_bo = gem_to_vmw_bo(gobj); - bo = ttm_bo_get_unless_zero(&vmw_bo->base); - vmw_bo = vmw_buffer_object(bo); - drm_gem_object_put(gobj); - - return vmw_bo; -} - - -/** * vmw_bo_fence_single - Utility function to fence a single TTM buffer * object without unreserving it. * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b062b020b378..5acbf5849b27 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -830,12 +830,7 @@ extern int vmw_user_resource_lookup_handle( uint32_t handle, const struct vmw_user_resource_conv *converter, struct vmw_resource **p_res); -extern struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv * - converter); + extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, @@ -875,15 +870,6 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) } /** - * vmw_user_resource_noref_release - release a user resource pointer looked up - * without reference - */ -static inline void vmw_user_resource_noref_release(void) -{ - ttm_base_object_noref_release(); -} - -/** * Buffer object helper functions - vmwgfx_bo.c */ extern int vmw_bo_pin_in_placement(struct vmw_private *vmw_priv, @@ -934,8 +920,6 @@ extern void vmw_bo_unmap(struct vmw_buffer_object *vbo); extern void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); extern void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -extern struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle); /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index a5379f6fb5ab..a44d53e33cdb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -290,20 +290,26 @@ static void vmw_execbuf_rcache_update(struct vmw_res_cache_entry *rcache, rcache->valid_handle = 0; } +enum vmw_val_add_flags { + vmw_val_add_flag_none = 0, + vmw_val_add_flag_noctx = 1 << 0, +}; + /** - * vmw_execbuf_res_noref_val_add - Add a resource described by an unreferenced - * rcu-protected pointer to the validation list. + * vmw_execbuf_res_val_add - Add a resource to the validation list. * * @sw_context: Pointer to the software context. * @res: Unreferenced rcu-protected pointer to the resource. * @dirty: Whether to change dirty status. + * @flags: specifies whether to use the context or not * * Returns: 0 on success. Negative error code on failure. Typical error codes * are %-EINVAL on inconsistency and %-ESRCH if the resource was doomed. */ -static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) +static int vmw_execbuf_res_val_add(struct vmw_sw_context *sw_context, + struct vmw_resource *res, + u32 dirty, + u32 flags) { struct vmw_private *dev_priv = res->dev_priv; int ret; @@ -318,24 +324,30 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, if (dirty) vmw_validation_res_set_dirty(sw_context->ctx, rcache->private, dirty); - vmw_user_resource_noref_release(); return 0; } - priv_size = vmw_execbuf_res_size(dev_priv, res_type); - ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, - dirty, (void **)&ctx_info, - &first_usage); - vmw_user_resource_noref_release(); - if (ret) - return ret; + if ((flags & vmw_val_add_flag_noctx) != 0) { + ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, + (void **)&ctx_info, NULL); + if (ret) + return ret; - if (priv_size && first_usage) { - ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, - ctx_info); - if (ret) { - VMW_DEBUG_USER("Failed first usage context setup.\n"); + } else { + priv_size = vmw_execbuf_res_size(dev_priv, res_type); + ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, + dirty, (void **)&ctx_info, + &first_usage); + if (ret) return ret; + + if (priv_size && first_usage) { + ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, + ctx_info); + if (ret) { + VMW_DEBUG_USER("Failed first usage context setup.\n"); + return ret; + } } } @@ -344,43 +356,6 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, } /** - * vmw_execbuf_res_noctx_val_add - Add a non-context resource to the resource - * validation list if it's not already on it - * - * @sw_context: Pointer to the software context. - * @res: Pointer to the resource. - * @dirty: Whether to change dirty status. - * - * Returns: Zero on success. Negative error code on failure. - */ -static int vmw_execbuf_res_noctx_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) -{ - struct vmw_res_cache_entry *rcache; - enum vmw_res_type res_type = vmw_res_type(res); - void *ptr; - int ret; - - rcache = &sw_context->res_cache[res_type]; - if (likely(rcache->valid && rcache->res == res)) { - if (dirty) - vmw_validation_res_set_dirty(sw_context->ctx, - rcache->private, dirty); - return 0; - } - - ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, - &ptr, NULL); - if (ret) - return ret; - - vmw_execbuf_rcache_update(rcache, res, ptr); - - return 0; -} - -/** * vmw_view_res_val_add - Add a view and the surface it's pointing to to the * validation list * @@ -398,13 +373,13 @@ static int vmw_view_res_val_add(struct vmw_sw_context *sw_context, * First add the resource the view is pointing to, otherwise it may be * swapped out when the view is validated. */ - ret = vmw_execbuf_res_noctx_val_add(sw_context, vmw_view_srf(view), - vmw_view_dirtying(view)); + ret = vmw_execbuf_res_val_add(sw_context, vmw_view_srf(view), + vmw_view_dirtying(view), vmw_val_add_flag_noctx); if (ret) return ret; - return vmw_execbuf_res_noctx_val_add(sw_context, view, - VMW_RES_DIRTY_NONE); + return vmw_execbuf_res_val_add(sw_context, view, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); } /** @@ -475,8 +450,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (IS_ERR(res)) continue; - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_SET); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_SET, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; } @@ -490,9 +466,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (vmw_res_type(entry->res) == vmw_res_view) ret = vmw_view_res_val_add(sw_context, entry->res); else - ret = vmw_execbuf_res_noctx_val_add - (sw_context, entry->res, - vmw_binding_dirtying(entry->bt)); + ret = vmw_execbuf_res_val_add(sw_context, entry->res, + vmw_binding_dirtying(entry->bt), + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) break; } @@ -658,7 +634,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, { struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type]; struct vmw_resource *res; - int ret; + int ret = 0; + bool needs_unref = false; if (p_res) *p_res = NULL; @@ -683,17 +660,18 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle - (dev_priv, sw_context->fp->tfile, *id_loc, converter); - if (IS_ERR(res)) { + ret = vmw_user_resource_lookup_handle + (dev_priv, sw_context->fp->tfile, *id_loc, converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find/use resource 0x%08x.\n", (unsigned int) *id_loc); - return PTR_ERR(res); + return ret; } + needs_unref = true; - ret = vmw_execbuf_res_noref_val_add(sw_context, res, dirty); + ret = vmw_execbuf_res_val_add(sw_context, res, dirty, vmw_val_add_flag_none); if (unlikely(ret != 0)) - return ret; + goto res_check_done; if (rcache->valid && rcache->res == res) { rcache->valid_handle = true; @@ -708,7 +686,11 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (p_res) *p_res = res; - return 0; +res_check_done: + if (needs_unref) + vmw_resource_unreference(&res); + + return ret; } /** @@ -1171,9 +1153,9 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use MOB buffer.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); @@ -1225,9 +1207,9 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use GMR region.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); @@ -2025,8 +2007,9 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, res = vmw_shader_lookup(vmw_context_res_man(ctx), cmd->body.shid, cmd->body.type); if (!IS_ERR(res)) { - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; @@ -2273,8 +2256,9 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) return ret; } @@ -2777,8 +2761,8 @@ static int vmw_cmd_dx_bind_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { VMW_DEBUG_USER("Error creating resource validation node.\n"); return ret; @@ -3098,8 +3082,8 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv, vmw_dx_streamoutput_set_size(res, cmd->body.sizeInBytes); - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -3148,8 +3132,8 @@ static int vmw_cmd_dx_set_streamoutput(struct vmw_private *dev_priv, return 0; } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -4066,22 +4050,26 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle + ret = vmw_user_resource_lookup_handle (dev_priv, sw_context->fp->tfile, handle, - user_context_converter); - if (IS_ERR(res)) { + user_context_converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find or user DX context 0x%08x.\n", (unsigned int) handle); - return PTR_ERR(res); + return ret; } - ret = vmw_execbuf_res_noref_val_add(sw_context, res, VMW_RES_DIRTY_SET); - if (unlikely(ret != 0)) + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_SET, + vmw_val_add_flag_none); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); return ret; + } sw_context->dx_ctx_node = vmw_execbuf_info_from_res(sw_context, res); sw_context->man = vmw_context_res_man(res); + vmw_resource_unreference(&res); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index f66caa540e14..c7d645e5ec7b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -281,39 +281,6 @@ out_bad_resource: return ret; } -/** - * vmw_user_resource_noref_lookup_handle - lookup a struct resource from a - * TTM user-space handle and perform basic type checks - * - * @dev_priv: Pointer to a device private struct - * @tfile: Pointer to a struct ttm_object_file identifying the caller - * @handle: The TTM user-space handle - * @converter: Pointer to an object describing the resource type - * - * If the handle can't be found or is associated with an incorrect resource - * type, -EINVAL will be returned. - */ -struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv - *converter) -{ - struct ttm_base_object *base; - - base = ttm_base_object_noref_lookup(tfile, handle); - if (!base) - return ERR_PTR(-ESRCH); - - if (unlikely(ttm_base_object_type(base) != converter->object_type)) { - ttm_base_object_noref_release(); - return ERR_PTR(-EINVAL); - } - - return converter->base_obj_to_res(base); -} - /* * Helper function that looks either a surface or bo. * diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index 0d8e6bd1ccbf..90996c108146 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -717,7 +717,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, return xenbus_switch_state(xb_dev, XenbusStateInitialising); } -static int xen_drv_remove(struct xenbus_device *dev) +static void xen_drv_remove(struct xenbus_device *dev) { struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev); int to = 100; @@ -751,7 +751,6 @@ static int xen_drv_remove(struct xenbus_device *dev) xen_drm_drv_fini(front_info); xenbus_frontend_closed(dev); - return 0; } static const struct xenbus_device_id xen_driver_ids[] = { diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 8d8ebdc2039b..67f1c7364c95 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -51,7 +51,7 @@ module_param_array(ptr_size, int, NULL, 0444); MODULE_PARM_DESC(ptr_size, "Pointing device width, height in pixels (default 800,600)"); -static int xenkbd_remove(struct xenbus_device *); +static void xenkbd_remove(struct xenbus_device *); static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *); static void xenkbd_disconnect_backend(struct xenkbd_info *); @@ -404,7 +404,7 @@ static int xenkbd_resume(struct xenbus_device *dev) return xenkbd_connect_backend(dev, info); } -static int xenkbd_remove(struct xenbus_device *dev) +static void xenkbd_remove(struct xenbus_device *dev) { struct xenkbd_info *info = dev_get_drvdata(&dev->dev); @@ -417,7 +417,6 @@ static int xenkbd_remove(struct xenbus_device *dev) input_unregister_device(info->mtouch); free_page((unsigned long)info->page); kfree(info); - return 0; } static int xenkbd_connect_backend(struct xenbus_device *dev, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab160198edd6..f2425b0f0cd6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3858,7 +3858,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) static void arm_smmu_device_shutdown(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + arm_smmu_device_disable(smmu); } static const struct of_device_id arm_smmu_of_match[] = { diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 719fbca1fe52..2ff7a72cf377 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1316,8 +1316,14 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - /* Assume that a coherent TCU implies coherent TBUs */ - return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK; + /* + * It's overwhelmingly the case in practice that when the pagetable + * walk interface is connected to a coherent interconnect, all the + * translation interfaces are too. Furthermore if the device is + * natively coherent, then its translation interface must also be. + */ + return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK || + device_get_dma_attr(dev) == DEV_DMA_COHERENT; case IOMMU_CAP_NOEXEC: return true; default: @@ -2185,19 +2191,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return 0; } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); if (!smmu) - return -ENODEV; + return; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_notice(&pdev->dev, "disabling translation\n"); - iommu_device_unregister(&smmu->iommu); - iommu_device_sysfs_remove(&smmu->iommu); - arm_smmu_rpm_get(smmu); /* Turn the thing off */ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); @@ -2209,12 +2212,21 @@ static int arm_smmu_device_remove(struct platform_device *pdev) clk_bulk_disable(smmu->num_clks, smmu->clks); clk_bulk_unprepare(smmu->num_clks, smmu->clks); - return 0; } -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int arm_smmu_device_remove(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + if (!smmu) + return -ENODEV; + + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + + arm_smmu_device_shutdown(pdev); + + return 0; } static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index de91dd88705b..5f6a85aea501 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3185,14 +3185,16 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); */ int iommu_device_claim_dma_owner(struct device *dev, void *owner) { - struct iommu_group *group = iommu_group_get(dev); + struct iommu_group *group; int ret = 0; - if (!group) - return -ENODEV; if (WARN_ON(!owner)) return -EINVAL; + group = iommu_group_get(dev); + if (!group) + return -ENODEV; + mutex_lock(&group->mutex); if (group->owner_cnt) { if (group->owner != owner) { diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a44ad92fc5eb..fe452ce46642 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -197,7 +197,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = to_iova(curr); - retry_pfn = curr_iova->pfn_hi + 1; + retry_pfn = curr_iova->pfn_hi; retry: do { @@ -211,7 +211,7 @@ retry: if (high_pfn < size || new_pfn < low_pfn) { if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { high_pfn = limit_pfn; - low_pfn = retry_pfn; + low_pfn = retry_pfn + 1; curr = iova_find_limit(iovad, limit_pfn); curr_iova = to_iova(curr); goto retry; diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 69682ee068d2..ca581ff1c769 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -683,7 +683,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); if (ret) - return ret; + goto out_clk_unprepare; ret = iommu_device_register(&data->iommu, &mtk_iommu_v1_ops, dev); if (ret) @@ -698,6 +698,8 @@ out_dev_unreg: iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); +out_clk_unprepare: + clk_disable_unprepare(data->bclk); return ret; } diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index ae3437f03e6c..76ee7c5e7b7e 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -210,7 +210,6 @@ FIELD_PREP(AIC_EVENT_NUM, x)) #define AIC_HWIRQ_IRQ(x) FIELD_GET(AIC_EVENT_NUM, x) #define AIC_HWIRQ_DIE(x) FIELD_GET(AIC_EVENT_DIE, x) -#define AIC_NR_FIQ 6 #define AIC_NR_SWIPI 32 /* @@ -222,11 +221,18 @@ * running at EL2 (with VHE). When the kernel is running at EL1, the * mapping differs and aic_irq_domain_translate() performs the remapping. */ - -#define AIC_TMR_EL0_PHYS AIC_TMR_HV_PHYS -#define AIC_TMR_EL0_VIRT AIC_TMR_HV_VIRT -#define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS -#define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT +enum fiq_hwirq { + /* Must be ordered as in apple-aic.h */ + AIC_TMR_EL0_PHYS = AIC_TMR_HV_PHYS, + AIC_TMR_EL0_VIRT = AIC_TMR_HV_VIRT, + AIC_TMR_EL02_PHYS = AIC_TMR_GUEST_PHYS, + AIC_TMR_EL02_VIRT = AIC_TMR_GUEST_VIRT, + AIC_CPU_PMU_Effi = AIC_CPU_PMU_E, + AIC_CPU_PMU_Perf = AIC_CPU_PMU_P, + /* No need for this to be discovered from DT */ + AIC_VGIC_MI, + AIC_NR_FIQ +}; static DEFINE_STATIC_KEY_TRUE(use_fast_ipi); @@ -384,14 +390,20 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs) /* * vGIC maintenance interrupts end up here too, so we need to check - * for them separately. This should never trigger if KVM is working - * properly, because it will have already taken care of clearing it - * on guest exit before this handler runs. + * for them separately. It should however only trigger when NV is + * in use, and be cleared when coming back from the handler. */ - if (is_kernel_in_hyp_mode() && (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) && - read_sysreg_s(SYS_ICH_MISR_EL2) != 0) { - pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n"); - sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0); + if (is_kernel_in_hyp_mode() && + (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) && + read_sysreg_s(SYS_ICH_MISR_EL2) != 0) { + generic_handle_domain_irq(aic_irqc->hw_domain, + AIC_FIQ_HWIRQ(AIC_VGIC_MI)); + + if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) && + read_sysreg_s(SYS_ICH_MISR_EL2))) { + pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n"); + sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0); + } } } @@ -1178,6 +1190,21 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p "irqchip/apple-aic/ipi:starting", aic_init_cpu, NULL); + if (is_kernel_in_hyp_mode()) { + struct irq_fwspec mi = { + .fwnode = of_node_to_fwnode(node), + .param_count = 3, + .param = { + [0] = AIC_FIQ, /* This is a lie */ + [1] = AIC_VGIC_MI, + [2] = IRQ_TYPE_LEVEL_HIGH, + }, + }; + + vgic_info.maint_irq = irq_create_fwspec_mapping(&mi); + WARN_ON(!vgic_info.maint_irq); + } + vgic_set_kvm_info(&vgic_info); pr_info("Initialized with %d/%d IRQs * %d/%d die(s), %d FIQs, %d vIPIs", diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c index 02601bb33de4..6e5e11c37078 100644 --- a/drivers/mtd/parsers/scpart.c +++ b/drivers/mtd/parsers/scpart.c @@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs, int cnt = 0; int res = 0; int res2; - loff_t offs; + uint32_t offs; size_t retlen; struct sc_part_desc *pdesc = NULL; struct sc_part_desc *tmpdesc; diff --git a/drivers/mtd/parsers/tplink_safeloader.c b/drivers/mtd/parsers/tplink_safeloader.c index f601e7bd8627..1c689dafca2a 100644 --- a/drivers/mtd/parsers/tplink_safeloader.c +++ b/drivers/mtd/parsers/tplink_safeloader.c @@ -91,7 +91,7 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, buf = mtd_parser_tplink_safeloader_read_table(mtd); if (!buf) { err = -ENOENT; - goto err_out; + goto err_free_parts; } for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET; @@ -118,6 +118,8 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, err_free: for (idx -= 1; idx >= 0; idx--) kfree(parts[idx].name); +err_free_parts: + kfree(parts); err_out: return err; }; diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index d8703d7dfd0a..d67c926bca8b 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -10,6 +10,7 @@ #include <linux/err.h> #include <linux/errno.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/mutex.h> #include <linux/math64.h> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 16ce7a90610c..240a7e8a7652 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -993,7 +993,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); @@ -1031,7 +1031,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 081bd2c3f289..e84e5be8e59e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3130,7 +3130,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, hclgevf_update_rss_size(handle, new_tqps_num); - hclge_comm_get_rss_tc_info(cur_rss_size, hdev->hw_tc_map, + hclge_comm_get_rss_tc_info(kinfo->rss_size, hdev->hw_tc_map, tc_offset, tc_valid, tc_size); ret = hclge_comm_set_rss_tc_mode(&hdev->hw.hw, tc_offset, tc_valid, tc_size); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index c4e451ef7942..adc02adef83a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3850,7 +3850,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(match.mask->dst)); + be32_to_cpu(match.mask->src)); return -EINVAL; } } diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b5a7f246d230..43e199b5b513 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -363,6 +363,7 @@ ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) /* Send the data out to a hardware port */ write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); if (!write_buf) { + kfree(cmd_buf); err = -ENOMEM; goto exit; } @@ -460,6 +461,9 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]), GFP_KERNEL); + if (!pf->gnss_tty_port[i]) + goto err_out; + pf->gnss_serial[i] = NULL; tty_port_init(pf->gnss_tty_port[i]); @@ -469,21 +473,23 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) err = tty_register_driver(tty_driver); if (err) { dev_err(dev, "Failed to register TTY driver err=%d\n", err); - - for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { - tty_port_destroy(pf->gnss_tty_port[i]); - kfree(pf->gnss_tty_port[i]); - } - kfree(ttydrv_name); - tty_driver_kref_put(pf->ice_gnss_tty_driver); - - return NULL; + goto err_out; } for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) dev_info(dev, "%s%d registered\n", ttydrv_name, i); return tty_driver; + +err_out: + while (i--) { + tty_port_destroy(pf->gnss_tty_port[i]); + kfree(pf->gnss_tty_port[i]); + } + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; } /** diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index a7b22639cfcd..e9747ec5ac0b 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -475,7 +475,9 @@ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 8dbb9f903ca7..c34734d432e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -322,7 +322,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, ts = ns_to_timespec64(ns); if (rq->perout.index == 1) { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT1; @@ -333,7 +333,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, freqout = IGC_FREQOUT1; } else { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT0; @@ -347,10 +347,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsauxc = rd32(IGC_TSAUXC); tsim = rd32(IGC_TSIM); if (rq->perout.index == 1) { - tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | + IGC_TSAUXC_ST1); tsim &= ~IGC_TSICR_TT1; } else { - tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | + IGC_TSAUXC_ST0); tsim &= ~IGC_TSICR_TT0; } if (on) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 24aa97f993ca..123dca9ce468 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); if (rp_pdev && rp_pdev->subordinate) { bus = rp_pdev->subordinate->number; + pci_dev_put(rp_pdev); return pci_get_domain_bus_and_slot(0, bus, 0); } + pci_dev_put(rp_pdev); return NULL; } @@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) struct ixgbe_adapter *adapter = hw->back; struct pci_dev *pdev = adapter->pdev; struct pci_dev *func0_pdev; + bool has_mii = false; /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 @@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); if (func0_pdev) { if (func0_pdev == pdev) - return true; - else - return false; + has_mii = true; + goto out; } func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); if (func0_pdev == pdev) - return true; + has_mii = true; - return false; +out: + pci_dev_put(func0_pdev); + return has_mii; } /** diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index b2b71fe80d61..724df6398bbe 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -774,9 +774,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); if (enable) - cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN; else - cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN); cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index fb2d37676d84..5a20d93004c7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -26,7 +26,6 @@ #define CMR_P2X_SEL_SHIFT 59ULL #define CMR_P2X_SEL_NIX0 1ULL #define CMR_P2X_SEL_NIX1 2ULL -#define CMR_EN BIT_ULL(55) #define DATA_PKT_TX_EN BIT_ULL(53) #define DATA_PKT_RX_EN BIT_ULL(54) #define CGX_LMAC_TYPE_SHIFT 40 diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 86653bb8e403..7f8ffbf79cf7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -758,6 +758,8 @@ static void otx2vf_remove(struct pci_dev *pdev) if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); otx2_ptp_destroy(vf); + otx2_mcam_flow_del(vf); + otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d3ca745d107d..c837103a9ee3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2176,15 +2176,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL); - if (!cmd->stats) - return -ENOMEM; - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) { - err = -ENOMEM; - goto dma_pool_err; - } + if (!cmd->pool) + return -ENOMEM; err = alloc_cmd_page(dev, cmd); if (err) @@ -2268,8 +2262,6 @@ err_free_page: err_free_pool: dma_pool_destroy(cmd->pool); -dma_pool_err: - kvfree(cmd->stats); return err; } @@ -2282,7 +2274,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); - kvfree(cmd->stats); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index 512d43148922..c4378afdec09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -34,12 +34,6 @@ static int police_act_validate(const struct flow_action_entry *act, return -EOPNOTSUPP; } - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c index 8d7d761482d2..50b60fd00946 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -127,6 +127,7 @@ mlx5e_post_meter_add_rule(struct mlx5e_priv *priv, attr->counter = act_counter; attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + attr->inner_match_level = MLX5_MATCH_NONE; attr->outer_match_level = MLX5_MATCH_NONE; attr->chain = 0; attr->prio = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index fd07c4cbfd1d..1f62c702b625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], struct udphdr *udp = (struct udphdr *)(buf); struct vxlanhdr *vxh; + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 9369a580743e..7f6b940830b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -62,6 +62,7 @@ struct mlx5e_macsec_sa { u32 enc_key_id; u32 next_pn; sci_t sci; + ssci_t ssci; salt_t salt; struct rhash_head hash; @@ -358,7 +359,6 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; union mlx5e_macsec_rule *macsec_rule; - struct macsec_key *key; int err; obj_attrs.next_pn = sa->next_pn; @@ -368,13 +368,9 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; - if (sa->epn_state.epn_enabled) { - obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : - cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); } obj_attrs.replay_window = ctx->secy->replay_window; @@ -499,10 +495,11 @@ mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, } static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, - const pn_t *next_pn_halves) + const pn_t *next_pn_halves, ssci_t ssci) { struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + sa->ssci = ssci; sa->salt = key->salt; epn_state->epn_enabled = 1; epn_state->epn_msb = next_pn_halves->upper; @@ -550,7 +547,8 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) tx_sa->assoc_num = assoc_num; if (secy->xpn) - update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves); + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, @@ -945,7 +943,8 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; if (ctx->secy->xpn) - update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves); + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cff5f2e29e1e..abcc614b6191 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4084,6 +4084,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; + if (!netif_device_present(netdev)) + return features; + vlan = mlx5e_fs_get_vlan(priv->fs); mutex_lock(&priv->state_lock); params = &priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 75b9e1528fd2..7d90e5b72854 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -191,7 +191,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) if (err) { netdev_warn(priv->netdev, "vport %d error %d reading stats\n", rep->vport, err); - return; + goto out; } #define MLX5_GET_CTR(p, x) \ @@ -241,6 +241,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) rep_stats->tx_vport_rdma_multicast_bytes = MLX5_GET_CTR(out, received_ib_multicast.octets); +out: kvfree(out); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index c8820ab22169..3df455f6b168 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2419,7 +2419,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = rq->stats; + stats = &priv->channel_stats[rq->ix]->rq; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9af2aa2922f5..dbadaf166487 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1301,7 +1301,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1359,8 +1358,10 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } mutex_unlock(&tc->t_lock); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); mlx5e_detach_mod_hdr(priv, flow); + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(priv->mdev, attr->counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e455b215c708..c981fa77f439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -143,7 +143,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (attr && !attr->chain && esw_attr->int_port) + if (!attr->chain && esw_attr && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else @@ -4143,8 +4143,6 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), - MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, @@ -4236,8 +4234,6 @@ int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, } hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), - MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap, hca_caps, roce, enable); err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index c247cca154e9..eff92dc0927c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -90,9 +90,21 @@ static void mlx5i_get_ringparam(struct net_device *dev, static int mlx5i_set_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } - return mlx5e_ethtool_set_channels(priv, ch); + return mlx5e_ethtool_set_channels(epriv, ch); } static void mlx5i_get_channels(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 2c73c8445e63..911cf4d23964 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -160,6 +160,44 @@ void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = sstats->tx_queue_dropped; } +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 99d46fda9f82..f3f2af972020 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -54,9 +54,11 @@ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ u32 qpn; bool sub_interface; + u32 num_sub_interfaces; u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; char *mlx5e_priv[]; }; @@ -117,5 +119,9 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + #endif /* CONFIG_MLX5_CORE_IPOIB */ #endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 4d9c9e49645c..03e681297937 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -158,21 +158,28 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv, *parent_ipriv; struct net_device *parent_dev; - int parent_ifindex; ipriv = priv->ppriv; - /* Get QPN to netdevice hash table from parent */ - parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); - parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); if (!parent_dev) { mlx5_core_warn(priv->mdev, "failed to get parent device\n"); return -EINVAL; } + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; - dev_put(parent_dev); return mlx5i_dev_init(dev); } @@ -184,6 +191,7 @@ static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) { + mlx5i_parent_put(netdev); return mlx5i_dev_cleanup(netdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 69cfe60c558a..69318b143268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -681,7 +681,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, .name = "mlx5_ptp", - .max_adj = 100000000, + .max_adj = 50000000, .n_alarm = 0, .n_ext_ts = 0, .n_per_out = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 74cbe53ee9db..b851141e03de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -3,7 +3,12 @@ #include "dr_types.h" +#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN < 2048) +/* don't try to optimize STE allocation if the stack is too constaraining */ +#define DR_RULE_MAX_STES_OPTIMIZED 0 +#else #define DR_RULE_MAX_STES_OPTIMIZED 5 +#endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn, @@ -1218,10 +1223,7 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, mlx5dr_domain_nic_unlock(nic_dmn); - if (unlikely(!hw_ste_arr_is_opt)) - kfree(hw_ste_arr); - - return 0; + goto out; free_rule: dr_rule_clean_rule_members(rule, nic_rule); @@ -1238,6 +1240,7 @@ remove_from_nic_tbl: free_hw_ste: mlx5dr_domain_nic_unlock(nic_dmn); +out: if (unlikely(!hw_ste_arr_is_opt)) kfree(hw_ste_arr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c22c3ac4e2a1..09e32778b012 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2951,7 +2951,7 @@ struct mlxsw_sp_nexthop_group_info { gateway:1, /* routes using the group use a gateway */ is_resilient:1; struct list_head list; /* member in nh_res_grp_list */ - struct mlxsw_sp_nexthop nexthops[0]; + struct mlxsw_sp_nexthop nexthops[]; #define nh_rif nexthops[0].rif }; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index f9ebfaafbebc..a8348437dd87 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -1073,6 +1073,9 @@ void lan966x_ptp_deinit(struct lan966x *lan966x) struct lan966x_port *port; int i; + if (!lan966x->ptp) + return; + for (i = 0; i < lan966x->num_phys_ports; i++) { port = lan966x->ports[i]; if (!port) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index d8dc9fbb81e1..a54c0426a35f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -95,10 +95,7 @@ lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup, bool found = false; u32 val; - /* Check if the port keyset selection is enabled */ val = lan_rd(lan966x, ANA_VCAP_S2_CFG(port->chip_port)); - if (!ANA_VCAP_S2_CFG_ENA_GET(val)) - return -ENOENT; /* Collect all keysets for the port in a list */ if (l3_proto == ETH_P_ALL) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 24592d972523..dadd61bccfe7 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1996,10 +1996,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) /* 8168F family. */ { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, - */ + { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 }, /* 8168E family. */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index d42e1afb6521..2f7d8e4561d9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -90,7 +90,6 @@ struct mediatek_dwmac_plat_data { struct mediatek_dwmac_variant { int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); - void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed); /* clock ids to be requested */ const char * const *clk_list; @@ -443,32 +442,9 @@ static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat) return 0; } -static void mt8195_fix_mac_speed(void *priv, unsigned int speed) -{ - struct mediatek_dwmac_plat_data *priv_plat = priv; - - if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) { - /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL, - * when link speed is 1Gbps with RGMII interface, - * Fall back to delay macro circuit for 10/100Mbps link speed. - */ - if (speed == SPEED_1000) - regmap_update_bits(priv_plat->peri_regmap, - MT8195_PERI_ETH_CTRL0, - MT8195_RGMII_TXC_PHASE_CTRL | - MT8195_DLY_GTXC_ENABLE | - MT8195_DLY_GTXC_INV | - MT8195_DLY_GTXC_STAGES, - MT8195_RGMII_TXC_PHASE_CTRL); - else - mt8195_set_delay(priv_plat); - } -} - static const struct mediatek_dwmac_variant mt8195_gmac_variant = { .dwmac_set_phy_interface = mt8195_set_interface, .dwmac_set_delay = mt8195_set_delay, - .dwmac_fix_mac_speed = mt8195_fix_mac_speed, .clk_list = mt8195_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), .dma_bit_mask = 35, @@ -619,8 +595,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; - if (priv_plat->variant->dwmac_fix_mac_speed) - plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index fc06ddeac0d5..b4388ca8d211 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -210,7 +210,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); - ret = 0; + /* wait for auxts fifo clear to finish */ + ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value, + !(acr_value & PTP_ACR_ATSFC), + 10, 10000); break; default: diff --git a/drivers/net/ipa/data/ipa_data-v4.7.c b/drivers/net/ipa/data/ipa_data-v4.7.c index 7552c400961e..b83390c48615 100644 --- a/drivers/net/ipa/data/ipa_data-v4.7.c +++ b/drivers/net/ipa/data/ipa_data-v4.7.c @@ -357,7 +357,7 @@ static const struct ipa_mem ipa_mem_local_data[] = { static const struct ipa_mem_data ipa_mem_data = { .local_count = ARRAY_SIZE(ipa_mem_local_data), .local = ipa_mem_local_data, - .imem_addr = 0x146a9000, + .imem_addr = 0x146a8000, .imem_size = 0x00002000, .smem_id = 497, .smem_size = 0x00009000, diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8911cd2ed534..c140edb4b648 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -1008,6 +1008,12 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { + /* Cinterion PLS62-W modem by GEMALTO/THALES */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x005b, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a481a1d831e2..23da1d9dafd1 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9836,6 +9836,7 @@ static const struct usb_device_id rtl8152_table[] = { REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab), REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6), REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927), + REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0c5e), REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f), REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3054), diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index a83699de01ec..fdd0c9abc1a1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -79,7 +79,8 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Apple ARM64 platforms have their own idea of board type, passed in * via the device tree. They also have an antenna SKU parameter */ - if (!of_property_read_string(np, "brcm,board-type", &prop)) + err = of_property_read_string(np, "brcm,board-type", &prop); + if (!err) settings->board_type = prop; if (!of_property_read_string(np, "apple,antenna-sku", &prop)) @@ -87,7 +88,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Set board-type to the first string of the machine compatible prop */ root = of_find_node_by_path("/"); - if (root && !settings->board_type) { + if (root && err) { char *board_type; const char *tmp; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index c1ba4294f364..001636901dda 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -977,7 +977,7 @@ static int read_xenbus_vif_flags(struct backend_info *be) return 0; } -static int netback_remove(struct xenbus_device *dev) +static void netback_remove(struct xenbus_device *dev) { struct backend_info *be = dev_get_drvdata(&dev->dev); @@ -992,7 +992,6 @@ static int netback_remove(struct xenbus_device *dev) kfree(be->hotplug_script); kfree(be); dev_set_drvdata(&dev->dev, NULL); - return 0; } /* diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 14aec417fa06..12b074286df9 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2646,7 +2646,7 @@ static void xennet_bus_close(struct xenbus_device *dev) } while (!ret); } -static int xennet_remove(struct xenbus_device *dev) +static void xennet_remove(struct xenbus_device *dev) { struct netfront_info *info = dev_get_drvdata(&dev->dev); @@ -2662,8 +2662,6 @@ static int xennet_remove(struct xenbus_device *dev) rtnl_unlock(); } xennet_free_netdev(info->netdev); - - return 0; } static const struct xenbus_device_id netfront_ids[] = { diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 6f71ac72012e..ed9c5e2cf3ad 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) return usb_submit_urb(phy->ack_urb, flags); } +struct pn533_out_arg { + struct pn533_usb_phy *phy; + struct completion done; +}; + static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; + struct pn533_out_arg arg; + void *cntx; int rc; if (phy->priv == NULL) @@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev, print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); + init_completion(&arg.done); + cntx = phy->out_urb->context; + phy->out_urb->context = &arg; + rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; + wait_for_completion(&arg.done); + phy->out_urb->context = cntx; + if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); @@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) return arg.rc; } -static void pn533_send_complete(struct urb *urb) +static void pn533_out_complete(struct urb *urb) +{ + struct pn533_out_arg *arg = urb->context; + struct pn533_usb_phy *phy = arg->phy; + + switch (urb->status) { + case 0: + break; /* success */ + case -ECONNRESET: + case -ENOENT: + dev_dbg(&phy->udev->dev, + "The urb has been stopped (status %d)\n", + urb->status); + break; + case -ESHUTDOWN: + default: + nfc_err(&phy->udev->dev, + "Urb failure (status %d)\n", + urb->status); + } + + complete(&arg->done); +} + +static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; @@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface, usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index e36aeb50b4ed..bf1c60edb7f9 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1493,7 +1493,7 @@ static int apple_nvme_probe(struct platform_device *pdev) } ret = nvme_init_ctrl(&anv->ctrl, anv->dev, &nvme_ctrl_ops, - NVME_QUIRK_SKIP_CID_GEN); + NVME_QUIRK_SKIP_CID_GEN | NVME_QUIRK_IDENTIFY_CNS); if (ret) { dev_err_probe(dev, ret, "Failed to initialize nvme_ctrl"); goto put_dev; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index a8639919237e..06f52db34be9 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -8,8 +8,13 @@ #include <linux/io_uring.h> #include "nvme.h" +enum { + NVME_IOCTL_VEC = (1 << 0), + NVME_IOCTL_PARTITION = (1 << 1), +}; + static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, - fmode_t mode) + unsigned int flags, fmode_t mode) { u32 effects; @@ -17,6 +22,13 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, return true; /* + * Do not allow unprivileged passthrough on partitions, as that allows an + * escape from the containment of the partition. + */ + if (flags & NVME_IOCTL_PARTITION) + return false; + + /* * Do not allow unprivileged processes to send vendor specific or fabrics * commands as we can't be sure about their effects. */ @@ -150,7 +162,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd, - bool vec) + unsigned int flags) { struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; @@ -163,7 +175,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(vec)) + if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) return -EINVAL; ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); @@ -172,8 +184,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); } else { ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), - bufflen, GFP_KERNEL, vec, 0, 0, - rq_data_dir(req)); + bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, + 0, rq_data_dir(req)); } if (ret) @@ -203,9 +215,9 @@ out: } static int nvme_submit_user_cmd(struct request_queue *q, - struct nvme_command *cmd, u64 ubuffer, - unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u64 *result, unsigned timeout, bool vec) + struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, + void __user *meta_buffer, unsigned meta_len, u32 meta_seed, + u64 *result, unsigned timeout, unsigned int flags) { struct nvme_ctrl *ctrl; struct request *req; @@ -221,7 +233,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, req->timeout = timeout; if (ubuffer && bufflen) { ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, - meta_len, meta_seed, &meta, NULL, vec); + meta_len, meta_seed, &meta, NULL, flags); if (ret) return ret; } @@ -304,10 +316,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - return nvme_submit_user_cmd(ns->queue, &c, - io.addr, length, - metadata, meta_len, lower_32_bits(io.slba), NULL, 0, - false); + return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, + meta_len, lower_32_bits(io.slba), NULL, 0, 0); } static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, @@ -325,7 +335,8 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd, fmode_t mode) + struct nvme_passthru_cmd __user *ucmd, unsigned int flags, + fmode_t mode) { struct nvme_passthru_cmd cmd; struct nvme_command c; @@ -353,16 +364,15 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); - if (!nvme_cmd_allowed(ns, &c, mode)) + if (!nvme_cmd_allowed(ns, &c, 0, mode)) return -EACCES; if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - cmd.addr, cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &result, timeout, false); + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), + cmd.metadata_len, 0, &result, timeout, 0); if (status >= 0) { if (put_user(result, &ucmd->result)) @@ -373,8 +383,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, } static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd, bool vec, - fmode_t mode) + struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, + fmode_t mode) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; @@ -401,16 +411,15 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15); - if (!nvme_cmd_allowed(ns, &c, mode)) + if (!nvme_cmd_allowed(ns, &c, flags, mode)) return -EACCES; if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - cmd.addr, cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &cmd.result, timeout, vec); + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), + cmd.metadata_len, 0, &cmd.result, timeout, flags); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) @@ -571,7 +580,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); - if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode)) + if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode)) return -EACCES; d.metadata = READ_ONCE(cmd->metadata); @@ -641,9 +650,9 @@ static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, { switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, mode); + return nvme_user_cmd(ctrl, NULL, argp, 0, mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false, mode); + return nvme_user_cmd64(ctrl, NULL, argp, 0, mode); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -668,14 +677,14 @@ struct nvme_user_io32 { #endif /* COMPAT_FOR_U64_ALIGNMENT */ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, fmode_t mode) + void __user *argp, unsigned int flags, fmode_t mode) { switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp, mode); + return nvme_user_cmd(ns->ctrl, ns, argp, flags, mode); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the @@ -686,37 +695,40 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, #endif case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); - case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode); case NVME_IOCTL_IO64_CMD_VEC: - return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode); + flags |= NVME_IOCTL_VEC; + fallthrough; + case NVME_IOCTL_IO64_CMD: + return nvme_user_cmd64(ns->ctrl, ns, argp, flags, mode); default: return -ENOTTY; } } -static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg, - fmode_t mode) -{ - if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode); - return nvme_ns_ioctl(ns, cmd, arg, mode); -} - int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = bdev->bd_disk->private_data; + void __user *argp = (void __user *)arg; + unsigned int flags = 0; - return __nvme_ioctl(ns, cmd, (void __user *)arg, mode); + if (bdev_is_partition(bdev)) + flags |= NVME_IOCTL_PARTITION; + + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode); + return nvme_ns_ioctl(ns, cmd, argp, flags, mode); } long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); + void __user *argp = (void __user *)arg; - return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode); + if (is_ctrl_ioctl(cmd)) + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, file->f_mode); + return nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode); } static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -806,6 +818,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, void __user *argp = (void __user *)arg; struct nvme_ns *ns; int srcu_idx, ret = -EWOULDBLOCK; + unsigned int flags = 0; + + if (bdev_is_partition(bdev)) + flags |= NVME_IOCTL_PARTITION; srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); @@ -821,7 +837,7 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, mode); - ret = nvme_ns_ioctl(ns, cmd, argp, mode); + ret = nvme_ns_ioctl(ns, cmd, argp, flags, mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -846,7 +862,7 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, file->f_mode); - ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode); + ret = nvme_ns_ioctl(ns, cmd, argp, 0, file->f_mode); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -945,7 +961,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, kref_get(&ns->kref); up_read(&ctrl->namespaces_rwsem); - ret = nvme_user_cmd(ctrl, ns, argp, mode); + ret = nvme_user_cmd(ctrl, ns, argp, 0, mode); nvme_put_ns(ns); return ret; @@ -962,9 +978,9 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, file->f_mode); + return nvme_user_cmd(ctrl, NULL, argp, 0, file->f_mode); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode); + return nvme_user_cmd64(ctrl, NULL, argp, 0, file->f_mode); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp, file->f_mode); case NVME_IOCTL_RESET: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b13baccedb4a..a2553b7d9bb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2533,7 +2533,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) */ result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (result < 0) - return result; + goto disable; dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -2586,8 +2586,13 @@ static int nvme_pci_enable(struct nvme_dev *dev) pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); - return nvme_pci_configure_admin_queue(dev); + result = nvme_pci_configure_admin_queue(dev); + if (result) + goto free_irq; + return result; + free_irq: + pci_free_irq_vectors(pdev); disable: pci_disable_device(pdev); return result; @@ -3495,7 +3500,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | - NVME_QUIRK_SKIP_CID_GEN }, + NVME_QUIRK_SKIP_CID_GEN | + NVME_QUIRK_IDENTIFY_CNS }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index a0d2713f0e88..99ec91e2a5cf 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -225,7 +225,7 @@ config PCIE_ARTPEC6_EP config PCIE_BT1 tristate "Baikal-T1 PCIe controller" depends on MIPS_BAIKAL_T1 || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Enables support for the PCIe controller in the Baikal-T1 SoC to work diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 7378e2f3e525..fcd029ca2eb1 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -1055,14 +1055,12 @@ out: return err; } -static int pcifront_xenbus_remove(struct xenbus_device *xdev) +static void pcifront_xenbus_remove(struct xenbus_device *xdev) { struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); if (pdev) free_pdev(pdev); - - return 0; } static const struct xenbus_device_id xenpci_ids[] = { diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 43e765199137..c6537a1b3a2e 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl, return status; status = ssam_request_sync_init(rqst, spec->flags); - if (status) + if (status) { + ssam_request_sync_free(rqst); return status; + } ssam_request_sync_set_resp(rqst, rsp); diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c index f5565570f16c..69132976d297 100644 --- a/drivers/platform/surface/aggregator/ssh_request_layer.c +++ b/drivers/platform/surface/aggregator/ssh_request_layer.c @@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data) if (sshp_parse_command(dev, data, &command, &command_data)) return; + /* + * Check if the message was intended for us. If not, drop it. + * + * Note: We will need to change this to handle debug messages. On newer + * generation devices, these seem to be sent to tid_out=0x03. We as + * host can still receive them as they can be forwarded via an override + * option on SAM, but doing so does not change tid_out=0x00. + */ + if (command->tid_out != 0x00) { + rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n", + command->tid_out); + return; + } + if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid))) ssh_rtl_rx_event(rtl, command, &command_data); else diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 439d282aafd1..8d924986381b 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -932,7 +932,7 @@ static int amd_pmc_probe(struct platform_device *pdev) if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) { err = amd_pmc_s2d_init(dev); if (err) - return err; + goto err_pci_dev_put; } platform_set_drvdata(pdev, dev); diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index c685a705b73d..cb15acdf14a3 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -121,6 +121,10 @@ static struct quirk_entry quirk_asus_tablet_mode = { .tablet_switch_mode = asus_wmi_lid_flip_rog_devid, }; +static struct quirk_entry quirk_asus_ignore_fan = { + .wmi_ignore_fan = true, +}; + static int dmi_matched(const struct dmi_system_id *dmi) { pr_info("Identified laptop model '%s'\n", dmi->ident); @@ -473,6 +477,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_tablet_mode, }, + { + .callback = dmi_matched, + .ident = "ASUS VivoBook E410MA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "E410MA"), + }, + .driver_data = &quirk_asus_ignore_fan, + }, {}, }; @@ -511,6 +524,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, + { KE_KEY, 0x33, { KEY_SCREENLOCK } }, { KE_KEY, 0x35, { KEY_SCREENLOCK } }, { KE_KEY, 0x38, { KEY_PROG3 } }, /* Armoury Crate */ { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, @@ -544,6 +558,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */ { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */ { KE_KEY, 0x82, { KEY_CAMERA } }, + { KE_KEY, 0x85, { KEY_CAMERA } }, { KE_KEY, 0x86, { KEY_PROG1 } }, /* MyASUS Key */ { KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */ { KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */ diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 6f81b2844dcb..104188d70988 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2243,7 +2243,9 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) asus->fan_type = FAN_TYPE_NONE; asus->agfn_pwm = -1; - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL)) + if (asus->driver->quirks->wmi_ignore_fan) + asus->fan_type = FAN_TYPE_NONE; + else if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL)) asus->fan_type = FAN_TYPE_SPEC83; else if (asus_wmi_has_agfn_fan(asus)) asus->fan_type = FAN_TYPE_AGFN; @@ -2436,6 +2438,9 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, *available = false; + if (asus->fan_type == FAN_TYPE_NONE) + return 0; + err = fan_curve_get_factory_default(asus, fan_dev); if (err) { return 0; diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 65316998b898..a478ebfd34df 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -38,6 +38,7 @@ struct quirk_entry { bool store_backlight_power; bool wmi_backlight_set_devstate; bool wmi_force_als_set; + bool wmi_ignore_fan; enum asus_wmi_tablet_switch_mode tablet_switch_mode; int wapf; /* diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index c82b3d6867c5..c517bd45dd32 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* privacy mic mute */ { KE_KEY, 0x0001, { KEY_MICMUTE } }, /* privacy camera mute */ - { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } }, + { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } }, { KE_END, 0}, }; @@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status) switch (code) { case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */ - case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ priv->last_status = status; sparse_keymap_report_entry(priv->input_dev, key, 1, true); ret = true; break; + case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ + priv->last_status = status; + sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false); + ret = true; + break; default: dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code); } @@ -292,7 +296,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) { struct privacy_wmi_data *priv; struct key_entry *keymap; - int ret, i; + int ret, i, j; ret = wmi_has_guid(DELL_PRIVACY_GUID); if (!ret) @@ -304,6 +308,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) dev_set_drvdata(&wdev->dev, priv); priv->wdev = wdev; + + ret = get_current_status(priv->wdev); + if (ret) + return ret; + /* create evdev passing interface */ priv->input_dev = devm_input_allocate_device(&wdev->dev); if (!priv->input_dev) @@ -318,9 +327,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) /* remap the keymap code with Dell privacy key type 0x12 as prefix * KEY_MICMUTE scancode will be reported as 0x120001 */ - for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { - keymap[i] = dell_wmi_keymap_type_0012[i]; - keymap[i].code |= (0x0012 << 16); + for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { + /* + * Unlike keys where only presses matter, userspace may act + * on switches in both of their positions. Only register + * SW_CAMERA_LENS_COVER if it is actually there. + */ + if (dell_wmi_keymap_type_0012[i].type == KE_VSW && + dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER && + !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))) + continue; + + keymap[j] = dell_wmi_keymap_type_0012[i]; + keymap[j].code |= (0x0012 << 16); + j++; } ret = sparse_keymap_setup(priv->input_dev, keymap, NULL); kfree(keymap); @@ -331,11 +351,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) priv->input_dev->name = "Dell Privacy Driver"; priv->input_dev->id.bustype = BUS_HOST; - ret = input_register_device(priv->input_dev); - if (ret) - return ret; + /* Report initial camera-cover status */ + if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)) + input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER, + !(priv->last_status & CAMERA_STATUS)); - ret = get_current_status(priv->wdev); + ret = input_register_device(priv->input_dev); if (ret) return ret; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 435d2d3d903b..0eb5bfdd823a 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1621,6 +1621,12 @@ static const struct dmi_system_id set_fn_lock_led_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"), } }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"), + } + }, {} }; diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index b2342b3d78c7..74dc2cff799e 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -181,6 +181,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, return PTR_ERR(int3472->regulator.gpio); } + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->regulator.gpio, 0); + cfg.dev = &int3472->adev->dev; cfg.init_data = &init_data; cfg.ena_gpiod = int3472->regulator.gpio; diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 974a132db651..c42c3faa2c32 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -168,6 +168,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.ena_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.ena_gpio, 0); break; case INT3472_GPIO_TYPE_PRIVACY_LED: gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led"); @@ -175,6 +177,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.led_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.led_gpio, 0); break; default: dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type); diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index f1d802f6ec3f..3a15d32d7644 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1029,6 +1029,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, adl_core_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, adl_core_init), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, mtl_core_init), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, mtl_core_init), {} }; diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c index ca76076fc706..b3622419cd1a 100644 --- a/drivers/platform/x86/simatic-ipc.c +++ b/drivers/platform/x86/simatic-ipc.c @@ -46,7 +46,8 @@ static struct { {SIMATIC_IPC_IPC427D, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_NONE}, {SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E}, {SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E}, - {SIMATIC_IPC_IPC427G, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G}, + {SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G}, + {SIMATIC_IPC_IPCPX_39A, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_227G}, }; static int register_platform_devices(u32 station_id) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 7156ae2ad196..537d6a2d0781 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1887,14 +1887,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd, break; } - ret = sony_call_snc_handle(handle, probe_base, &result); - if (ret) - return ret; + /* + * Only probe if there is a separate probe_base, otherwise the probe call + * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in + * the keyboard backlight being turned off. + */ + if (probe_base) { + ret = sony_call_snc_handle(handle, probe_base, &result); + if (ret) + return ret; - if ((handle == 0x0137 && !(result & 0x02)) || - !(result & 0x01)) { - dprintk("no backlight keyboard found\n"); - return 0; + if ((handle == 0x0137 && !(result & 0x02)) || + !(result & 0x01)) { + dprintk("no backlight keyboard found\n"); + return 0; + } } kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 1195293b22fd..a95946800ae9 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10311,9 +10311,11 @@ static DEFINE_MUTEX(dytc_mutex); static int dytc_capabilities; static bool dytc_mmc_get_available; -static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) +static int convert_dytc_to_profile(int funcmode, int dytcmode, + enum platform_profile_option *profile) { - if (dytc_capabilities & BIT(DYTC_FC_MMC)) { + switch (funcmode) { + case DYTC_FUNCTION_MMC: switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10329,8 +10331,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p return -EINVAL; } return 0; - } - if (dytc_capabilities & BIT(DYTC_FC_PSC)) { + case DYTC_FUNCTION_PSC: switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10344,6 +10345,14 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p default: /* Unknown mode */ return -EINVAL; } + return 0; + case DYTC_FUNCTION_AMT: + /* For now return balanced. It's the closest we have to 'auto' */ + *profile = PLATFORM_PROFILE_BALANCED; + return 0; + default: + /* Unknown function */ + return -EOPNOTSUPP; } return 0; } @@ -10492,6 +10501,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; + /* system supports AMT, activate it when on balanced */ if (dytc_capabilities & BIT(DYTC_FC_AMT)) dytc_control_amt(profile == PLATFORM_PROFILE_BALANCED); @@ -10507,7 +10517,7 @@ static void dytc_profile_refresh(void) { enum platform_profile_option profile; int output, err = 0; - int perfmode; + int perfmode, funcmode; mutex_lock(&dytc_mutex); if (dytc_capabilities & BIT(DYTC_FC_MMC)) { @@ -10522,8 +10532,9 @@ static void dytc_profile_refresh(void) if (err) return; + funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(perfmode, &profile); + convert_dytc_to_profile(funcmode, perfmode, &profile); if (profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index baae3120efd0..f00995390fdf 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -264,6 +264,23 @@ static const struct ts_dmi_data connect_tablet9_data = { .properties = connect_tablet9_props, }; +static const struct property_entry csl_panther_tab_hd_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 1), + PROPERTY_ENTRY_U32("touchscreen-min-y", 20), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1526), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data csl_panther_tab_hd_data = { + .acpi_name = "MSSL1680:00", + .properties = csl_panther_tab_hd_props, +}; + static const struct property_entry cube_iwork8_air_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 1), PROPERTY_ENTRY_U32("touchscreen-min-y", 3), @@ -1125,6 +1142,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, }, { + /* CSL Panther Tab HD */ + .driver_data = (void *)&csl_panther_tab_hd_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CSL Computer GmbH & Co. KG"), + DMI_MATCH(DMI_PRODUCT_NAME, "CSL Panther Tab HD"), + }, + }, + { /* CUBE iwork8 Air */ .driver_data = (void *)&cube_iwork8_air_data, .matches = { diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index e01b32d1fa17..00828f5baa97 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c) chip->chip_irq = i2c->irq; + ret = da9211_regulator_init(chip); + if (ret < 0) { + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); + return ret; + } + if (chip->chip_irq != 0) { ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL, da9211_irq_handler, @@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c) dev_warn(chip->dev, "No IRQ configured\n"); } - ret = da9211_regulator_init(chip); - - if (ret < 0) - dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); - return ret; } diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 43b5b9377714..ae6021390143 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1016,7 +1016,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = { RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"), RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"), RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"), - RPMH_VREG("ldo11", "ldo%s11", &pmic5_pldo, "vdd-l11"), + RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"), RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"), RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"), diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 41ba22f6c7f0..8c038ccf1c09 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -162,7 +162,7 @@ static void hisi_sas_slot_index_clear(struct hisi_hba *hisi_hba, int slot_idx) static void hisi_sas_slot_index_free(struct hisi_hba *hisi_hba, int slot_idx) { if (hisi_hba->hw->slot_index_alloc || - slot_idx >= HISI_SAS_UNRESERVED_IPTT) { + slot_idx < HISI_SAS_RESERVED_IPTT) { spin_lock(&hisi_hba->lock); hisi_sas_slot_index_clear(hisi_hba, slot_idx); spin_unlock(&hisi_hba->lock); @@ -704,7 +704,7 @@ static int hisi_sas_init_device(struct domain_device *device) int_to_scsilun(0, &lun); while (retry-- > 0) { - rc = sas_clear_task_set(device, lun.scsi_lun); + rc = sas_abort_task_set(device, lun.scsi_lun); if (rc == TMF_RESP_FUNC_COMPLETE) { hisi_sas_release_task(hisi_hba, device); break; @@ -1316,7 +1316,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) device->linkrate = phy->sas_phy.linkrate; hisi_hba->hw->setup_itct(hisi_hba, sas_dev); - } else + } else if (!port->port_attached) port->id = 0xff; } } diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 1ccce706167a..5e80225b5308 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -889,7 +889,9 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) { struct ata_port *ap = device->sata_dev.ap; struct ata_link *link = &ap->link; + unsigned long flags; + spin_lock_irqsave(ap->lock, flags); device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */ device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */ @@ -897,6 +899,7 @@ void sas_ata_device_link_abort(struct domain_device *device, bool force_reset) if (force_reset) link->eh_info.action |= ATA_EH_RESET; ata_link_abort(link); + spin_unlock_irqrestore(ap->lock, flags); } EXPORT_SYMBOL_GPL(sas_ata_device_link_abort); diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile index ef86ca46646b..3bf8cf34e1c3 100644 --- a/drivers/scsi/mpi3mr/Makefile +++ b/drivers/scsi/mpi3mr/Makefile @@ -1,5 +1,5 @@ # mpi3mr makefile -obj-m += mpi3mr.o +obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o mpi3mr-y += mpi3mr_os.o \ mpi3mr_fw.o \ mpi3mr_app.o \ diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 0c4aabaefdcc..286a44506578 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -3633,8 +3633,7 @@ int mpi3mr_setup_resources(struct mpi3mr_ioc *mrioc) int i, retval = 0, capb = 0; u16 message_control; u64 dma_mask = mrioc->dma_mask ? mrioc->dma_mask : - (((dma_get_required_mask(&pdev->dev) > DMA_BIT_MASK(32)) && - (sizeof(dma_addr_t) > 4)) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32)); + ((sizeof(dma_addr_t) > 4) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32)); if (pci_enable_device_mem(pdev)) { ioc_err(mrioc, "pci_enable_device_mem: failed\n"); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 4e981ccaac41..69061545d9d2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2992,8 +2992,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) struct sysinfo s; u64 coherent_dma_mask, dma_mask; - if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 || - dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32)) { + if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4) { ioc->dma_mask = 32; coherent_dma_mask = dma_mask = DMA_BIT_MASK(32); /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index cc6953809a24..8553277effb3 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1511,8 +1511,6 @@ static int inquiry_vpd_b0(unsigned char *arr) put_unaligned_be64(sdebug_write_same_length, &arr[32]); return 0x3c; /* Mandatory page length for Logical Block Provisioning */ - - return sizeof(vpdb0_data); } /* Block device characteristics VPD page (SBC-3) */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index a7960ad2d386..2aa2c2aee6e7 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -231,6 +231,11 @@ scsi_abort_command(struct scsi_cmnd *scmd) struct Scsi_Host *shost = sdev->host; unsigned long flags; + if (!shost->hostt->eh_abort_handler) { + /* No abort handler, fail command directly */ + return FAILED; + } + if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { /* * Retry after abort failed, escalate to next level. diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 13cfd3e317cc..b9b97300e3b3 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1677,6 +1677,13 @@ static const char *iscsi_session_state_name(int state) return name; } +static char *iscsi_session_target_state_name[] = { + [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", + [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", + [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", + [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", +}; + int iscsi_session_chkready(struct iscsi_cls_session *session) { int err; @@ -1786,9 +1793,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data) if ((scan_data->channel == SCAN_WILD_CARD || scan_data->channel == 0) && (scan_data->id == SCAN_WILD_CARD || - scan_data->id == id)) + scan_data->id == id)) { scsi_scan_target(&session->dev, 0, id, scan_data->lun, scan_data->rescan); + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_SCANNED; + spin_unlock_irqrestore(&session->lock, flags); + } } user_scan_exit: @@ -1961,31 +1972,41 @@ static void __iscsi_unbind_session(struct work_struct *work) struct iscsi_cls_host *ihost = shost->shost_data; unsigned long flags; unsigned int target_id; + bool remove_target = true; ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); /* Prevent new scans and make sure scanning is not in progress */ mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); - if (session->target_id == ISCSI_MAX_TARGET) { + if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { + remove_target = false; + } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - goto unbind_session_exit; + ISCSI_DBG_TRANS_SESSION(session, + "Skipping target unbinding: Session is unbound/unbinding.\n"); + return; } + session->target_state = ISCSI_SESSION_TARGET_UNBINDING; target_id = session->target_id; session->target_id = ISCSI_MAX_TARGET; spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - scsi_remove_target(&session->dev); + if (remove_target) + scsi_remove_target(&session->dev); if (session->ida_used) ida_free(&iscsi_sess_ida, target_id); -unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); + + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_UNBOUND; + spin_unlock_irqrestore(&session->lock, flags); } static void __iscsi_destroy_session(struct work_struct *work) @@ -2062,6 +2083,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) session->ida_used = true; } else session->target_id = target_id; + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; + spin_unlock_irqrestore(&session->lock, flags); dev_set_name(&session->dev, "session%u", session->sid); err = device_add(&session->dev); @@ -4370,6 +4394,19 @@ iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); static ssize_t +show_priv_session_target_state(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + + return sysfs_emit(buf, "%s\n", + iscsi_session_target_state_name[session->target_state]); +} + +static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, + show_priv_session_target_state, NULL); + +static ssize_t show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) { @@ -4471,6 +4508,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_sess_boot_target.attr, &dev_attr_priv_sess_recovery_tmo.attr, &dev_attr_priv_sess_state.attr, + &dev_attr_priv_sess_target_state.attr, &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, @@ -4584,6 +4622,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO | S_IWUSR; else if (attr == &dev_attr_priv_sess_state.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_state.attr) + return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_id.attr) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index d7a84c0bfaeb..22705eb781b0 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1823,6 +1823,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request, get_cpu()); put_cpu(); + if (ret) + scsi_dma_unmap(scmnd); + if (ret == -EAGAIN) { /* no more space */ ret = SCSI_MLQUEUE_DEVICE_BUSY; diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 66b316d173b0..71a3bb83984c 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -995,7 +995,7 @@ static int scsifront_suspend(struct xenbus_device *dev) return err; } -static int scsifront_remove(struct xenbus_device *dev) +static void scsifront_remove(struct xenbus_device *dev) { struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev); @@ -1011,8 +1011,6 @@ static int scsifront_remove(struct xenbus_device *dev) scsifront_free_ring(info); scsi_host_put(info->host); - - return 0; } static void scsifront_disconnect(struct vscsifrnt_info *info) diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c index 520b4cc69cdc..91db3c973167 100644 --- a/drivers/spi/spi-cadence-xspi.c +++ b/drivers/spi/spi-cadence-xspi.c @@ -177,7 +177,10 @@ #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op) ( \ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_DCNT_H, \ ((op)->data.nbytes >> 16) & 0xffff) | \ - FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, (op)->dummy.nbytes * 8)) + FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, \ + (op)->dummy.buswidth != 0 ? \ + (((op)->dummy.nbytes * 8) / (op)->dummy.buswidth) : \ + 0)) #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_4(op, chipsel) ( \ FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R4_BANK, chipsel) | \ diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 6de8360e5c2a..9eab6c20dbc5 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1253,6 +1253,11 @@ static int mtk_spi_probe(struct platform_device *pdev) dev_notice(dev, "SPI dma_set_mask(%d) failed, ret:%d\n", addr_bits, ret); + ret = devm_request_irq(dev, irq, mtk_spi_interrupt, + IRQF_TRIGGER_NONE, dev_name(dev), master); + if (ret) + return dev_err_probe(dev, ret, "failed to register irq\n"); + pm_runtime_enable(dev); ret = devm_spi_register_master(dev, master); @@ -1261,13 +1266,6 @@ static int mtk_spi_probe(struct platform_device *pdev) return dev_err_probe(dev, ret, "failed to register master\n"); } - ret = devm_request_irq(dev, irq, mtk_spi_interrupt, - IRQF_TRIGGER_NONE, dev_name(dev), master); - if (ret) { - pm_runtime_disable(dev); - return dev_err_probe(dev, ret, "failed to register irq\n"); - } - return 0; } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 3cc7bb4d03de..15f174f4e056 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2310,7 +2310,7 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; - if (!of_property_read_u16(nc, "spi-cs-setup-ns", &cs_setup)) { + if (!of_property_read_u16(nc, "spi-cs-setup-delay-ns", &cs_setup)) { spi->cs_setup.value = cs_setup; spi->cs_setup.unit = SPI_DELAY_UNIT_NSECS; } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 6313e7d0cdf8..1935ca613447 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -68,7 +68,7 @@ static_assert(N_SPI_MINORS > 0 && N_SPI_MINORS <= 256); struct spidev_data { dev_t devt; - spinlock_t spi_lock; + struct mutex spi_lock; struct spi_device *spi; struct list_head device_entry; @@ -95,9 +95,8 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) int status; struct spi_device *spi; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spidev->spi; - spin_unlock_irq(&spidev->spi_lock); if (spi == NULL) status = -ESHUTDOWN; @@ -107,6 +106,7 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message) if (status == 0) status = message->actual_length; + mutex_unlock(&spidev->spi_lock); return status; } @@ -359,12 +359,12 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) * we issue this ioctl. */ spidev = filp->private_data; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spi_dev_get(spidev->spi); - spin_unlock_irq(&spidev->spi_lock); - - if (spi == NULL) + if (spi == NULL) { + mutex_unlock(&spidev->spi_lock); return -ESHUTDOWN; + } /* use the buffer lock here for triple duty: * - prevent I/O (from us) so calling spi_setup() is safe; @@ -508,6 +508,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) mutex_unlock(&spidev->buf_lock); spi_dev_put(spi); + mutex_unlock(&spidev->spi_lock); return retval; } @@ -529,12 +530,12 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd, * we issue this ioctl. */ spidev = filp->private_data; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spi = spi_dev_get(spidev->spi); - spin_unlock_irq(&spidev->spi_lock); - - if (spi == NULL) + if (spi == NULL) { + mutex_unlock(&spidev->spi_lock); return -ESHUTDOWN; + } /* SPI_IOC_MESSAGE needs the buffer locked "normally" */ mutex_lock(&spidev->buf_lock); @@ -561,6 +562,7 @@ spidev_compat_ioc_message(struct file *filp, unsigned int cmd, done: mutex_unlock(&spidev->buf_lock); spi_dev_put(spi); + mutex_unlock(&spidev->spi_lock); return retval; } @@ -601,7 +603,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->tx_buffer) { spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->tx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_find_dev; } @@ -610,7 +611,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->rx_buffer) { spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->rx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_alloc_rx_buf; } @@ -640,10 +640,10 @@ static int spidev_release(struct inode *inode, struct file *filp) spidev = filp->private_data; filp->private_data = NULL; - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); /* ... after we unbound from the underlying device? */ dofree = (spidev->spi == NULL); - spin_unlock_irq(&spidev->spi_lock); + mutex_unlock(&spidev->spi_lock); /* last close? */ spidev->users--; @@ -776,7 +776,7 @@ static int spidev_probe(struct spi_device *spi) /* Initialize the driver data */ spidev->spi = spi; - spin_lock_init(&spidev->spi_lock); + mutex_init(&spidev->spi_lock); mutex_init(&spidev->buf_lock); INIT_LIST_HEAD(&spidev->device_entry); @@ -821,9 +821,9 @@ static void spidev_remove(struct spi_device *spi) /* prevent new opens */ mutex_lock(&device_list_lock); /* make sure ops on existing fds can abort cleanly */ - spin_lock_irq(&spidev->spi_lock); + mutex_lock(&spidev->spi_lock); spidev->spi = NULL; - spin_unlock_irq(&spidev->spi_lock); + mutex_unlock(&spidev->spi_lock); list_del(&spidev->device_entry); device_destroy(spidev_class, spidev->devt); diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 7c23112dc923..5bddb2f5e931 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock); static struct xencons_info *vtermno_to_xencons(int vtermno) { - struct xencons_info *entry, *n, *ret = NULL; + struct xencons_info *entry, *ret = NULL; + unsigned long flags; - if (list_empty(&xenconsoles)) - return NULL; + spin_lock_irqsave(&xencons_lock, flags); + if (list_empty(&xenconsoles)) { + spin_unlock_irqrestore(&xencons_lock, flags); + return NULL; + } - list_for_each_entry_safe(entry, n, &xenconsoles, list) { + list_for_each_entry(entry, &xenconsoles, list) { if (entry->vtermno == vtermno) { ret = entry; break; } } + spin_unlock_irqrestore(&xencons_lock, flags); return ret; } @@ -223,7 +228,7 @@ static int xen_hvm_console_init(void) { int r; uint64_t v = 0; - unsigned long gfn; + unsigned long gfn, flags; struct xencons_info *info; if (!xen_hvm_domain()) @@ -258,9 +263,9 @@ static int xen_hvm_console_init(void) goto err; info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; err: @@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno) static int xen_pv_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_pv_domain()) return -ENODEV; @@ -299,9 +305,9 @@ static int xen_pv_console_init(void) /* already configured */ return 0; } - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); xencons_info_pv_init(info, HVC_COOKIE); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -309,6 +315,7 @@ static int xen_pv_console_init(void) static int xen_initial_domain_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_initial_domain()) return -ENODEV; @@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void) info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info) static int xen_console_remove(struct xencons_info *info) { + unsigned long flags; + xencons_disconnect_backend(info); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->xbdev != NULL) xencons_free(info); else { @@ -394,9 +403,9 @@ static int xen_console_remove(struct xencons_info *info) return 0; } -static int xencons_remove(struct xenbus_device *dev) +static void xencons_remove(struct xenbus_device *dev) { - return xen_console_remove(dev_get_drvdata(&dev->dev)); + xen_console_remove(dev_get_drvdata(&dev->dev)); } static int xencons_connect_backend(struct xenbus_device *dev, @@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev, { int ret, devid; struct xencons_info *info; + unsigned long flags; devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; if (devid == 0) @@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev, ret = xencons_connect_backend(dev, info); if (ret < 0) goto error; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; @@ -584,10 +594,12 @@ static int __init xen_hvc_init(void) info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); if (IS_ERR(info->hvc)) { + unsigned long flags; + r = PTR_ERR(info->hvc); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->irq) unbind_from_irqhandler(info->irq, NULL); kfree(info); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index e18c9f4463ec..bda61be5f035 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6056,6 +6056,14 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba) } } +static void ufshcd_force_error_recovery(struct ufs_hba *hba) +{ + spin_lock_irq(hba->host->host_lock); + hba->force_reset = true; + ufshcd_schedule_eh_work(hba); + spin_unlock_irq(hba->host->host_lock); +} + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) { down_write(&hba->clk_scaling_lock); @@ -9083,6 +9091,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (!hba->dev_info.b_rpm_dev_flush_capable) { ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto enable_scaling; } @@ -9094,6 +9111,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto set_dev_active; diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c index de1b09158318..46fdab940092 100644 --- a/drivers/usb/host/xen-hcd.c +++ b/drivers/usb/host/xen-hcd.c @@ -1530,15 +1530,13 @@ static void xenhcd_backend_changed(struct xenbus_device *dev, } } -static int xenhcd_remove(struct xenbus_device *dev) +static void xenhcd_remove(struct xenbus_device *dev) { struct xenhcd_info *info = dev_get_drvdata(&dev->dev); struct usb_hcd *hcd = xenhcd_info_to_hcd(info); xenhcd_destroy_rings(info); usb_put_hcd(hcd); - - return 0; } static int xenhcd_probe(struct xenbus_device *dev, diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c index 8752d389e382..d7f3e6281ce4 100644 --- a/drivers/video/fbdev/xen-fbfront.c +++ b/drivers/video/fbdev/xen-fbfront.c @@ -67,7 +67,7 @@ MODULE_PARM_DESC(video, "Video memory size in MB, width, height in pixels (default 2,800,600)"); static void xenfb_make_preferred_console(void); -static int xenfb_remove(struct xenbus_device *); +static void xenfb_remove(struct xenbus_device *); static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); @@ -523,7 +523,7 @@ static int xenfb_resume(struct xenbus_device *dev) return xenfb_connect_backend(dev, info); } -static int xenfb_remove(struct xenbus_device *dev) +static void xenfb_remove(struct xenbus_device *dev) { struct xenfb_info *info = dev_get_drvdata(&dev->dev); @@ -538,8 +538,6 @@ static int xenfb_remove(struct xenbus_device *dev) vfree(info->gfns); vfree(info->fb); kfree(info); - - return 0; } static unsigned long vmalloc_to_gfn(void *address) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 28b2a1fa25ab..0d4f8f4f4948 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -1181,9 +1181,8 @@ static void pvcalls_back_changed(struct xenbus_device *dev, } } -static int pvcalls_back_remove(struct xenbus_device *dev) +static void pvcalls_back_remove(struct xenbus_device *dev) { - return 0; } static int pvcalls_back_uevent(struct xenbus_device *xdev, diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 1826e8e67125..d5d589bda243 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -225,6 +225,8 @@ again: return IRQ_HANDLED; } +static void free_active_ring(struct sock_mapping *map); + static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, struct sock_mapping *map) { @@ -240,7 +242,7 @@ static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) gnttab_end_foreign_access(map->active.ring->ref[i], NULL); gnttab_end_foreign_access(map->active.ref, NULL); - free_page((unsigned long)map->active.ring); + free_active_ring(map); kfree(map); } @@ -1085,7 +1087,7 @@ static const struct xenbus_device_id pvcalls_front_ids[] = { { "" } }; -static int pvcalls_front_remove(struct xenbus_device *dev) +static void pvcalls_front_remove(struct xenbus_device *dev) { struct pvcalls_bedata *bedata; struct sock_mapping *map = NULL, *n; @@ -1121,7 +1123,6 @@ static int pvcalls_front_remove(struct xenbus_device *dev) kfree(bedata->ring.sring); kfree(bedata); xenbus_switch_state(dev, XenbusStateClosed); - return 0; } static int pvcalls_front_probe(struct xenbus_device *dev, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index d171091eec12..b11e401f1b1e 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -716,14 +716,12 @@ out: return err; } -static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) +static void xen_pcibk_xenbus_remove(struct xenbus_device *dev) { struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev); if (pdev != NULL) free_pdev(pdev); - - return 0; } static const struct xenbus_device_id xen_pcibk_ids[] = { diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 6106ed93817d..954188b0b858 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1249,7 +1249,7 @@ static void scsiback_release_translation_entry(struct vscsibk_info *info) spin_unlock_irqrestore(&info->v2p_lock, flags); } -static int scsiback_remove(struct xenbus_device *dev) +static void scsiback_remove(struct xenbus_device *dev) { struct vscsibk_info *info = dev_get_drvdata(&dev->dev); @@ -1261,8 +1261,6 @@ static int scsiback_remove(struct xenbus_device *dev) gnttab_page_cache_shrink(&info->free_pages, 0); dev_set_drvdata(&dev->dev, NULL); - - return 0; } static int scsiback_probe(struct xenbus_device *dev, diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 7dcd59693a0c..d4ddb20d6732 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> static int afs_deliver_cb_init_call_back_state(struct afs_call *); static int afs_deliver_cb_init_call_back_state3(struct afs_call *); @@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call) * Abort a service call from within an action function. */ static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, - const char *why) + enum rxrpc_abort_reason why) { rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, error, why); @@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - afs_abort_service_call(call, 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative); afs_put_call(call); _leave(""); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index c62939e5ea1f..7817e2b860e5 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -13,6 +13,8 @@ #include "internal.h" #include "afs_cm.h" #include "protocol_yfs.h" +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> struct workqueue_struct *afs_async_calls; @@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) error_do_abort: if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(call->net->socket, rxcall, - RX_USER_ABORT, ret, "KSD"); + RX_USER_ABORT, ret, + afs_abort_send_data_error); } else { len = 0; iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0); @@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call) case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KIV"); + abort_code, ret, + afs_abort_op_not_supported); goto local_abort; case -EIO: pr_err("kAFS: Call %u in bad state %u\n", @@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call) if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KUM"); + abort_code, ret, + afs_abort_unmarshal_error); goto local_abort; default: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KER"); + abort_code, ret, + afs_abort_general_error); goto local_abort; } } @@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call, /* Kill off the call if it's still live. */ _debug("call interrupted"); if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - RX_USER_ABORT, -EINTR, "KWI")) + RX_USER_ABORT, -EINTR, + afs_abort_interrupted)) afs_set_call_complete(call, -EINTR, 0); } } @@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); fallthrough; default: _leave(" [error]"); @@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, + afs_abort_oom); } _leave(" [error]"); } @@ -900,6 +909,7 @@ int afs_extract_data(struct afs_call *call, bool want_more) ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, &call->iov_len, want_more, &remote_abort, &call->service_id); + trace_afs_receive_data(call, call->iter, want_more, ret); if (ret == 0 || ret == -EAGAIN) return ret; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index de63572a9404..9a780fafc539 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm) * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 096e3520a0b1..a05eafcacfb2 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5db73c0f792a..cbc18b4a9cb2 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -278,6 +278,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) * ( for NTLMSSP_AV_NB_DOMAIN_NAME followed by NTLMSSP_AV_EOL ) + * unicode length of a netbios domain name */ + kfree_sensitive(ses->auth_key.response); ses->auth_key.len = size + 2 * dlen; ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d371259d6808..b2a04b4e89a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2606,11 +2606,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, smb2_query_server_interfaces); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (ses->server->dialect >= SMB30_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + /* schedule query interfaces poll */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 43ad1176dcb9..e20f8880363f 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1299,7 +1299,6 @@ static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, c * Resolve share's hostname and check if server address matches. Otherwise just ignore it * as we could not have upcall to resolve hostname or failed to convert ip address. */ - match = true; extract_unc_hostname(s1, &host, &hostlen); scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bd374feeccaa..a5a097a69983 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = 0644; rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0b842a07e157..c47b254f0d1e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -815,6 +815,7 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, return -EINVAL; } if (tilen) { + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(bcc_ptr + tioffset, tilen, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1428,6 +1429,7 @@ sess_auth_kerberos(struct sess_data *sess_data) goto out_put_spnego_key; } + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 50480751e521..4cb364454e13 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); - if (!rc) - move_cifs_info_to_smb2(&data->fi, &fi); *adjustTZ = true; } - if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) { + if (!rc) { int tmprc; int oplock = 0; struct cifs_fid fid; struct cifs_open_parms oparms; + move_cifs_info_to_smb2(&data->fi, &fi); + + if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) + return 0; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; @@ -716,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) { - FILE_ALL_INFO *fi = buf; + struct cifs_open_info_data *data = buf; + FILE_ALL_INFO fi = {}; + int rc; if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) - return SMBLegacyOpen(xid, oparms->tcon, oparms->path, - oparms->disposition, - oparms->desired_access, - oparms->create_options, - &oparms->fid->netfid, oplock, fi, - oparms->cifs_sb->local_nls, - cifs_remap(oparms->cifs_sb)); - return CIFS_open(xid, oparms, oplock, fi); + rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, + oparms->desired_access, + oparms->create_options, + &oparms->fid->netfid, oplock, &fi, + oparms->cifs_sb->local_nls, + cifs_remap(oparms->cifs_sb)); + else + rc = CIFS_open(xid, oparms, oplock, &fi); + + if (!rc && data) + move_cifs_info_to_smb2(&data->fi, &fi); + + return rc; } static void @@ -1050,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - FILE_ALL_INFO *buf = NULL; + struct cifs_open_info_data buf = {}; struct cifs_io_parms io_parms; __u32 oplock = 0; struct cifs_fid fid; @@ -1082,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode, cifs_sb->local_nls, cifs_remap(cifs_sb)); if (rc) - goto out; + return rc; rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (rc == 0) d_instantiate(dentry, newinode); - goto out; + return rc; } /* @@ -1097,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode, * support block and char device (no socket & fifo) */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - goto out; + return rc; if (!S_ISCHR(mode) && !S_ISBLK(mode)) - goto out; + return rc; cifs_dbg(FYI, "sfu compat create special file\n"); - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (buf == NULL) { - rc = -ENOMEM; - goto out; - } - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; @@ -1124,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode, oplock = REQ_OPLOCK; else oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); if (rc) - goto out; + return rc; /* * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)buf; + pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = buf; + iov[1].iov_base = &buf.fi; iov[1].iov_len = sizeof(struct win_dev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); @@ -1157,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_drop(dentry); /* FIXME: add code here to set EAs */ -out: - kfree(buf); + + cifs_free_open_info(&buf); return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2c484d47c592..4b71f4a92f76 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1453,6 +1453,7 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) /* keep session key if binding */ if (!is_binding) { + kfree_sensitive(ses->auth_key.response); ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, GFP_KERNEL); if (!ses->auth_key.response) { @@ -1482,8 +1483,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) out_put_spnego_key: key_invalidate(spnego_key); key_put(spnego_key); - if (rc) + if (rc) { kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } out: sess_data->result = rc; sess_data->func = NULL; diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 2a39ffb8423b..6e61b5bc7d86 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); dn_len = le16_to_cpu(authblob->DomainName.Length); - if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len || + nt_len < CIFS_ENCPWD_SIZE) return -EINVAL; /* TODO : use domain name that imported from configuration file */ diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 12be8386446a..fd0a288af299 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -316,9 +316,12 @@ int ksmbd_conn_handler_loop(void *p) /* 4 for rfc1002 length field */ size = pdu_size + 4; - conn->request_buf = kvmalloc(size, GFP_KERNEL); + conn->request_buf = kvmalloc(size, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); if (!conn->request_buf) - continue; + break; memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); if (!ksmbd_smb_request(conn)) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 14d7f3599c63..38fbda52e06f 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1928,13 +1928,13 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; -out_err1: rsp->StructureSize = cpu_to_le16(16); + inc_rfc1001_len(work->response_buf, 16); +out_err1: rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; - inc_rfc1001_len(work->response_buf, 16); if (!IS_ERR(treename)) kfree(treename); @@ -1967,6 +1967,9 @@ out_err1: rsp->hdr.Status = STATUS_ACCESS_DENIED; } + if (status.ret != KSMBD_TREE_CONN_STATUS_OK) + smb2_set_err_rsp(work); + return rc; } diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 63d55f543bd2..4c6bd0b69979 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -295,6 +295,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; + int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -321,9 +322,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if (length == -ERESTARTSYS || length == -EAGAIN) { + } else if ((length == -ERESTARTSYS || length == -EAGAIN) && + max_retry) { usleep_range(1000, 2000); length = 0; + max_retry--; continue; } else if (length <= 0) { total_read = -EAGAIN; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 45b2c9e3f636..0ef070349014 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1071,8 +1071,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1147,8 +1147,7 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1158,20 +1157,23 @@ out: out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1207,7 +1209,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1228,28 +1230,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb1..41516a4263ea 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bd880d55f565..9b81d012666e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -2607,12 +2607,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2734,7 +2733,7 @@ encode_op: out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7b2ee535ade8..4809ae0f0138 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ebb4d02a42ce..97edb32be77f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3f..9744443c3965 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c852ae8eaf37..8f9c82d9e075 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4c16c8c31fcb..35f574421670 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4666,7 +4666,12 @@ xfs_btree_space_to_height( const unsigned int *limits, unsigned long long leaf_blocks) { - unsigned long long node_blocks = limits[1]; + /* + * The root btree block can have fewer than minrecs pointers in it + * because the tree might not be big enough to require that amount of + * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. + */ + unsigned long long node_blocks = 2; unsigned long long blocks_left = leaf_blocks - 1; unsigned int height = 1; diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index ad22a003f959..f3d328e4a440 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -236,6 +236,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f35e2cee5265..ddeaccc04aec 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1853,12 +1853,20 @@ xfs_inodegc_worker( struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; + unsigned int nofs_flag; WRITE_ONCE(gc->items, 0); if (!node) return; + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + ip = llist_entry(node, struct xfs_inode, i_gclist); trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); @@ -1867,6 +1875,8 @@ xfs_inodegc_worker( xfs_iflags_set(ip, XFS_INACTIVATING); xfs_inodegc_inactivate(ip); } + + memalloc_nofs_restore(nofs_flag); } /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 13f1b2add390..736510bc241b 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 669c1bc5c3a7..fc1946f80a4a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -83,7 +83,7 @@ xfs_iomap_valid( return true; } -const struct iomap_page_ops xfs_iomap_page_ops = { +static const struct iomap_page_ops xfs_iomap_page_ops = { .iomap_valid = xfs_iomap_valid, }; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ff53d40a2dae..e2c542f6dcd4 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -68,7 +68,7 @@ restart: while (1) { struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; - int error = 0; + int error; int i; mutex_lock(&qi->qi_tree_lock); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index fe46bce8cae6..5535778a98f9 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -416,8 +416,6 @@ xfs_reflink_fill_cow_hole( goto convert; } - ASSERT(cmap->br_startoff > imap->br_startoff); - /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index cd3b75e08ec3..e44be31115a6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -230,7 +230,8 @@ struct acpi_pnp_type { u32 hardware_id:1; u32 bus_address:1; u32 platform_id:1; - u32 reserved:29; + u32 backlight:1; + u32 reserved:28; }; struct acpi_device_pnp { diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index cd6d8f260eab..71916de7c6c4 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -60,7 +60,7 @@ struct arch_timer_cpu { bool enabled; }; -int kvm_timer_hyp_init(bool); +int __init kvm_timer_hyp_init(bool has_gic); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); @@ -104,4 +104,8 @@ void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, u32 timer_get_ctl(struct arch_timer_context *ctxt); u64 timer_get_cval(struct arch_timer_context *ctxt); +/* CPU HP callbacks */ +void kvm_timer_cpu_up(void); +void kvm_timer_cpu_down(void); + #endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 9270cd87da3f..0629e3532ad0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -432,4 +432,8 @@ int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); +/* CPU HP callbacks */ +void kvm_vgic_cpu_up(void); +void kvm_vgic_cpu_down(void); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..5b2f8147d1ae 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -187,10 +187,6 @@ enum cpuhp_state { CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, - CPUHP_AP_KVM_STARTING, - CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - CPUHP_AP_KVM_ARM_VGIC_STARTING, - CPUHP_AP_KVM_ARM_TIMER_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, @@ -205,6 +201,7 @@ enum cpuhp_state { /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 9ec81290e3c8..bd5560542c79 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -105,14 +105,14 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu); * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the gate DSO was being used. */ -extern Elf_Half elf_core_extra_phdrs(void); +extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm); extern int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); -extern size_t elf_core_extra_data_size(void); +extern size_t elf_core_extra_data_size(struct coredump_params *cprm); #else -static inline Elf_Half elf_core_extra_phdrs(void) +static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return 0; } @@ -127,7 +127,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -static inline size_t elf_core_extra_data_size(void) +static inline size_t elf_core_extra_data_size(struct coredump_params *cprm) { return 0; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4f26b244f6d0..109b18e2789c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -956,8 +956,7 @@ static inline void kvm_irqfd_exit(void) { } #endif -int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - struct module *module); +int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); @@ -1423,9 +1422,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); -int kvm_arch_init(void *opaque); -void kvm_arch_exit(void); - void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); @@ -1445,11 +1441,10 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_hardware_setup(void *opaque); -void kvm_arch_hardware_unsetup(void); -int kvm_arch_check_processor_compat(void *opaque); +#endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); @@ -2081,7 +2076,9 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool kvm_rebooting; +#endif extern unsigned int halt_poll_ns; extern unsigned int halt_poll_ns_grow; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f..76ef2e4fde38 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -315,7 +315,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats *stats; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_cmd_mailbox { diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 25765556223a..a3f8cdca90c8 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include <linux/bitops.h> -#include <linux/mtd/cfi.h> #include <linux/mtd/mtd.h> #include <linux/spi/spi-mem.h> diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h index 632320ec8f08..a48bb5240977 100644 --- a/include/linux/platform_data/x86/simatic-ipc.h +++ b/include/linux/platform_data/x86/simatic-ipc.h @@ -32,7 +32,8 @@ enum simatic_ipc_station_ids { SIMATIC_IPC_IPC477E = 0x00000A02, SIMATIC_IPC_IPC127E = 0x00000D01, SIMATIC_IPC_IPC227G = 0x00000F01, - SIMATIC_IPC_IPC427G = 0x00001001, + SIMATIC_IPC_IPCBX_39A = 0x00001001, + SIMATIC_IPC_IPCPX_39A = 0x00001002, }; static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 20c0ff54b7a0..7d68a5cc5881 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev * The loop below will unmap these fields if the log is larger than * one page, so save them here for reference: */ - count = READ_ONCE(event->count); - event_type = READ_ONCE(event->event_type); + count = event->count; + event_type = event->event_type; /* Verify that it's the log header */ if (event_header->pcr_idx != 0 || diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index d5a5ae926380..ba717eac0229 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -15,6 +15,7 @@ struct key; struct sock; struct socket; struct rxrpc_call; +enum rxrpc_abort_reason; enum rxrpc_interruptibility { RXRPC_INTERRUPTIBLE, /* Call is interruptible */ @@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, struct iov_iter *, size_t *, bool, u32 *, u16 *); bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, - u32, int, const char *); + u32, int, enum rxrpc_abort_reason); void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index cab52b0f11d0..34c03707fb6e 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -236,6 +236,14 @@ enum { ISCSI_SESSION_FREE, }; +enum { + ISCSI_SESSION_TARGET_UNBOUND, + ISCSI_SESSION_TARGET_ALLOCATED, + ISCSI_SESSION_TARGET_SCANNED, + ISCSI_SESSION_TARGET_UNBINDING, + ISCSI_SESSION_TARGET_MAX, +}; + #define ISCSI_MAX_TARGET -1 struct iscsi_cls_session { @@ -264,6 +272,7 @@ struct iscsi_cls_session { */ pid_t creator; int state; + int target_state; /* session target bind state */ int sid; /* session id */ void *dd_data; /* LLD private data */ struct device dev; /* sysfs transport/container device */ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 5f9dd7389536..283db0ea3db4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,7 +16,107 @@ /* * Declare tracing information enums and their string mappings for display. */ +#define rxrpc_abort_reasons \ + /* AFS errors */ \ + EM(afs_abort_general_error, "afs-error") \ + EM(afs_abort_interrupted, "afs-intr") \ + EM(afs_abort_oom, "afs-oom") \ + EM(afs_abort_op_not_supported, "afs-op-notsupp") \ + EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \ + EM(afs_abort_send_data_error, "afs-send-data") \ + EM(afs_abort_unmarshal_error, "afs-unmarshal") \ + /* rxperf errors */ \ + EM(rxperf_abort_general_error, "rxperf-error") \ + EM(rxperf_abort_oom, "rxperf-oom") \ + EM(rxperf_abort_op_not_supported, "rxperf-op-notsupp") \ + EM(rxperf_abort_unmarshal_error, "rxperf-unmarshal") \ + /* RxKAD security errors */ \ + EM(rxkad_abort_1_short_check, "rxkad1-short-check") \ + EM(rxkad_abort_1_short_data, "rxkad1-short-data") \ + EM(rxkad_abort_1_short_encdata, "rxkad1-short-encdata") \ + EM(rxkad_abort_1_short_header, "rxkad1-short-hdr") \ + EM(rxkad_abort_2_short_check, "rxkad2-short-check") \ + EM(rxkad_abort_2_short_data, "rxkad2-short-data") \ + EM(rxkad_abort_2_short_header, "rxkad2-short-hdr") \ + EM(rxkad_abort_2_short_len, "rxkad2-short-len") \ + EM(rxkad_abort_bad_checksum, "rxkad2-bad-cksum") \ + EM(rxkad_abort_chall_key_expired, "rxkad-chall-key-exp") \ + EM(rxkad_abort_chall_level, "rxkad-chall-level") \ + EM(rxkad_abort_chall_no_key, "rxkad-chall-nokey") \ + EM(rxkad_abort_chall_short, "rxkad-chall-short") \ + EM(rxkad_abort_chall_version, "rxkad-chall-version") \ + EM(rxkad_abort_resp_bad_callid, "rxkad-resp-bad-callid") \ + EM(rxkad_abort_resp_bad_checksum, "rxkad-resp-bad-cksum") \ + EM(rxkad_abort_resp_bad_param, "rxkad-resp-bad-param") \ + EM(rxkad_abort_resp_call_ctr, "rxkad-resp-call-ctr") \ + EM(rxkad_abort_resp_call_state, "rxkad-resp-call-state") \ + EM(rxkad_abort_resp_key_expired, "rxkad-resp-key-exp") \ + EM(rxkad_abort_resp_key_rejected, "rxkad-resp-key-rej") \ + EM(rxkad_abort_resp_level, "rxkad-resp-level") \ + EM(rxkad_abort_resp_nokey, "rxkad-resp-nokey") \ + EM(rxkad_abort_resp_ooseq, "rxkad-resp-ooseq") \ + EM(rxkad_abort_resp_short, "rxkad-resp-short") \ + EM(rxkad_abort_resp_short_tkt, "rxkad-resp-short-tkt") \ + EM(rxkad_abort_resp_tkt_aname, "rxkad-resp-tk-aname") \ + EM(rxkad_abort_resp_tkt_expired, "rxkad-resp-tk-exp") \ + EM(rxkad_abort_resp_tkt_future, "rxkad-resp-tk-future") \ + EM(rxkad_abort_resp_tkt_inst, "rxkad-resp-tk-inst") \ + EM(rxkad_abort_resp_tkt_len, "rxkad-resp-tk-len") \ + EM(rxkad_abort_resp_tkt_realm, "rxkad-resp-tk-realm") \ + EM(rxkad_abort_resp_tkt_short, "rxkad-resp-tk-short") \ + EM(rxkad_abort_resp_tkt_sinst, "rxkad-resp-tk-sinst") \ + EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \ + EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \ + EM(rxkad_abort_resp_version, "rxkad-resp-version") \ + /* rxrpc errors */ \ + EM(rxrpc_abort_call_improper_term, "call-improper-term") \ + EM(rxrpc_abort_call_reset, "call-reset") \ + EM(rxrpc_abort_call_sendmsg, "call-sendmsg") \ + EM(rxrpc_abort_call_sock_release, "call-sock-rel") \ + EM(rxrpc_abort_call_sock_release_tba, "call-sock-rel-tba") \ + EM(rxrpc_abort_call_timeout, "call-timeout") \ + EM(rxrpc_abort_no_service_key, "no-serv-key") \ + EM(rxrpc_abort_nomem, "nomem") \ + EM(rxrpc_abort_service_not_offered, "serv-not-offered") \ + EM(rxrpc_abort_shut_down, "shut-down") \ + EM(rxrpc_abort_unsupported_security, "unsup-sec") \ + EM(rxrpc_badmsg_bad_abort, "bad-abort") \ + EM(rxrpc_badmsg_bad_jumbo, "bad-jumbo") \ + EM(rxrpc_badmsg_short_ack, "short-ack") \ + EM(rxrpc_badmsg_short_ack_info, "short-ack-info") \ + EM(rxrpc_badmsg_short_hdr, "short-hdr") \ + EM(rxrpc_badmsg_unsupported_packet, "unsup-pkt") \ + EM(rxrpc_badmsg_zero_call, "zero-call") \ + EM(rxrpc_badmsg_zero_seq, "zero-seq") \ + EM(rxrpc_badmsg_zero_service, "zero-service") \ + EM(rxrpc_eproto_ackr_outside_window, "ackr-out-win") \ + EM(rxrpc_eproto_ackr_sack_overflow, "ackr-sack-over") \ + EM(rxrpc_eproto_ackr_short_sack, "ackr-short-sack") \ + EM(rxrpc_eproto_ackr_zero, "ackr-zero") \ + EM(rxrpc_eproto_bad_upgrade, "bad-upgrade") \ + EM(rxrpc_eproto_data_after_last, "data-after-last") \ + EM(rxrpc_eproto_different_last, "diff-last") \ + EM(rxrpc_eproto_early_reply, "early-reply") \ + EM(rxrpc_eproto_improper_term, "improper-term") \ + EM(rxrpc_eproto_no_client_call, "no-cl-call") \ + EM(rxrpc_eproto_no_client_conn, "no-cl-conn") \ + EM(rxrpc_eproto_no_service_call, "no-sv-call") \ + EM(rxrpc_eproto_reupgrade, "re-upgrade") \ + EM(rxrpc_eproto_rxnull_challenge, "rxnull-chall") \ + EM(rxrpc_eproto_rxnull_response, "rxnull-resp") \ + EM(rxrpc_eproto_tx_rot_last, "tx-rot-last") \ + EM(rxrpc_eproto_unexpected_ack, "unex-ack") \ + EM(rxrpc_eproto_unexpected_ackall, "unex-ackall") \ + EM(rxrpc_eproto_unexpected_implicit_end, "unex-impl-end") \ + EM(rxrpc_eproto_unexpected_reply, "unex-reply") \ + EM(rxrpc_eproto_wrong_security, "wrong-sec") \ + EM(rxrpc_recvmsg_excess_data, "recvmsg-excess") \ + EM(rxrpc_recvmsg_short_data, "recvmsg-short") \ + E_(rxrpc_sendmsg_late_send, "sendmsg-late") + #define rxrpc_call_poke_traces \ + EM(rxrpc_call_poke_abort, "Abort") \ + EM(rxrpc_call_poke_complete, "Compl") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ EM(rxrpc_call_poke_start, "Start") \ @@ -26,6 +126,7 @@ #define rxrpc_skb_traces \ EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ @@ -35,6 +136,7 @@ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ @@ -76,7 +178,6 @@ #define rxrpc_peer_traces \ EM(rxrpc_peer_free, "FREE ") \ EM(rxrpc_peer_get_accept, "GET accept ") \ - EM(rxrpc_peer_get_activate_call, "GET act-call") \ EM(rxrpc_peer_get_bundle, "GET bundle ") \ EM(rxrpc_peer_get_client_conn, "GET cln-conn") \ EM(rxrpc_peer_get_input, "GET input ") \ @@ -89,7 +190,6 @@ EM(rxrpc_peer_put_bundle, "PUT bundle ") \ EM(rxrpc_peer_put_call, "PUT call ") \ EM(rxrpc_peer_put_conn, "PUT conn ") \ - EM(rxrpc_peer_put_discard_tmp, "PUT disc-tmp") \ EM(rxrpc_peer_put_input, "PUT input ") \ EM(rxrpc_peer_put_input_error, "PUT inpt-err") \ E_(rxrpc_peer_put_keepalive, "PUT keepaliv") @@ -99,6 +199,7 @@ EM(rxrpc_bundle_get_client_call, "GET clt-call") \ EM(rxrpc_bundle_get_client_conn, "GET clt-conn") \ EM(rxrpc_bundle_get_service_conn, "GET svc-conn") \ + EM(rxrpc_bundle_put_call, "PUT call ") \ EM(rxrpc_bundle_put_conn, "PUT conn ") \ EM(rxrpc_bundle_put_discard, "PUT discard ") \ E_(rxrpc_bundle_new, "NEW ") @@ -109,14 +210,14 @@ EM(rxrpc_conn_get_call_input, "GET inp-call") \ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ - EM(rxrpc_conn_get_poke, "GET poke ") \ + EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ EM(rxrpc_conn_new_service, "NEW service ") \ EM(rxrpc_conn_put_call, "PUT call ") \ EM(rxrpc_conn_put_call_input, "PUT inp-call") \ EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \ - EM(rxrpc_conn_put_discard, "PUT discard ") \ EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \ EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \ EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \ @@ -124,10 +225,10 @@ EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \ EM(rxrpc_conn_put_unbundle, "PUT unbundle") \ EM(rxrpc_conn_put_unidle, "PUT unidle ") \ + EM(rxrpc_conn_put_work, "PUT work ") \ EM(rxrpc_conn_queue_challenge, "QUE chall ") \ EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ - EM(rxrpc_conn_queue_timer, "QUE timer ") \ EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \ E_(rxrpc_conn_see_work, "SEE work ") @@ -138,16 +239,16 @@ EM(rxrpc_client_chan_activate, "ChActv") \ EM(rxrpc_client_chan_disconnect, "ChDisc") \ EM(rxrpc_client_chan_pass, "ChPass") \ - EM(rxrpc_client_chan_wait_failed, "ChWtFl") \ EM(rxrpc_client_cleanup, "Clean ") \ EM(rxrpc_client_discard, "Discar") \ - EM(rxrpc_client_duplicate, "Duplic") \ EM(rxrpc_client_exposed, "Expose") \ EM(rxrpc_client_replace, "Replac") \ + EM(rxrpc_client_queue_new_call, "Q-Call") \ EM(rxrpc_client_to_active, "->Actv") \ E_(rxrpc_client_to_idle, "->Idle") #define rxrpc_call_traces \ + EM(rxrpc_call_get_io_thread, "GET iothread") \ EM(rxrpc_call_get_input, "GET input ") \ EM(rxrpc_call_get_kernel_service, "GET krnl-srv") \ EM(rxrpc_call_get_notify_socket, "GET notify ") \ @@ -160,6 +261,7 @@ EM(rxrpc_call_new_prealloc_service, "NEW prealloc") \ EM(rxrpc_call_put_discard_prealloc, "PUT disc-pre") \ EM(rxrpc_call_put_discard_error, "PUT disc-err") \ + EM(rxrpc_call_put_io_thread, "PUT iothread") \ EM(rxrpc_call_put_input, "PUT input ") \ EM(rxrpc_call_put_kernel, "PUT kernel ") \ EM(rxrpc_call_put_poke, "PUT poke ") \ @@ -169,10 +271,12 @@ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ EM(rxrpc_call_put_unnotify, "PUT unnotify") \ EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ + EM(rxrpc_call_put_userid, "PUT user-id ") \ EM(rxrpc_call_see_accept, "SEE accept ") \ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ + EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_release, "SEE release ") \ @@ -376,6 +480,7 @@ #define EM(a, b) a, #define E_(a, b) a +enum rxrpc_abort_reason { rxrpc_abort_reasons } __mode(byte); enum rxrpc_bundle_trace { rxrpc_bundle_traces } __mode(byte); enum rxrpc_call_poke_trace { rxrpc_call_poke_traces } __mode(byte); enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte); @@ -404,9 +509,13 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); */ #undef EM #undef E_ + +#ifndef RXRPC_TRACE_ONLY_DEFINE_ENUMS + #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +rxrpc_abort_reasons; rxrpc_bundle_traces; rxrpc_call_poke_traces; rxrpc_call_traces; @@ -657,14 +766,14 @@ TRACE_EVENT(rxrpc_rx_done, ); TRACE_EVENT(rxrpc_abort, - TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id, - rxrpc_seq_t seq, int abort_code, int error), + TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why, + u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error), TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error), TP_STRUCT__entry( __field(unsigned int, call_nr ) - __array(char, why, 4 ) + __field(enum rxrpc_abort_reason, why ) __field(u32, cid ) __field(u32, call_id ) __field(rxrpc_seq_t, seq ) @@ -673,8 +782,8 @@ TRACE_EVENT(rxrpc_abort, ), TP_fast_assign( - memcpy(__entry->why, why, 4); __entry->call_nr = call_nr; + __entry->why = why; __entry->cid = cid; __entry->call_id = call_id; __entry->abort_code = abort_code; @@ -685,7 +794,8 @@ TRACE_EVENT(rxrpc_abort, TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s", __entry->call_nr, __entry->cid, __entry->call_id, __entry->seq, - __entry->abort_code, __entry->error, __entry->why) + __entry->abort_code, __entry->error, + __print_symbolic(__entry->why, rxrpc_abort_reasons)) ); TRACE_EVENT(rxrpc_call_complete, @@ -1521,30 +1631,6 @@ TRACE_EVENT(rxrpc_improper_term, __entry->abort_code) ); -TRACE_EVENT(rxrpc_rx_eproto, - TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, - const char *why), - - TP_ARGS(call, serial, why), - - TP_STRUCT__entry( - __field(unsigned int, call ) - __field(rxrpc_serial_t, serial ) - __field(const char *, why ) - ), - - TP_fast_assign( - __entry->call = call ? call->debug_id : 0; - __entry->serial = serial; - __entry->why = why; - ), - - TP_printk("c=%08x EPROTO %08x %s", - __entry->call, - __entry->serial, - __entry->why) - ); - TRACE_EVENT(rxrpc_connect_call, TP_PROTO(struct rxrpc_call *call), @@ -1842,6 +1928,8 @@ TRACE_EVENT(rxrpc_call_poked, #undef EM #undef E_ + +#endif /* RXRPC_TRACE_ONLY_DEFINE_ENUMS */ #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 4c6a8fa5e7ed..68de6f4c4eee 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -434,6 +434,7 @@ typedef struct elf64_shdr { #define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ #define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ #define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ +#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ #define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ #define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 3511095c2702..42a40ad3fb62 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -58,7 +58,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -67,7 +67,7 @@ #define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index eaa932b99d8a..ad4fb4eab753 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -117,7 +117,7 @@ struct xenbus_driver { const struct xenbus_device_id *id); void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); - int (*remove)(struct xenbus_device *dev); + void (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); diff --git a/init/Kconfig b/init/Kconfig index 7e5c3ddc341d..0958846b005e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -894,13 +894,17 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-12 array-bounds globally. +# Currently, disable gcc-11,12 array-bounds globally. # We may want to target only particular configurations some day. +config GCC11_NO_ARRAY_BOUNDS + def_bool y + config GCC12_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS # diff --git a/init/Makefile b/init/Makefile index 8316c23bead2..26de459006c4 100644 --- a/init/Makefile +++ b/init/Makefile @@ -59,3 +59,4 @@ include/generated/utsversion.h: FORCE $(obj)/version-timestamp.o: include/generated/utsversion.h CFLAGS_version-timestamp.o := -include include/generated/utsversion.h +KASAN_SANITIZE_version-timestamp.o := n diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 2e04850a657b..882bd56b01ed 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -170,12 +170,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, xa_for_each(&ctx->personalities, index, cred) io_uring_show_cred(m, index, cred); } - if (has_lock) - mutex_unlock(&ctx->uring_lock); seq_puts(m, "PollList:\n"); for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; + struct io_hash_bucket *hbl = &ctx->cancel_table_locked.hbs[i]; struct io_kiocb *req; spin_lock(&hb->lock); @@ -183,8 +182,17 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, seq_printf(m, " op=%d, task_works=%d\n", req->opcode, task_work_pending(req->task)); spin_unlock(&hb->lock); + + if (!has_lock) + continue; + hlist_for_each_entry(req, &hbl->list, hash_node) + seq_printf(m, " op=%d, task_works=%d\n", req->opcode, + task_work_pending(req->task)); } + if (has_lock) + mutex_unlock(&ctx->uring_lock); + seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 992dcd9f8c4c..411bb2d1acd4 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,7 +1230,12 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); - kfree(worker); + /* + * Only the worker continuation helper has worker allocated and + * hence needs freeing. + */ + if (cb->func == create_worker_cont) + kfree(worker); } } diff --git a/io_uring/poll.c b/io_uring/poll.c index ee7da6150ec4..32e5fc8365e6 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -223,21 +223,22 @@ enum { IOU_POLL_DONE = 0, IOU_POLL_NO_ACTION = 1, IOU_POLL_REMOVE_POLL_USE_RES = 2, + IOU_POLL_REISSUE = 3, }; /* * All poll tw should go through this. Checks for poll events, manages * references, does rewait, etc. * - * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, - * which is either spurious wakeup or multishot CQE is served. - * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. - * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result - * is stored in req->cqe. + * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action + * require, which is either spurious wakeup or multishot CQE is served. + * IOU_POLL_DONE when it's done with the request, then the mask is stored in + * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot + * poll and that the result is stored in req->cqe. */ static int io_poll_check_events(struct io_kiocb *req, bool *locked) { - int v, ret; + int v; /* req->task == current here, checking PF_EXITING is safe */ if (unlikely(req->task->flags & PF_EXITING)) @@ -276,10 +277,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) if (!req->cqe.res) { struct poll_table_struct pt = { ._key = req->apoll_events }; req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; + /* + * We got woken with a mask, but someone else got to + * it first. The above vfs_poll() doesn't add us back + * to the waitqueue, so if we get nothing back, we + * should be safe and attempt a reissue. + */ + if (unlikely(!req->cqe.res)) + return IOU_POLL_REISSUE; } - - if ((unlikely(!req->cqe.res))) - continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; @@ -294,7 +300,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - ret = io_poll_issue(req, locked); + int ret = io_poll_issue(req, locked); if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; if (ret < 0) @@ -330,6 +336,9 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) poll = io_kiocb_to_cmd(req, struct io_poll); req->cqe.res = mangle_poll(req->cqe.res & poll->events); + } else if (ret == IOU_POLL_REISSUE) { + io_req_task_submit(req, locked); + return; } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { req->cqe.res = ret; req_set_fail(req); @@ -342,7 +351,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) if (ret == IOU_POLL_REMOVE_POLL_USE_RES) io_req_task_complete(req, locked); - else if (ret == IOU_POLL_DONE) + else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE) io_req_task_submit(req, locked); else io_req_defer_failed(req, ret); @@ -533,6 +542,14 @@ static bool io_poll_can_finish_inline(struct io_kiocb *req, return pt->owning || io_poll_get_ownership(req); } +static void io_poll_add_hash(struct io_kiocb *req) +{ + if (req->flags & REQ_F_HASH_LOCKED) + io_poll_req_insert_locked(req); + else + io_poll_req_insert(req); +} + /* * Returns 0 when it's handed over for polling. The caller owns the requests if * it returns non-zero, but otherwise should not touch it. Negative values @@ -591,18 +608,17 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (mask && ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { - if (!io_poll_can_finish_inline(req, ipt)) + if (!io_poll_can_finish_inline(req, ipt)) { + io_poll_add_hash(req); return 0; + } io_poll_remove_entries(req); ipt->result_mask = mask; /* no one else has access to the req, forget about the ref */ return 1; } - if (req->flags & REQ_F_HASH_LOCKED) - io_poll_req_insert_locked(req); - else - io_poll_req_insert(req); + io_poll_add_hash(req); if (mask && (poll->events & EPOLLET) && io_poll_can_finish_inline(req, ipt)) { diff --git a/io_uring/rw.c b/io_uring/rw.c index 8227af2e1c0f..9c3ddd46a1ad 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1062,7 +1062,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) continue; req->cqe.flags = io_put_kbuf(req, 0); - io_fill_cqe_req(req->ctx, req); + if (unlikely(!__io_fill_cqe_req(ctx, req))) { + spin_lock(&ctx->completion_lock); + io_req_cqe_overflow(req); + spin_unlock(&ctx->completion_lock); + } } if (unlikely(!nr_events)) diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index f35d9cc1aab1..bfbc12da3326 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -157,14 +157,11 @@ static void test_kallsyms_compression_ratio(void) static int lookup_name(void *data, const char *name, struct module *mod, unsigned long addr) { u64 t0, t1, t; - unsigned long flags; struct test_stat *stat = (struct test_stat *)data; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); (void)kallsyms_lookup_name(name); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); t = t1 - t0; if (t < stat->min) @@ -234,18 +231,15 @@ static int find_symbol(void *data, const char *name, struct module *mod, unsigne static void test_perf_kallsyms_on_each_symbol(void) { u64 t0, t1; - unsigned long flags; struct test_stat stat; memset(&stat, 0, sizeof(stat)); stat.max = INT_MAX; stat.name = stub_name; stat.perf = 1; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); kallsyms_on_each_symbol(find_symbol, &stat); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); pr_info("kallsyms_on_each_symbol() traverse all: %lld ns\n", t1 - t0); } @@ -270,17 +264,14 @@ static int match_symbol(void *data, unsigned long addr) static void test_perf_kallsyms_on_each_match_symbol(void) { u64 t0, t1; - unsigned long flags; struct test_stat stat; memset(&stat, 0, sizeof(stat)); stat.max = INT_MAX; stat.name = stub_name; - local_irq_save(flags); - t0 = sched_clock(); + t0 = ktime_get_ns(); kallsyms_on_each_match_symbol(match_symbol, stat.name, &stat); - t1 = sched_clock(); - local_irq_restore(flags); + t1 = ktime_get_ns(); pr_info("kallsyms_on_each_match_symbol() traverse all: %lld ns\n", t1 - t0); } diff --git a/kernel/kcsan/kcsan_test.c b/kernel/kcsan/kcsan_test.c index dcec1b743c69..a60c561724be 100644 --- a/kernel/kcsan/kcsan_test.c +++ b/kernel/kcsan/kcsan_test.c @@ -159,7 +159,7 @@ static bool __report_matches(const struct expect_report *r) const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; bool ret = false; unsigned long flags; - typeof(observed.lines) expect; + typeof(*observed.lines) *expect; const char *end; char *cur; int i; @@ -168,6 +168,10 @@ static bool __report_matches(const struct expect_report *r) if (!report_available()) return false; + expect = kmalloc(sizeof(observed.lines), GFP_KERNEL); + if (WARN_ON(!expect)) + return false; + /* Generate expected report contents. */ /* Title */ @@ -253,6 +257,7 @@ static bool __report_matches(const struct expect_report *r) strstr(observed.lines[2], expect[1]))); out: spin_unlock_irqrestore(&observed.lock, flags); + kfree(expect); return ret; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..bb1ee6d7bdde 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2604,27 +2604,71 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) .user_mask = NULL, .flags = SCA_USER, /* clear the user requested mask */ }; + union cpumask_rcuhead { + cpumask_t cpumask; + struct rcu_head rcu; + }; __do_set_cpus_allowed(p, &ac); - kfree(ac.user_mask); + + /* + * Because this is called with p->pi_lock held, it is not possible + * to use kfree() here (when PREEMPT_RT=y), therefore punt to using + * kfree_rcu(). + */ + kfree_rcu((union cpumask_rcuhead *)ac.user_mask, rcu); +} + +static cpumask_t *alloc_user_cpus_ptr(int node) +{ + /* + * See do_set_cpus_allowed() above for the rcu_head usage. + */ + int size = max_t(int, cpumask_size(), sizeof(struct rcu_head)); + + return kmalloc_node(size, GFP_KERNEL, node); } int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { + cpumask_t *user_mask; unsigned long flags; - if (!src->user_cpus_ptr) + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = alloc_user_cpus_ptr(node); + if (!user_mask) return -ENOMEM; - /* Use pi_lock to protect content of user_cpus_ptr */ + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ raw_spin_lock_irqsave(&src->pi_lock, flags); - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } @@ -3581,6 +3625,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) return false; } +static inline cpumask_t *alloc_user_cpus_ptr(int node) +{ + return NULL; +} + #endif /* !CONFIG_SMP */ static void @@ -5504,7 +5553,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); @@ -8239,8 +8290,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (retval) goto out_put_task; - user_mask = kmalloc(cpumask_size(), GFP_KERNEL); - if (!user_mask) { + user_mask = alloc_user_cpus_ptr(NUMA_NO_NODE); + if (IS_ENABLED(CONFIG_SMP) && !user_mask) { retval = -ENOMEM; goto out_put_task; } diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 475ecceda768..5e2c2c26b3cc 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -18,7 +18,7 @@ #include "tick-internal.h" /** - * tick_program_event + * tick_program_event - program the CPU local timer device for the next event */ int tick_program_event(ktime_t expires, int force) { @@ -99,7 +99,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) } /** - * tick_check_oneshot_mode - check whether the system is in oneshot mode + * tick_oneshot_mode_active - check whether the system is in oneshot mode * * returns 1 when either nohz or highres are enabled. otherwise 0. */ diff --git a/kernel/time/time.c b/kernel/time/time.c index 526257b3727c..f4198af60fee 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -462,7 +462,7 @@ struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec) EXPORT_SYMBOL(ns_to_kernel_old_timeval); /** - * set_normalized_timespec - set timespec sec and nsec parts and normalize + * set_normalized_timespec64 - set timespec sec and nsec parts and normalize * * @ts: pointer to timespec variable to be set * @sec: seconds to set @@ -526,7 +526,7 @@ struct timespec64 ns_to_timespec64(s64 nsec) EXPORT_SYMBOL(ns_to_timespec64); /** - * msecs_to_jiffies: - convert milliseconds to jiffies + * __msecs_to_jiffies: - convert milliseconds to jiffies * @m: time in milliseconds * * conversion is done as follows: @@ -541,12 +541,12 @@ EXPORT_SYMBOL(ns_to_timespec64); * handling any 32-bit overflows. * for the details see __msecs_to_jiffies() * - * msecs_to_jiffies() checks for the passed in value being a constant + * __msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the * code, __msecs_to_jiffies() is called if the value passed does not * allow constant folding and the actual conversion must be done at * runtime. - * the _msecs_to_jiffies helpers are the HZ dependent conversion + * The _msecs_to_jiffies helpers are the HZ dependent conversion * routines found in include/linux/jiffies.h */ unsigned long __msecs_to_jiffies(const unsigned int m) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f72b9f1de178..5579ead449f2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1590,10 +1590,10 @@ void __weak read_persistent_clock64(struct timespec64 *ts) /** * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset * from the boot. + * @wall_time: current time as returned by persistent clock + * @boot_offset: offset that is defined as wall_time - boot_time * * Weak dummy function for arches that do not yet support it. - * @wall_time: - current time as returned by persistent clock - * @boot_offset: - offset that is defined as wall_time - boot_time * * The default function calculates offset based on the current value of * local_clock(). This way architectures that support sched_clock() but don't @@ -1701,7 +1701,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, } #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) -/** +/* * We have three kinds of time sources to use for sleep time * injection, the preference order is: * 1) non-stop clocksource @@ -1722,7 +1722,7 @@ bool timekeeping_rtc_skipresume(void) return !suspend_timing_needed; } -/** +/* * 1) can be determined whether to use or not only when doing * timekeeping_resume() which is invoked after rtc_suspend(), * so we can't skip rtc_suspend() surely if system has 1). diff --git a/lib/lockref.c b/lib/lockref.c index 45e93ece8ba0..2afe4c5d8919 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -23,7 +23,6 @@ } \ if (!--retry) \ break; \ - cpu_relax(); \ } \ } while (0) diff --git a/mm/memblock.c b/mm/memblock.c index d036c7861310..685e30e6d27c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,7 +1640,13 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - memblock_free_pages(pfn_to_page(cursor), cursor, 0); + /* + * Reserved pages are always initialized by the end of + * memblock_free_all() (by memmap_init() and, if deferred + * initialization is enabled, memmap_init_reserved_pages()), so + * these pages can be released directly to the buddy allocator. + */ + __free_pages_core(pfn_to_page(cursor), 0); totalram_pages_inc(); } } diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 9630b1275557..82c7005ede65 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -305,13 +305,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) kfree(priv); } -static int xen_9pfs_front_remove(struct xenbus_device *dev) +static void xen_9pfs_front_remove(struct xenbus_device *dev) { struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev); dev_set_drvdata(&dev->dev, NULL); xen_9pfs_front_free(priv); - return 0; } static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, diff --git a/net/core/gro.c b/net/core/gro.c index fd8c6a7e8d3e..506f83d715f8 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -505,8 +505,9 @@ found_ptype: NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) + /* Only support TCP and non DODGY users. */ + if (!skb_is_gso_tcp(skb) || + (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) NAPI_GRO_CB(skb)->flush = 1; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a06a9f847db5..ada087b50541 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -505,6 +505,7 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index e76d3459d78e..ac5caf5a48e1 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -10,6 +10,7 @@ rxrpc-y := \ call_accept.o \ call_event.o \ call_object.o \ + call_state.o \ conn_client.o \ conn_event.o \ conn_object.o \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7ea576f6ba4b..ebbd4a1c3f86 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -155,10 +155,10 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) if (service_id) { write_lock(&local->services_lock); - if (rcu_access_pointer(local->service)) + if (local->service) goto service_in_use; rx->local = local; - rcu_assign_pointer(local->service, rx); + local->service = rx; write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; @@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, mutex_unlock(&call->user_mutex); } - rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p", call); return call; } @@ -374,13 +373,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * @sock: The socket the call is on * @call: The call to check * - * Allow a kernel service to find out whether a call is still alive - - * ie. whether it has completed. + * Allow a kernel service to find out whether a call is still alive - whether + * it has completed successfully and all received data has been consumed. */ bool rxrpc_kernel_check_life(const struct socket *sock, const struct rxrpc_call *call) { - return call->state != RXRPC_CALL_COMPLETE; + if (!rxrpc_call_is_complete(call)) + return true; + if (call->completion != RXRPC_CALL_SUCCEEDED) + return false; + return !skb_queue_empty(&call->recvmsg_queue); } EXPORT_SYMBOL(rxrpc_kernel_check_life); @@ -872,9 +875,9 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; - if (rx->local && rcu_access_pointer(rx->local->service) == rx) { + if (rx->local && rx->local->service == rx) { write_lock(&rx->local->services_lock); - rcu_assign_pointer(rx->local->service, NULL); + rx->local->service = NULL; write_unlock(&rx->local->services_lock); } @@ -957,16 +960,9 @@ static const struct net_proto_family rxrpc_family_ops = { static int __init af_rxrpc_init(void) { int ret = -1; - unsigned int tmp; BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb)); - get_random_bytes(&tmp, sizeof(tmp)); - tmp &= 0x3fffffff; - if (tmp == 0) - tmp = 1; - idr_set_cursor(&rxrpc_client_conn_ids, tmp); - ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, @@ -1062,7 +1058,6 @@ static void __exit af_rxrpc_exit(void) * are released. */ rcu_barrier(); - rxrpc_destroy_client_conn_ids(); destroy_workqueue(rxrpc_workqueue); rxrpc_exit_security(); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 18092526d3c8..433060cade03 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -38,6 +38,7 @@ struct rxrpc_txbuf; enum rxrpc_skb_mark { RXRPC_SKB_MARK_PACKET, /* Received packet */ RXRPC_SKB_MARK_ERROR, /* Error notification */ + RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; @@ -75,13 +76,7 @@ struct rxrpc_net { bool live; - bool kill_all_client_conns; atomic_t nr_client_conns; - spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ - struct mutex client_conn_discard_lock; /* Prevent multiple discarders */ - struct list_head idle_client_conns; - struct work_struct client_conn_reaper; - struct timer_list client_conn_reap_timer; struct hlist_head local_endpoints; struct mutex local_mutex; /* Lock for ->local_endpoints */ @@ -202,6 +197,7 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { + struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ u16 offset; /* Offset of data */ u16 len; /* Length of data */ u8 flags; @@ -262,13 +258,11 @@ struct rxrpc_security { /* respond to a challenge */ int (*respond_to_challenge)(struct rxrpc_connection *, - struct sk_buff *, - u32 *); + struct sk_buff *); /* verify a response */ int (*verify_response)(struct rxrpc_connection *, - struct sk_buff *, - u32 *); + struct sk_buff *); /* clear connection security */ void (*clear)(struct rxrpc_connection *); @@ -283,22 +277,34 @@ struct rxrpc_local { struct rcu_head rcu; atomic_t active_users; /* Number of users of the local endpoint */ refcount_t ref; /* Number of references to the structure */ - struct rxrpc_net *rxnet; /* The network ns in which this resides */ + struct net *net; /* The network namespace */ + struct rxrpc_net *rxnet; /* Our bits in the network namespace */ struct hlist_node link; struct socket *socket; /* my UDP socket */ struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread started */ - struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ + struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ struct sk_buff_head rx_queue; /* Received packets */ + struct list_head conn_attend_q; /* Conns requiring immediate attention */ struct list_head call_attend_q; /* Calls requiring immediate attention */ + struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ + bool kill_all_client_conns; + struct list_head idle_client_conns; + struct timer_list client_conn_reap_timer; + unsigned long client_conn_flags; +#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */ + spinlock_t lock; /* access lock */ rwlock_t services_lock; /* lock for services list */ int debug_id; /* debug ID for printks */ bool dead; bool service_closed; /* Service socket closed */ + struct idr conn_ids; /* List of connection IDs */ + struct list_head new_client_calls; /* Newly created client calls need connection */ + spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ }; @@ -356,7 +362,6 @@ struct rxrpc_conn_proto { struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ - struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ @@ -365,10 +370,21 @@ struct rxrpc_conn_parameters { }; /* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + +/* * Bits in the connection flags. */ enum rxrpc_conn_flag { - RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ @@ -388,6 +404,7 @@ enum rxrpc_conn_flag { */ enum rxrpc_conn_event { RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ + RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */ }; /* @@ -395,13 +412,13 @@ enum rxrpc_conn_event { */ enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ + RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */ RXRPC_CONN_CLIENT, /* Client connection */ RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ - RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ - RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ + RXRPC_CONN_ABORTED, /* Conn aborted */ RXRPC_CONN__NR_STATES }; @@ -412,17 +429,16 @@ struct rxrpc_bundle { struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_peer *peer; /* Remote endpoint */ struct key *key; /* Security details */ + const struct rxrpc_security *security; /* applied security module */ refcount_t ref; atomic_t active; /* Number of active users */ unsigned int debug_id; u32 security_level; /* Security level selected */ u16 service_id; /* Service ID for this connection */ bool try_upgrade; /* True if the bundle is attempting upgrade */ - bool alloc_conn; /* True if someone's getting a conn */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ - short alloc_error; /* Error from last conn allocation */ - spinlock_t channel_lock; + unsigned short alloc_error; /* Error from last conn allocation */ struct rb_node local_node; /* Node in local->client_conns */ struct list_head waiting_calls; /* Calls waiting for channels */ unsigned long avail_chans; /* Mask of available channels */ @@ -440,6 +456,7 @@ struct rxrpc_connection { struct rxrpc_peer *peer; /* Remote endpoint */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ struct key *key; /* Security details */ + struct list_head attend_link; /* Link in local->conn_attend_q */ refcount_t ref; atomic_t active; /* Active count for service conns */ @@ -449,7 +466,7 @@ struct rxrpc_connection { unsigned char act_chans; /* Mask of active channels */ struct rxrpc_channel { unsigned long final_ack_at; /* Time at which to issue final ACK */ - struct rxrpc_call __rcu *call; /* Active call */ + struct rxrpc_call *call; /* Active call */ unsigned int call_debug_id; /* call->debug_id */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ @@ -470,6 +487,7 @@ struct rxrpc_connection { struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ + struct mutex security_lock; /* Lock for security management */ const struct rxrpc_security *security; /* applied security module */ union { struct { @@ -483,7 +501,8 @@ struct rxrpc_connection { unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_proto_state state; /* current state of connection */ - u32 abort_code; /* Abort code of connection abort */ + enum rxrpc_call_completion completion; /* Completion condition */ + s32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ @@ -527,7 +546,8 @@ enum rxrpc_call_flag { RXRPC_CALL_KERNEL, /* The call was made by the kernel */ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ - RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */ + RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ + RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ }; /* @@ -558,18 +578,6 @@ enum rxrpc_call_state { }; /* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - -/* * Call Tx congestion management modes. */ enum rxrpc_congest_mode { @@ -587,6 +595,7 @@ enum rxrpc_congest_mode { struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ + struct rxrpc_bundle *bundle; /* Connection bundle to use */ struct rxrpc_peer *peer; /* Peer record for remote address */ struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_sock __rcu *socket; /* socket responsible */ @@ -609,7 +618,7 @@ struct rxrpc_call { struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ - struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ + struct list_head wait_link; /* Link in local->new_client_calls */ struct hlist_node error_link; /* link in error distribution list */ struct list_head accept_link; /* Link in rx->acceptq */ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ @@ -623,10 +632,13 @@ struct rxrpc_call { unsigned long flags; unsigned long events; spinlock_t notify_lock; /* Kernel notification lock */ - rwlock_t state_lock; /* lock for state transition */ - u32 abort_code; /* Local/remote abort code */ + unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */ + s32 send_abort; /* Abort code to be sent */ + short send_abort_err; /* Error to be associated with the abort */ + rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */ + s32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_state _state; /* Current state of call (needs barrier) */ enum rxrpc_call_completion completion; /* Call completion condition */ refcount_t ref; u8 security_ix; /* Security type */ @@ -812,9 +824,11 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); -int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *, - struct rxrpc_connection *, struct sockaddr_rxrpc *, - struct sk_buff *); +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb); void rxrpc_accept_incoming_calls(struct rxrpc_local *); int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); @@ -834,7 +848,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long now, enum rxrpc_timer_trace why); -void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); +bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); /* * call_object.c @@ -851,6 +865,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); +void rxrpc_start_call_timer(struct rxrpc_call *call); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); @@ -873,32 +888,88 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) } /* + * call_state.c + */ +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error); +bool rxrpc_call_completed(struct rxrpc_call *call); +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why); +void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, + int error); + +static inline void rxrpc_set_call_state(struct rxrpc_call *call, + enum rxrpc_call_state state) +{ + /* Order write of completion info before write of ->state. */ + smp_store_release(&call->_state, state); + wake_up(&call->waitq); +} + +static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call) +{ + return call->_state; /* Only inside I/O thread */ +} + +static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call) +{ + return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; +} + +static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call) +{ + /* Order read ->state before read of completion info. */ + return smp_load_acquire(&call->_state); +} + +static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call) +{ + return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE; +} + +static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call) +{ + return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED; +} + +/* * conn_client.c */ extern unsigned int rxrpc_reap_client_connections; extern unsigned long rxrpc_conn_idle_client_expiry; extern unsigned long rxrpc_conn_idle_client_fast_expiry; -extern struct idr rxrpc_client_conn_ids; -void rxrpc_destroy_client_conn_ids(void); +void rxrpc_purge_client_connections(struct rxrpc_local *local); struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); -int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, - struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - gfp_t); +int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp); +void rxrpc_connect_client_calls(struct rxrpc_local *local); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); +void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); -void rxrpc_discard_expired_client_conns(struct work_struct *); -void rxrpc_destroy_all_client_connections(struct rxrpc_net *); +void rxrpc_discard_expired_client_conns(struct rxrpc_local *local); void rxrpc_clean_up_local_conns(struct rxrpc_local *); /* * conn_event.c */ +void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb, + unsigned int channel); +int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, enum rxrpc_abort_reason why); void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); -int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); +bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); +void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb); + +static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn) +{ + /* Order reading the abort info after the state check. */ + return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED; +} /* * conn_object.c @@ -906,6 +977,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); extern unsigned int rxrpc_connection_expiry; extern unsigned int rxrpc_closed_conn_expiry; +void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why); struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t); struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *, struct sockaddr_rxrpc *, @@ -961,12 +1033,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); */ int rxrpc_encap_rcv(struct sock *, struct sk_buff *); void rxrpc_error_report(struct sock *); +bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err); int rxrpc_io_thread(void *data); static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) { wake_up_process(local->io_thread); } +static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why) +{ + return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO); +} + /* * insecure.c */ @@ -1048,6 +1127,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *); +void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); @@ -1063,12 +1143,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *); */ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, - struct sockaddr_rxrpc *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, gfp_t gfp); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, enum rxrpc_peer_trace); -void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, - struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace); @@ -1086,33 +1165,22 @@ extern const struct seq_operations rxrpc_local_seq_ops; * recvmsg.c */ void rxrpc_notify_socket(struct rxrpc_call *); -bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); -bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); -bool __rxrpc_call_completed(struct rxrpc_call *); -bool rxrpc_call_completed(struct rxrpc_call *); -bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); -bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* * Abort a call due to a protocol error. */ -static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, - struct sk_buff *skb, - const char *eproto_why, - const char *why, - u32 abort_code) +static inline int rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + s32 abort_code, + enum rxrpc_abort_reason why) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); - return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); + rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why); + return -EPROTO; } -#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ - __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ - (abort_why), (abort_code)) - /* * rtt.c */ @@ -1144,6 +1212,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *, /* * sendmsg.c */ +bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, + enum rxrpc_abort_reason why); int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index c02401656fa9..3e8689fdc437 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -99,7 +99,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); - call->state = RXRPC_CALL_SERVER_PREALLOC; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC); __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), @@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_new_incoming_peer(rx, local, peer); + rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ @@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * If we want to report an error, we mark the skb with the packet type and * abort code and return false. */ -int rxrpc_new_incoming_call(struct rxrpc_local *local, - struct rxrpc_peer *peer, - struct rxrpc_connection *conn, - struct sockaddr_rxrpc *peer_srx, - struct sk_buff *skb) +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) { const struct rxrpc_security *sec = NULL; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -339,18 +339,17 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, _enter(""); - /* Don't set up a call for anything other than the first DATA packet. */ - if (sp->hdr.seq != 1 || - sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - return 0; /* Just discard */ + /* Don't set up a call for anything other than a DATA packet. */ + if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); - rcu_read_lock(); + read_lock(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just * discarded. */ - rx = rcu_dereference(local->service); + rx = local->service; if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && sp->hdr.serviceId != rx->second_service) ) { @@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, if (!conn) { sec = rxrpc_get_incoming_security(rx, skb); if (!sec) - goto reject; + goto unsupported_security; } spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { - trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, - sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; + rxrpc_direct_abort(skb, rxrpc_abort_shut_down, + RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } @@ -402,7 +399,7 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); - rcu_read_unlock(); + read_unlock(&local->services_lock); if (hlist_unhashed(&call->error_link)) { spin_lock(&call->peer->lock); @@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local, _leave(" = %p{%d}", call, call->debug_id); rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return 0; + return true; unsupported_service: - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->priority = RX_INVALID_OPERATION; - goto reject; + read_unlock(&local->services_lock); + return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EOPNOTSUPP); +unsupported_security: + read_unlock(&local->services_lock); + return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, + RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); -reject: - rcu_read_unlock(); + read_unlock(&local->services_lock); _leave(" = f [%u]", skb->mark); - return -EPROTO; + return false; discard: - rcu_read_unlock(); - return 0; + read_unlock(&local->services_lock); + return true; } /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index b2cf448fb02c..1abdef15debc 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -251,6 +251,41 @@ out: _leave(""); } +/* + * Start transmitting the reply to a service. This cancels the need to ACK the + * request if we haven't yet done so. + */ +static void rxrpc_begin_service_reply(struct rxrpc_call *call) +{ + unsigned long now = jiffies; + + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY); + WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); +} + +/* + * Close the transmission phase. After this point there is no more data to be + * transmitted in the call. + */ +static void rxrpc_close_tx_phase(struct rxrpc_call *call) +{ + _debug("________awaiting reply/ACK__________"); + + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); + break; + case RXRPC_CALL_SERVER_SEND_REPLY: + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK); + break; + default: + break; + } +} + static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) { unsigned int winsize = min_t(unsigned int, call->tx_winsize, @@ -270,9 +305,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) { struct rxrpc_txbuf *txb; - if (rxrpc_is_client_call(call) && - !test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + if (list_empty(&call->tx_sendmsg)) + return; rxrpc_expose_client_call(call); + } while ((txb = list_first_entry_or_null(&call->tx_sendmsg, struct rxrpc_txbuf, call_link))) { @@ -283,6 +320,9 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) call->tx_top = txb->seq; list_add_tail(&txb->call_link, &call->tx_buffer); + if (txb->wire.flags & RXRPC_LAST_PACKET) + rxrpc_close_tx_phase(call); + rxrpc_transmit_one(call, txb); if (!rxrpc_tx_window_has_space(call)) @@ -292,16 +332,15 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) static void rxrpc_transmit_some_data(struct rxrpc_call *call) { - switch (call->state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: if (list_empty(&call->tx_sendmsg)) return; + rxrpc_begin_service_reply(call); fallthrough; case RXRPC_CALL_SERVER_SEND_REPLY: - case RXRPC_CALL_SERVER_AWAIT_ACK: case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_CLIENT_AWAIT_REPLY: if (!rxrpc_tx_window_has_space(call)) return; if (list_empty(&call->tx_sendmsg)) { @@ -331,21 +370,31 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) /* * Handle retransmission and deferred ACK/abort generation. */ -void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) +bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { unsigned long now, next, t; rxrpc_serial_t ackr_serial; bool resend = false, expired = false; + s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); //printk("\n--------------------\n"); _enter("{%d,%s,%lx}", - call->debug_id, rxrpc_call_states[call->state], call->events); + call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], + call->events); - if (call->state == RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) goto out; + /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ + abort_code = smp_load_acquire(&call->send_abort); + if (abort_code) { + rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err, + call->send_abort_why); + goto out; + } + if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) goto out; @@ -358,7 +407,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } t = READ_ONCE(call->expect_req_by); - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST && time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); expired = true; @@ -429,11 +478,12 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); - rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_reset); } else { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME, + rxrpc_abort_call_timeout); } - rxrpc_send_abort_packet(call); goto out; } @@ -441,7 +491,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY) + if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY) rxrpc_resend(call, NULL); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) @@ -453,7 +503,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_input_data); /* Make sure the timer is restarted */ - if (call->state != RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } @@ -474,9 +524,15 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } out: - if (call->state == RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) { del_timer_sync(&call->timer); + if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + rxrpc_disconnect_call(call); + if (call->security) + call->security->free_call_crypto(call); + } if (call->acks_hard_ack != call->tx_bottom) rxrpc_shrink_call_tx_buffer(call); _leave(""); + return true; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 89dcf60b1158..3ded5a24627c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -50,7 +50,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) struct rxrpc_local *local = call->local; bool busy; - if (call->state < RXRPC_CALL_COMPLETE) { + if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) { spin_lock_bh(&local->lock); busy = !list_empty(&call->attend_link); trace_rxrpc_poke_call(call, busy, what); @@ -69,7 +69,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t) _enter("%d", call->debug_id); - if (call->state < RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { trace_rxrpc_timer_expired(call, jiffies); rxrpc_poke_call(call, rxrpc_call_poke_timer); } @@ -150,7 +150,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, timer_setup(&call->timer, rxrpc_call_timer_expired, 0); INIT_WORK(&call->destroyer, rxrpc_destroy_call); INIT_LIST_HEAD(&call->link); - INIT_LIST_HEAD(&call->chan_wait_link); + INIT_LIST_HEAD(&call->wait_link); INIT_LIST_HEAD(&call->accept_link); INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); @@ -162,7 +162,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); spin_lock_init(&call->tx_lock); - rwlock_init(&call->state_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; @@ -211,7 +210,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; - call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; call->dest_srx = *srx; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; @@ -227,11 +225,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, ret = rxrpc_init_client_call_security(call); if (ret < 0) { - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); rxrpc_put_call(call, rxrpc_call_put_discard_error); return ERR_PTR(ret); } + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), p->user_call_ID, rxrpc_call_new_client); @@ -242,7 +242,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, /* * Initiate the call ack/resend/expiry timer. */ -static void rxrpc_start_call_timer(struct rxrpc_call *call) +void rxrpc_start_call_timer(struct rxrpc_call *call) { unsigned long now = jiffies; unsigned long j = now + MAX_JIFFY_OFFSET; @@ -287,6 +287,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call) } /* + * Start the process of connecting a call. We obtain a peer and a connection + * bundle, but the actual association of a call with a connection is offloaded + * to the I/O thread to simplify locking. + */ +static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) +{ + struct rxrpc_local *local = call->local; + int ret = 0; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp); + if (!call->peer) + goto error; + + ret = rxrpc_look_up_bundle(call, gfp); + if (ret < 0) + goto error; + + trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call); + rxrpc_get_call(call, rxrpc_call_get_io_thread); + spin_lock(&local->client_call_lock); + list_add_tail(&call->wait_link, &local->new_client_calls); + spin_unlock(&local->client_call_lock); + rxrpc_wake_up_io_thread(local); + return 0; + +error: + __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + return ret; +} + +/* * Set up a call for the given parameters. * - Called with the socket lock held, which it must release. * - If it returns a call, the call's lock will need releasing by the caller. @@ -365,14 +398,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ - ret = rxrpc_connect_call(rx, call, cp, srx, gfp); + ret = rxrpc_connect_call(call, gfp); if (ret < 0) goto error_attached_to_socket; - rxrpc_see_call(call, rxrpc_call_see_connected); - - rxrpc_start_call_timer(call); - _leave(" = %p [new]", call); return call; @@ -384,27 +413,23 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, -EEXIST); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, rxrpc_call_see_userid_exists); - rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_userid_exists); _leave(" = -EEXIST"); return ERR_PTR(-EEXIST); /* We got an error, but the call is attached to the socket and is in - * need of release. However, we might now race with recvmsg() when - * completing the call queues it. Return 0 from sys_sendmsg() and + * need of release. However, we might now race with recvmsg() when it + * completion notifies the socket. Return 0 from sys_sendmsg() and * leave the error to recvmsg() to deal with. */ error_attached_to_socket: trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, rxrpc_call_see_connect_failed); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); _leave(" = c=%08x [err]", call->debug_id); return call; } @@ -427,32 +452,32 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->call_id = sp->hdr.callNumber; call->dest_srx.srx_service = sp->hdr.serviceId; call->cid = sp->hdr.cid; - call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; + __set_bit(RXRPC_CALL_EXPOSED, &call->flags); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - call->state = RXRPC_CALL_SERVER_SECURING; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); break; case RXRPC_CONN_SERVICE: - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; - case RXRPC_CONN_REMOTELY_ABORTED: - __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->abort_code, conn->error); - break; - case RXRPC_CONN_LOCALLY_ABORTED: - __rxrpc_abort_call("CON", call, 1, - conn->abort_code, conn->error); + case RXRPC_CONN_ABORTED: + rxrpc_set_call_completion(call, conn->completion, + conn->abort_code, conn->error); break; default: BUG(); } + rxrpc_get_call(call, rxrpc_call_get_io_thread); + /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called * from) and the RESPONSE packet parser (which is only really @@ -462,7 +487,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, chan = sp->hdr.cid & RXRPC_CHANNELMASK; conn->channels[chan].call_counter = call->call_id; conn->channels[chan].call_id = call->call_id; - rcu_assign_pointer(conn->channels[chan].call, call); + conn->channels[chan].call = call; spin_unlock(&conn->state_lock); spin_lock(&conn->peer->lock); @@ -522,20 +547,17 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call) void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; - bool put = false; + bool put = false, putu = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), call->flags, rxrpc_call_see_release); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); rxrpc_put_call_slot(call); - del_timer_sync(&call->timer); /* Make sure we don't get any more notifications */ write_lock(&rx->recvmsg_lock); @@ -560,7 +582,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - rxrpc_put_call(call, rxrpc_call_put_userid_exists); + putu = true; } list_del(&call->sock_link); @@ -568,10 +590,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) - rxrpc_disconnect_call(call); - if (call->security) - call->security->free_call_crypto(call); + if (putu) + rxrpc_put_call(call, rxrpc_call_put_userid); + _leave(""); } @@ -588,7 +609,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_sock_release_tba); rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } @@ -596,8 +618,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_get_release_sock); - rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); - rxrpc_send_abort_packet(call); + rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, + rxrpc_abort_call_sock_release); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_release_sock); } @@ -620,7 +642,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) dead = __refcount_dec_and_test(&call->ref, &r); trace_rxrpc_call(debug_id, r - 1, 0, why); if (dead) { - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { spin_lock(&rxnet->call_lock); @@ -669,6 +691,8 @@ static void rxrpc_destroy_call(struct work_struct *work) rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); rxrpc_put_connection(call->conn, rxrpc_conn_put_call); + rxrpc_deactivate_bundle(call->bundle); + rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call); rxrpc_put_peer(call->peer, rxrpc_peer_put_call); rxrpc_put_local(call->local, rxrpc_local_put_call); call_rcu(&call->rcu, rxrpc_rcu_free_call); @@ -681,7 +705,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) { memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); del_timer(&call->timer); @@ -719,7 +743,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, refcount_read(&call->ref), - rxrpc_call_states[call->state], + rxrpc_call_states[__rxrpc_call_state(call)], call->flags, call->events); spin_unlock(&rxnet->call_lock); diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c new file mode 100644 index 000000000000..6afb54373ebb --- /dev/null +++ b/net/rxrpc/call_state.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Call state changing functions. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include "ar-internal.h" + +/* + * Transition a call to the complete state. + */ +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE) + return false; + + call->abort_code = abort_code; + call->error = error; + call->completion = compl; + /* Allow reader of completion state to operate locklessly */ + rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE); + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; +} + +/* + * Record that a call successfully completed. + */ +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +/* + * Record that a call is locally aborted. + */ +bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq, + u32 abort_code, int error, enum rxrpc_abort_reason why) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error)) + return false; + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_send_abort_packet(call); + return true; +} + +/* + * Record that a call errored out before even getting off the ground, thereby + * setting the state to allow it to be destroyed. + */ +void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl, + int error) +{ + call->abort_code = RX_CALL_DEAD; + call->error = error; + call->completion = compl; + call->_state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)); +} diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 87efa0373aed..981ca5b98bcb 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -34,104 +34,59 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900; __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; -/* - * We use machine-unique IDs for our client connections. - */ -DEFINE_IDR(rxrpc_client_conn_ids); -static DEFINE_SPINLOCK(rxrpc_conn_id_lock); - -static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); - -/* - * Get a connection ID and epoch for a client connection from the global pool. - * The connection struct pointer is then recorded in the idr radix tree. The - * epoch doesn't change until the client is rebooted (or, at least, unless the - * module is unloaded). - */ -static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, - gfp_t gfp) +static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_net *rxnet = conn->rxnet; - int id; - - _enter(""); - - idr_preload(gfp); - spin_lock(&rxrpc_conn_id_lock); - - id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, - 1, 0x40000000, GFP_NOWAIT); - if (id < 0) - goto error; - - spin_unlock(&rxrpc_conn_id_lock); - idr_preload_end(); - - conn->proto.epoch = rxnet->epoch; - conn->proto.cid = id << RXRPC_CIDSHIFT; - set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); - _leave(" [CID %x]", conn->proto.cid); - return 0; - -error: - spin_unlock(&rxrpc_conn_id_lock); - idr_preload_end(); - _leave(" = %d", id); - return id; + atomic_inc(&bundle->active); } /* - * Release a connection ID for a client connection from the global pool. + * Release a connection ID for a client connection. */ -static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn) +static void rxrpc_put_client_connection_id(struct rxrpc_local *local, + struct rxrpc_connection *conn) { - if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) { - spin_lock(&rxrpc_conn_id_lock); - idr_remove(&rxrpc_client_conn_ids, - conn->proto.cid >> RXRPC_CIDSHIFT); - spin_unlock(&rxrpc_conn_id_lock); - } + idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT); } /* * Destroy the client connection ID tree. */ -void rxrpc_destroy_client_conn_ids(void) +static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local) { struct rxrpc_connection *conn; int id; - if (!idr_is_empty(&rxrpc_client_conn_ids)) { - idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { + if (!idr_is_empty(&local->conn_ids)) { + idr_for_each_entry(&local->conn_ids, conn, id) { pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", conn, refcount_read(&conn->ref)); } BUG(); } - idr_destroy(&rxrpc_client_conn_ids); + idr_destroy(&local->conn_ids); } /* * Allocate a connection bundle. */ -static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, +static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_bundle *bundle; bundle = kzalloc(sizeof(*bundle), gfp); if (bundle) { - bundle->local = cp->local; - bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle); - bundle->key = cp->key; - bundle->exclusive = cp->exclusive; - bundle->upgrade = cp->upgrade; - bundle->service_id = cp->service_id; - bundle->security_level = cp->security_level; + bundle->local = call->local; + bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle); + bundle->key = key_get(call->key); + bundle->security = call->security; + bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); + bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); + bundle->service_id = call->dest_srx.srx_service; + bundle->security_level = call->security_level; refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); - spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); } @@ -152,84 +107,87 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); + key_put(bundle->key); kfree(bundle); } void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { - unsigned int id = bundle->debug_id; + unsigned int id; bool dead; int r; - dead = __refcount_dec_and_test(&bundle->ref, &r); - trace_rxrpc_bundle(id, r - 1, why); - if (dead) - rxrpc_free_bundle(bundle); + if (bundle) { + id = bundle->debug_id; + dead = __refcount_dec_and_test(&bundle->ref, &r); + trace_rxrpc_bundle(id, r - 1, why); + if (dead) + rxrpc_free_bundle(bundle); + } +} + +/* + * Get rid of outstanding client connection preallocations when a local + * endpoint is destroyed. + */ +void rxrpc_purge_client_connections(struct rxrpc_local *local) +{ + rxrpc_destroy_client_conn_ids(local); } /* * Allocate a client connection. */ static struct rxrpc_connection * -rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) +rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = bundle->local->rxnet; - int ret; + struct rxrpc_local *local = bundle->local; + struct rxrpc_net *rxnet = local->rxnet; + int id; _enter(""); - conn = rxrpc_alloc_connection(rxnet, gfp); - if (!conn) { - _leave(" = -ENOMEM"); + conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN); + if (!conn) return ERR_PTR(-ENOMEM); + + id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000, + GFP_ATOMIC | __GFP_NOWARN); + if (id < 0) { + kfree(conn); + return ERR_PTR(id); } refcount_set(&conn->ref, 1); - conn->bundle = bundle; - conn->local = bundle->local; - conn->peer = bundle->peer; - conn->key = bundle->key; + conn->proto.cid = id << RXRPC_CIDSHIFT; + conn->proto.epoch = local->rxnet->epoch; + conn->out_clientflag = RXRPC_CLIENT_INITIATED; + conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); + conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn); + conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn); + conn->key = key_get(bundle->key); + conn->security = bundle->security; conn->exclusive = bundle->exclusive; conn->upgrade = bundle->upgrade; conn->orig_service_id = bundle->service_id; conn->security_level = bundle->security_level; - conn->out_clientflag = RXRPC_CLIENT_INITIATED; - conn->state = RXRPC_CONN_CLIENT; + conn->state = RXRPC_CONN_CLIENT_UNSECURED; conn->service_id = conn->orig_service_id; - ret = rxrpc_get_client_connection_id(conn, gfp); - if (ret < 0) - goto error_0; - - ret = rxrpc_init_client_conn_security(conn); - if (ret < 0) - goto error_1; + if (conn->security == &rxrpc_no_security) + conn->state = RXRPC_CONN_CLIENT; atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); - rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn); - rxrpc_get_local(conn->local, rxrpc_local_get_client_conn); - key_get(conn->key); - - trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), - rxrpc_conn_new_client); + rxrpc_see_connection(conn, rxrpc_conn_new_client); atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); - _leave(" = %p", conn); return conn; - -error_1: - rxrpc_put_client_connection_id(conn); -error_0: - kfree(conn); - _leave(" = %d", ret); - return ERR_PTR(ret); } /* @@ -247,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; - if (conn->state != RXRPC_CONN_CLIENT || + if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED && + conn->state != RXRPC_CONN_CLIENT) || conn->proto.epoch != rxnet->epoch) goto mark_dont_reuse; @@ -257,7 +216,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) * times the maximum number of client conns away from the current * allocation point to try and keep the IDs concentrated. */ - id_cursor = idr_get_cursor(&rxrpc_client_conn_ids); + id_cursor = idr_get_cursor(&conn->local->conn_ids); id = conn->proto.cid >> RXRPC_CIDSHIFT; distance = id - id_cursor; if (distance < 0) @@ -278,20 +237,23 @@ dont_reuse: * Look up the conn bundle that matches the connection parameters, adding it if * it doesn't yet exist. */ -static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp, - gfp_t gfp) +int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp) { static atomic_t rxrpc_bundle_id; struct rxrpc_bundle *bundle, *candidate; - struct rxrpc_local *local = cp->local; + struct rxrpc_local *local = call->local; struct rb_node *p, **pp, *parent; long diff; + bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags); _enter("{%px,%x,%u,%u}", - cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade); + call->peer, key_serial(call->key), call->security_level, + upgrade); - if (cp->exclusive) - return rxrpc_alloc_bundle(cp, gfp); + if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) { + call->bundle = rxrpc_alloc_bundle(call, gfp); + return call->bundle ? 0 : -ENOMEM; + } /* First, see if the bundle is already there. */ _debug("search 1"); @@ -300,11 +262,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c while (p) { bundle = rb_entry(p, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X, Y) ((long)(X) - (long)(Y)) + diff = (cmp(bundle->peer, call->peer) ?: + cmp(bundle->key, call->key) ?: + cmp(bundle->security_level, call->security_level) ?: + cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) p = p->rb_left; @@ -317,9 +279,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c _debug("not found"); /* It wasn't. We need to add one. */ - candidate = rxrpc_alloc_bundle(cp, gfp); + candidate = rxrpc_alloc_bundle(call, gfp); if (!candidate) - return NULL; + return -ENOMEM; _debug("search 2"); spin_lock(&local->client_bundles_lock); @@ -329,11 +291,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c parent = *pp; bundle = rb_entry(parent, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->X - (long)cp->X) - diff = (cmp(peer) ?: - cmp(key) ?: - cmp(security_level) ?: - cmp(upgrade)); +#define cmp(X, Y) ((long)(X) - (long)(Y)) + diff = (cmp(bundle->peer, call->peer) ?: + cmp(bundle->key, call->key) ?: + cmp(bundle->security_level, call->security_level) ?: + cmp(bundle->upgrade, upgrade)); #undef cmp if (diff < 0) pp = &(*pp)->rb_left; @@ -347,178 +309,89 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); - rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); + call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); - _leave(" = %u [new]", candidate->debug_id); - return candidate; + _leave(" = B=%u [new]", call->bundle->debug_id); + return 0; found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: - rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); - atomic_inc(&bundle->active); + call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); + rxrpc_activate_bundle(bundle); spin_unlock(&local->client_bundles_lock); - _leave(" = %u [found]", bundle->debug_id); - return bundle; -} - -/* - * Create or find a client bundle to use for a call. - * - * If we return with a connection, the call will be on its waiting list. It's - * left to the caller to assign a channel and wake up the call. - */ -static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) -{ - struct rxrpc_bundle *bundle; - - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - - cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); - if (!cp->peer) - goto error; - - call->tx_last_sent = ktime_get_real(); - call->cong_ssthresh = cp->peer->cong_ssthresh; - if (call->cong_cwnd >= call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; - else - call->cong_mode = RXRPC_CALL_SLOW_START; - if (cp->upgrade) - __set_bit(RXRPC_CALL_UPGRADE, &call->flags); - - /* Find the client connection bundle. */ - bundle = rxrpc_look_up_bundle(cp, gfp); - if (!bundle) - goto error; - - /* Get this call queued. Someone else may activate it whilst we're - * lining up a new connection, but that's fine. - */ - spin_lock(&bundle->channel_lock); - list_add_tail(&call->chan_wait_link, &bundle->waiting_calls); - spin_unlock(&bundle->channel_lock); - - _leave(" = [B=%x]", bundle->debug_id); - return bundle; - -error: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _leave(" = B=%u [found]", call->bundle->debug_id); + return 0; } /* * Allocate a new connection and add it into a bundle. */ -static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) - __releases(bundle->channel_lock) +static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, + unsigned int slot) { - struct rxrpc_connection *candidate = NULL, *old = NULL; - bool conflict; - int i; - - _enter(""); - - conflict = bundle->alloc_conn; - if (!conflict) - bundle->alloc_conn = true; - spin_unlock(&bundle->channel_lock); - if (conflict) { - _leave(" [conf]"); - return; - } - - candidate = rxrpc_alloc_client_connection(bundle, gfp); - - spin_lock(&bundle->channel_lock); - bundle->alloc_conn = false; - - if (IS_ERR(candidate)) { - bundle->alloc_error = PTR_ERR(candidate); - spin_unlock(&bundle->channel_lock); - _leave(" [err %ld]", PTR_ERR(candidate)); - return; - } - - bundle->alloc_error = 0; - - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { - unsigned int shift = i * RXRPC_MAXCALLS; - int j; - - old = bundle->conns[i]; - if (!rxrpc_may_reuse_conn(old)) { - if (old) - trace_rxrpc_client(old, -1, rxrpc_client_replace); - candidate->bundle_shift = shift; - atomic_inc(&bundle->active); - bundle->conns[i] = candidate; - for (j = 0; j < RXRPC_MAXCALLS; j++) - set_bit(shift + j, &bundle->avail_chans); - candidate = NULL; - break; - } + struct rxrpc_connection *conn, *old; + unsigned int shift = slot * RXRPC_MAXCALLS; + unsigned int i; - old = NULL; + old = bundle->conns[slot]; + if (old) { + bundle->conns[slot] = NULL; + trace_rxrpc_client(old, -1, rxrpc_client_replace); + rxrpc_put_connection(old, rxrpc_conn_put_noreuse); } - spin_unlock(&bundle->channel_lock); - - if (candidate) { - _debug("discard C=%x", candidate->debug_id); - trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); - rxrpc_put_connection(candidate, rxrpc_conn_put_discard); + conn = rxrpc_alloc_client_connection(bundle); + if (IS_ERR(conn)) { + bundle->alloc_error = PTR_ERR(conn); + return false; } - rxrpc_put_connection(old, rxrpc_conn_put_noreuse); - _leave(""); + rxrpc_activate_bundle(bundle); + conn->bundle_shift = shift; + bundle->conns[slot] = conn; + for (i = 0; i < RXRPC_MAXCALLS; i++) + set_bit(shift + i, &bundle->avail_chans); + return true; } /* * Add a connection to a bundle if there are no usable connections or we have * connections waiting for extra capacity. */ -static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp) +static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle) { - struct rxrpc_call *call; - int i, usable; + int slot = -1, i, usable; _enter(""); - spin_lock(&bundle->channel_lock); + bundle->alloc_error = 0; /* See if there are any usable connections. */ usable = 0; - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { if (rxrpc_may_reuse_conn(bundle->conns[i])) usable++; - - if (!usable && !list_empty(&bundle->waiting_calls)) { - call = list_first_entry(&bundle->waiting_calls, - struct rxrpc_call, chan_wait_link); - if (test_bit(RXRPC_CALL_UPGRADE, &call->flags)) - bundle->try_upgrade = true; + else if (slot == -1) + slot = i; } + if (!usable && bundle->upgrade) + bundle->try_upgrade = true; + if (!usable) goto alloc_conn; if (!bundle->avail_chans && !bundle->try_upgrade && - !list_empty(&bundle->waiting_calls) && usable < ARRAY_SIZE(bundle->conns)) goto alloc_conn; - spin_unlock(&bundle->channel_lock); _leave(""); - return; + return usable; alloc_conn: - return rxrpc_add_conn_to_bundle(bundle, gfp); + return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false; } /* @@ -532,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, struct rxrpc_channel *chan = &conn->channels[channel]; struct rxrpc_bundle *bundle = conn->bundle; struct rxrpc_call *call = list_entry(bundle->waiting_calls.next, - struct rxrpc_call, chan_wait_link); + struct rxrpc_call, wait_link); u32 call_id = chan->call_counter + 1; _enter("C=%x,%u", conn->debug_id, channel); + list_del_init(&call->wait_link); + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); /* Cancel the final ACK on the previous call if it hasn't been sent yet @@ -546,68 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); rxrpc_see_call(call, rxrpc_call_see_activate_client); - list_del_init(&call->chan_wait_link); - call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call); call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; call->dest_srx.srx_service = conn->service_id; - - trace_rxrpc_connect_call(call); - - write_lock(&call->state_lock); - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - write_unlock(&call->state_lock); - - /* Paired with the read barrier in rxrpc_connect_call(). This orders - * cid and epoch in the connection wrt to call_id without the need to - * take the channel_lock. - * - * We provisionally assign a callNumber at this point, but we don't - * confirm it until the call is about to be exposed. - * - * TODO: Pair with a barrier in the data_ready handler when that looks - * at the call ID through a connection channel. - */ - smp_wmb(); + call->cong_ssthresh = call->peer->cong_ssthresh; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; chan->call_id = call_id; chan->call_debug_id = call->debug_id; - rcu_assign_pointer(chan->call, call); + chan->call = call; + + rxrpc_see_call(call, rxrpc_call_see_connected); + trace_rxrpc_connect_call(call); + call->tx_last_sent = ktime_get_real(); + rxrpc_start_call_timer(call); + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST); wake_up(&call->waitq); } /* * Remove a connection from the idle list if it's on it. */ -static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) +static void rxrpc_unidle_conn(struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet = bundle->local->rxnet; - bool drop_ref; - if (!list_empty(&conn->cache_link)) { - drop_ref = false; - spin_lock(&rxnet->client_conn_cache_lock); - if (!list_empty(&conn->cache_link)) { - list_del_init(&conn->cache_link); - drop_ref = true; - } - spin_unlock(&rxnet->client_conn_cache_lock); - if (drop_ref) - rxrpc_put_connection(conn, rxrpc_conn_put_unidle); + list_del_init(&conn->cache_link); + rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } } /* - * Assign channels and callNumbers to waiting calls with channel_lock - * held by caller. + * Assign channels and callNumbers to waiting calls. */ -static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) +static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) { struct rxrpc_connection *conn; unsigned long avail, mask; unsigned int channel, slot; + trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); + if (bundle->try_upgrade) mask = 1; else @@ -627,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) if (bundle->try_upgrade) set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); - rxrpc_unidle_conn(bundle, conn); + rxrpc_unidle_conn(conn); channel &= (RXRPC_MAXCALLS - 1); conn->act_chans |= 1 << channel; @@ -636,132 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) } /* - * Assign channels and callNumbers to waiting calls. - */ -static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) -{ - _enter("B=%x", bundle->debug_id); - - trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); - - if (!bundle->avail_chans) - return; - - spin_lock(&bundle->channel_lock); - rxrpc_activate_channels_locked(bundle); - spin_unlock(&bundle->channel_lock); - _leave(""); -} - -/* - * Wait for a callNumber and a channel to be granted to a call. - */ -static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, - struct rxrpc_call *call, gfp_t gfp) -{ - DECLARE_WAITQUEUE(myself, current); - int ret = 0; - - _enter("%d", call->debug_id); - - if (!gfpflags_allow_blocking(gfp)) { - rxrpc_maybe_add_conn(bundle, gfp); - rxrpc_activate_channels(bundle); - ret = bundle->alloc_error ?: -EAGAIN; - goto out; - } - - add_wait_queue_exclusive(&call->waitq, &myself); - for (;;) { - rxrpc_maybe_add_conn(bundle, gfp); - rxrpc_activate_channels(bundle); - ret = bundle->alloc_error; - if (ret < 0) - break; - - switch (call->interruptibility) { - case RXRPC_INTERRUPTIBLE: - case RXRPC_PREINTERRUPTIBLE: - set_current_state(TASK_INTERRUPTIBLE); - break; - case RXRPC_UNINTERRUPTIBLE: - default: - set_current_state(TASK_UNINTERRUPTIBLE); - break; - } - if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN) - break; - if ((call->interruptibility == RXRPC_INTERRUPTIBLE || - call->interruptibility == RXRPC_PREINTERRUPTIBLE) && - signal_pending(current)) { - ret = -ERESTARTSYS; - break; - } - schedule(); - } - remove_wait_queue(&call->waitq, &myself); - __set_current_state(TASK_RUNNING); - -out: - _leave(" = %d", ret); - return ret; -} - -/* - * find a connection for a call - * - called in process context with IRQs enabled + * Connect waiting channels (called from the I/O thread). */ -int rxrpc_connect_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +void rxrpc_connect_client_calls(struct rxrpc_local *local) { - struct rxrpc_bundle *bundle; - struct rxrpc_net *rxnet = cp->local->rxnet; - int ret = 0; - - _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - - rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); + struct rxrpc_call *call; - bundle = rxrpc_prep_call(rx, call, cp, srx, gfp); - if (IS_ERR(bundle)) { - ret = PTR_ERR(bundle); - goto out; - } + while ((call = list_first_entry_or_null(&local->new_client_calls, + struct rxrpc_call, wait_link)) + ) { + struct rxrpc_bundle *bundle = call->bundle; - if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = rxrpc_wait_for_channel(bundle, call, gfp); - if (ret < 0) - goto wait_failed; - } + spin_lock(&local->client_call_lock); + list_move_tail(&call->wait_link, &bundle->waiting_calls); + spin_unlock(&local->client_call_lock); -granted_channel: - /* Paired with the write barrier in rxrpc_activate_one_channel(). */ - smp_rmb(); - -out_put_bundle: - rxrpc_deactivate_bundle(bundle); - rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call); -out: - _leave(" = %d", ret); - return ret; - -wait_failed: - spin_lock(&bundle->channel_lock); - list_del_init(&call->chan_wait_link); - spin_unlock(&bundle->channel_lock); - - if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) { - ret = 0; - goto granted_channel; + if (rxrpc_bundle_has_space(bundle)) + rxrpc_activate_channels(bundle); } - - trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); - rxrpc_disconnect_client_call(bundle, call); - goto out_put_bundle; } /* @@ -794,14 +543,14 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) /* * Set the reap timer. */ -static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +static void rxrpc_set_client_reap_timer(struct rxrpc_local *local) { - if (!rxnet->kill_all_client_conns) { + if (!local->kill_all_client_conns) { unsigned long now = jiffies; unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; - if (rxnet->live) - timer_reduce(&rxnet->client_conn_reap_timer, reap_at); + if (local->rxnet->live) + timer_reduce(&local->client_conn_reap_timer, reap_at); } } @@ -812,16 +561,13 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call { struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; - struct rxrpc_net *rxnet = bundle->local->rxnet; + struct rxrpc_local *local = bundle->local; unsigned int channel; bool may_reuse; u32 cid; _enter("c=%x", call->debug_id); - spin_lock(&bundle->channel_lock); - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - /* Calls that have never actually been assigned a channel can simply be * discarded. */ @@ -830,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); - list_del_init(&call->chan_wait_link); - goto out; + list_del_init(&call->wait_link); + return; } cid = call->cid; @@ -839,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call chan = &conn->channels[channel]; trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - if (rcu_access_pointer(chan->call) != call) { - spin_unlock(&bundle->channel_lock); - BUG(); - } + if (WARN_ON(chan->call != call)) + return; may_reuse = rxrpc_may_reuse_conn(conn); @@ -863,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call trace_rxrpc_client(conn, channel, rxrpc_client_to_active); bundle->try_upgrade = false; if (may_reuse) - rxrpc_activate_channels_locked(bundle); + rxrpc_activate_channels(bundle); } - } /* See if we can pass the channel directly to another call. */ if (may_reuse && !list_empty(&bundle->waiting_calls)) { trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); rxrpc_activate_one_channel(conn, channel); - goto out; + return; } /* Schedule the final ACK to be transmitted in a short while so that it @@ -890,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call } /* Deactivate the channel. */ - rcu_assign_pointer(chan->call, NULL); + chan->call = NULL; set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans); conn->act_chans &= ~(1 << channel); @@ -903,17 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call conn->idle_timestamp = jiffies; rxrpc_get_connection(conn, rxrpc_conn_get_idle); - spin_lock(&rxnet->client_conn_cache_lock); - list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); - spin_unlock(&rxnet->client_conn_cache_lock); + list_move_tail(&conn->cache_link, &local->idle_client_conns); - rxrpc_set_client_reap_timer(rxnet); + rxrpc_set_client_reap_timer(local); } - -out: - spin_unlock(&bundle->channel_lock); - _leave(""); - return; } /* @@ -923,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; unsigned int bindex; - bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -931,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) rxrpc_process_delayed_final_acks(conn, true); - spin_lock(&bundle->channel_lock); bindex = conn->bundle_shift / RXRPC_MAXCALLS; if (bundle->conns[bindex] == conn) { _debug("clear slot %u", bindex); bundle->conns[bindex] = NULL; for (i = 0; i < RXRPC_MAXCALLS; i++) clear_bit(conn->bundle_shift + i, &bundle->avail_chans); - need_drop = true; - } - spin_unlock(&bundle->channel_lock); - - if (need_drop) { + rxrpc_put_client_connection_id(bundle->local, conn); rxrpc_deactivate_bundle(bundle); rxrpc_put_connection(conn, rxrpc_conn_put_unbundle); } @@ -951,11 +681,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) /* * Drop the active count on a bundle. */ -static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_local *local = bundle->local; + struct rxrpc_local *local; bool need_put = false; + if (!bundle) + return; + + local = bundle->local; if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { if (!bundle->exclusive) { _debug("erase bundle"); @@ -982,7 +716,7 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); atomic_dec(&rxnet->nr_client_conns); - rxrpc_put_client_connection_id(conn); + rxrpc_put_client_connection_id(local, conn); } /* @@ -992,42 +726,26 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn) * This may be called from conn setup or from a work item so cannot be * considered non-reentrant. */ -void rxrpc_discard_expired_client_conns(struct work_struct *work) +void rxrpc_discard_expired_client_conns(struct rxrpc_local *local) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = - container_of(work, struct rxrpc_net, client_conn_reaper); unsigned long expiry, conn_expires_at, now; unsigned int nr_conns; _enter(""); - if (list_empty(&rxnet->idle_client_conns)) { - _leave(" [empty]"); - return; - } - - /* Don't double up on the discarding */ - if (!mutex_trylock(&rxnet->client_conn_discard_lock)) { - _leave(" [already]"); - return; - } - /* We keep an estimate of what the number of conns ought to be after * we've discarded some so that we don't overdo the discarding. */ - nr_conns = atomic_read(&rxnet->nr_client_conns); + nr_conns = atomic_read(&local->rxnet->nr_client_conns); next: - spin_lock(&rxnet->client_conn_cache_lock); - - if (list_empty(&rxnet->idle_client_conns)) - goto out; - - conn = list_entry(rxnet->idle_client_conns.next, - struct rxrpc_connection, cache_link); + conn = list_first_entry_or_null(&local->idle_client_conns, + struct rxrpc_connection, cache_link); + if (!conn) + return; - if (!rxnet->kill_all_client_conns) { + if (!local->kill_all_client_conns) { /* If the number of connections is over the reap limit, we * expedite discard by reducing the expiry timeout. We must, * however, have at least a short grace period to be able to do @@ -1050,8 +768,6 @@ next: trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); - spin_unlock(&rxnet->client_conn_cache_lock); - rxrpc_unbundle_conn(conn); /* Drop the ->cache_link ref */ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle); @@ -1068,31 +784,8 @@ not_yet_expired: * then things get messier. */ _debug("not yet"); - if (!rxnet->kill_all_client_conns) - timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at); - -out: - spin_unlock(&rxnet->client_conn_cache_lock); - mutex_unlock(&rxnet->client_conn_discard_lock); - _leave(""); -} - -/* - * Preemptively destroy all the client connection records rather than waiting - * for them to time out - */ -void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) -{ - _enter(""); - - spin_lock(&rxnet->client_conn_cache_lock); - rxnet->kill_all_client_conns = true; - spin_unlock(&rxnet->client_conn_cache_lock); - - del_timer_sync(&rxnet->client_conn_reap_timer); - - if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) - _debug("destroy: queue failed"); + if (!local->kill_all_client_conns) + timer_reduce(&local->client_conn_reap_timer, conn_expires_at); _leave(""); } @@ -1102,29 +795,19 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) */ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) { - struct rxrpc_connection *conn, *tmp; - struct rxrpc_net *rxnet = local->rxnet; - LIST_HEAD(graveyard); + struct rxrpc_connection *conn; _enter(""); - spin_lock(&rxnet->client_conn_cache_lock); - - list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, - cache_link) { - if (conn->local == local) { - atomic_dec(&conn->active); - trace_rxrpc_client(conn, -1, rxrpc_client_discard); - list_move(&conn->cache_link, &graveyard); - } - } + local->kill_all_client_conns = true; - spin_unlock(&rxnet->client_conn_cache_lock); + del_timer_sync(&local->client_conn_reap_timer); - while (!list_empty(&graveyard)) { - conn = list_entry(graveyard.next, - struct rxrpc_connection, cache_link); + while ((conn = list_first_entry_or_null(&local->idle_client_conns, + struct rxrpc_connection, cache_link))) { list_del_init(&conn->cache_link); + atomic_dec(&conn->active); + trace_rxrpc_client(conn, -1, rxrpc_client_discard); rxrpc_unbundle_conn(conn); rxrpc_put_connection(conn, rxrpc_conn_put_local_dead); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 480364bcbf85..44414e724415 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -17,11 +17,65 @@ #include "ar-internal.h" /* + * Set the completion state on an aborted connection. + */ +static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, + enum rxrpc_call_completion compl) +{ + bool aborted = false; + + if (conn->state != RXRPC_CONN_ABORTED) { + spin_lock(&conn->state_lock); + if (conn->state != RXRPC_CONN_ABORTED) { + conn->abort_code = abort_code; + conn->error = err; + conn->completion = compl; + /* Order the abort info before the state change. */ + smp_store_release(&conn->state, RXRPC_CONN_ABORTED); + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events); + aborted = true; + } + spin_unlock(&conn->state_lock); + } + + return aborted; +} + +/* + * Mark a socket buffer to indicate that the connection it's on should be aborted. + */ +int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, + s32 abort_code, int err, enum rxrpc_abort_reason why) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (rxrpc_set_conn_aborted(conn, skb, abort_code, err, + RXRPC_CALL_LOCALLY_ABORTED)) { + trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, abort_code, err); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort); + } + return -EPROTO; +} + +/* + * Mark a connection as being remotely aborted. + */ +static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, + RXRPC_CALL_REMOTELY_ABORTED); +} + +/* * Retransmit terminal ACK or ABORT of the previous call. */ -static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, - struct sk_buff *skb, - unsigned int channel) +void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb, + unsigned int channel) { struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; struct rxrpc_channel *chan; @@ -46,9 +100,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, /* If the last call got moved on whilst we were waiting to run, just * ignore this packet. */ - call_id = READ_ONCE(chan->last_call); - /* Sync with __rxrpc_disconnect_call() */ - smp_rmb(); + call_id = chan->last_call; if (skb && call_id != sp->hdr.callNumber) return; @@ -65,9 +117,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[2].iov_base = &ack_info; iov[2].iov_len = sizeof(ack_info); + serial = atomic_inc_return(&conn->serial); + pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid | channel); pkt.whdr.callNumber = htonl(call_id); + pkt.whdr.serial = htonl(serial); pkt.whdr.seq = 0; pkt.whdr.type = chan->last_type; pkt.whdr.flags = conn->out_clientflag; @@ -104,31 +159,15 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[0].iov_len += sizeof(pkt.ack); len += sizeof(pkt.ack) + 3 + sizeof(ack_info); ioc = 3; - break; - - default: - return; - } - - /* Resync with __rxrpc_disconnect_call() and check that the last call - * didn't get advanced whilst we were filling out the packets. - */ - smp_rmb(); - if (READ_ONCE(chan->last_call) != call_id) - return; - - serial = atomic_inc_return(&conn->serial); - pkt.whdr.serial = htonl(serial); - switch (chan->last_type) { - case RXRPC_PACKET_TYPE_ABORT: - break; - case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), pkt.ack.reason, 0); break; + + default: + return; } ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len); @@ -146,131 +185,34 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, /* * pass a connection-level abort onto all calls on that connection */ -static void rxrpc_abort_calls(struct rxrpc_connection *conn, - enum rxrpc_call_completion compl, - rxrpc_serial_t serial) +static void rxrpc_abort_calls(struct rxrpc_connection *conn) { struct rxrpc_call *call; int i; _enter("{%d},%x", conn->debug_id, conn->abort_code); - spin_lock(&conn->bundle->channel_lock); - for (i = 0; i < RXRPC_MAXCALLS; i++) { - call = rcu_dereference_protected( - conn->channels[i].call, - lockdep_is_held(&conn->bundle->channel_lock)); - if (call) { - if (compl == RXRPC_CALL_LOCALLY_ABORTED) - trace_rxrpc_abort(call->debug_id, - "CON", call->cid, - call->call_id, 0, + call = conn->channels[i].call; + if (call) + rxrpc_set_call_completion(call, + conn->completion, conn->abort_code, conn->error); - else - trace_rxrpc_rx_abort(call, serial, - conn->abort_code); - rxrpc_set_call_completion(call, compl, - conn->abort_code, - conn->error); - } } - spin_unlock(&conn->bundle->channel_lock); _leave(""); } /* - * generate a connection-level abort - */ -static int rxrpc_abort_connection(struct rxrpc_connection *conn, - int error, u32 abort_code) -{ - struct rxrpc_wire_header whdr; - struct msghdr msg; - struct kvec iov[2]; - __be32 word; - size_t len; - u32 serial; - int ret; - - _enter("%d,,%u,%u", conn->debug_id, error, abort_code); - - /* generate a connection-level abort */ - spin_lock(&conn->state_lock); - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - spin_unlock(&conn->state_lock); - _leave(" = 0 [already dead]"); - return 0; - } - - conn->error = error; - conn->abort_code = abort_code; - conn->state = RXRPC_CONN_LOCALLY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - spin_unlock(&conn->state_lock); - - msg.msg_name = &conn->peer->srx.transport; - msg.msg_namelen = conn->peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - whdr.epoch = htonl(conn->proto.epoch); - whdr.cid = htonl(conn->proto.cid); - whdr.callNumber = 0; - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - whdr.flags = conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(conn->service_id); - - word = htonl(conn->abort_code); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &word; - iov[1].iov_len = sizeof(word); - - len = iov[0].iov_len + iov[1].iov_len; - - serial = atomic_inc_return(&conn->serial); - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); - whdr.serial = htonl(serial); - - ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); - if (ret < 0) { - trace_rxrpc_tx_fail(conn->debug_id, serial, ret, - rxrpc_tx_point_conn_abort); - _debug("sendmsg failed: %d", ret); - return -EAGAIN; - } - - trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - - conn->peer->last_tx_at = ktime_get_seconds(); - - _leave(" = 0"); - return 0; -} - -/* * mark a call as being on a now-secured channel * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - _enter("%p", call); - if (call) { - write_lock(&call->state_lock); - if (call->state == RXRPC_CALL_SERVER_SECURING) { - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - rxrpc_notify_socket(call); - } - write_unlock(&call->state_lock); + if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + rxrpc_notify_socket(call); } } @@ -278,44 +220,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) * connection-level Rx packet processor */ static int rxrpc_process_event(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int loop, ret; + int ret; - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - _leave(" = -ECONNABORTED [%u]", conn->state); + if (conn->state == RXRPC_CONN_ABORTED) return -ECONNABORTED; - } _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_DATA: - case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb, - sp->hdr.cid & RXRPC_CHANNELMASK); - return 0; - - case RXRPC_PACKET_TYPE_BUSY: - /* Just ignore BUSY packets for now. */ - return 0; - - case RXRPC_PACKET_TYPE_ABORT: - conn->error = -ECONNABORTED; - conn->abort_code = skb->priority; - conn->state = RXRPC_CONN_REMOTELY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return -ECONNABORTED; - case RXRPC_PACKET_TYPE_CHALLENGE: - return conn->security->respond_to_challenge(conn, skb, - _abort_code); + return conn->security->respond_to_challenge(conn, skb); case RXRPC_PACKET_TYPE_RESPONSE: - ret = conn->security->verify_response(conn, skb, _abort_code); + ret = conn->security->verify_response(conn, skb); if (ret < 0) return ret; @@ -324,27 +244,25 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; - spin_lock(&conn->bundle->channel_lock); spin_lock(&conn->state_lock); - - if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { + if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure( - rcu_dereference_protected( - conn->channels[loop].call, - lockdep_is_held(&conn->bundle->channel_lock))); - } else { - spin_unlock(&conn->state_lock); - } + spin_unlock(&conn->state_lock); - spin_unlock(&conn->bundle->channel_lock); + if (conn->state == RXRPC_CONN_SERVICE) { + /* Offload call state flipping to the I/O thread. As + * we've already received the packet, put it on the + * front of the queue. + */ + skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; + rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); + skb_queue_head(&conn->local->rx_queue, skb); + rxrpc_wake_up_io_thread(conn->local); + } return 0; default: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_conn_pkt")); + WARN_ON_ONCE(1); return -EPROTO; } } @@ -354,26 +272,9 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, */ static void rxrpc_secure_connection(struct rxrpc_connection *conn) { - u32 abort_code; - int ret; - - _enter("{%d}", conn->debug_id); - - ASSERT(conn->security_ix != 0); - - if (conn->security->issue_challenge(conn) < 0) { - abort_code = RX_CALL_DEAD; - ret = -ENOMEM; - goto abort; - } - - _leave(""); - return; - -abort: - _debug("abort %d, %d", ret, abort_code); - rxrpc_abort_connection(conn, ret, abort_code); - _leave(" [aborted]"); + if (conn->security->issue_challenge(conn) < 0) + rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, + rxrpc_abort_nomem); } /* @@ -395,9 +296,7 @@ again: if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) continue; - smp_rmb(); /* vs rxrpc_disconnect_client_call */ - ack_at = READ_ONCE(chan->final_ack_at); - + ack_at = chan->final_ack_at; if (time_before(j, ack_at) && !force) { if (time_before(ack_at, next_j)) { next_j = ack_at; @@ -424,47 +323,27 @@ again: static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { struct sk_buff *skb; - u32 abort_code = RX_PROTOCOL_ERROR; int ret; if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); - /* Process delayed ACKs whose time has come. */ - if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) - rxrpc_process_delayed_final_acks(conn, false); - /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); - ret = rxrpc_process_event(conn, skb, &abort_code); + ret = rxrpc_process_event(conn, skb); switch (ret) { - case -EPROTO: - case -EKEYEXPIRED: - case -EKEYREJECTED: - goto protocol_error; case -ENOMEM: case -EAGAIN: - goto requeue_and_leave; - case -ECONNABORTED: + skb_queue_head(&conn->rx_queue, skb); + rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work); + break; default: rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); break; } } - - return; - -requeue_and_leave: - skb_queue_head(&conn->rx_queue, skb); - return; - -protocol_error: - if (rxrpc_abort_connection(conn, ret, abort_code) < 0) - goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); - return; } void rxrpc_process_connection(struct work_struct *work) @@ -498,44 +377,59 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, /* * Input a connection-level packet. */ -int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - _leave(" = -ECONNABORTED [%u]", conn->state); - return -ECONNABORTED; - } - - _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_DATA: - case RXRPC_PACKET_TYPE_ACK: - rxrpc_conn_retransmit_call(conn, skb, - sp->hdr.cid & RXRPC_CHANNELMASK); - return 0; - case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets for now. */ - return 0; + return true; case RXRPC_PACKET_TYPE_ABORT: - conn->error = -ECONNABORTED; - conn->abort_code = skb->priority; - conn->state = RXRPC_CONN_REMOTELY_ABORTED; - set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); - return -ECONNABORTED; + if (rxrpc_is_conn_aborted(conn)) + return true; + rxrpc_input_conn_abort(conn, skb); + rxrpc_abort_calls(conn); + return true; case RXRPC_PACKET_TYPE_CHALLENGE: case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_is_conn_aborted(conn)) { + if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED) + rxrpc_send_conn_abort(conn); + return true; + } rxrpc_post_packet_to_conn(conn, skb); - return 0; + return true; default: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_conn_pkt")); - return -EPROTO; + WARN_ON_ONCE(1); + return true; } } + +/* + * Input a connection event. + */ +void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) +{ + unsigned int loop; + + if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) + rxrpc_abort_calls(conn); + + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; + + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } + + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn, false); +} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 3c8f83dacb2b..ac85d4644a3c 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work); static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at); +void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) +{ + struct rxrpc_local *local = conn->local; + bool busy; + + if (WARN_ON_ONCE(!local)) + return; + + spin_lock_bh(&local->lock); + busy = !list_empty(&conn->attend_link); + if (!busy) { + rxrpc_get_connection(conn, why); + list_add_tail(&conn->attend_link, &local->conn_attend_q); + } + spin_unlock_bh(&local->lock); + rxrpc_wake_up_io_thread(local); +} + static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); - rxrpc_queue_conn(conn, rxrpc_conn_queue_timer); + rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer); } /* @@ -49,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + mutex_init(&conn->security_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; @@ -82,10 +101,10 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - /* Look up client connections by connection ID alone as their IDs are - * unique for this machine. + /* Look up client connections by connection ID alone as their + * IDs are unique for this machine. */ - conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); + conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; @@ -139,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); - if (rcu_access_pointer(chan->call) == call) { + if (chan->call == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ @@ -159,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, break; } - /* Sync with rxrpc_conn_retransmit(). */ - smp_wmb(); chan->last_call = chan->call_id; chan->call_id = chan->call_counter; - - rcu_assign_pointer(chan->call, NULL); + chan->call = NULL; } _leave(""); @@ -178,6 +194,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + rxrpc_see_call(call, rxrpc_call_see_disconnected); + call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { @@ -186,18 +205,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) spin_unlock(&call->peer->lock); } - if (rxrpc_is_client_call(call)) - return rxrpc_disconnect_client_call(conn->bundle, call); - - spin_lock(&conn->bundle->channel_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); + if (rxrpc_is_client_call(call)) { + rxrpc_disconnect_client_call(call->bundle, call); + } else { + __rxrpc_disconnect_call(conn, call); + conn->idle_timestamp = jiffies; + if (atomic_dec_and_test(&conn->active)) + rxrpc_set_service_reap_timer(conn->rxnet, + jiffies + rxrpc_connection_expiry); + } - set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); - conn->idle_timestamp = jiffies; - if (atomic_dec_and_test(&conn->active)) - rxrpc_set_service_reap_timer(conn->rxnet, - jiffies + rxrpc_connection_expiry); + rxrpc_put_call(call, rxrpc_call_put_io_thread); } /* @@ -293,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work) container_of(work, struct rxrpc_connection, destructor); struct rxrpc_net *rxnet = conn->rxnet; - ASSERT(!rcu_access_pointer(conn->channels[0].call) && - !rcu_access_pointer(conn->channels[1].call) && - !rcu_access_pointer(conn->channels[2].call) && - !rcu_access_pointer(conn->channels[3].call)); + ASSERT(!conn->channels[0].call && + !conn->channels[1].call && + !conn->channels[2].call && + !conn->channels[3].call); ASSERT(list_empty(&conn->cache_link)); del_timer_sync(&conn->timer); @@ -447,7 +465,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) _enter(""); atomic_dec(&rxnet->nr_conns); - rxrpc_destroy_all_client_connections(rxnet); del_timer_sync(&rxnet->service_conn_reap_timer); rxrpc_queue_work(&rxnet->service_conn_reaper); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 2a55a88b2a5b..f30323de82bd 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -11,7 +11,6 @@ static struct rxrpc_bundle rxrpc_service_dummy_bundle = { .ref = REFCOUNT_INIT(1), .debug_id = UINT_MAX, - .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), }; /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d0e20e946e48..367927a99881 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -9,11 +9,10 @@ #include "ar-internal.h" -static void rxrpc_proto_abort(const char *why, - struct rxrpc_call *call, rxrpc_seq_t seq) +static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, + enum rxrpc_abort_reason why) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) - rxrpc_send_abort_packet(call); + rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why); } /* @@ -185,7 +184,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) if (call->cong_mode != RXRPC_CALL_SLOW_START && call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) return; - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) + if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); @@ -250,47 +249,34 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, * This occurs when we get an ACKALL packet, the first DATA packet of a reply, * or a final ACK packet. */ -static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, - const char *abort_why) +static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + enum rxrpc_abort_reason abort_why) { - unsigned int state; - ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - write_lock(&call->state_lock); - - state = call->state; - switch (state) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - if (reply_begun) - call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; - else - call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + if (reply_begun) { + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); + break; + } + + rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); + trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); break; case RXRPC_CALL_SERVER_AWAIT_ACK: - __rxrpc_call_completed(call); - state = call->state; + rxrpc_call_completed(call); + trace_rxrpc_txqueue(call, rxrpc_txqueue_end); break; default: - goto bad_state; + kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]); + rxrpc_proto_abort(call, call->tx_top, abort_why); + break; } - - write_unlock(&call->state_lock); - if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply); - else - trace_rxrpc_txqueue(call, rxrpc_txqueue_end); - _leave(" = ok"); - return true; - -bad_state: - write_unlock(&call->state_lock); - kdebug("end_tx %s", rxrpc_call_states[call->state]); - rxrpc_proto_abort(abort_why, call, call->tx_top); - return false; } /* @@ -305,18 +291,48 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) if (call->ackr_reason) { now = jiffies; timo = now + MAX_JIFFY_OFFSET; - WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->delay_ack_at, timo); trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { if (!rxrpc_rotate_tx_window(call, top, &summary)) { - rxrpc_proto_abort("TXL", call, top); + rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); return false; } } - return rxrpc_end_tx_phase(call, true, "ETD"); + + rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply); + return true; +} + +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) +{ + rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); + + _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]); + + trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); + + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); + rxrpc_call_completed(call); + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST); + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; + rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op); + break; + + default: + break; + } } static void rxrpc_input_update_ack_window(struct rxrpc_call *call, @@ -337,8 +353,9 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); - trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); + if (last) + rxrpc_end_rx_phase(call, sp->hdr.serial); } /* @@ -366,17 +383,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, if (last) { if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && - seq + 1 != wtop) { - rxrpc_proto_abort("LSN", call, seq); - return; - } + seq + 1 != wtop) + return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last); } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && after_eq(seq, wtop)) { pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n", call->debug_id, seq, window, wtop, wlimit); - rxrpc_proto_abort("LSA", call, seq); - return; + return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last); } } @@ -550,7 +564,6 @@ protocol_error: static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - enum rxrpc_call_state state; rxrpc_serial_t serial = sp->hdr.serial; rxrpc_seq_t seq0 = sp->hdr.seq; @@ -558,11 +571,20 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) atomic64_read(&call->ackr_window), call->rx_highest_seq, skb->len, seq0); - state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) + if (__rxrpc_call_is_complete(call)) return; - if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + /* Received data implicitly ACKs all of the request + * packets we sent when we're acting as a client. + */ + if (!rxrpc_receiving_reply(call)) + goto out_notify; + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; @@ -573,18 +595,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_reduce_call_timer(call, expect_req_by, now, rxrpc_timer_set_for_idle); } + break; } - /* Received data implicitly ACKs all of the request packets we sent - * when we're acting as a client. - */ - if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || - state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && - !rxrpc_receiving_reply(call)) - goto out_notify; + default: + break; + } if (!rxrpc_input_split_jumbo(call, skb)) { - rxrpc_proto_abort("VLD", call, sp->hdr.seq); + rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo); goto out_notify; } skb = NULL; @@ -765,7 +784,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) offset = sizeof(struct rxrpc_wire_header); if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) - return rxrpc_proto_abort("XAK", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack); offset += sizeof(ack); ack_serial = sp->hdr.serial; @@ -845,7 +864,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(info) && skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) - return rxrpc_proto_abort("XAI", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info); if (nr_acks > 0) skb_condense(skb); @@ -868,10 +887,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_input_ackinfo(call, skb, &info); if (first_soft_ack == 0) - return rxrpc_proto_abort("AK0", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ - switch (READ_ONCE(call->state)) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: case RXRPC_CALL_SERVER_SEND_REPLY: @@ -883,20 +902,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (before(hard_ack, call->acks_hard_ack) || after(hard_ack, call->tx_top)) - return rxrpc_proto_abort("AKW", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window); if (nr_acks > call->tx_top - hard_ack) - return rxrpc_proto_abort("AKN", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { - rxrpc_end_tx_phase(call, false, "ETA"); + rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); return; } } if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) - return rxrpc_proto_abort("XSA", call, 0); + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, nr_acks, &summary); } @@ -918,7 +937,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) - rxrpc_end_tx_phase(call, false, "ETL"); + rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); } /* @@ -963,27 +982,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: - rxrpc_input_data(call, skb); - break; + return rxrpc_input_data(call, skb); case RXRPC_PACKET_TYPE_ACK: - rxrpc_input_ack(call, skb); - break; + return rxrpc_input_ack(call, skb); case RXRPC_PACKET_TYPE_BUSY: /* Just ignore BUSY packets from the server; the retry and * lifespan timers will take care of business. BUSY packets * from the client don't make sense. */ - break; + return; case RXRPC_PACKET_TYPE_ABORT: - rxrpc_input_abort(call, skb); - break; + return rxrpc_input_abort(call, skb); case RXRPC_PACKET_TYPE_ACKALL: - rxrpc_input_ackall(call, skb); - break; + return rxrpc_input_ackall(call, skb); default: break; @@ -998,24 +1013,18 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) */ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_connection *conn = call->conn; - - switch (READ_ONCE(call->state)) { + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); fallthrough; case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) - rxrpc_send_abort_packet(call); + rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN, + rxrpc_eproto_improper_term); trace_rxrpc_improper_term(call); break; } rxrpc_input_call_event(call, skb); - - spin_lock(&conn->bundle->channel_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&conn->bundle->channel_lock); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 0eb8471bfc53..34353b6e584b 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -43,25 +43,17 @@ static void none_free_call_crypto(struct rxrpc_call *call) } static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("chall_none")); - return -EPROTO; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_challenge); } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("resp_none")); - return -EPROTO; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxrpc_eproto_rxnull_response); } static void none_clear(struct rxrpc_connection *conn) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 1ad067d66fb6..9e9dfb2fc559 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -67,9 +67,31 @@ void rxrpc_error_report(struct sock *sk) } /* + * Directly produce an abort from a packet. + */ +bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, + s32 abort_code, int err) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + abort_code, err); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = abort_code; + return false; +} + +static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why) +{ + return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG); +} + +#define just_discard true + +/* * Process event packets targeted at a local endpoint. */ -static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) +static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); char v; @@ -81,22 +103,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) if (v == 0) rxrpc_send_version_request(local, &sp->hdr, skb); } + + return true; } /* * Extract the wire header from a packet and translate the byte order. */ -static noinline -int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) +static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp, + struct sk_buff *skb) { struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_hdr")); - return -EBADMSG; - } + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -110,7 +131,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); - return 0; + return true; } /* @@ -130,28 +151,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb) /* * Process packets received on the local endpoint */ -static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; struct sk_buff *skb = *_skb; - int ret = 0; + bool ret = false; skb_pull(skb, sizeof(struct udphdr)); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; + if (!rxrpc_extract_header(sp, skb)) + return just_discard; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - return 0; + return just_discard; } } @@ -160,28 +181,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: if (rxrpc_to_client(sp)) - return 0; - rxrpc_input_version(local, skb); - return 0; + return just_discard; + return rxrpc_input_version(local, skb); case RXRPC_PACKET_TYPE_BUSY: if (rxrpc_to_server(sp)) - return 0; + return just_discard; fallthrough; case RXRPC_PACKET_TYPE_ACK: case RXRPC_PACKET_TYPE_ACKALL: if (sp->hdr.callNumber == 0) - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); break; case RXRPC_PACKET_TYPE_ABORT: if (!rxrpc_extract_abort(skb)) - return 0; /* Just discard if malformed */ + return just_discard; /* Just discard if malformed */ break; case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0 || - sp->hdr.seq == 0) - goto bad_message; + if (sp->hdr.callNumber == 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); + if (sp->hdr.seq == 0) + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); /* Unshare the packet so that it can be modified for in-place * decryption. @@ -191,7 +212,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) if (!skb) { rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); *_skb = NULL; - return 0; + return just_discard; } if (skb != *_skb) { @@ -205,28 +226,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) case RXRPC_PACKET_TYPE_CHALLENGE: if (rxrpc_to_server(sp)) - return 0; + return just_discard; break; case RXRPC_PACKET_TYPE_RESPONSE: if (rxrpc_to_client(sp)) - return 0; + return just_discard; break; /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: - return 0; + return just_discard; default: - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet); } if (sp->hdr.serviceId == 0) - goto bad_message; + return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service); if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0)) - return true; /* Unsupported address type - discard. */ + return just_discard; /* Unsupported address type. */ if (peer_srx.transport.family != local->srx.transport.family && (peer_srx.transport.family == AF_INET && @@ -234,7 +255,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", peer_srx.transport.family, local->srx.transport.family); - return true; /* Wrong address type - discard. */ + return just_discard; /* Wrong address type. */ } if (rxrpc_to_client(sp)) { @@ -242,12 +263,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb); conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); rcu_read_unlock(); - if (!conn) { - trace_rxrpc_abort(0, "NCC", sp->hdr.cid, - sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - goto protocol_error; - } + if (!conn) + return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn); ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); rxrpc_put_connection(conn, rxrpc_conn_put_call_input); @@ -280,19 +297,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb); rxrpc_put_peer(peer, rxrpc_peer_put_input); - if (ret < 0) - goto reject_packet; - return 0; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - rxrpc_reject_packet(local, skb); - return 0; + return ret; } /* @@ -306,21 +311,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; unsigned int channel; + bool ret; if (sp->hdr.securityIndex != conn->security_ix) - goto wrong_security; + return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security, + RXKADINCONSISTENCY, -EBADMSG); if (sp->hdr.serviceId != conn->service_id) { int old_id; if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) - goto reupgrade; + return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade); + old_id = cmpxchg(&conn->service_id, conn->orig_service_id, sp->hdr.serviceId); - if (old_id != conn->orig_service_id && old_id != sp->hdr.serviceId) - goto reupgrade; + return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade); } if (after(sp->hdr.serial, conn->hi_serial)) @@ -336,19 +343,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) - return 0; + return just_discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - return 0; + return just_discard; /* For the previous service call, if completed successfully, we * discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) - return 0; + return just_discard; /* But otherwise we need to retransmit the final packet from * data cached in the connection record. @@ -358,19 +365,17 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, sp->hdr.seq, sp->hdr.serial, sp->hdr.flags); - rxrpc_input_conn_packet(conn, skb); - return 0; + rxrpc_conn_retransmit_call(conn, skb, channel); + return just_discard; } - rcu_read_lock(); - call = rxrpc_try_get_call(rcu_dereference(chan->call), - rxrpc_call_get_input); - rcu_read_unlock(); + call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input); if (sp->hdr.callNumber > chan->call_id) { if (rxrpc_to_client(sp)) { rxrpc_put_call(call, rxrpc_call_put_input); - goto reject_packet; + return rxrpc_protocol_error(skb, + rxrpc_eproto_unexpected_implicit_end); } if (call) { @@ -382,38 +387,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (!call) { if (rxrpc_to_client(sp)) - goto bad_message; - if (rxrpc_new_incoming_call(conn->local, conn->peer, conn, - peer_srx, skb) == 0) - return 0; - goto reject_packet; + return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call); + return rxrpc_new_incoming_call(conn->local, conn->peer, conn, + peer_srx, skb); } - rxrpc_input_call_event(call, skb); + ret = rxrpc_input_call_event(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return 0; - -wrong_security: - trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - skb->priority = RXKADINCONSISTENCY; - goto post_abort; - -reupgrade: - trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); - goto protocol_error; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; -post_abort: - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - rxrpc_reject_packet(conn->local, skb); - return 0; + return ret; } /* @@ -421,6 +402,7 @@ reject_packet: */ int rxrpc_io_thread(void *data) { + struct rxrpc_connection *conn; struct sk_buff_head rx_queue; struct rxrpc_local *local = data; struct rxrpc_call *call; @@ -436,6 +418,24 @@ int rxrpc_io_thread(void *data) for (;;) { rxrpc_inc_stat(local->rxnet, stat_io_loop); + /* Deal with connections that want immediate attention. */ + conn = list_first_entry_or_null(&local->conn_attend_q, + struct rxrpc_connection, + attend_link); + if (conn) { + spin_lock_bh(&local->lock); + list_del_init(&conn->attend_link); + spin_unlock_bh(&local->lock); + + rxrpc_input_conn_event(conn, NULL); + rxrpc_put_connection(conn, rxrpc_conn_put_poke); + continue; + } + + if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) + rxrpc_discard_expired_client_conns(local); + /* Deal with calls that want immediate attention. */ if ((call = list_first_entry_or_null(&local->call_attend_q, struct rxrpc_call, @@ -450,12 +450,17 @@ int rxrpc_io_thread(void *data) continue; } + if (!list_empty(&local->new_client_calls)) + rxrpc_connect_client_calls(local); + /* Process received packets and errors. */ if ((skb = __skb_dequeue(&rx_queue))) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; - rxrpc_input_packet(local, &skb); + if (!rxrpc_input_packet(local, &skb)) + rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); break; @@ -463,6 +468,11 @@ int rxrpc_io_thread(void *data) rxrpc_input_error(local, skb); rxrpc_free_skb(skb, rxrpc_skb_put_error_report); break; + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + rxrpc_input_conn_event(sp->conn, skb); + rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured); + break; default: WARN_ON_ONCE(1); rxrpc_free_skb(skb, rxrpc_skb_put_unknown); @@ -481,7 +491,11 @@ int rxrpc_io_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || - !list_empty(&local->call_attend_q)) { + !list_empty(&local->call_attend_q) || + !list_empty(&local->conn_attend_q) || + !list_empty(&local->new_client_calls) || + test_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) { __set_current_state(TASK_RUNNING); continue; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 270b63d8f37a..b8eaca5d9f22 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -82,31 +82,59 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, } } +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_local *local = + container_of(timer, struct rxrpc_local, client_conn_reap_timer); + + if (local->kill_all_client_conns && + test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) + rxrpc_wake_up_io_thread(local); +} + /* * Allocate a new local endpoint. */ -static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, +static struct rxrpc_local *rxrpc_alloc_local(struct net *net, const struct sockaddr_rxrpc *srx) { struct rxrpc_local *local; + u32 tmp; local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); if (local) { refcount_set(&local->ref, 1); atomic_set(&local->active_users, 1); - local->rxnet = rxnet; + local->net = net; + local->rxnet = rxrpc_net(net); INIT_HLIST_NODE(&local->link); init_rwsem(&local->defrag_sem); init_completion(&local->io_thread_ready); skb_queue_head_init(&local->rx_queue); + INIT_LIST_HEAD(&local->conn_attend_q); INIT_LIST_HEAD(&local->call_attend_q); + local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); + local->kill_all_client_conns = false; + INIT_LIST_HEAD(&local->idle_client_conns); + timer_setup(&local->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); + spin_lock_init(&local->lock); rwlock_init(&local->services_lock); local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; + idr_init(&local->conn_ids); + get_random_bytes(&tmp, sizeof(tmp)); + tmp &= 0x3fffffff; + if (tmp == 0) + tmp = 1; + idr_set_cursor(&local->conn_ids, tmp); + INIT_LIST_HEAD(&local->new_client_calls); + spin_lock_init(&local->client_call_lock); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } @@ -248,7 +276,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, goto found; } - local = rxrpc_alloc_local(rxnet, srx); + local = rxrpc_alloc_local(net, srx); if (!local) goto nomem; @@ -407,6 +435,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local) * local endpoint. */ rxrpc_purge_queue(&local->rx_queue); + rxrpc_purge_client_connections(local); } /* diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 5905530e2f33..a0319c040c25 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -10,15 +10,6 @@ unsigned int rxrpc_net_id; -static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) -{ - struct rxrpc_net *rxnet = - container_of(timer, struct rxrpc_net, client_conn_reap_timer); - - if (rxnet->live) - rxrpc_queue_work(&rxnet->client_conn_reaper); -} - static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = @@ -63,14 +54,6 @@ static __net_init int rxrpc_init_net(struct net *net) rxrpc_service_conn_reap_timeout, 0); atomic_set(&rxnet->nr_client_conns, 0); - rxnet->kill_all_client_conns = false; - spin_lock_init(&rxnet->client_conn_cache_lock); - mutex_init(&rxnet->client_conn_discard_lock); - INIT_LIST_HEAD(&rxnet->idle_client_conns); - INIT_WORK(&rxnet->client_conn_reaper, - rxrpc_discard_expired_client_conns); - timer_setup(&rxnet->client_conn_reap_timer, - rxrpc_client_conn_reap_timeout, 0); INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3d8c9f830ee0..a9746be29634 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -261,7 +261,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) rxrpc_tx_point_call_ack); rxrpc_tx_backoff(call, ret); - if (call->state < RXRPC_CALL_COMPLETE) { + if (!__rxrpc_call_is_complete(call)) { if (ret < 0) rxrpc_cancel_rtt_probe(call, serial, rtt_slot); rxrpc_set_keepalive(call); @@ -545,6 +545,62 @@ send_fragmentable: } /* + * Transmit a connection-level abort. + */ +void rxrpc_send_conn_abort(struct rxrpc_connection *conn) +{ + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[2]; + __be32 word; + size_t len; + u32 serial; + int ret; + + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(conn->proto.cid); + whdr.callNumber = 0; + whdr.seq = 0; + whdr.type = RXRPC_PACKET_TYPE_ABORT; + whdr.flags = conn->out_clientflag; + whdr.userStatus = 0; + whdr.securityIndex = conn->security_ix; + whdr._rsvd = 0; + whdr.serviceId = htons(conn->service_id); + + word = htonl(conn->abort_code); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &word; + iov[1].iov_len = sizeof(word); + + len = iov[0].iov_len + iov[1].iov_len; + + serial = atomic_inc_return(&conn->serial); + whdr.serial = htonl(serial); + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_conn_abort); + _debug("sendmsg failed: %d", ret); + return; + } + + trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); + + conn->peer->last_tx_at = ktime_get_seconds(); +} + +/* * Reject a packet through the local endpoint. */ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) @@ -667,7 +723,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) static inline void rxrpc_instant_resend(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - if (call->state < RXRPC_CALL_COMPLETE) + if (!__rxrpc_call_is_complete(call)) kdebug("resend"); } diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 4eecea2be307..8d7a715a0bb1 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -147,10 +147,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, * assess the MTU size for the network interface through which this peer is * reached */ -static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, +static void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer) { - struct net *net = sock_net(&rx->sk); + struct net *net = local->net; struct dst_entry *dst; struct rtable *rt; struct flowi fl; @@ -236,11 +236,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, /* * Initialise peer record. */ -static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, +static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(rx, peer); + rxrpc_assess_MTU_size(local, peer); peer->mtu = peer->if_mtu; peer->rtt_last_req = ktime_get_real(); @@ -272,8 +272,7 @@ static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, /* * Set up a new peer. */ -static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, - struct rxrpc_local *local, +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, unsigned long hash_key, gfp_t gfp) @@ -285,7 +284,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); - rxrpc_init_peer(rx, peer, hash_key); + rxrpc_init_peer(local, peer, hash_key); } _leave(" = %p", peer); @@ -304,14 +303,13 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer) * since we've already done a search in the list from the non-reentrant context * (the data_ready handler) that is the only place we can add new peers. */ -void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, - struct rxrpc_peer *peer) +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) { struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; hash_key = rxrpc_peer_hash_key(local, &peer->srx); - rxrpc_init_peer(rx, peer, hash_key); + rxrpc_init_peer(local, peer, hash_key); spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); @@ -322,8 +320,7 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, /* * obtain a remote transport endpoint for the specified address */ -struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, - struct rxrpc_local *local, +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_peer *peer, *candidate; @@ -343,7 +340,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* The peer is not yet present in hash - create a candidate * for a new record and then redo the search. */ - candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); + candidate = rxrpc_create_peer(local, srx, hash_key, gfp); if (!candidate) { _leave(" = NULL [nomem]"); return NULL; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 3a59591ec061..750158a085cd 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -12,13 +12,13 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", + [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ", [RXRPC_CONN_CLIENT] = "Client ", [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", - [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", - [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CONN_ABORTED] = "Aborted ", }; /* @@ -51,6 +51,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_local *local; struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + enum rxrpc_call_state state; unsigned long timeout = 0; rxrpc_seq_t acks_hard_ack; char lbuff[50], rbuff[50]; @@ -75,7 +76,8 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) sprintf(rbuff, "%pISpc", &call->dest_srx.transport); - if (call->state != RXRPC_CALL_SERVER_PREALLOC) { + state = rxrpc_call_state(call); + if (state != RXRPC_CALL_SERVER_PREALLOC) { timeout = READ_ONCE(call->expect_rx_by); timeout -= jiffies; } @@ -92,7 +94,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", refcount_read(&call->ref), - rxrpc_call_states[call->state], + rxrpc_call_states[state], call->abort_code, call->debug_id, acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, @@ -143,6 +145,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + const char *state; char lbuff[50], rbuff[50]; if (v == &rxnet->conn_proc_list) { @@ -163,9 +166,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) } sprintf(lbuff, "%pISpc", &conn->local->srx.transport); - sprintf(rbuff, "%pISpc", &conn->peer->srx.transport); print: + state = rxrpc_is_conn_aborted(conn) ? + rxrpc_call_completions[conn->completion] : + rxrpc_conn_states[conn->state]; seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d" " %s %08x %08x %08x %08x %08x %08x %08x\n", @@ -176,7 +181,7 @@ print: rxrpc_conn_is_service(conn) ? "Svc" : "Clt", refcount_read(&conn->ref), atomic_read(&conn->active), - rxrpc_conn_states[conn->state], + state, key_serial(conn->key), atomic_read(&conn->serial), conn->hi_serial, diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6ebd6440a2b7..dd54ceee7bcc 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -59,85 +59,6 @@ void rxrpc_notify_socket(struct rxrpc_call *call) } /* - * Transition a call to the complete state. - */ -bool __rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - if (call->state < RXRPC_CALL_COMPLETE) { - call->abort_code = abort_code; - call->error = error; - call->completion = compl; - call->state = RXRPC_CALL_COMPLETE; - trace_rxrpc_call_complete(call); - wake_up(&call->waitq); - rxrpc_notify_socket(call); - return true; - } - return false; -} - -bool rxrpc_set_call_completion(struct rxrpc_call *call, - enum rxrpc_call_completion compl, - u32 abort_code, - int error) -{ - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock(&call->state_lock); - } - return ret; -} - -/* - * Record that a call successfully completed. - */ -bool __rxrpc_call_completed(struct rxrpc_call *call) -{ - return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); -} - -bool rxrpc_call_completed(struct rxrpc_call *call) -{ - bool ret = false; - - if (call->state < RXRPC_CALL_COMPLETE) { - write_lock(&call->state_lock); - ret = __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - } - return ret; -} - -/* - * Record that a call is locally aborted. - */ -bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, - abort_code, error); - return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, - abort_code, error); -} - -bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, - rxrpc_seq_t seq, u32 abort_code, int error) -{ - bool ret; - - write_lock(&call->state_lock); - ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock(&call->state_lock); - return ret; -} - -/* * Pass a call terminating message to userspace. */ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) @@ -168,7 +89,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: - pr_err("Invalid terminal call state %u\n", call->state); + pr_err("Invalid terminal call state %u\n", call->completion); BUG(); break; } @@ -180,41 +101,6 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) } /* - * End the packet reception phase. - */ -static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) -{ - rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq); - - _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - - trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh); - - if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) - rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); - - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - __rxrpc_call_completed(call); - write_unlock(&call->state_lock); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; - write_unlock(&call->state_lock); - rxrpc_propose_delay_ACK(call, serial, - rxrpc_propose_ack_processing_op); - break; - default: - write_unlock(&call->state_lock); - break; - } -} - -/* * Discard a packet we've used up and advance the Rx window by one. */ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) @@ -244,10 +130,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, serial, call->rx_consumed); - if (last) { - rxrpc_end_rx_phase(call, serial); - return; - } + + if (last) + set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags); /* Check to see if there's an ACK that needs sending. */ acked = atomic_add_return(call->rx_consumed - old_consumed, @@ -272,7 +157,8 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb) /* * Deliver messages to a call. This keeps processing packets until the buffer * is filled and we find either more DATA (returns 0) or the end of the DATA - * (returns 1). If more packets are required, it returns -EAGAIN. + * (returns 1). If more packets are required, it returns -EAGAIN and if the + * call has failed it returns -EIO. */ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, struct msghdr *msg, struct iov_iter *iter, @@ -288,7 +174,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_offset = call->rx_pkt_offset; rx_pkt_len = call->rx_pkt_len; - if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + if (rxrpc_call_has_failed(call)) { + seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; + ret = -EIO; + goto done; + } + + if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) { seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; ret = 1; goto done; @@ -312,14 +204,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, if (rx_pkt_offset == 0) { ret2 = rxrpc_verify_data(call, skb); - rx_pkt_offset = sp->offset; - rx_pkt_len = sp->len; trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq, - rx_pkt_offset, rx_pkt_len, ret2); + sp->offset, sp->len, ret2); if (ret2 < 0) { + kdebug("verify = %d", ret2); ret = ret2; goto out; } + rx_pkt_offset = sp->offset; + rx_pkt_len = sp->len; } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); @@ -494,36 +387,36 @@ try_again: msg->msg_namelen = len; } - switch (READ_ONCE(call->state)) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, - flags, &copied); - if (ret == -EAGAIN) - ret = 0; - - if (!skb_queue_empty(&call->recvmsg_queue)) - rxrpc_notify_socket(call); - break; - default: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) ret = 0; - break; - } - + if (ret == -EIO) + goto call_failed; if (ret < 0) goto error_unlock_call; - if (call->state == RXRPC_CALL_COMPLETE) { - ret = rxrpc_recvmsg_term(call, msg); - if (ret < 0) - goto error_unlock_call; - if (!(flags & MSG_PEEK)) - rxrpc_release_call(rx, call); - msg->msg_flags |= MSG_EOR; - ret = 1; - } + if (rxrpc_call_is_complete(call) && + skb_queue_empty(&call->recvmsg_queue)) + goto call_complete; + if (rxrpc_call_has_failed(call)) + goto call_failed; + rxrpc_notify_socket(call); + goto not_yet_complete; + +call_failed: + rxrpc_purge_queue(&call->recvmsg_queue); +call_complete: + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error_unlock_call; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; + +not_yet_complete: if (ret == 0) msg->msg_flags |= MSG_MORE; else @@ -586,49 +479,34 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, size_t offset = 0; int ret; - _enter("{%d,%s},%zu,%d", - call->debug_id, rxrpc_call_states[call->state], - *_len, want_more); - - ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); + _enter("{%d},%zu,%d", call->debug_id, *_len, want_more); mutex_lock(&call->user_mutex); - switch (READ_ONCE(call->state)) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - ret = rxrpc_recvmsg_data(sock, call, NULL, iter, - *_len, 0, &offset); - *_len -= offset; - if (ret < 0) - goto out; - - /* We can only reach here with a partially full buffer if we - * have reached the end of the data. We must otherwise have a - * full buffer or have been given -EAGAIN. - */ - if (ret == 1) { - if (iov_iter_count(iter) > 0) - goto short_data; - if (!want_more) - goto read_phase_complete; - ret = 0; - goto out; - } - - if (!want_more) - goto excess_data; + ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset); + *_len -= offset; + if (ret == -EIO) + goto call_failed; + if (ret < 0) goto out; - case RXRPC_CALL_COMPLETE: - goto call_complete; - - default: - ret = -EINPROGRESS; + /* We can only reach here with a partially full buffer if we have + * reached the end of the data. We must otherwise have a full buffer + * or have been given -EAGAIN. + */ + if (ret == 1) { + if (iov_iter_count(iter) > 0) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; goto out; } + if (!want_more) + goto excess_data; + goto out; + read_phase_complete: ret = 1; out: @@ -639,14 +517,18 @@ out: return ret; short_data: - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); + trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data, + call->cid, call->call_id, call->rx_consumed, + 0, -EBADMSG); ret = -EBADMSG; goto out; excess_data: - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); + trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data, + call->cid, call->call_id, call->rx_consumed, + 0, -EMSGSIZE); ret = -EMSGSIZE; goto out; -call_complete: +call_failed: *_abort = call->abort_code; ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index d1233720e05f..1bf571a66e02 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist sg[16]; - bool aborted; u32 data_size, buf; u16 check; int ret; _enter(""); - if (sp->len < 8) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", - RXKADSEALEDINCON); - goto protocol_error; - } + if (sp->len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. @@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, skcipher_request_zero(req); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", - RXKADDATALEN); - goto protocol_error; - } + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_encdata); sp->offset += sizeof(sechdr); sp->len -= sizeof(sechdr); @@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, check = buf >> 16; check ^= seq ^ call->call_id; check &= 0xffff; - if (check != 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", - RXKADSEALEDINCON); - goto protocol_error; - } - - if (data_size > sp->len) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", - RXKADDATALEN); - goto protocol_error; - } + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_1_short_check); + if (data_size > sp->len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_1_short_data); sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; } /* @@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - bool aborted; u32 data_size, buf; u16 check; int nsg, ret; _enter(",{%d}", sp->len); - if (sp->len < 8) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", - RXKADSEALEDINCON); - goto protocol_error; - } + if (sp->len < 8) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_header); /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. @@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) - goto nomem; + return -ENOMEM; } sg_init_table(sg, nsg); @@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, kfree(sg); /* Extract the decrypted packet length */ - if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", - RXKADDATALEN); - goto protocol_error; - } + if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_len); sp->offset += sizeof(sechdr); sp->len -= sizeof(sechdr); @@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, check = buf >> 16; check ^= seq ^ call->call_id; check &= 0xffff; - if (check != 0) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", - RXKADSEALEDINCON); - goto protocol_error; - } + if (check != 0) + return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_2_short_check); - if (data_size > sp->len) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", - RXKADDATALEN); - goto protocol_error; - } + if (data_size > sp->len) + return rxrpc_abort_eproto(call, skb, RXKADDATALEN, + rxkad_abort_2_short_data); sp->len = data_size; _leave(" = 0 [dlen=%x]", data_size); return 0; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; } /* @@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) __be32 buf[2]; } crypto __aligned(8); rxrpc_seq_t seq = sp->hdr.seq; - bool aborted; int ret; u16 cksum; u32 x, y; @@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) cksum = 1; /* zero checksums are not permitted */ if (cksum != sp->hdr.cksum) { - aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", - RXKADSEALEDINCON); - goto protocol_error; + ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON, + rxkad_abort_bad_checksum); + goto out; } switch (call->conn->security_level) { @@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) break; } +out: skcipher_request_free(req); return ret; - -protocol_error: - if (aborted) - rxrpc_send_abort_packet(call); - return -EPROTO; } /* @@ -821,34 +783,30 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn, * respond to a challenge packet */ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { const struct rxrpc_key_token *token; struct rxkad_challenge challenge; struct rxkad_response *resp; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - const char *eproto; - u32 version, nonce, min_level, abort_code; - int ret; + u32 version, nonce, min_level; + int ret = -EPROTO; _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); - eproto = tracepoint_string("chall_no_key"); - abort_code = RX_PROTOCOL_ERROR; if (!conn->key) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, + rxkad_abort_chall_no_key); - abort_code = RXKADEXPIRED; ret = key_validate(conn->key); if (ret < 0) - goto other_error; + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_chall_key_expired); - eproto = tracepoint_string("chall_short"); - abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &challenge, sizeof(challenge)) < 0) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_chall_short); version = ntohl(challenge.version); nonce = ntohl(challenge.nonce); @@ -856,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level); - eproto = tracepoint_string("chall_ver"); - abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) - goto protocol_error; + return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_chall_version); - abort_code = RXKADLEVELFAIL; - ret = -EACCES; if (conn->security_level < min_level) - goto other_error; + return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES, + rxkad_abort_chall_level); token = conn->key->payload.data[0]; @@ -893,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret; - -protocol_error: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - ret = -EPROTO; -other_error: - *_abort_code = abort_code; - return ret; } /* @@ -910,20 +859,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, - time64_t *_expiry, - u32 *_abort_code) + time64_t *_expiry) { struct skcipher_request *req; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv, key; struct scatterlist sg[1]; struct in_addr addr; unsigned int life; - const char *eproto; time64_t issue, now; bool little_endian; - int ret; - u32 abort_code; u8 *p, *q, *name, *end; _enter("{%d},{%x}", conn->debug_id, key_serial(server_key)); @@ -935,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, memcpy(&iv, &server_key->payload.data[2], sizeof(iv)); - ret = -ENOMEM; req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS); if (!req) - goto temporary_error; + return -ENOMEM; sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); @@ -949,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p = ticket; end = p + ticket_len; -#define Z(field) \ - ({ \ - u8 *__str = p; \ - eproto = tracepoint_string("rxkad_bad_"#field); \ - q = memchr(p, 0, end - p); \ - if (!q || q - p > (field##_SZ)) \ - goto bad_ticket; \ - for (; p < q; p++) \ - if (!isprint(*p)) \ - goto bad_ticket; \ - p++; \ - __str; \ +#define Z(field, fieldl) \ + ({ \ + u8 *__str = p; \ + q = memchr(p, 0, end - p); \ + if (!q || q - p > field##_SZ) \ + return rxrpc_abort_conn( \ + conn, skb, RXKADBADTICKET, -EPROTO, \ + rxkad_abort_resp_tkt_##fieldl); \ + for (; p < q; p++) \ + if (!isprint(*p)) \ + return rxrpc_abort_conn( \ + conn, skb, RXKADBADTICKET, -EPROTO, \ + rxkad_abort_resp_tkt_##fieldl); \ + p++; \ + __str; \ }) /* extract the ticket flags */ @@ -969,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p++; /* extract the authentication name */ - name = Z(ANAME); + name = Z(ANAME, aname); _debug("KIV ANAME: %s", name); /* extract the principal's instance */ - name = Z(INST); + name = Z(INST, inst); _debug("KIV INST : %s", name); /* extract the principal's authentication domain */ - name = Z(REALM); + name = Z(REALM, realm); _debug("KIV REALM: %s", name); - eproto = tracepoint_string("rxkad_bad_len"); if (end - p < 4 + 8 + 4 + 2) - goto bad_ticket; + return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO, + rxkad_abort_resp_tkt_short); /* get the IPv4 address of the entity that requested the ticket */ memcpy(&addr, p, sizeof(addr)); @@ -1014,38 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, _debug("KIV ISSUE: %llx [%llx]", issue, now); /* check the ticket is in date */ - if (issue > now) { - abort_code = RXKADNOAUTH; - ret = -EKEYREJECTED; - goto other_error; - } - - if (issue < now - life) { - abort_code = RXKADEXPIRED; - ret = -EKEYEXPIRED; - goto other_error; - } + if (issue > now) + return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED, + rxkad_abort_resp_tkt_future); + if (issue < now - life) + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED, + rxkad_abort_resp_tkt_expired); *_expiry = issue + life; /* get the service name */ - name = Z(SNAME); + name = Z(SNAME, sname); _debug("KIV SNAME: %s", name); /* get the service instance name */ - name = Z(INST); + name = Z(INST, sinst); _debug("KIV SINST: %s", name); return 0; - -bad_ticket: - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); - abort_code = RXKADBADTICKET; - ret = -EPROTO; -other_error: - *_abort_code = abort_code; - return ret; -temporary_error: - return ret; } /* @@ -1086,17 +1017,15 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, * verify a response */ static int rxkad_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb) { struct rxkad_response *response; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; struct key *server_key; - const char *eproto; time64_t expiry; void *ticket; - u32 abort_code, version, kvno, ticket_len, level; + u32 version, kvno, ticket_len, level; __be32 csum; int ret, i; @@ -1104,22 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, server_key = rxrpc_look_up_server_security(conn, skb, 0, 0); if (IS_ERR(server_key)) { - switch (PTR_ERR(server_key)) { + ret = PTR_ERR(server_key); + switch (ret) { case -ENOKEY: - abort_code = RXKADUNKNOWNKEY; - break; + return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret, + rxkad_abort_resp_nokey); case -EKEYEXPIRED: - abort_code = RXKADEXPIRED; - break; + return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret, + rxkad_abort_resp_key_expired); default: - abort_code = RXKADNOAUTH; - break; + return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret, + rxkad_abort_resp_key_rejected); } - trace_rxrpc_abort(0, "SVK", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - abort_code, PTR_ERR(server_key)); - *_abort_code = abort_code; - return -EPROTO; } ret = -ENOMEM; @@ -1127,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (!response) goto temporary_error; - eproto = tracepoint_string("rxkad_rsp_short"); - abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - response, sizeof(*response)) < 0) + response, sizeof(*response)) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short); goto protocol_error; + } version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); @@ -1139,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); - eproto = tracepoint_string("rxkad_rsp_ver"); - abort_code = RXKADINCONSISTENCY; - if (version != RXKAD_VERSION) + if (version != RXKAD_VERSION) { + rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO, + rxkad_abort_resp_version); goto protocol_error; + } - eproto = tracepoint_string("rxkad_rsp_tktlen"); - abort_code = RXKADTICKETLEN; - if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) + if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) { + rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO, + rxkad_abort_resp_tkt_len); goto protocol_error; + } - eproto = tracepoint_string("rxkad_rsp_unkkey"); - abort_code = RXKADUNKNOWNKEY; - if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) + if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) { + rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO, + rxkad_abort_resp_unknown_tkt); goto protocol_error; + } /* extract the kerberos ticket and decrypt and decode it */ ret = -ENOMEM; @@ -1160,15 +1089,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, if (!ticket) goto temporary_error_free_resp; - eproto = tracepoint_string("rxkad_tkt_short"); - abort_code = RXKADPACKETSHORT; - ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), - ticket, ticket_len); - if (ret < 0) - goto temporary_error_free_ticket; + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response), + ticket, ticket_len) < 0) { + rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO, + rxkad_abort_resp_short_tkt); + goto protocol_error; + } ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len, - &session_key, &expiry, _abort_code); + &session_key, &expiry); if (ret < 0) goto temporary_error_free_ticket; @@ -1176,56 +1105,61 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * response */ rxkad_decrypt_response(conn, response, &session_key); - eproto = tracepoint_string("rxkad_rsp_param"); - abort_code = RXKADSEALEDINCON; - if (ntohl(response->encrypted.epoch) != conn->proto.epoch) - goto protocol_error_free; - if (ntohl(response->encrypted.cid) != conn->proto.cid) - goto protocol_error_free; - if (ntohl(response->encrypted.securityIndex) != conn->security_ix) + if (ntohl(response->encrypted.epoch) != conn->proto.epoch || + ntohl(response->encrypted.cid) != conn->proto.cid || + ntohl(response->encrypted.securityIndex) != conn->security_ix) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_param); goto protocol_error_free; + } + csum = response->encrypted.checksum; response->encrypted.checksum = 0; rxkad_calc_response_checksum(response); - eproto = tracepoint_string("rxkad_rsp_csum"); - if (response->encrypted.checksum != csum) + if (response->encrypted.checksum != csum) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_checksum); goto protocol_error_free; + } - spin_lock(&conn->bundle->channel_lock); for (i = 0; i < RXRPC_MAXCALLS; i++) { - struct rxrpc_call *call; u32 call_id = ntohl(response->encrypted.call_id[i]); + u32 counter = READ_ONCE(conn->channels[i].call_counter); + + if (call_id > INT_MAX) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_bad_callid); + goto protocol_error_free; + } - eproto = tracepoint_string("rxkad_rsp_callid"); - if (call_id > INT_MAX) - goto protocol_error_unlock; - - eproto = tracepoint_string("rxkad_rsp_callctr"); - if (call_id < conn->channels[i].call_counter) - goto protocol_error_unlock; - - eproto = tracepoint_string("rxkad_rsp_callst"); - if (call_id > conn->channels[i].call_counter) { - call = rcu_dereference_protected( - conn->channels[i].call, - lockdep_is_held(&conn->bundle->channel_lock)); - if (call && call->state < RXRPC_CALL_COMPLETE) - goto protocol_error_unlock; + if (call_id < counter) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_ctr); + goto protocol_error_free; + } + + if (call_id > counter) { + if (conn->channels[i].call) { + rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO, + rxkad_abort_resp_call_state); + goto protocol_error_free; + } conn->channels[i].call_counter = call_id; } } - spin_unlock(&conn->bundle->channel_lock); - eproto = tracepoint_string("rxkad_rsp_seq"); - abort_code = RXKADOUTOFSEQUENCE; - if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) + if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) { + rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO, + rxkad_abort_resp_ooseq); goto protocol_error_free; + } - eproto = tracepoint_string("rxkad_rsp_level"); - abort_code = RXKADLEVELFAIL; level = ntohl(response->encrypted.level); - if (level > RXRPC_SECURITY_ENCRYPT) + if (level > RXRPC_SECURITY_ENCRYPT) { + rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO, + rxkad_abort_resp_level); goto protocol_error_free; + } conn->security_level = level; /* create a key to hold the security data and expiration time - after @@ -1240,15 +1174,11 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _leave(" = 0"); return 0; -protocol_error_unlock: - spin_unlock(&conn->bundle->channel_lock); protocol_error_free: kfree(ticket); protocol_error: kfree(response); - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); key_put(server_key); - *_abort_code = abort_code; return -EPROTO; temporary_error_free_ticket: diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index d33a109e846c..16dcabb71ebe 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -10,6 +10,8 @@ #include <linux/slab.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#define RXRPC_TRACE_ONLY_DEFINE_ENUMS +#include <trace/events/rxrpc.h> MODULE_DESCRIPTION("rxperf test server (afs)"); MODULE_AUTHOR("Red Hat, Inc."); @@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work) case -EOPNOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - abort_code, ret, "GOP"); + abort_code, ret, + rxperf_abort_op_not_supported); goto call_complete; case -ENOTSUPP: abort_code = RX_USER_ABORT; rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - abort_code, ret, "GUA"); + abort_code, ret, + rxperf_abort_op_not_supported); goto call_complete; case -EIO: pr_err("Call %u in bad state %u\n", @@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work) case -ENOMEM: case -EFAULT: rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RXGEN_SS_UNMARSHAL, ret, "GUM"); + RXGEN_SS_UNMARSHAL, ret, + rxperf_abort_unmarshal_error); goto call_complete; default: rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RX_CALL_DEAD, ret, "GER"); + RX_CALL_DEAD, ret, + rxperf_abort_general_error); goto call_complete; } } @@ -523,7 +529,8 @@ static int rxperf_process_call(struct rxperf_call *call) if (n == -ENOMEM) rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, - RXGEN_SS_MARSHAL, -ENOMEM, "GOM"); + RXGEN_SS_MARSHAL, -ENOMEM, + rxperf_abort_oom); return n; } diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index ab968f65a490..cb8dd1d3b1d4 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -97,38 +97,31 @@ found: */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { - const struct rxrpc_security *sec; struct rxrpc_key_token *token; struct key *key = conn->key; - int ret; + int ret = 0; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); - if (!key) - return 0; - - ret = key_validate(key); - if (ret < 0) - return ret; - for (token = key->payload.data[0]; token; token = token->next) { - sec = rxrpc_security_lookup(token->security_index); - if (sec) + if (token->security_index == conn->security->security_index) goto found; } return -EKEYREJECTED; found: - conn->security = sec; - - ret = conn->security->init_connection_security(conn, token); - if (ret < 0) { - conn->security = &rxrpc_no_security; - return ret; + mutex_lock(&conn->security_lock); + if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) { + ret = conn->security->init_connection_security(conn, token); + if (ret == 0) { + spin_lock(&conn->state_lock); + if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) + conn->state = RXRPC_CONN_CLIENT; + spin_unlock(&conn->state_lock); + } } - - _leave(" = 0"); - return 0; + mutex_unlock(&conn->security_lock); + return ret; } /* @@ -144,21 +137,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx, sec = rxrpc_security_lookup(sp->hdr.securityIndex); if (!sec) { - trace_rxrpc_abort(0, "SVS", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; + rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security, + RX_INVALID_OPERATION, -EKEYREJECTED); return NULL; } if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE && !rx->securities) { - trace_rxrpc_abort(0, "SVR", - sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EKEYREJECTED); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = sec->no_key_abort; + rxrpc_direct_abort(skb, rxrpc_abort_no_service_key, + sec->no_key_abort, -EKEYREJECTED); return NULL; } @@ -191,9 +178,9 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); - rcu_read_lock(); + read_lock(&conn->local->services_lock); - rx = rcu_dereference(conn->local->service); + rx = conn->local->service; if (!rx) goto out; @@ -215,6 +202,6 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, } out: - rcu_read_unlock(); + read_unlock(&conn->local->services_lock); return key; } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index cde1e65f16b4..da49fcf1c456 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -18,6 +18,81 @@ #include "ar-internal.h" /* + * Propose an abort to be made in the I/O thread. + */ +bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error, + enum rxrpc_abort_reason why) +{ + _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); + + if (!call->send_abort && !rxrpc_call_is_complete(call)) { + call->send_abort_why = why; + call->send_abort_err = error; + call->send_abort_seq = 0; + /* Request abort locklessly vs rxrpc_input_call_event(). */ + smp_store_release(&call->send_abort, abort_code); + rxrpc_poke_call(call, rxrpc_call_poke_abort); + return true; + } + + return false; +} + +/* + * Wait for a call to become connected. Interruption here doesn't cause the + * call to be aborted. + */ +static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo) +{ + DECLARE_WAITQUEUE(myself, current); + int ret = 0; + + _enter("%d", call->debug_id); + + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) + return call->error; + + add_wait_queue_exclusive(&call->waitq, &myself); + + for (;;) { + ret = call->error; + if (ret < 0) + break; + + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + case RXRPC_PREINTERRUPTIBLE: + set_current_state(TASK_INTERRUPTIBLE); + break; + case RXRPC_UNINTERRUPTIBLE: + default: + set_current_state(TASK_UNINTERRUPTIBLE); + break; + } + if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = call->error; + break; + } + if ((call->interruptibility == RXRPC_INTERRUPTIBLE || + call->interruptibility == RXRPC_PREINTERRUPTIBLE) && + signal_pending(current)) { + ret = sock_intr_errno(*timeo); + break; + } + *timeo = schedule_timeout(*timeo); + } + + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + + if (ret == 0 && rxrpc_call_is_complete(call)) + ret = call->error; + + _leave(" = %d", ret); + return ret; +} + +/* * Return true if there's sufficient Tx queue space. */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) @@ -39,7 +114,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, NULL)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; if (signal_pending(current)) @@ -74,7 +149,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, &tx_win)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; if (timeout == 0 && @@ -103,7 +178,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (rxrpc_check_tx_space(call, NULL)) return 0; - if (call->state >= RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) return call->error; trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); @@ -168,7 +243,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { - unsigned long now; rxrpc_seq_t seq = txb->seq; bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke; @@ -191,36 +265,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, poke = list_empty(&call->tx_sendmsg); list_add_tail(&txb->call_link, &call->tx_sendmsg); call->tx_prepared = seq; + if (last) + rxrpc_notify_end_tx(rx, call, notify_end_tx); spin_unlock(&call->tx_lock); - if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - _debug("________awaiting reply/ACK__________"); - write_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; - rxrpc_notify_end_tx(rx, call, notify_end_tx); - break; - case RXRPC_CALL_SERVER_ACK_REQUEST: - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - now = jiffies; - WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET); - if (call->ackr_reason == RXRPC_ACK_DELAY) - call->ackr_reason = 0; - trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); - if (!last) - break; - fallthrough; - case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; - rxrpc_notify_end_tx(rx, call, notify_end_tx); - break; - default: - break; - } - write_unlock(&call->state_lock); - } - if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } @@ -245,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + ret = rxrpc_wait_to_be_connected(call, &timeo); + if (ret < 0) + return ret; + + if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) { + ret = rxrpc_init_client_conn_security(call->conn); + if (ret < 0) + return ret; + } + /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -252,15 +310,20 @@ reload: ret = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) goto maybe_error; - state = READ_ONCE(call->state); + state = rxrpc_call_state(call); ret = -ESHUTDOWN; if (state >= RXRPC_CALL_COMPLETE) goto maybe_error; ret = -EPROTO; if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && state != RXRPC_CALL_SERVER_ACK_REQUEST && - state != RXRPC_CALL_SERVER_SEND_REPLY) + state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Request phase complete for this client call */ + trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, + call->cid, call->call_id, call->rx_consumed, + 0, -EPROTO); goto maybe_error; + } ret = -EMSGSIZE; if (call->tx_total_len != -1) { @@ -329,7 +392,7 @@ reload: /* check for the far side aborting the call or a network error * occurring */ - if (call->state == RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) goto call_terminated; /* add the packet to the send queue if it's now full */ @@ -354,12 +417,9 @@ reload: success: ret = copied; - if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { - read_lock(&call->state_lock); - if (call->error < 0) - ret = call->error; - read_unlock(&call->state_lock); - } + if (rxrpc_call_is_complete(call) && + call->error < 0) + ret = call->error; out: call->tx_pending = txb; _leave(" = %d", ret); @@ -543,7 +603,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ - rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p\n", call); return call; } @@ -556,7 +615,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { - enum rxrpc_call_state state; struct rxrpc_call *call; unsigned long now, j; bool dropped_lock = false; @@ -598,10 +656,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); /* ... and we have the call lock. */ ret = 0; - if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + if (rxrpc_call_is_complete(call)) goto out_put_unlock; } else { - switch (READ_ONCE(call->state)) { + switch (rxrpc_call_state(call)) { case RXRPC_CALL_UNINITIALISED: case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: @@ -655,17 +713,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) break; } - state = READ_ONCE(call->state); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, state, call->conn); - - if (state >= RXRPC_CALL_COMPLETE) { + if (rxrpc_call_is_complete(call)) { /* it's too late for this call */ ret = -ESHUTDOWN; } else if (p.command == RXRPC_CMD_SEND_ABORT) { + rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, + rxrpc_abort_call_sendmsg); ret = 0; - if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED)) - ret = rxrpc_send_abort_packet(call); } else if (p.command != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else { @@ -705,34 +759,17 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, bool dropped_lock = false; int ret; - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + _enter("{%d},", call->debug_id); ASSERTCMP(msg->msg_name, ==, NULL); ASSERTCMP(msg->msg_control, ==, NULL); mutex_lock(&call->user_mutex); - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - switch (READ_ONCE(call->state)) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - case RXRPC_CALL_SERVER_SEND_REPLY: - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, - notify_end_tx, &dropped_lock); - break; - case RXRPC_CALL_COMPLETE: - read_lock(&call->state_lock); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, + notify_end_tx, &dropped_lock); + if (ret == -ESHUTDOWN) ret = call->error; - read_unlock(&call->state_lock); - break; - default: - /* Request phase complete for this client call */ - trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); - ret = -EPROTO; - break; - } if (!dropped_lock) mutex_unlock(&call->user_mutex); @@ -747,24 +784,20 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @call: The call to be aborted * @abort_code: The abort code to stick into the ABORT packet * @error: Local error value - * @why: 3-char string indicating why. + * @why: Indication as to why. * * Allow a kernel service to abort a call, if it's still in an abortable state * and return true if the call was aborted, false if it was already complete. */ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, - u32 abort_code, int error, const char *why) + u32 abort_code, int error, enum rxrpc_abort_reason why) { bool aborted; - _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); - - aborted = rxrpc_abort_call(why, call, 0, abort_code, error); - if (aborted) - rxrpc_send_abort_packet(call); - + aborted = rxrpc_propose_abort(call, abort_code, error, why); mutex_unlock(&call->user_mutex); return aborted; } diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index ff47ce4d3968..6b26bdb999d7 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -134,6 +134,11 @@ static int valid_label(const struct nlattr *attr, { const u32 *label = nla_data(attr); + if (nla_len(attr) != sizeof(*label)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length"); + return -EINVAL; + } + if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) { NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range"); return -EINVAL; @@ -145,7 +150,8 @@ static int valid_label(const struct nlattr *attr, static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), [TCA_MPLS_PROTO] = { .type = NLA_U16 }, - [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), + [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + valid_label), [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7), [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2317db02c764..72d2c204d5f3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1133,6 +1133,11 @@ skip: return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 148bb0a7fa5b..acb822b23af1 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -923,7 +923,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g * rejecting the server-computed MIC in this somewhat rare case, * do not use splice with the GSS integrity service. */ - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) @@ -990,7 +990,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs int pad, remaining_len, offset; u32 rseqno; - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 85f0c3cfc877..f06622814a95 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1243,10 +1243,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off by GSS integrity and privacy services */ - __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - __clear_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_DROPME, &rqstp->rq_flags); svc_putu32(resv, rqstp->rq_xid); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2106003645a7..c2ce12538008 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 015714398007..815baf308236 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) static void svc_sock_secure_port(struct svc_rqst *rqstp) { if (svc_port_is_privileged(svc_addr(rqstp))) - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); else - __clear_bit(RQ_SECURE, &rqstp->rq_flags); + clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) - __set_bit(RQ_LOCAL, &rqstp->rq_flags); + set_bit(RQ_LOCAL, &rqstp->rq_flags); else - __clear_bit(RQ_LOCAL, &rqstp->rq_flags); + clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 199fa012f18a..94b20fb47135 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) diff --git a/net/tipc/node.c b/net/tipc/node.c index 49ddc484c4fe..5e000fde8067 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool addr_match = false; bool sign_match = false; bool link_up = false; + bool link_is_reset = false; bool accept_addr = false; - bool reset = true; + bool reset = false; char *if_name; unsigned long intv; u16 session; @@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - reset = false; + /* All is fine. Ignore requests. */ /* Peer node is not a container/local namespace */ if (!n->peer_hash_mix) n->peer_hash_mix = hash_mixes; @@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, */ accept_addr = true; *respond = true; + reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already @@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, n->signature = signature; accept_addr = true; *respond = true; + reset = true; } if (!accept_addr) @@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); - if (reset && l && !tipc_link_is_reset(l)) + if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb96844..10176dec97ea 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -18,6 +18,7 @@ quiet_cmd_cc_o_c = CC $@ $(call if_changed_dep,cc_o_c) ifdef CONFIG_MODULES +KASAN_SANITIZE_.vmlinux.export.o := n targets += .vmlinux.export.o vmlinux: .vmlinux.export.o endif diff --git a/sound/core/control.c b/sound/core/control.c index 50e7ba66f187..82aa1af1d1d8 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1203,14 +1203,19 @@ static int snd_ctl_elem_read(struct snd_card *card, const u32 pattern = 0xdeadbeef; int ret; + down_read(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); - if (kctl == NULL) - return -ENOENT; + if (kctl == NULL) { + ret = -ENOENT; + goto unlock; + } index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; - if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) - return -EPERM; + if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) { + ret = -EPERM; + goto unlock; + } snd_ctl_build_ioff(&control->id, kctl, index_offset); @@ -1220,7 +1225,7 @@ static int snd_ctl_elem_read(struct snd_card *card, info.id = control->id; ret = __snd_ctl_elem_info(card, kctl, &info, NULL); if (ret < 0) - return ret; + goto unlock; #endif if (!snd_ctl_skip_validation(&info)) @@ -1230,7 +1235,7 @@ static int snd_ctl_elem_read(struct snd_card *card, ret = kctl->get(kctl, control); snd_power_unref(card); if (ret < 0) - return ret; + goto unlock; if (!snd_ctl_skip_validation(&info) && sanity_check_elem_value(card, control, &info, pattern) < 0) { dev_err(card->dev, @@ -1238,8 +1243,11 @@ static int snd_ctl_elem_read(struct snd_card *card, control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index); - return -EINVAL; + ret = -EINVAL; + goto unlock; } +unlock: + up_read(&card->controls_rwsem); return ret; } @@ -1253,9 +1261,7 @@ static int snd_ctl_elem_read_user(struct snd_card *card, if (IS_ERR(control)) return PTR_ERR(control); - down_read(&card->controls_rwsem); result = snd_ctl_elem_read(card, control); - up_read(&card->controls_rwsem); if (result < 0) goto error; diff --git a/sound/core/control_led.c b/sound/core/control_led.c index f975cc85772b..3cadd40100f3 100644 --- a/sound/core/control_led.c +++ b/sound/core/control_led.c @@ -530,12 +530,11 @@ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, si bool attach) { char buf2[256], *s, *os; - size_t len = max(sizeof(s) - 1, count); struct snd_ctl_elem_id id; int err; - strncpy(buf2, buf, len); - buf2[len] = '\0'; + if (strscpy(buf2, buf, sizeof(buf2)) < 0) + return -E2BIG; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; s = buf2; diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 91842c0c8c74..f7815ee24f83 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -598,8 +598,8 @@ static int cs35l41_system_suspend(struct device *dev) dev_dbg(cs35l41->dev, "System Suspend\n"); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) { - dev_err(cs35l41->dev, "System Suspend not supported\n"); - return -EINVAL; + dev_err_once(cs35l41->dev, "System Suspend not supported\n"); + return 0; /* don't block the whole system suspend */ } ret = pm_runtime_force_suspend(dev); @@ -624,8 +624,8 @@ static int cs35l41_system_resume(struct device *dev) dev_dbg(cs35l41->dev, "System Resume\n"); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) { - dev_err(cs35l41->dev, "System Resume not supported\n"); - return -EINVAL; + dev_err_once(cs35l41->dev, "System Resume not supported\n"); + return 0; /* don't block the whole system resume */ } if (cs35l41->reset_gpio) { @@ -647,6 +647,15 @@ static int cs35l41_system_resume(struct device *dev) return ret; } +static int cs35l41_runtime_idle(struct device *dev) +{ + struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); + + if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) + return -EBUSY; /* suspend not supported yet on this model */ + return 0; +} + static int cs35l41_runtime_suspend(struct device *dev) { struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); @@ -1536,7 +1545,8 @@ void cs35l41_hda_remove(struct device *dev) EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41); const struct dev_pm_ops cs35l41_hda_pm_ops = { - RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, NULL) + RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, + cs35l41_runtime_idle) SYSTEM_SLEEP_PM_OPS(cs35l41_system_suspend, cs35l41_system_resume) }; EXPORT_SYMBOL_NS_GPL(cs35l41_hda_pm_ops, SND_HDA_SCODEC_CS35L41); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 386dd9d9143f..9ea633fe9339 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1981,6 +1981,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), + SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3794b522c222..6fab7c8fc19a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3564,6 +3564,15 @@ static void alc256_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; + if (spec->ultra_low_power) { + alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); + alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); + alc_update_coef_idx(codec, 0x08, 7<<4, 0); + alc_update_coef_idx(codec, 0x3b, 1<<15, 0); + alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); + msleep(30); + } + if (!hp_pin) hp_pin = 0x21; @@ -3575,14 +3584,6 @@ static void alc256_init(struct hda_codec *codec) msleep(2); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ - if (spec->ultra_low_power) { - alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); - alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); - alc_update_coef_idx(codec, 0x08, 7<<4, 0); - alc_update_coef_idx(codec, 0x3b, 1<<15, 0); - alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); - msleep(30); - } snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); @@ -3713,6 +3714,13 @@ static void alc225_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; + if (spec->ultra_low_power) { + alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); + alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); + alc_update_coef_idx(codec, 0x33, 1<<11, 0); + msleep(30); + } + if (spec->codec_variant != ALC269_TYPE_ALC287 && spec->codec_variant != ALC269_TYPE_ALC245) /* required only at boot or S3 and S4 resume time */ @@ -3734,12 +3742,6 @@ static void alc225_init(struct hda_codec *codec) msleep(2); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ - if (spec->ultra_low_power) { - alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); - alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); - alc_update_coef_idx(codec, 0x33, 1<<11, 0); - msleep(30); - } if (hp1_pin_sense || spec->ultra_low_power) snd_hda_codec_write(codec, hp_pin, 0, @@ -4644,6 +4646,16 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec, } } +static void alc285_fixup_hp_gpio_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc_fixup_hp_gpio_led(codec, action, 0, 0x04); +} + static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4665,6 +4677,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec, alc285_fixup_hp_coef_micmute_led(codec, fix, action); } +static void alc285_fixup_hp_spectre_x360_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + alc285_fixup_hp_gpio_micmute_led(codec, fix, action); +} + static void alc236_fixup_hp_mute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7106,6 +7125,7 @@ enum { ALC285_FIXUP_ASUS_G533Z_PINS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED, ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, @@ -8486,6 +8506,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_mute_led, + }, [ALC236_FIXUP_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_gpio_led, @@ -9239,6 +9263,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), @@ -9327,6 +9352,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), @@ -9406,6 +9432,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1f0b5527c594..0d283e41f66d 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -209,6 +209,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "M5402RA"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"), } @@ -220,6 +227,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Redmi Book Pro 14 2022"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Razer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Blade 14 (2022) - RZ09-0427"), + } + }, {} }; diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c index 644300e88b4c..fcf4fbaed3c7 100644 --- a/sound/soc/codecs/rt9120.c +++ b/sound/soc/codecs/rt9120.c @@ -177,8 +177,20 @@ static int rt9120_codec_probe(struct snd_soc_component *comp) return 0; } +static int rt9120_codec_suspend(struct snd_soc_component *comp) +{ + return pm_runtime_force_suspend(comp->dev); +} + +static int rt9120_codec_resume(struct snd_soc_component *comp) +{ + return pm_runtime_force_resume(comp->dev); +} + static const struct snd_soc_component_driver rt9120_component_driver = { .probe = rt9120_codec_probe, + .suspend = rt9120_codec_suspend, + .resume = rt9120_codec_resume, .controls = rt9120_snd_controls, .num_controls = ARRAY_SIZE(rt9120_snd_controls), .dapm_widgets = rt9120_dapm_widgets, diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index ca6a01a230af..791d8738d1c0 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, int dcs_mask; int dcs_l, dcs_r; int dcs_l_reg, dcs_r_reg; + int an_out_reg; int timeout; int pwr_reg; @@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1; dcs_r_reg = WM8904_DC_SERVO_8; dcs_l_reg = WM8904_DC_SERVO_9; + an_out_reg = WM8904_ANALOGUE_OUT1_LEFT; dcs_l = 0; dcs_r = 1; break; @@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3; dcs_r_reg = WM8904_DC_SERVO_6; dcs_l_reg = WM8904_DC_SERVO_7; + an_out_reg = WM8904_ANALOGUE_OUT2_LEFT; dcs_l = 2; dcs_r = 3; break; @@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, reg, WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP, WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP); + + /* Update volume, requires PGA to be powered */ + val = snd_soc_component_read(component, an_out_reg); + snd_soc_component_write(component, an_out_reg, val); break; case SND_SOC_DAPM_POST_PMU: diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index c836848ef0a6..8d14b5593658 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -121,11 +121,11 @@ static const struct snd_soc_dapm_route audio_map[] = { static const struct snd_soc_dapm_route audio_map_ac97[] = { /* 1st half -- Normal DAPM routes */ - {"Playback", NULL, "AC97 Playback"}, - {"AC97 Capture", NULL, "Capture"}, + {"AC97 Playback", NULL, "CPU AC97 Playback"}, + {"CPU AC97 Capture", NULL, "AC97 Capture"}, /* 2nd half -- ASRC DAPM routes */ - {"AC97 Playback", NULL, "ASRC-Playback"}, - {"ASRC-Capture", NULL, "AC97 Capture"}, + {"CPU AC97 Playback", NULL, "ASRC-Playback"}, + {"ASRC-Capture", NULL, "CPU AC97 Capture"}, }; static const struct snd_soc_dapm_route audio_map_tx[] = { diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 7b17f152bbf3..94341e4352b3 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -315,21 +315,21 @@ static int hwvad_detected(struct snd_kcontrol *kcontrol, static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = { SOC_SINGLE_SX_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(0), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(0), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(1), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(1), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH2 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(2), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(2), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH3 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(3), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(3), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH4 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(4), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(4), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH5 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(5), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(5), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH6 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(6), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(6), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(7), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(7), 0x8, 0xF, gain_tlv), SOC_ENUM_EXT("MICFIL Quality Select", fsl_micfil_quality_enum, micfil_quality_get, micfil_quality_set), diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index c9e0e31d5b34..46a53551b955 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1189,14 +1189,14 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { .symmetric_channels = 1, .probe = fsl_ssi_dai_probe, .playback = { - .stream_name = "AC97 Playback", + .stream_name = "CPU AC97 Playback", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_48000, .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S20, }, .capture = { - .stream_name = "AC97 Capture", + .stream_name = "CPU AC97 Capture", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_48000, diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index a472de1909f4..99308ed85277 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -554,10 +554,12 @@ config SND_SOC_INTEL_SOF_NAU8825_MACH select SND_SOC_RT1015P select SND_SOC_MAX98373_I2C select SND_SOC_MAX98357A + select SND_SOC_NAU8315 select SND_SOC_DMIC select SND_SOC_HDAC_HDMI select SND_SOC_INTEL_HDA_DSP_COMMON select SND_SOC_INTEL_SOF_MAXIM_COMMON + select SND_SOC_INTEL_SOF_REALTEK_COMMON help This adds support for ASoC machine driver for SOF platforms with nau8825 codec. diff --git a/sound/soc/intel/boards/sof_nau8825.c b/sound/soc/intel/boards/sof_nau8825.c index 27880224359d..a800854c2831 100644 --- a/sound/soc/intel/boards/sof_nau8825.c +++ b/sound/soc/intel/boards/sof_nau8825.c @@ -48,6 +48,7 @@ #define SOF_MAX98373_SPEAKER_AMP_PRESENT BIT(15) #define SOF_MAX98360A_SPEAKER_AMP_PRESENT BIT(16) #define SOF_RT1015P_SPEAKER_AMP_PRESENT BIT(17) +#define SOF_NAU8318_SPEAKER_AMP_PRESENT BIT(18) static unsigned long sof_nau8825_quirk = SOF_NAU8825_SSP_CODEC(0); @@ -338,6 +339,13 @@ static struct snd_soc_dai_link_component rt1019p_component[] = { } }; +static struct snd_soc_dai_link_component nau8318_components[] = { + { + .name = "NVTN2012:00", + .dai_name = "nau8315-hifi", + } +}; + static struct snd_soc_dai_link_component dummy_component[] = { { .name = "snd-soc-dummy", @@ -486,6 +494,11 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, max_98360a_dai_link(&links[id]); } else if (sof_nau8825_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) { sof_rt1015p_dai_link(&links[id]); + } else if (sof_nau8825_quirk & + SOF_NAU8318_SPEAKER_AMP_PRESENT) { + links[id].codecs = nau8318_components; + links[id].num_codecs = ARRAY_SIZE(nau8318_components); + links[id].init = speaker_codec_init; } else { goto devm_err; } @@ -618,7 +631,7 @@ static const struct platform_device_id board_ids[] = { }, { - .name = "adl_rt1019p_nau8825", + .name = "adl_rt1019p_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT1019P_SPEAKER_AMP_PRESENT | @@ -626,7 +639,7 @@ static const struct platform_device_id board_ids[] = { SOF_NAU8825_NUM_HDMIDEV(4)), }, { - .name = "adl_max98373_nau8825", + .name = "adl_max98373_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_MAX98373_SPEAKER_AMP_PRESENT | @@ -637,7 +650,7 @@ static const struct platform_device_id board_ids[] = { }, { /* The limitation of length of char array, shorten the name */ - .name = "adl_mx98360a_nau8825", + .name = "adl_mx98360a_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_MAX98360A_SPEAKER_AMP_PRESENT | @@ -648,7 +661,7 @@ static const struct platform_device_id board_ids[] = { }, { - .name = "adl_rt1015p_nau8825", + .name = "adl_rt1015p_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT1015P_SPEAKER_AMP_PRESENT | @@ -657,6 +670,16 @@ static const struct platform_device_id board_ids[] = { SOF_BT_OFFLOAD_SSP(2) | SOF_SSP_BT_OFFLOAD_PRESENT), }, + { + .name = "adl_nau8318_8825", + .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | + SOF_SPEAKER_AMP_PRESENT | + SOF_NAU8318_SPEAKER_AMP_PRESENT | + SOF_NAU8825_SSP_AMP(1) | + SOF_NAU8825_NUM_HDMIDEV(4) | + SOF_BT_OFFLOAD_SSP(2) | + SOF_SSP_BT_OFFLOAD_PRESENT), + }, { } }; MODULE_DEVICE_TABLE(platform, board_ids); diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 60aee56f94bd..56ee5fef66a8 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -450,6 +450,11 @@ static const struct snd_soc_acpi_codecs adl_lt6911_hdmi = { .codecs = {"INTC10B0"} }; +static const struct snd_soc_acpi_codecs adl_nau8318_amp = { + .num_codecs = 1, + .codecs = {"NVTN2012"} +}; + struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { { .comp_ids = &adl_rt5682_rt5682s_hp, @@ -474,21 +479,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { }, { .id = "10508825", - .drv_name = "adl_rt1019p_nau8825", + .drv_name = "adl_rt1019p_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_rt1019p_amp, .sof_tplg_filename = "sof-adl-rt1019-nau8825.tplg", }, { .id = "10508825", - .drv_name = "adl_max98373_nau8825", + .drv_name = "adl_max98373_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98373_amp, .sof_tplg_filename = "sof-adl-max98373-nau8825.tplg", }, { .id = "10508825", - .drv_name = "adl_mx98360a_nau8825", + .drv_name = "adl_mx98360a_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98360a_amp, .sof_tplg_filename = "sof-adl-max98360a-nau8825.tplg", @@ -502,13 +507,20 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { }, { .id = "10508825", - .drv_name = "adl_rt1015p_nau8825", + .drv_name = "adl_rt1015p_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_rt1015p_amp, .sof_tplg_filename = "sof-adl-rt1015-nau8825.tplg", }, { .id = "10508825", + .drv_name = "adl_nau8318_8825", + .machine_quirk = snd_soc_acpi_codec_list, + .quirk_data = &adl_nau8318_amp, + .sof_tplg_filename = "sof-adl-nau8318-nau8825.tplg", + }, + { + .id = "10508825", .drv_name = "sof_nau8825", .sof_tplg_filename = "sof-adl-nau8825.tplg", }, diff --git a/sound/soc/intel/common/soc-acpi-intel-rpl-match.c b/sound/soc/intel/common/soc-acpi-intel-rpl-match.c index 31b43116e3d8..07f96a11ea2f 100644 --- a/sound/soc/intel/common/soc-acpi-intel-rpl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-rpl-match.c @@ -203,6 +203,25 @@ static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link2_rt1316_link01_rt71 {} }; +static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link2_rt1316_link01[] = { + { + .mask = BIT(2), + .num_adr = ARRAY_SIZE(rt711_sdca_2_adr), + .adr_d = rt711_sdca_2_adr, + }, + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(rt1316_0_group2_adr), + .adr_d = rt1316_0_group2_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(rt1316_1_group2_adr), + .adr_d = rt1316_1_group2_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link0_rt1318_link12_rt714_link3[] = { { .mask = BIT(0), @@ -227,6 +246,25 @@ static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link0_rt1318_link12_rt71 {} }; +static const struct snd_soc_acpi_link_adr rpl_sdw_rt711_link0_rt1318_link12[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(rt711_sdca_0_adr), + .adr_d = rt711_sdca_0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(rt1318_1_group1_adr), + .adr_d = rt1318_1_group1_adr, + }, + { + .mask = BIT(2), + .num_adr = ARRAY_SIZE(rt1318_2_group1_adr), + .adr_d = rt1318_2_group1_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr rpl_sdw_rt1316_link12_rt714_link0[] = { { .mask = BIT(1), @@ -272,12 +310,24 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_rpl_sdw_machines[] = { .sof_tplg_filename = "sof-rpl-rt711-l0-rt1318-l12-rt714-l3.tplg", }, { + .link_mask = 0x7, /* rt711 on link0 & two rt1318s on link1 and link2 */ + .links = rpl_sdw_rt711_link0_rt1318_link12, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-rpl-rt711-l0-rt1318-l12.tplg", + }, + { .link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */ .links = rpl_sdw_rt1316_link12_rt714_link0, .drv_name = "sof_sdw", .sof_tplg_filename = "sof-rpl-rt1316-l12-rt714-l0.tplg", }, { + .link_mask = 0x7, /* rt711 on link2 & two rt1316s on link0 and link1 */ + .links = rpl_sdw_rt711_link2_rt1316_link01, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-rpl-rt711-l2-rt1316-l01.tplg", + }, + { .link_mask = 0x1, /* link0 required */ .links = rpl_rvp, .drv_name = "sof_sdw", diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 363fa4d47680..b027fba8233d 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -182,10 +182,12 @@ config SND_SOC_MT8186_MT6366_DA7219_MAX98357 If unsure select "N". config SND_SOC_MT8186_MT6366_RT1019_RT5682S - tristate "ASoC Audio driver for MT8186 with RT1019 RT5682S codec" + tristate "ASoC Audio driver for MT8186 with RT1019 RT5682S MAX98357A/MAX98360 codec" depends on I2C && GPIOLIB depends on SND_SOC_MT8186 && MTK_PMIC_WRAP + select SND_SOC_MAX98357A select SND_SOC_MT6358 + select SND_SOC_MAX98357A select SND_SOC_RT1015P select SND_SOC_RT5682S select SND_SOC_BT_SCO diff --git a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c index 8f77a0bc1dc8..af44e331dae8 100644 --- a/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c +++ b/sound/soc/mediatek/mt8186/mt8186-mt6366-rt1019-rt5682s.c @@ -1083,6 +1083,21 @@ static struct snd_soc_card mt8186_mt6366_rt1019_rt5682s_soc_card = { .num_configs = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_codec_conf), }; +static struct snd_soc_card mt8186_mt6366_rt5682s_max98360_soc_card = { + .name = "mt8186_rt5682s_max98360", + .owner = THIS_MODULE, + .dai_link = mt8186_mt6366_rt1019_rt5682s_dai_links, + .num_links = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_dai_links), + .controls = mt8186_mt6366_rt1019_rt5682s_controls, + .num_controls = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_controls), + .dapm_widgets = mt8186_mt6366_rt1019_rt5682s_widgets, + .num_dapm_widgets = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_widgets), + .dapm_routes = mt8186_mt6366_rt1019_rt5682s_routes, + .num_dapm_routes = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_routes), + .codec_conf = mt8186_mt6366_rt1019_rt5682s_codec_conf, + .num_configs = ARRAY_SIZE(mt8186_mt6366_rt1019_rt5682s_codec_conf), +}; + static int mt8186_mt6366_rt1019_rt5682s_dev_probe(struct platform_device *pdev) { struct snd_soc_card *card; @@ -1232,9 +1247,14 @@ err_adsp_node: #if IS_ENABLED(CONFIG_OF) static const struct of_device_id mt8186_mt6366_rt1019_rt5682s_dt_match[] = { - { .compatible = "mediatek,mt8186-mt6366-rt1019-rt5682s-sound", + { + .compatible = "mediatek,mt8186-mt6366-rt1019-rt5682s-sound", .data = &mt8186_mt6366_rt1019_rt5682s_soc_card, }, + { + .compatible = "mediatek,mt8186-mt6366-rt5682s-max98360-sound", + .data = &mt8186_mt6366_rt5682s_max98360_soc_card, + }, {} }; MODULE_DEVICE_TABLE(of, mt8186_mt6366_rt1019_rt5682s_dt_match); diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index 96a6d4731e6f..e7b00d1d9e99 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -2,7 +2,6 @@ menuconfig SND_SOC_QCOM tristate "ASoC support for QCOM platforms" depends on ARCH_QCOM || COMPILE_TEST - imply SND_SOC_QCOM_COMMON help Say Y or M if you want to add support to use audio devices in Qualcomm Technologies SOC-based platforms. @@ -60,14 +59,16 @@ config SND_SOC_STORM config SND_SOC_APQ8016_SBC tristate "SoC Audio support for APQ8016 SBC platforms" select SND_SOC_LPASS_APQ8016 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON help Support for Qualcomm Technologies LPASS audio block in APQ8016 SOC-based systems. Say Y if you want to use audio devices on MI2S. config SND_SOC_QCOM_COMMON - depends on SOUNDWIRE + tristate + +config SND_SOC_QCOM_SDW tristate config SND_SOC_QDSP6_COMMON @@ -144,7 +145,7 @@ config SND_SOC_MSM8996 depends on QCOM_APR depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON help Support for Qualcomm Technologies LPASS audio block in APQ8096 SoC-based systems. @@ -155,7 +156,7 @@ config SND_SOC_SDM845 depends on QCOM_APR && I2C && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_RT5663 select SND_SOC_MAX98927 imply SND_SOC_CROS_EC_CODEC @@ -169,7 +170,8 @@ config SND_SOC_SM8250 depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_SDW help To add support for audio on Qualcomm Technologies Inc. SM8250 SoC-based systems. @@ -180,7 +182,8 @@ config SND_SOC_SC8280XP depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_SDW help To add support for audio on Qualcomm Technologies Inc. SC8280XP SoC-based systems. @@ -190,7 +193,7 @@ config SND_SOC_SC7180 tristate "SoC Machine driver for SC7180 boards" depends on I2C && GPIOLIB depends on SOUNDWIRE || SOUNDWIRE=n - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7180 select SND_SOC_MAX98357A select SND_SOC_RT5682_I2C @@ -204,7 +207,7 @@ config SND_SOC_SC7180 config SND_SOC_SC7280 tristate "SoC Machine driver for SC7280 boards" depends on I2C && SOUNDWIRE - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7280 select SND_SOC_MAX98357A select SND_SOC_WCD938X_SDW diff --git a/sound/soc/qcom/Makefile b/sound/soc/qcom/Makefile index 8b97172cf990..254350d9dc06 100644 --- a/sound/soc/qcom/Makefile +++ b/sound/soc/qcom/Makefile @@ -28,6 +28,7 @@ snd-soc-sdm845-objs := sdm845.o snd-soc-sm8250-objs := sm8250.o snd-soc-sc8280xp-objs := sc8280xp.o snd-soc-qcom-common-objs := common.o +snd-soc-qcom-sdw-objs := sdw.o obj-$(CONFIG_SND_SOC_STORM) += snd-soc-storm.o obj-$(CONFIG_SND_SOC_APQ8016_SBC) += snd-soc-apq8016-sbc.o @@ -38,6 +39,7 @@ obj-$(CONFIG_SND_SOC_SC8280XP) += snd-soc-sc8280xp.o obj-$(CONFIG_SND_SOC_SDM845) += snd-soc-sdm845.o obj-$(CONFIG_SND_SOC_SM8250) += snd-soc-sm8250.o obj-$(CONFIG_SND_SOC_QCOM_COMMON) += snd-soc-qcom-common.o +obj-$(CONFIG_SND_SOC_QCOM_SDW) += snd-soc-qcom-sdw.o #DSP lib obj-$(CONFIG_SND_SOC_QDSP6) += qdsp6/ diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 49c74c1662a3..96fe80241fb4 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -180,120 +180,6 @@ err_put_np: } EXPORT_SYMBOL_GPL(qcom_snd_parse_of); -int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, - bool *stream_prepared) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - int ret; - - if (!sruntime) - return 0; - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - break; - default: - return 0; - } - - if (*stream_prepared) { - sdw_disable_stream(sruntime); - sdw_deprepare_stream(sruntime); - *stream_prepared = false; - } - - ret = sdw_prepare_stream(sruntime); - if (ret) - return ret; - - /** - * NOTE: there is a strict hw requirement about the ordering of port - * enables and actual WSA881x PA enable. PA enable should only happen - * after soundwire ports are enabled if not DC on the line is - * accumulated resulting in Click/Pop Noise - * PA enable/mute are handled as part of codec DAPM and digital mute. - */ - - ret = sdw_enable_stream(sruntime); - if (ret) { - sdw_deprepare_stream(sruntime); - return ret; - } - *stream_prepared = true; - - return ret; -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare); - -int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct sdw_stream_runtime **psruntime) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - struct sdw_stream_runtime *sruntime; - int i; - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - for_each_rtd_codec_dais(rtd, i, codec_dai) { - sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream); - if (sruntime != ERR_PTR(-ENOTSUPP)) - *psruntime = sruntime; - } - break; - } - - return 0; - -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params); - -int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, bool *stream_prepared) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - if (sruntime && *stream_prepared) { - sdw_disable_stream(sruntime); - sdw_deprepare_stream(sruntime); - *stream_prepared = false; - } - break; - default: - break; - } - - return 0; -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); - int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_jack *jack, bool *jack_setup) { diff --git a/sound/soc/qcom/common.h b/sound/soc/qcom/common.h index 3ef5bb6d12df..d7f80ee5ae26 100644 --- a/sound/soc/qcom/common.h +++ b/sound/soc/qcom/common.h @@ -5,19 +5,9 @@ #define __QCOM_SND_COMMON_H__ #include <sound/soc.h> -#include <linux/soundwire/sdw.h> int qcom_snd_parse_of(struct snd_soc_card *card); int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_jack *jack, bool *jack_setup); -int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *runtime, - bool *stream_prepared); -int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct sdw_stream_runtime **psruntime); -int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, - bool *stream_prepared); #endif diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 54353842dc07..dbdaaa85ce48 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -1037,10 +1037,11 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev, struct lpass_data *data) { struct device_node *node; - int ret, id; + int ret, i, id; /* Allow all channels by default for backwards compatibility */ - for (id = 0; id < data->variant->num_dai; id++) { + for (i = 0; i < data->variant->num_dai; i++) { + id = data->variant->dai_driver[i].id; data->mi2s_playback_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; data->mi2s_capture_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; } diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index ade44ad7c585..14d9fea33d16 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -12,6 +12,7 @@ #include <linux/input-event-codes.h> #include "qdsp6/q6afe.h" #include "common.h" +#include "sdw.h" #define DRIVER_NAME "sc8280xp" diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c new file mode 100644 index 000000000000..10249519a39e --- /dev/null +++ b/sound/soc/qcom/sdw.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018, Linaro Limited. +// Copyright (c) 2018, The Linux Foundation. All rights reserved. + +#include <linux/module.h> +#include <sound/soc.h> +#include "qdsp6/q6afe.h" +#include "sdw.h" + +int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, + bool *stream_prepared) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + int ret; + + if (!sruntime) + return 0; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + break; + default: + return 0; + } + + if (*stream_prepared) { + sdw_disable_stream(sruntime); + sdw_deprepare_stream(sruntime); + *stream_prepared = false; + } + + ret = sdw_prepare_stream(sruntime); + if (ret) + return ret; + + /** + * NOTE: there is a strict hw requirement about the ordering of port + * enables and actual WSA881x PA enable. PA enable should only happen + * after soundwire ports are enabled if not DC on the line is + * accumulated resulting in Click/Pop Noise + * PA enable/mute are handled as part of codec DAPM and digital mute. + */ + + ret = sdw_enable_stream(sruntime); + if (ret) { + sdw_deprepare_stream(sruntime); + return ret; + } + *stream_prepared = true; + + return ret; +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare); + +int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct sdw_stream_runtime **psruntime) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + struct sdw_stream_runtime *sruntime; + int i; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + for_each_rtd_codec_dais(rtd, i, codec_dai) { + sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream); + if (sruntime != ERR_PTR(-ENOTSUPP)) + *psruntime = sruntime; + } + break; + } + + return 0; + +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params); + +int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, bool *stream_prepared) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + if (sruntime && *stream_prepared) { + sdw_disable_stream(sruntime); + sdw_deprepare_stream(sruntime); + *stream_prepared = false; + } + break; + default: + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/qcom/sdw.h b/sound/soc/qcom/sdw.h new file mode 100644 index 000000000000..d74cbb84da13 --- /dev/null +++ b/sound/soc/qcom/sdw.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018, The Linux Foundation. All rights reserved. + +#ifndef __QCOM_SND_SDW_H__ +#define __QCOM_SND_SDW_H__ + +#include <linux/soundwire/sdw.h> + +int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *runtime, + bool *stream_prepared); +int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct sdw_stream_runtime **psruntime); +int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, + bool *stream_prepared); +#endif diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c index 8dbe9ef41b1c..9626a9ef78c2 100644 --- a/sound/soc/qcom/sm8250.c +++ b/sound/soc/qcom/sm8250.c @@ -12,6 +12,7 @@ #include <linux/input-event-codes.h> #include "qdsp6/q6afe.h" #include "common.h" +#include "sdw.h" #define DRIVER_NAME "sm8250" #define MI2S_BCLK_RATE 1536000 diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index d9a3ce7b69e1..ade0507328af 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -353,7 +353,9 @@ int snd_sof_dbg_init(struct snd_sof_dev *sdev) return err; } - return 0; + return snd_sof_debugfs_buf_item(sdev, &sdev->fw_state, + sizeof(sdev->fw_state), + "fw_state", 0444); } EXPORT_SYMBOL_GPL(snd_sof_dbg_init); diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c index df740be645e8..8722bbd7fd3d 100644 --- a/sound/soc/sof/pm.c +++ b/sound/soc/sof/pm.c @@ -182,7 +182,7 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm; const struct sof_ipc_tplg_ops *tplg_ops = sdev->ipc->ops->tplg; pm_message_t pm_state; - u32 target_state = 0; + u32 target_state = snd_sof_dsp_power_target(sdev); int ret; /* do nothing if dsp suspend callback is not set */ @@ -192,6 +192,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) if (runtime_suspend && !sof_ops(sdev)->runtime_suspend) return 0; + if (tplg_ops && tplg_ops->tear_down_all_pipelines) + tplg_ops->tear_down_all_pipelines(sdev, false); + if (sdev->fw_state != SOF_FW_BOOT_COMPLETE) goto suspend; @@ -206,7 +209,6 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) } } - target_state = snd_sof_dsp_power_target(sdev); pm_state.event = target_state; /* Skip to platform-specific suspend if DSP is entering D0 */ @@ -217,9 +219,6 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) goto suspend; } - if (tplg_ops->tear_down_all_pipelines) - tplg_ops->tear_down_all_pipelines(sdev, false); - /* suspend DMA trace */ sof_fw_trace_suspend(sdev, pm_state); diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 41ac7185b42b..4727043fd745 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -471,7 +471,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, subs = find_matching_substream(chip, stream, target->sync_ep, target->fmt_type); if (!subs) - return sync_fmt; + goto end; high_score = 0; list_for_each_entry(fp, &subs->fmt_list, list) { @@ -485,6 +485,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, } } + end: if (fixed_rate) *fixed_rate = snd_usb_pcm_has_fixed_rate(subs); return sync_fmt; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 99a66d0ef5b2..d959da7a1afb 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -160,9 +160,12 @@ find_substream_format(struct snd_usb_substream *subs, bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs) { const struct audioformat *fp; - struct snd_usb_audio *chip = subs->stream->chip; + struct snd_usb_audio *chip; int rate = -1; + if (!subs) + return false; + chip = subs->stream->chip; if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE)) return false; list_for_each_entry(fp, &subs->fmt_list, list) { @@ -525,6 +528,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, if (snd_usb_endpoint_compatible(chip, subs->data_endpoint, fmt, hw_params)) goto unlock; + if (stop_endpoints(subs, false)) + sync_pending_stops(subs); close_endpoints(chip, subs); } @@ -787,11 +792,27 @@ static int apply_hw_params_minmax(struct snd_interval *it, unsigned int rmin, return changed; } +/* get the specified endpoint object that is being used by other streams + * (i.e. the parameter is locked) + */ +static const struct snd_usb_endpoint * +get_endpoint_in_use(struct snd_usb_audio *chip, int endpoint, + const struct snd_usb_endpoint *ref_ep) +{ + const struct snd_usb_endpoint *ep; + + ep = snd_usb_get_endpoint(chip, endpoint); + if (ep && ep->cur_audiofmt && (ep != ref_ep || ep->opened > 1)) + return ep; + return NULL; +} + static int hw_rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_usb_substream *subs = rule->private; struct snd_usb_audio *chip = subs->stream->chip; + const struct snd_usb_endpoint *ep; const struct audioformat *fp; struct snd_interval *it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); unsigned int rmin, rmax, r; @@ -803,6 +824,29 @@ static int hw_rule_rate(struct snd_pcm_hw_params *params, list_for_each_entry(fp, &subs->fmt_list, list) { if (!hw_check_valid_format(subs, params, fp)) continue; + + ep = get_endpoint_in_use(chip, fp->endpoint, + subs->data_endpoint); + if (ep) { + hwc_debug("rate limit %d for ep#%x\n", + ep->cur_rate, fp->endpoint); + rmin = min(rmin, ep->cur_rate); + rmax = max(rmax, ep->cur_rate); + continue; + } + + if (fp->implicit_fb) { + ep = get_endpoint_in_use(chip, fp->sync_ep, + subs->sync_endpoint); + if (ep) { + hwc_debug("rate limit %d for sync_ep#%x\n", + ep->cur_rate, fp->sync_ep); + rmin = min(rmin, ep->cur_rate); + rmax = max(rmax, ep->cur_rate); + continue; + } + } + r = snd_usb_endpoint_get_clock_rate(chip, fp->clock); if (r > 0) { if (!snd_interval_test(it, r)) @@ -872,6 +916,8 @@ static int hw_rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_usb_substream *subs = rule->private; + struct snd_usb_audio *chip = subs->stream->chip; + const struct snd_usb_endpoint *ep; const struct audioformat *fp; struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); u64 fbits; @@ -881,6 +927,27 @@ static int hw_rule_format(struct snd_pcm_hw_params *params, list_for_each_entry(fp, &subs->fmt_list, list) { if (!hw_check_valid_format(subs, params, fp)) continue; + + ep = get_endpoint_in_use(chip, fp->endpoint, + subs->data_endpoint); + if (ep) { + hwc_debug("format limit %d for ep#%x\n", + ep->cur_format, fp->endpoint); + fbits |= pcm_format_to_bits(ep->cur_format); + continue; + } + + if (fp->implicit_fb) { + ep = get_endpoint_in_use(chip, fp->sync_ep, + subs->sync_endpoint); + if (ep) { + hwc_debug("format limit %d for sync_ep#%x\n", + ep->cur_format, fp->sync_ep); + fbits |= pcm_format_to_bits(ep->cur_format); + continue; + } + } + fbits |= fp->formats; } return apply_hw_params_format_bits(fmt, fbits); @@ -913,98 +980,95 @@ static int hw_rule_period_time(struct snd_pcm_hw_params *params, return apply_hw_params_minmax(it, pmin, UINT_MAX); } -/* get the EP or the sync EP for implicit fb when it's already set up */ -static const struct snd_usb_endpoint * -get_sync_ep_from_substream(struct snd_usb_substream *subs) -{ - struct snd_usb_audio *chip = subs->stream->chip; - const struct audioformat *fp; - const struct snd_usb_endpoint *ep; - - list_for_each_entry(fp, &subs->fmt_list, list) { - ep = snd_usb_get_endpoint(chip, fp->endpoint); - if (ep && ep->cur_audiofmt) { - /* if EP is already opened solely for this substream, - * we still allow us to change the parameter; otherwise - * this substream has to follow the existing parameter - */ - if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1) - return ep; - } - if (!fp->implicit_fb) - continue; - /* for the implicit fb, check the sync ep as well */ - ep = snd_usb_get_endpoint(chip, fp->sync_ep); - if (ep && ep->cur_audiofmt) - return ep; - } - return NULL; -} - /* additional hw constraints for implicit feedback mode */ -static int hw_rule_format_implicit_fb(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_usb_substream *subs = rule->private; - const struct snd_usb_endpoint *ep; - struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); - - ep = get_sync_ep_from_substream(subs); - if (!ep) - return 0; - - hwc_debug("applying %s\n", __func__); - return apply_hw_params_format_bits(fmt, pcm_format_to_bits(ep->cur_format)); -} - -static int hw_rule_rate_implicit_fb(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) -{ - struct snd_usb_substream *subs = rule->private; - const struct snd_usb_endpoint *ep; - struct snd_interval *it; - - ep = get_sync_ep_from_substream(subs); - if (!ep) - return 0; - - hwc_debug("applying %s\n", __func__); - it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); - return apply_hw_params_minmax(it, ep->cur_rate, ep->cur_rate); -} - static int hw_rule_period_size_implicit_fb(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_usb_substream *subs = rule->private; + struct snd_usb_audio *chip = subs->stream->chip; + const struct audioformat *fp; const struct snd_usb_endpoint *ep; struct snd_interval *it; + unsigned int rmin, rmax; - ep = get_sync_ep_from_substream(subs); - if (!ep) - return 0; - - hwc_debug("applying %s\n", __func__); it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); - return apply_hw_params_minmax(it, ep->cur_period_frames, - ep->cur_period_frames); + hwc_debug("hw_rule_period_size: (%u,%u)\n", it->min, it->max); + rmin = UINT_MAX; + rmax = 0; + list_for_each_entry(fp, &subs->fmt_list, list) { + if (!hw_check_valid_format(subs, params, fp)) + continue; + ep = get_endpoint_in_use(chip, fp->endpoint, + subs->data_endpoint); + if (ep) { + hwc_debug("period size limit %d for ep#%x\n", + ep->cur_period_frames, fp->endpoint); + rmin = min(rmin, ep->cur_period_frames); + rmax = max(rmax, ep->cur_period_frames); + continue; + } + + if (fp->implicit_fb) { + ep = get_endpoint_in_use(chip, fp->sync_ep, + subs->sync_endpoint); + if (ep) { + hwc_debug("period size limit %d for sync_ep#%x\n", + ep->cur_period_frames, fp->sync_ep); + rmin = min(rmin, ep->cur_period_frames); + rmax = max(rmax, ep->cur_period_frames); + continue; + } + } + } + + if (!rmax) + return 0; /* no limit by implicit fb */ + return apply_hw_params_minmax(it, rmin, rmax); } static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_usb_substream *subs = rule->private; + struct snd_usb_audio *chip = subs->stream->chip; + const struct audioformat *fp; const struct snd_usb_endpoint *ep; struct snd_interval *it; + unsigned int rmin, rmax; - ep = get_sync_ep_from_substream(subs); - if (!ep) - return 0; - - hwc_debug("applying %s\n", __func__); it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIODS); - return apply_hw_params_minmax(it, ep->cur_buffer_periods, - ep->cur_buffer_periods); + hwc_debug("hw_rule_periods: (%u,%u)\n", it->min, it->max); + rmin = UINT_MAX; + rmax = 0; + list_for_each_entry(fp, &subs->fmt_list, list) { + if (!hw_check_valid_format(subs, params, fp)) + continue; + ep = get_endpoint_in_use(chip, fp->endpoint, + subs->data_endpoint); + if (ep) { + hwc_debug("periods limit %d for ep#%x\n", + ep->cur_buffer_periods, fp->endpoint); + rmin = min(rmin, ep->cur_buffer_periods); + rmax = max(rmax, ep->cur_buffer_periods); + continue; + } + + if (fp->implicit_fb) { + ep = get_endpoint_in_use(chip, fp->sync_ep, + subs->sync_endpoint); + if (ep) { + hwc_debug("periods limit %d for sync_ep#%x\n", + ep->cur_buffer_periods, fp->sync_ep); + rmin = min(rmin, ep->cur_buffer_periods); + rmax = max(rmax, ep->cur_buffer_periods); + continue; + } + } + } + + if (!rmax) + return 0; /* no limit by implicit fb */ + return apply_hw_params_minmax(it, rmin, rmax); } /* @@ -1113,16 +1177,6 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre return err; /* additional hw constraints for implicit fb */ - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, - hw_rule_format_implicit_fb, subs, - SNDRV_PCM_HW_PARAM_FORMAT, -1); - if (err < 0) - return err; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - hw_rule_rate_implicit_fb, subs, - SNDRV_PCM_HW_PARAM_RATE, -1); - if (err < 0) - return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, hw_rule_period_size_implicit_fb, subs, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); diff --git a/sound/usb/stream.c b/sound/usb/stream.c index f75601ca2d52..f10f4e6d3fb8 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -1222,6 +1222,12 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, if (err < 0) return err; } + + /* try to set the interface... */ + usb_set_interface(chip->dev, iface_no, 0); + snd_usb_init_pitch(chip, fp); + snd_usb_init_sample_rate(chip, fp, fp->rate_max); + usb_set_interface(chip->dev, iface_no, altno); } return 0; } diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c index 4041748c12e5..b66e037710d0 100644 --- a/sound/xen/xen_snd_front.c +++ b/sound/xen/xen_snd_front.c @@ -311,7 +311,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, return xenbus_switch_state(xb_dev, XenbusStateInitialising); } -static int xen_drv_remove(struct xenbus_device *dev) +static void xen_drv_remove(struct xenbus_device *dev) { struct xen_snd_front_info *front_info = dev_get_drvdata(&dev->dev); int to = 100; @@ -345,7 +345,6 @@ static int xen_drv_remove(struct xenbus_device *dev) xen_snd_drv_fini(front_info); xenbus_frontend_closed(dev); - return 0; } static const struct xenbus_device_id xen_drv_ids[] = { diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 5fc5b8029bff..7380093ba9e7 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -192,6 +192,7 @@ struct sys_stat_struct { __asm__ (".section .text\n" ".weak __start\n" ".set nomips16\n" + ".set push\n" ".set noreorder\n" ".option pic0\n" ".ent __start\n" @@ -210,6 +211,7 @@ __asm__ (".section .text\n" "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" + ".set pop\n" ""); #endif // _NOLIBC_ARCH_MIPS_H diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index ba04771cb3a3..a3bdd9803f8c 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -11,13 +11,13 @@ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 struct sys_stat_struct { unsigned long st_dev; /* Device. */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index e3000b2992d7..6f90706d0644 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -96,4 +96,7 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index 06893d6dfb7a..9dc4919c769b 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -24,4 +24,7 @@ static int errno; */ #define MAX_ERRNO 4095 +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index ef47e71e2be3..137552216e46 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -19,4 +19,7 @@ int raise(int signal) return sys_kill(sys_getpid(), signal); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index a3cebc4bc3ac..96ac8afc5aee 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -303,4 +303,7 @@ void perror(const char *msg) fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 92378c4b9660..a24000d1e822 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -419,4 +419,7 @@ char *u64toa(uint64_t in) return itoa_buffer; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index ad97c0d522b8..fffdaf6ff467 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -88,8 +88,11 @@ void *memset(void *dst, int b, size_t len) { char *p = dst; - while (len--) + while (len--) { + /* prevent gcc from recognizing memset() here */ + asm volatile(""); *(p++) = b; + } return dst; } @@ -285,4 +288,7 @@ char *strrchr(const char *s, int c) return (char *)ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index ce3ee03aa679..78473d34e27c 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -1243,5 +1243,7 @@ ssize_t write(int fd, const void *buf, size_t count) return ret; } +/* make sure to include all global symbols */ +#include "nolibc.h" #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d18b7661fdd7..84655361b9ad 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -25,4 +25,7 @@ time_t time(time_t *tptr) return tv.tv_sec; } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 959997034e55..fbbc0e68c001 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -26,13 +26,13 @@ #define S_IFSOCK 0140000 #define S_IFMT 0170000 -#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) -#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) -#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) -#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) -#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) -#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) -#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) +#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFMT) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFMT) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFMT) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK) /* dirent types */ #define DT_UNKNOWN 0x0 @@ -89,39 +89,46 @@ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + /* for select() */ typedef struct { - uint32_t fd32[(FD_SETSIZE + 31) / 32]; + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; } fd_set; -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] &= ~(1U << (__fd & 31)); \ +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FX_SETBITMASK)); \ } while (0) -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fd32[__fd / 32] |= 1U << (__fd & 31); \ +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ } while (0) -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fd32[__fd / 32] & 1U << (__fd & 31)); \ - __r; \ +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SET_BITMASK)); \ + __r; \ }) -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - for (__idx = 0; __idx < (FD_SETSIZE+31) / 32; __idx ++) \ - __set->fd32[__idx] = 0; \ +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ } while (0) /* for poll() */ @@ -202,4 +209,7 @@ struct stat { }) #endif +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index 1c25e20ee360..1cfcd52106a4 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -51,4 +51,7 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } +/* make sure to include all global symbols */ +#include "nolibc.h" + #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4350be739f4f..4b7c8b33069e 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -427,6 +427,15 @@ static int decode_instructions(struct objtool_file *file) if (func->type != STT_NOTYPE && func->type != STT_FUNC) continue; + if (func->offset == sec->sh.sh_size) { + /* Heuristic: likely an "end" symbol */ + if (func->type == STT_NOTYPE) + continue; + WARN("%s(): STT_FUNC at end of section", + func->name); + return -1; + } + if (func->return_thunk || func->alias != func) continue; diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c2504c39bdcb..5b8784675903 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -589,6 +589,8 @@ ifndef NO_LIBELF $(call feature_check,libbpf-bpf_program__set_insns) ifeq ($(feature-libbpf-bpf_program__set_insns), 1) CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS + else + dummy := $(error Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources); endif $(call feature_check,libbpf-btf__raw_data) ifeq ($(feature-libbpf-btf__raw_data), 1) @@ -602,6 +604,8 @@ ifndef NO_LIBELF dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else + # Libbpf will be built as a static library from tools/lib/bpf. + LIBBPF_STATIC := 1 CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM @@ -1314,14 +1318,6 @@ tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) export perfexec_instdir_SQ -# If we install to $(HOME) we keep the traceevent default: -# $(HOME)/.traceevent/plugins -# Otherwise we install plugins into the global $(libdir). -ifdef DESTDIR -plugindir=$(libdir)/traceevent/plugins -plugindir_SQ= $(subst ','\'',$(plugindir)) -endif - print_var = $(eval $(print_var_code)) $(info $(MSG)) define print_var_code MSG = $(shell printf '...%40s: %s' $(1) $($(1))) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1e32c93b8042..b7d9c4206230 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -303,10 +303,12 @@ ifneq ($(OUTPUT),) else LIBBPF_OUTPUT = $(CURDIR)/libbpf endif -LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include -LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a -CFLAGS += -I$(LIBBPF_OUTPUT)/include +ifdef LIBBPF_STATIC + LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) + LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include + LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a + CFLAGS += -I$(LIBBPF_OUTPUT)/include +endif ifneq ($(OUTPUT),) LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd @@ -393,10 +395,8 @@ endif export PERL_PATH PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) -ifndef NO_LIBBPF - ifndef LIBBPF_DYNAMIC - PERFLIBS += $(LIBBPF) - endif +ifdef LIBBPF_STATIC + PERFLIBS += $(LIBBPF) endif # We choose to avoid "if .. else if .. else .. endif endif" @@ -756,12 +756,15 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(arch_errno_name_array) \ $(sync_file_range_arrays) \ $(LIBAPI) \ - $(LIBBPF) \ $(LIBPERF) \ $(LIBSUBCMD) \ $(LIBSYMBOL) \ bpf-skel +ifdef LIBBPF_STATIC +prepare: $(LIBBPF) +endif + $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index e20656c431a4..8ae0a1535293 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include <linux/kernel.h> +#include <linux/numa.h> #include <linux/rbtree.h> #include <linux/string.h> #include <linux/zalloc.h> @@ -185,22 +186,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s total_allocated += bytes_alloc; nr_allocs++; - return 0; -} -static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) -{ - int ret = evsel__process_alloc_event(evsel, sample); + /* + * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA + * version of tracepoints") adds the field "node" into the + * tracepoints 'kmalloc' and 'kmem_cache_alloc'. + * + * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node' + * also contain the field "node". + * + * If the tracepoint contains the field "node" the tool stats the + * cross allocation. + */ + if (evsel__field(evsel, "node")) { + int node1, node2; - if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), - node2 = evsel__intval(evsel, sample, "node"); + node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}); + node2 = evsel__intval(evsel, sample, "node"); - if (node1 != node2) + /* + * If the field "node" is NUMA_NO_NODE (-1), we don't take it + * as a cross allocation. + */ + if ((node2 != NUMA_NO_NODE) && (node1 != node2)) nr_cross_allocs++; } - return ret; + return 0; } static int ptr_cmp(void *, void *); @@ -1369,8 +1381,8 @@ static int __cmd_kmem(struct perf_session *session) /* slab allocator */ { "kmem:kmalloc", evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, - { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, }, { "kmem:kfree", evsel__process_free_event, }, { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ @@ -1824,6 +1836,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused, return 0; } +static bool slab_legacy_tp_is_exposed(void) +{ + /* + * The tracepoints "kmem:kmalloc_node" and + * "kmem:kmem_cache_alloc_node" have been removed on the latest + * kernel, if the tracepoint "kmem:kmalloc_node" is existed it + * means the tool is running on an old kernel, we need to + * rollback to support these legacy tracepoints. + */ + return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { @@ -1831,22 +1856,28 @@ static int __cmd_record(int argc, const char **argv) }; const char * const slab_events[] = { "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const slab_legacy_events[] = { + "-e", "kmem:kmalloc_node", + "-e", "kmem:kmem_cache_alloc_node", + }; const char * const page_events[] = { "-e", "kmem:mm_page_alloc", "-e", "kmem:mm_page_free", }; unsigned int rec_argc, i, j; const char **rec_argv; + unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed(); rec_argc = ARRAY_SIZE(record_args) + argc - 1; - if (kmem_slab) + if (kmem_slab) { rec_argc += ARRAY_SIZE(slab_events); + if (slab_legacy_tp_exposed) + rec_argc += ARRAY_SIZE(slab_legacy_events); + } if (kmem_page) rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ @@ -1861,6 +1892,10 @@ static int __cmd_record(int argc, const char **argv) if (kmem_slab) { for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) rec_argv[i] = strdup(slab_events[j]); + if (slab_legacy_tp_exposed) { + for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++) + rec_argv[i] = strdup(slab_legacy_events[j]); + } } if (kmem_page) { rec_argv[i++] = strdup("-g"); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 86e06f136f40..d21fe0f32a6d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,7 +16,9 @@ #include "util/record.h" #include <api/fs/tracing_path.h> +#ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> +#endif #include "util/bpf_map.h" #include "util/rlimit.h" #include "builtin.h" diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c index bd83d364cf30..91778b5c6125 100644 --- a/tools/perf/tests/bpf-script-test-prologue.c +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -20,6 +20,8 @@ # undef if #endif +typedef unsigned int __bitwise fmode_t; + #define FMODE_READ 0x1 #define FMODE_WRITE 0x2 diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 05e818a8bbad..009d6efb673c 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -222,19 +222,7 @@ installed_files_bin := bin/perf installed_files_bin += etc/bash_completion.d/perf installed_files_bin += libexec/perf-core/perf-archive -installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so -installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so - installed_files_all := $(installed_files_bin) -installed_files_all += $(installed_files_plugins) test_make_install := $(call test_dest_files,$(installed_files_all)) test_make_install_O := $(call test_dest_files,$(installed_files_all)) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 265d20cc126b..c2e323cd7d49 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2611,7 +2611,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b69..c6d21c07b14c 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -4,9 +4,12 @@ #include <linux/list.h> #include <sys/resource.h> + +#ifdef HAVE_LIBBPF_SUPPORT #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#endif struct evsel; struct target; @@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } +#ifdef HAVE_BPF_SKEL + static inline __u32 bpf_link_get_id(int fd) { struct bpf_link_info link_info = { .id = 0, }; @@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) return bpf_prog_test_run_opts(prog_fd, &opts); } +#endif /* HAVE_BPF_SKEL */ #endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..85973e55489e 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,6 +15,10 @@ bool mirrored_kernelcore = false; struct page {}; +void __free_pages_core(struct page *page, unsigned int order) +{ +} + void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 9f255bc5f31c..93333a90bf3a 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -50,6 +50,78 @@ static void sme_sigill(void) asm volatile(".inst 0x04bf5800" : : : "x0"); } +static void sme2_sigill(void) +{ + /* SMSTART ZA */ + asm volatile("msr S0_3_C4_C5_3, xzr" : : : ); + + /* ZERO ZT0 */ + asm volatile(".inst 0xc0480001" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme2p1_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* BFCLAMP { Z0.H - Z1.H }, Z0.H, Z0.H */ + asm volatile(".inst 0xc120C000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smei16i32_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* SMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */ + asm volatile(".inst 0xa0800000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smebi32i32_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* BMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */ + asm volatile(".inst 0x80800008" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smeb16b16_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */ + asm volatile(".inst 0xC1E41C00" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smef16f16_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FADD ZA.H[W0, 0], { Z0.H-Z1.H } */ + asm volatile(".inst 0xc1a41C00" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + static void sve_sigill(void) { /* RDVL x0, #0 */ @@ -159,6 +231,49 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "SME2", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME2, + .cpuinfo = "sme2", + .sigill_fn = sme2_sigill, + .sigill_reliable = true, + }, + { + .name = "SME 2.1", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME2P1, + .cpuinfo = "sme2p1", + .sigill_fn = sme2p1_sigill, + }, + { + .name = "SME I16I32", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_I16I32, + .cpuinfo = "smei16i32", + .sigill_fn = smei16i32_sigill, + }, + { + .name = "SME BI32I32", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_BI32I32, + .cpuinfo = "smebi32i32", + .sigill_fn = smebi32i32_sigill, + }, + { + .name = "SME B16B16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_B16B16, + .cpuinfo = "smeb16b16", + .sigill_fn = smeb16b16_sigill, + }, + { + .name = "SME F16F16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SME_F16F16, + .cpuinfo = "smef16f16", + .sigill_fn = smef16f16_sigill, + }, + { .name = "SVE", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_SVE, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index acd5e9f3bc0b..6ddf392329c9 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -23,6 +23,9 @@ .arch_extension sve +#define ID_AA64SMFR0_EL1_SMEver_SHIFT 56 +#define ID_AA64SMFR0_EL1_SMEver_WIDTH 4 + /* * LDR (vector to ZA array): * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] @@ -45,6 +48,26 @@ | ((\offset) & 7) .endm +/* + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + .inst 0xe11f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx + */ +.macro _str_zt nx + .inst 0xe13f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + .globl do_syscall do_syscall: // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1 @@ -64,7 +87,7 @@ do_syscall: msr S3_3_C4_C2_2, x2 1: - // Load ZA if it's enabled - uses x12 as scratch due to SME LDR + // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR tbz x2, #SVCR_ZA_SHIFT, 1f mov w12, #0 ldr x2, =za_in @@ -73,6 +96,15 @@ do_syscall: add x12, x12, #1 cmp x1, x12 bne 2b + + // ZT0 + mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ + #ID_AA64SMFR0_EL1_SMEver_WIDTH + cbz x2, 1f + adrp x2, zt_in + add x2, x2, :lo12:zt_in + _ldr_zt 2 1: // Load GPRs x8-x28, and save our SP/FP for later comparison @@ -235,6 +267,15 @@ do_syscall: add x12, x12, #1 cmp x1, x12 bne 2b + + // ZT0 + mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 + ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ + #ID_AA64SMFR0_EL1_SMEver_WIDTH + cbz x2, 1f + adrp x2, zt_out + add x2, x2, :lo12:zt_out + _str_zt 2 1: // Save the SVE state if we have some diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index dd7ebe536d05..9800f9dc6b35 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -311,6 +311,35 @@ static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } +uint8_t zt_in[ZT_SIG_REG_BYTES] __attribute__((aligned(16))); +uint8_t zt_out[ZT_SIG_REG_BYTES] __attribute__((aligned(16))); + +static void setup_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + fill_random(zt_in, sizeof(zt_in)); + memset(zt_out, 0, sizeof(zt_out)); +} + +static int check_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + int errors = 0; + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) + return 0; + + if (!(svcr & SVCR_ZA_MASK)) + return 0; + + if (memcmp(zt_in, zt_out, sizeof(zt_in)) != 0) { + ksft_print_msg("SME VL %d ZT does not match\n", sme_vl); + errors++; + } + + return errors; +} + typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr); typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, @@ -334,6 +363,7 @@ static struct { { setup_ffr, check_ffr }, { setup_svcr, check_svcr }, { setup_za, check_za }, + { setup_zt, check_zt }, }; static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl, @@ -474,6 +504,7 @@ int main(void) { int i; int tests = 1; /* FPSIMD */ + int sme_ver; srandom(getpid()); @@ -482,10 +513,15 @@ int main(void) tests += (sve_count_vls() * sme_count_vls()) * 3; ksft_set_plan(ARRAY_SIZE(syscalls) * tests); + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) + sme_ver = 2; + else + sme_ver = 1; + if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) - ksft_print_msg("SME with FA64\n"); + ksft_print_msg("SME%d with FA64\n", sme_ver); else if (getauxval(AT_HWCAP2) & HWCAP2_SME) - ksft_print_msg("SME without FA64\n"); + ksft_print_msg("SME%d without FA64\n", sme_ver); for (i = 0; i < ARRAY_SIZE(syscalls); i++) test_one_syscall(&syscalls[i]); diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index df79d29664a1..ebc86757bdd8 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -12,3 +12,5 @@ vlset za-fork za-ptrace za-test +zt-ptrace +zt-test diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 36db61358ed5..50a70220ba6c 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -14,6 +14,8 @@ TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ sve-test \ ssve-test \ za-test \ + zt-ptrace \ + zt-test \ vlset TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress @@ -41,5 +43,8 @@ $(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o $(OUTPUT)/za-ptrace: za-ptrace.c $(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o $(CC) -nostdlib $^ -o $@ +$(OUTPUT)/zt-ptrace: zt-ptrace.c +$(OUTPUT)/zt-test: zt-test.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index f8b2f41aac36..520385fcfede 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -370,6 +370,19 @@ static void start_za(struct child_data *child, int vl, int cpu) ksft_print_msg("Started %s\n", child->name); } +static void start_zt(struct child_data *child, int cpu) +{ + int ret; + + ret = asprintf(&child->name, "ZT-%d", cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + child_start(child, "./zt-test"); + + ksft_print_msg("Started %s\n", child->name); +} + static void probe_vls(int vls[], int *vl_count, int set_vl) { unsigned int vq; @@ -426,6 +439,7 @@ int main(int argc, char **argv) bool all_children_started = false; int seen_children; int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; + bool have_sme2; struct sigaction sa; while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { @@ -458,6 +472,13 @@ int main(int argc, char **argv) sme_vl_count = 0; } + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) { + tests += cpus; + have_sme2 = true; + } else { + have_sme2 = false; + } + /* Force context switching if we only have FPSIMD */ if (!sve_vl_count && !sme_vl_count) fpsimd_per_cpu = 2; @@ -468,8 +489,9 @@ int main(int argc, char **argv) ksft_print_header(); ksft_set_plan(tests); - ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n", - cpus, sve_vl_count, sme_vl_count); + ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs, SME2 %s\n", + cpus, sve_vl_count, sme_vl_count, + have_sme2 ? "present" : "absent"); if (timeout > 0) ksft_print_msg("Will run for %ds\n", timeout); @@ -527,6 +549,9 @@ int main(int argc, char **argv) start_ssve(&children[num_children++], sme_vls[j], i); start_za(&children[num_children++], sme_vls[j], i); } + + if (have_sme2) + start_zt(&children[num_children++], i); } /* diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h index 7191e53ca1c0..9292bba5400b 100644 --- a/tools/testing/selftests/arm64/fp/sme-inst.h +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -48,4 +48,24 @@ | ((\offset) & 7) .endm +/* + * LDR (ZT0) + * + * LDR ZT0, nx + */ +.macro _ldr_zt nx + .inst 0xe11f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + +/* + * STR (ZT0) + * + * STR ZT0, nx + */ +.macro _str_zt nx + .inst 0xe13f8000 \ + | (((\nx) & 0x1f) << 5) +.endm + #endif diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c new file mode 100644 index 000000000000..996d9614a131 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + */ +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_ZA +#define NT_ARM_ZA 0x40c +#endif +#ifndef NT_ARM_ZT +#define NT_ARM_ZT 0x40d +#endif + +#define EXPECTED_TESTS 3 + +static int sme_vl; + +static void fill_buf(char *buf, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + buf[i] = random(); +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size) +{ + struct user_za_header *za; + void *p; + size_t sz = sizeof(*za); + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov)) + goto error; + + za = *buf; + if (za->size <= sz) + break; + + sz = za->size; + } + + return za; + +error: + return NULL; +} + +static int set_za(pid_t pid, const struct user_za_header *za) +{ + struct iovec iov; + + iov.iov_base = (void *)za; + iov.iov_len = za->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov); +} + +static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES]) +{ + struct iovec iov; + + iov.iov_base = zt; + iov.iov_len = ZT_SIG_REG_BYTES; + return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov); +} + + +static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES]) +{ + struct iovec iov; + + iov.iov_base = (void *)zt; + iov.iov_len = ZT_SIG_REG_BYTES; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZT, &iov); +} + +/* Reading with ZA disabled returns all zeros */ +static void ptrace_za_disabled_read_zt(pid_t child) +{ + struct user_za_header za; + char zt[ZT_SIG_REG_BYTES]; + int ret, i; + bool fail = false; + + /* Disable PSTATE.ZA using the ZA interface */ + memset(&za, 0, sizeof(za)); + za.vl = sme_vl; + za.size = sizeof(za); + + ret = set_za(child, &za); + if (ret != 0) { + ksft_print_msg("Failed to disable ZA\n"); + fail = true; + } + + /* Read back ZT */ + ret = get_zt(child, zt); + if (ret != 0) { + ksft_print_msg("Failed to read ZT\n"); + fail = true; + } + + for (i = 0; i < ARRAY_SIZE(zt); i++) { + if (zt[i]) { + ksft_print_msg("zt[%d]: 0x%x != 0\n", i, zt[i]); + fail = true; + } + } + + ksft_test_result(!fail, "ptrace_za_disabled_read_zt\n"); +} + +/* Writing then reading ZT should return the data written */ +static void ptrace_set_get_zt(pid_t child) +{ + char zt_in[ZT_SIG_REG_BYTES]; + char zt_out[ZT_SIG_REG_BYTES]; + int ret, i; + bool fail = false; + + fill_buf(zt_in, sizeof(zt_in)); + + ret = set_zt(child, zt_in); + if (ret != 0) { + ksft_print_msg("Failed to set ZT\n"); + fail = true; + } + + ret = get_zt(child, zt_out); + if (ret != 0) { + ksft_print_msg("Failed to read ZT\n"); + fail = true; + } + + for (i = 0; i < ARRAY_SIZE(zt_in); i++) { + if (zt_in[i] != zt_out[i]) { + ksft_print_msg("zt[%d]: 0x%x != 0x%x\n", i, + zt_in[i], zt_out[i]); + fail = true; + } + } + + ksft_test_result(!fail, "ptrace_set_get_zt\n"); +} + +/* Writing ZT should set PSTATE.ZA */ +static void ptrace_enable_za_via_zt(pid_t child) +{ + struct user_za_header za_in; + struct user_za_header *za_out; + char zt[ZT_SIG_REG_BYTES]; + char *za_data; + size_t za_out_size; + int ret, i, vq; + bool fail = false; + + /* Disable PSTATE.ZA using the ZA interface */ + memset(&za_in, 0, sizeof(za_in)); + za_in.vl = sme_vl; + za_in.size = sizeof(za_in); + + ret = set_za(child, &za_in); + if (ret != 0) { + ksft_print_msg("Failed to disable ZA\n"); + fail = true; + } + + /* Write ZT */ + fill_buf(zt, sizeof(zt)); + ret = set_zt(child, zt); + if (ret != 0) { + ksft_print_msg("Failed to set ZT\n"); + fail = true; + } + + /* Read back ZA and check for register data */ + za_out = NULL; + za_out_size = 0; + if (get_za(child, (void **)&za_out, &za_out_size)) { + /* Should have an unchanged VL */ + if (za_out->vl != sme_vl) { + ksft_print_msg("VL changed from %d to %d\n", + sme_vl, za_out->vl); + fail = true; + } + vq = __sve_vq_from_vl(za_out->vl); + za_data = (char *)za_out + ZA_PT_ZA_OFFSET; + + /* Should have register data */ + if (za_out->size < ZA_PT_SIZE(vq)) { + ksft_print_msg("ZA data less than expected: %u < %u\n", + za_out->size, ZA_PT_SIZE(vq)); + fail = true; + vq = 0; + } + + /* That register data should be non-zero */ + for (i = 0; i < ZA_PT_ZA_SIZE(vq); i++) { + if (za_data[i]) { + ksft_print_msg("ZA byte %d is %x\n", + i, za_data[i]); + fail = true; + } + } + } else { + ksft_print_msg("Failed to read ZA\n"); + fail = true; + } + + ksft_test_result(!fail, "ptrace_enable_za_via_zt\n"); +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + + ptrace_za_disabled_read_zt(child); + ptrace_set_get_zt(child); + ptrace_enable_za_via_zt(child); + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + pid_t child; + + srandom(getpid()); + + ksft_print_header(); + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) { + ksft_set_plan(1); + ksft_exit_skip("SME2 not available\n"); + } + + /* We need a valid SME VL to enable/disable ZA */ + sme_vl = prctl(PR_SME_GET_VL); + if (sme_vl == -1) { + ksft_set_plan(1); + ksft_exit_skip("Failed to read SME VL: %d (%s)\n", + errno, strerror(errno)); + } + + ksft_set_plan(EXPECTED_TESTS); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return ret; +} diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S new file mode 100644 index 000000000000..d63286397638 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021-2 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Scalable Matrix Extension ZT context switch test +// Repeatedly writes unique test patterns into ZT0 +// and reads them back to verify integrity. + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" +#include "sme-inst.h" + +.arch_extension sve + +#define ZT_SZ 512 +#define ZT_B (ZT_SZ / 8) + +// Declare some storage space to shadow ZT register contents and a +// scratch buffer. +.pushsection .text +.data +.align 4 +ztref: + .space ZT_B +scratch: + .space ZT_B +.popsection + + +// Generate a test pattern for storage in ZT +// x0: pid +// x1: generation + +// These values are used to construct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:24 generation number (increments once per test_loop) +// bits 23: 8 pid +// bits 7: 0 32-bit lane index + +function pattern + mov w3, wzr + bfi w3, w0, #8, #16 // PID + bfi w3, w1, #24, #8 // Generation + + ldr x0, =scratch + mov w1, #ZT_B / 4 + +0: str w3, [x0], #4 + add w3, w3, #1 // Lane + subs w1, w1, #1 + b.ne 0b + + ret +endfunction + +// Set up test pattern in a ZT horizontal vector +// x0: pid +// x1: generation +function setup_zt + mov x4, x30 + + bl pattern // Get pattern in scratch buffer + ldr x0, =ztref + ldr x1, =scratch + mov x2, #ZT_B + bl memcpy + + ldr x0, =ztref + _ldr_zt 0 // load zt0 from pointer x0 + + ret x4 +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a ZT vector matches its shadow in memory, else abort +// Clobbers x0-x3 +function check_zt + mov x3, x30 + + ldr x0, =scratch // Poison scratch + mov x1, #ZT_B + bl memfill_ae + + ldr x0, =scratch + _str_zt 0 + + ldr x0, =ztref + ldr x1, =scratch + mov x2, #ZT_B + mov x30, x3 + b memcmp +endfunction + +// Any SME register modified here can cause corruption in the main +// thread -- but *only* the locations modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random ZT data +#if 0 + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#endif + + ret +endfunction + +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start + mov x23, #0 // signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + smstart_za + + // Obtain our PID, to ensure test pattern uniqueness between processes + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + mov x0, x20 + mov x1, x22 + bl setup_zt + + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 + and x1, x0, #3 + cmp x1, #2 + b.ne svcr_barf + + bl check_zt + + add x22, x22, #1 // Everything still working + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + smstop + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function svcr_barf + mov x10, x0 + + puts "Bad SVCR: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index e8d2b57f73ec..b7fbb65183e8 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -5,4 +5,5 @@ sme_* ssve_* sve_* za_* +zt_* !*.[ch] diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index 0c645834ddc3..1e6273d81575 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -34,6 +34,7 @@ enum { FSVE_BIT, FSME_BIT, FSME_FA64_BIT, + FSME2_BIT, FMAX_END }; @@ -41,6 +42,7 @@ enum { #define FEAT_SVE (1UL << FSVE_BIT) #define FEAT_SME (1UL << FSME_BIT) #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) +#define FEAT_SME2 (1UL << FSME2_BIT) /* * A descriptor used to describe and configure a test case. diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 308e229e58ab..07f518f0e58d 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -29,6 +29,7 @@ static char const *const feats_names[FMAX_END] = { " SVE ", " SME ", " FA64 ", + " SME2 ", }; #define MAX_FEATS_SZ 128 @@ -323,6 +324,8 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SME; if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) td->feats_supported |= FEAT_SME_FA64; + if (getauxval(AT_HWCAP2) & HWCAP2_SME2) + td->feats_supported |= FEAT_SME2; if (feats_ok(td)) { if (td->feats_required & td->feats_supported) fprintf(stderr, diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index d2eda7b5de26..27d495fa52f8 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -108,6 +108,26 @@ bool validate_za_context(struct za_context *za, char **err) return true; } +bool validate_zt_context(struct zt_context *zt, char **err) +{ + if (!zt || !err) + return false; + + /* If the context is present there should be at least one register */ + if (zt->nregs == 0) { + *err = "no registers"; + return false; + } + + /* Size should agree with the number of registers */ + if (zt->head.size != ZT_SIG_CONTEXT_SIZE(zt->nregs)) { + *err = "register count does not match size"; + return false; + } + + return true; +} + bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) { bool terminated = false; @@ -117,6 +137,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) struct extra_context *extra = NULL; struct sve_context *sve = NULL; struct za_context *za = NULL; + struct zt_context *zt = NULL; struct _aarch64_ctx *head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; void *extra_data = NULL; @@ -177,6 +198,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) za = (struct za_context *)head; new_flags |= ZA_CTX; break; + case ZT_MAGIC: + if (flags & ZT_CTX) + *err = "Multiple ZT_MAGIC"; + /* Size is validated in validate_za_context() */ + zt = (struct zt_context *)head; + new_flags |= ZT_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; @@ -234,6 +262,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) if (new_flags & ZA_CTX) if (!validate_za_context(za, err)) return false; + if (new_flags & ZT_CTX) + if (!validate_zt_context(zt, err)) + return false; flags |= new_flags; @@ -245,6 +276,11 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) return false; } + if (terminated && (flags & ZT_CTX) && !(flags & ZA_CTX)) { + *err = "ZT context but no ZA context"; + return false; + } + return true; } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 040afded0b76..a08ab0d6207a 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -18,6 +18,7 @@ #define SVE_CTX (1 << 1) #define ZA_CTX (1 << 2) #define EXTRA_CTX (1 << 3) +#define ZT_CTX (1 << 4) #define KSFT_BAD_MAGIC 0xdeadbeef diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c new file mode 100644 index 000000000000..34f69bcf821e --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that using an instruction not supported in streaming mode + * traps when in streaming mode. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +int zt_no_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + + /* + * Get a signal context which should not have a ZT frame and + * registers in it. + */ + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset); + if (head) { + fprintf(stderr, "Got unexpected ZT context\n"); + return 1; + } + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "ZT register data not present", + .descr = "Validate that ZT is not present when ZA is disabled", + .feats_required = FEAT_SME2, + .timeout = 3, + .sanity_disabled = true, + .run = zt_no_regs_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c new file mode 100644 index 000000000000..e1eb4d5c027a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Limited + * + * Verify that using an instruction not supported in streaming mode + * traps when in streaming mode. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +static void enable_za(void) +{ + /* smstart za; real data is TODO */ + asm volatile(".inst 0xd503457f" : : : ); +} + +int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct zt_context *zt; + char *zeros; + + /* + * Get a signal context which should have a ZT frame and registers + * in it. + */ + enable_za(); + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset); + if (!head) { + fprintf(stderr, "No ZT context\n"); + return 1; + } + + zt = (struct zt_context *)head; + if (zt->nregs == 0) { + fprintf(stderr, "Got context with no registers\n"); + return 1; + } + + fprintf(stderr, "Got expected size %u for %d registers\n", + head->size, zt->nregs); + + /* We didn't load any data into ZT so it should be all zeros */ + zeros = malloc(ZT_SIG_REGS_SIZE(zt->nregs)); + if (!zeros) { + fprintf(stderr, "Out of memory, nregs=%u\n", zt->nregs); + return 1; + } + memset(zeros, 0, ZT_SIG_REGS_SIZE(zt->nregs)); + + if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET, + ZT_SIG_REGS_SIZE(zt->nregs)) != 0) { + fprintf(stderr, "ZT data invalid\n"); + return 1; + } + + free(zeros); + + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "ZT register data", + .descr = "Validate that ZT is present and has data when ZA is enabled", + .feats_required = FEAT_SME2, + .timeout = 3, + .sanity_disabled = true, + .run = zt_regs_run, +}; diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 9218bb5f44bf..28eb99046475 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -85,61 +85,108 @@ #define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF /* HYPERV_CPUID_FEATURES.EAX */ -#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0) -#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) -#define HV_MSR_SYNIC_AVAILABLE BIT(2) -#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) -#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4) -#define HV_MSR_HYPERCALL_AVAILABLE BIT(5) -#define HV_MSR_VP_INDEX_AVAILABLE BIT(6) -#define HV_MSR_RESET_AVAILABLE BIT(7) -#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8) -#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) -#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10) -#define HV_ACCESS_FREQUENCY_MSRS BIT(11) -#define HV_ACCESS_REENLIGHTENMENT BIT(13) -#define HV_ACCESS_TSC_INVARIANT BIT(15) +#define HV_MSR_VP_RUNTIME_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0) +#define HV_MSR_TIME_REF_COUNT_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1) +#define HV_MSR_SYNIC_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2) +#define HV_MSR_SYNTIMER_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3) +#define HV_MSR_APIC_ACCESS_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4) +#define HV_MSR_HYPERCALL_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5) +#define HV_MSR_VP_INDEX_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6) +#define HV_MSR_RESET_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7) +#define HV_MSR_STAT_PAGES_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8) +#define HV_MSR_REFERENCE_TSC_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9) +#define HV_MSR_GUEST_IDLE_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10) +#define HV_ACCESS_FREQUENCY_MSRS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11) +#define HV_ACCESS_REENLIGHTENMENT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13) +#define HV_ACCESS_TSC_INVARIANT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15) /* HYPERV_CPUID_FEATURES.EBX */ -#define HV_CREATE_PARTITIONS BIT(0) -#define HV_ACCESS_PARTITION_ID BIT(1) -#define HV_ACCESS_MEMORY_POOL BIT(2) -#define HV_ADJUST_MESSAGE_BUFFERS BIT(3) -#define HV_POST_MESSAGES BIT(4) -#define HV_SIGNAL_EVENTS BIT(5) -#define HV_CREATE_PORT BIT(6) -#define HV_CONNECT_PORT BIT(7) -#define HV_ACCESS_STATS BIT(8) -#define HV_DEBUGGING BIT(11) -#define HV_CPU_MANAGEMENT BIT(12) -#define HV_ISOLATION BIT(22) +#define HV_CREATE_PARTITIONS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0) +#define HV_ACCESS_PARTITION_ID \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1) +#define HV_ACCESS_MEMORY_POOL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2) +#define HV_ADJUST_MESSAGE_BUFFERS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3) +#define HV_POST_MESSAGES \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4) +#define HV_SIGNAL_EVENTS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5) +#define HV_CREATE_PORT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6) +#define HV_CONNECT_PORT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7) +#define HV_ACCESS_STATS \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8) +#define HV_DEBUGGING \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11) +#define HV_CPU_MANAGEMENT \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12) +#define HV_ISOLATION \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22) /* HYPERV_CPUID_FEATURES.EDX */ -#define HV_X64_MWAIT_AVAILABLE BIT(0) -#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) -#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) -#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) -#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) -#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) -#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) -#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) +#define HV_X64_MWAIT_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0) +#define HV_X64_GUEST_DEBUGGING_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1) +#define HV_X64_PERF_MONITOR_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2) +#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4) +#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5) +#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8) +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10) +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11) +#define HV_STIMER_DIRECT_MODE_AVAILABLE \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19) /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ -#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0) -#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1) -#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2) -#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3) -#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4) -#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5) -#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9) -#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10) -#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) -#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +#define HV_X64_AS_SWITCH_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0) +#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1) +#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2) +#define HV_X64_APIC_ACCESS_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3) +#define HV_X64_SYSTEM_RESET_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4) +#define HV_X64_RELAXED_TIMING_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5) +#define HV_DEPRECATING_AEOI_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9) +#define HV_X64_CLUSTER_IPI_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10) +#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11) +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ -#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1) +#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) /* Hypercalls */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 @@ -288,4 +335,7 @@ struct hyperv_test_pages { struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, vm_vaddr_t *p_hv_pages_gva); +/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ +#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index b1a31de7108a..2a5f47d51388 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -137,6 +137,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) #define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29) +#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8) #define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4) #define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12) #define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0) diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index 0b45ac593387..894417c96f70 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -19,6 +19,7 @@ #include "kvm_util.h" #include "asm/kvm.h" #include "linux/kvm.h" +#include "kselftest.h" static void stats_test(int stats_fd) { @@ -51,7 +52,7 @@ static void stats_test(int stats_fd) /* Sanity check for other fields in header */ if (header.num_desc == 0) { - printf("No KVM stats defined!"); + ksft_print_msg("No KVM stats defined!\n"); return; } /* @@ -224,9 +225,13 @@ int main(int argc, char *argv[]) max_vcpu = DEFAULT_NUM_VCPU; } + ksft_print_header(); + /* Check the extension for binary stats */ TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD)); + ksft_set_plan(max_vm); + /* Create VMs and VCPUs */ vms = malloc(sizeof(vms[0]) * max_vm); TEST_ASSERT(vms, "Allocate memory for storing VM pointers"); @@ -245,10 +250,12 @@ int main(int argc, char *argv[]) vm_stats_test(vms[i]); for (j = 0; j < max_vcpu; ++j) vcpu_stats_test(vcpus[i * max_vcpu + j]); + ksft_test_result_pass("vm%i\n", i); } for (i = 0; i < max_vm; ++i) kvm_vm_free(vms[i]); free(vms); - return 0; + + ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 3163c3e8db0a..258267df8e2a 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -13,9 +13,17 @@ #include "processor.h" #include "hyperv.h" +/* + * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf + * but to activate the feature it is sufficient to set it to a non-zero + * value. Use BIT(0) for that. + */ +#define HV_PV_SPINLOCKS_TEST \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0) + struct msr_data { uint32_t idx; - bool available; + bool fault_expected; bool write; u64 write_val; }; @@ -26,22 +34,46 @@ struct hcall_data { bool ud_expected; }; +static bool is_write_only_msr(uint32_t msr) +{ + return msr == HV_X64_MSR_EOI; +} + static void guest_msr(struct msr_data *msr) { - uint64_t ignored; - uint8_t vector; + uint8_t vector = 0; + uint64_t msr_val = 0; GUEST_ASSERT(msr->idx); - if (!msr->write) - vector = rdmsr_safe(msr->idx, &ignored); - else + if (msr->write) vector = wrmsr_safe(msr->idx, msr->write_val); - if (msr->available) - GUEST_ASSERT_2(!vector, msr->idx, vector); + if (!vector && (!msr->write || !is_write_only_msr(msr->idx))) + vector = rdmsr_safe(msr->idx, &msr_val); + + if (msr->fault_expected) + GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR); else - GUEST_ASSERT_2(vector == GP_VECTOR, msr->idx, vector); + GUEST_ASSERT_3(!vector, msr->idx, vector, 0); + + if (vector || is_write_only_msr(msr->idx)) + goto done; + + if (msr->write) + GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx, + msr_val, msr->write_val); + + /* Invariant TSC bit appears when TSC invariant control MSR is written to */ + if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) { + if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT)) + GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC)); + else + GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) == + !!(msr_val & HV_INVARIANT_TSC_EXPOSED)); + } + +done: GUEST_DONE(); } @@ -89,7 +121,6 @@ static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu) static void guest_test_msrs_access(void) { struct kvm_cpuid2 *prev_cpuid = NULL; - struct kvm_cpuid_entry2 *feat, *dbg; struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; @@ -97,6 +128,7 @@ static void guest_test_msrs_access(void) int stage = 0; vm_vaddr_t msr_gva; struct msr_data *msr; + bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC); while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_msr); @@ -116,9 +148,6 @@ static void guest_test_msrs_access(void) vcpu_init_cpuid(vcpu, prev_cpuid); } - feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); - dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); - vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vcpu); @@ -134,133 +163,139 @@ static void guest_test_msrs_access(void) * Only available when Hyper-V identification is set */ msr->idx = HV_X64_MSR_GUEST_OS_ID; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 1: msr->idx = HV_X64_MSR_HYPERCALL; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 2: - feat->eax |= HV_MSR_HYPERCALL_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE); /* * HV_X64_MSR_GUEST_OS_ID has to be written first to make * HV_X64_MSR_HYPERCALL available. */ msr->idx = HV_X64_MSR_GUEST_OS_ID; - msr->write = 1; + msr->write = true; msr->write_val = HYPERV_LINUX_OS_ID; - msr->available = 1; + msr->fault_expected = false; break; case 3: msr->idx = HV_X64_MSR_GUEST_OS_ID; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 4: msr->idx = HV_X64_MSR_HYPERCALL; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 5: msr->idx = HV_X64_MSR_VP_RUNTIME; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 6: - feat->eax |= HV_MSR_VP_RUNTIME_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE); msr->idx = HV_X64_MSR_VP_RUNTIME; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 7: /* Read only */ msr->idx = HV_X64_MSR_VP_RUNTIME; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 0; + msr->fault_expected = true; break; case 8: msr->idx = HV_X64_MSR_TIME_REF_COUNT; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 9: - feat->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE); msr->idx = HV_X64_MSR_TIME_REF_COUNT; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 10: /* Read only */ msr->idx = HV_X64_MSR_TIME_REF_COUNT; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 0; + msr->fault_expected = true; break; case 11: msr->idx = HV_X64_MSR_VP_INDEX; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 12: - feat->eax |= HV_MSR_VP_INDEX_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE); msr->idx = HV_X64_MSR_VP_INDEX; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 13: /* Read only */ msr->idx = HV_X64_MSR_VP_INDEX; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 0; + msr->fault_expected = true; break; case 14: msr->idx = HV_X64_MSR_RESET; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 15: - feat->eax |= HV_MSR_RESET_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE); msr->idx = HV_X64_MSR_RESET; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 16: msr->idx = HV_X64_MSR_RESET; - msr->write = 1; + msr->write = true; + /* + * TODO: the test only writes '0' to HV_X64_MSR_RESET + * at the moment, writing some other value there will + * trigger real vCPU reset and the code is not prepared + * to handle it yet. + */ msr->write_val = 0; - msr->available = 1; + msr->fault_expected = false; break; case 17: msr->idx = HV_X64_MSR_REFERENCE_TSC; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 18: - feat->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE); msr->idx = HV_X64_MSR_REFERENCE_TSC; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 19: msr->idx = HV_X64_MSR_REFERENCE_TSC; - msr->write = 1; + msr->write = true; msr->write_val = 0; - msr->available = 1; + msr->fault_expected = false; break; case 20: msr->idx = HV_X64_MSR_EOM; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 21: /* @@ -268,149 +303,185 @@ static void guest_test_msrs_access(void) * capability enabled and guest visible CPUID bit unset. */ msr->idx = HV_X64_MSR_EOM; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 22: - feat->eax |= HV_MSR_SYNIC_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE); msr->idx = HV_X64_MSR_EOM; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 23: msr->idx = HV_X64_MSR_EOM; - msr->write = 1; + msr->write = true; msr->write_val = 0; - msr->available = 1; + msr->fault_expected = false; break; case 24: msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 25: - feat->eax |= HV_MSR_SYNTIMER_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE); msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 26: msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = 1; + msr->write = true; msr->write_val = 0; - msr->available = 1; + msr->fault_expected = false; break; case 27: /* Direct mode test */ msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = 1; + msr->write = true; msr->write_val = 1 << 12; - msr->available = 0; + msr->fault_expected = true; break; case 28: - feat->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE); msr->idx = HV_X64_MSR_STIMER0_CONFIG; - msr->write = 1; + msr->write = true; msr->write_val = 1 << 12; - msr->available = 1; + msr->fault_expected = false; break; case 29: msr->idx = HV_X64_MSR_EOI; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 30: - feat->eax |= HV_MSR_APIC_ACCESS_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE); msr->idx = HV_X64_MSR_EOI; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 1; + msr->fault_expected = false; break; case 31: msr->idx = HV_X64_MSR_TSC_FREQUENCY; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 32: - feat->eax |= HV_ACCESS_FREQUENCY_MSRS; + vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS); msr->idx = HV_X64_MSR_TSC_FREQUENCY; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 33: /* Read only */ msr->idx = HV_X64_MSR_TSC_FREQUENCY; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 0; + msr->fault_expected = true; break; case 34: msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 35: - feat->eax |= HV_ACCESS_REENLIGHTENMENT; + vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT); msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 36: msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 1; + msr->fault_expected = false; break; case 37: /* Can only write '0' */ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 0; + msr->fault_expected = true; break; case 38: msr->idx = HV_X64_MSR_CRASH_P0; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 39: - feat->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE); msr->idx = HV_X64_MSR_CRASH_P0; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 40: msr->idx = HV_X64_MSR_CRASH_P0; - msr->write = 1; + msr->write = true; msr->write_val = 1; - msr->available = 1; + msr->fault_expected = false; break; case 41: msr->idx = HV_X64_MSR_SYNDBG_STATUS; - msr->write = 0; - msr->available = 0; + msr->write = false; + msr->fault_expected = true; break; case 42: - feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; - dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE); + vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING); msr->idx = HV_X64_MSR_SYNDBG_STATUS; - msr->write = 0; - msr->available = 1; + msr->write = false; + msr->fault_expected = false; break; case 43: msr->idx = HV_X64_MSR_SYNDBG_STATUS; - msr->write = 1; + msr->write = true; msr->write_val = 0; - msr->available = 1; + msr->fault_expected = false; break; case 44: + /* MSR is not available when CPUID feature bit is unset */ + if (!has_invtsc) + continue; + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = false; + msr->fault_expected = true; + break; + case 45: + /* MSR is vailable when CPUID feature bit is set */ + if (!has_invtsc) + continue; + vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = false; + msr->fault_expected = false; + break; + case 46: + /* Writing bits other than 0 is forbidden */ + if (!has_invtsc) + continue; + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = true; + msr->write_val = 0xdeadbeef; + msr->fault_expected = true; + break; + case 47: + /* Setting bit 0 enables the feature */ + if (!has_invtsc) + continue; + msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; + msr->write = true; + msr->write_val = 1; + msr->fault_expected = false; + break; + + default: kvm_vm_free(vm); return; } @@ -429,7 +500,7 @@ static void guest_test_msrs_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "MSR = %lx, vector = %lx"); + REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx"); return; case UCALL_DONE: break; @@ -445,7 +516,6 @@ static void guest_test_msrs_access(void) static void guest_test_hcalls_access(void) { - struct kvm_cpuid_entry2 *feat, *recomm, *dbg; struct kvm_cpuid2 *prev_cpuid = NULL; struct kvm_vcpu *vcpu; struct kvm_run *run; @@ -480,15 +550,11 @@ static void guest_test_hcalls_access(void) vcpu_init_cpuid(vcpu, prev_cpuid); } - feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); - recomm = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); - dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); - run = vcpu->run; switch (stage) { case 0: - feat->eax |= HV_MSR_HYPERCALL_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE); hcall->control = 0xbeef; hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -498,7 +564,7 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 2: - feat->ebx |= HV_POST_MESSAGES; + vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES); hcall->control = HVCALL_POST_MESSAGE; hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; break; @@ -508,7 +574,7 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 4: - feat->ebx |= HV_SIGNAL_EVENTS; + vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS); hcall->control = HVCALL_SIGNAL_EVENT; hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; break; @@ -518,12 +584,12 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; break; case 6: - dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING); hcall->control = HVCALL_RESET_DEBUG_SESSION; hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 7: - feat->ebx |= HV_DEBUGGING; + vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING); hcall->control = HVCALL_RESET_DEBUG_SESSION; hcall->expect = HV_STATUS_OPERATION_DENIED; break; @@ -533,7 +599,7 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 9: - recomm->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED); hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; hcall->expect = HV_STATUS_SUCCESS; break; @@ -542,7 +608,7 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 11: - recomm->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; + vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED); hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; hcall->expect = HV_STATUS_SUCCESS; break; @@ -552,7 +618,7 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 13: - recomm->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED); hcall->control = HVCALL_SEND_IPI; hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; break; @@ -567,7 +633,7 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 16: - recomm->ebx = 0xfff; + vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST); hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; hcall->expect = HV_STATUS_SUCCESS; break; @@ -577,7 +643,7 @@ static void guest_test_hcalls_access(void) hcall->ud_expected = true; break; case 18: - feat->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE); hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; hcall->ud_expected = false; hcall->expect = HV_STATUS_SUCCESS; diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index 22d366c697f7..c9f67702f657 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -72,11 +72,16 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: - TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); + if (!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage + 1) + ksft_test_result_pass("stage %d passed\n", stage + 1); + else + ksft_test_result_fail( + "stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); return; case UCALL_DONE: + ksft_test_result_pass("stage %d passed\n", stage + 1); return; case UCALL_ABORT: REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); @@ -92,6 +97,9 @@ int main(void) struct kvm_vm *vm; uint64_t val; + ksft_print_header(); + ksft_set_plan(5); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); val = 0; @@ -149,5 +157,5 @@ int main(void) kvm_vm_free(vm); - return 0; + ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,19 +12,27 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +69,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +101,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +135,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +208,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +232,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +249,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +270,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +295,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +312,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,15 +333,17 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -351,11 +379,35 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -385,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 9fb1ff6f19e5..b74916de5183 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -92,3 +92,6 @@ config KVM_XFER_TO_GUEST_WORK config HAVE_KVM_PM_NOTIFIER bool + +config KVM_GENERIC_HARDWARE_ENABLING + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 13e88297f999..d255964ec331 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -100,13 +100,8 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); */ DEFINE_MUTEX(kvm_lock); -static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); -static cpumask_var_t cpus_hardware_enabled; -static int kvm_usage_count; -static atomic_t hardware_enable_failed; - static struct kmem_cache *kvm_vcpu_cache; static __read_mostly struct preempt_ops kvm_preempt_ops; @@ -148,9 +143,6 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -__visible bool kvm_rebooting; -EXPORT_SYMBOL_GPL(kvm_rebooting); - #define KVM_EVENT_CREATE_VM 0 #define KVM_EVENT_DESTROY_VM 1 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); @@ -3954,6 +3946,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } mutex_lock(&kvm->lock); + +#ifdef CONFIG_LOCKDEP + /* Ensure that lockdep knows vcpu->mutex is taken *inside* kvm->lock */ + mutex_lock(&vcpu->mutex); + mutex_unlock(&vcpu->mutex); +#endif + if (kvm_get_vcpu_by_id(kvm, id)) { r = -EEXIST; goto unlock_vcpu_destroy; @@ -5095,50 +5094,70 @@ static struct miscdevice kvm_dev = { &kvm_chardev_ops, }; -static void hardware_enable_nolock(void *junk) -{ - int cpu = raw_smp_processor_id(); - int r; - - if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) - return; +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +__visible bool kvm_rebooting; +EXPORT_SYMBOL_GPL(kvm_rebooting); - cpumask_set_cpu(cpu, cpus_hardware_enabled); +static DEFINE_PER_CPU(bool, hardware_enabled); +static int kvm_usage_count; - r = kvm_arch_hardware_enable(); +static int __hardware_enable_nolock(void) +{ + if (__this_cpu_read(hardware_enabled)) + return 0; - if (r) { - cpumask_clear_cpu(cpu, cpus_hardware_enabled); - atomic_inc(&hardware_enable_failed); - pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu); + if (kvm_arch_hardware_enable()) { + pr_info("kvm: enabling virtualization on CPU%d failed\n", + raw_smp_processor_id()); + return -EIO; } + + __this_cpu_write(hardware_enabled, true); + return 0; } -static int kvm_starting_cpu(unsigned int cpu) +static void hardware_enable_nolock(void *failed) { - raw_spin_lock(&kvm_count_lock); + if (__hardware_enable_nolock()) + atomic_inc(failed); +} + +static int kvm_online_cpu(unsigned int cpu) +{ + int ret = 0; + + /* + * Abort the CPU online process if hardware virtualization cannot + * be enabled. Otherwise running VMs would encounter unrecoverable + * errors when scheduled to this CPU. + */ + mutex_lock(&kvm_lock); if (kvm_usage_count) - hardware_enable_nolock(NULL); - raw_spin_unlock(&kvm_count_lock); - return 0; + ret = __hardware_enable_nolock(); + mutex_unlock(&kvm_lock); + return ret; } static void hardware_disable_nolock(void *junk) { - int cpu = raw_smp_processor_id(); - - if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) + /* + * Note, hardware_disable_all_nolock() tells all online CPUs to disable + * hardware, not just CPUs that successfully enabled hardware! + */ + if (!__this_cpu_read(hardware_enabled)) return; - cpumask_clear_cpu(cpu, cpus_hardware_enabled); + kvm_arch_hardware_disable(); + + __this_cpu_write(hardware_enabled, false); } -static int kvm_dying_cpu(unsigned int cpu) +static int kvm_offline_cpu(unsigned int cpu) { - raw_spin_lock(&kvm_count_lock); + mutex_lock(&kvm_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); - raw_spin_unlock(&kvm_count_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -5153,29 +5172,41 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - raw_spin_lock(&kvm_count_lock); + cpus_read_lock(); + mutex_lock(&kvm_lock); hardware_disable_all_nolock(); - raw_spin_unlock(&kvm_count_lock); + mutex_unlock(&kvm_lock); + cpus_read_unlock(); } static int hardware_enable_all(void) { + atomic_t failed = ATOMIC_INIT(0); int r = 0; - raw_spin_lock(&kvm_count_lock); + /* + * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() + * is called, and so on_each_cpu() between them includes the CPU that + * is being onlined. As a result, hardware_enable_nolock() may get + * invoked before kvm_online_cpu(), which also enables hardware if the + * usage count is non-zero. Disable CPU hotplug to avoid attempting to + * enable hardware multiple times. + */ + cpus_read_lock(); + mutex_lock(&kvm_lock); kvm_usage_count++; if (kvm_usage_count == 1) { - atomic_set(&hardware_enable_failed, 0); - on_each_cpu(hardware_enable_nolock, NULL, 1); + on_each_cpu(hardware_enable_nolock, &failed, 1); - if (atomic_read(&hardware_enable_failed)) { + if (atomic_read(&failed)) { hardware_disable_all_nolock(); r = -EBUSY; } } - raw_spin_unlock(&kvm_count_lock); + mutex_unlock(&kvm_lock); + cpus_read_unlock(); return r; } @@ -5200,6 +5231,49 @@ static struct notifier_block kvm_reboot_notifier = { .priority = 0, }; +static int kvm_suspend(void) +{ + /* + * Secondary CPUs and CPU hotplug are disabled across the suspend/resume + * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count + * is stable. Assert that kvm_lock is not held to ensure the system + * isn't suspended while KVM is enabling hardware. Hardware enabling + * can be preempted, but the task cannot be frozen until it has dropped + * all locks (userspace tasks are frozen via a fake signal). + */ + lockdep_assert_not_held(&kvm_lock); + lockdep_assert_irqs_disabled(); + + if (kvm_usage_count) + hardware_disable_nolock(NULL); + return 0; +} + +static void kvm_resume(void) +{ + lockdep_assert_not_held(&kvm_lock); + lockdep_assert_irqs_disabled(); + + if (kvm_usage_count) + WARN_ON_ONCE(__hardware_enable_nolock()); +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; +#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ +static int hardware_enable_all(void) +{ + return 0; +} + +static void hardware_disable_all(void) +{ + +} +#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ + static void kvm_io_bus_destroy(struct kvm_io_bus *bus) { int i; @@ -5778,26 +5852,6 @@ static void kvm_init_debug(void) } } -static int kvm_suspend(void) -{ - if (kvm_usage_count) - hardware_disable_nolock(NULL); - return 0; -} - -static void kvm_resume(void) -{ - if (kvm_usage_count) { - lockdep_assert_not_held(&kvm_count_lock); - hardware_enable_nolock(NULL); - } -} - -static struct syscore_ops kvm_syscore_ops = { - .suspend = kvm_suspend, - .resume = kvm_resume, -}; - static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) { @@ -5902,62 +5956,20 @@ void kvm_unregister_perf_callbacks(void) } #endif -struct kvm_cpu_compat_check { - void *opaque; - int *ret; -}; - -static void check_processor_compat(void *data) +int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) { - struct kvm_cpu_compat_check *c = data; - - *c->ret = kvm_arch_check_processor_compat(c->opaque); -} - -int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - struct module *module) -{ - struct kvm_cpu_compat_check c; int r; int cpu; - r = kvm_arch_init(opaque); - if (r) - goto out_fail; - - /* - * kvm_arch_init makes sure there's at most one caller - * for architectures that support multiple implementations, - * like intel and amd on x86. - * kvm_arch_init must be called before kvm_irqfd_init to avoid creating - * conflicts in case kvm is already setup for another implementation. - */ - r = kvm_irqfd_init(); +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING + r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", + kvm_online_cpu, kvm_offline_cpu); if (r) - goto out_irqfd; - - if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { - r = -ENOMEM; - goto out_free_0; - } - - r = kvm_arch_hardware_setup(opaque); - if (r < 0) - goto out_free_1; - - c.ret = &r; - c.opaque = opaque; - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, check_processor_compat, &c, 1); - if (r < 0) - goto out_free_2; - } + return r; - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting", - kvm_starting_cpu, kvm_dying_cpu); - if (r) - goto out_free_2; register_reboot_notifier(&kvm_reboot_notifier); + register_syscore_ops(&kvm_syscore_ops); +#endif /* A kmem cache lets us meet the alignment requirements of fx_save. */ if (!vcpu_align) @@ -5971,59 +5983,65 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; - goto out_free_3; + goto err_vcpu_cache; } for_each_possible_cpu(cpu) { if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), GFP_KERNEL, cpu_to_node(cpu))) { r = -ENOMEM; - goto out_free_4; + goto err_cpu_kick_mask; } } + r = kvm_irqfd_init(); + if (r) + goto err_irqfd; + r = kvm_async_pf_init(); if (r) - goto out_free_4; + goto err_async_pf; kvm_chardev_ops.owner = module; - r = misc_register(&kvm_dev); - if (r) { - pr_err("kvm: misc device register failed\n"); - goto out_unreg; - } - - register_syscore_ops(&kvm_syscore_ops); - kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; kvm_init_debug(); r = kvm_vfio_ops_init(); - WARN_ON(r); + if (WARN_ON_ONCE(r)) + goto err_vfio; + + /* + * Registration _must_ be the very last thing done, as this exposes + * /dev/kvm to userspace, i.e. all infrastructure must be setup! + */ + r = misc_register(&kvm_dev); + if (r) { + pr_err("kvm: misc device register failed\n"); + goto err_register; + } return 0; -out_unreg: +err_register: + kvm_vfio_ops_exit(); +err_vfio: kvm_async_pf_deinit(); -out_free_4: +err_async_pf: + kvm_irqfd_exit(); +err_irqfd: +err_cpu_kick_mask: for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); -out_free_3: +err_vcpu_cache: +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING + unregister_syscore_ops(&kvm_syscore_ops); unregister_reboot_notifier(&kvm_reboot_notifier); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); -out_free_2: - kvm_arch_hardware_unsetup(); -out_free_1: - free_cpumask_var(cpus_hardware_enabled); -out_free_0: - kvm_irqfd_exit(); -out_irqfd: - kvm_arch_exit(); -out_fail: + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); +#endif return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -6032,21 +6050,25 @@ void kvm_exit(void) { int cpu; - debugfs_remove_recursive(kvm_debugfs_dir); + /* + * Note, unregistering /dev/kvm doesn't strictly need to come first, + * fops_get(), a.k.a. try_module_get(), prevents acquiring references + * to KVM while the module is being stopped. + */ misc_deregister(&kvm_dev); + + debugfs_remove_recursive(kvm_debugfs_dir); for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); + kvm_vfio_ops_exit(); kvm_async_pf_deinit(); +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING unregister_syscore_ops(&kvm_syscore_ops); unregister_reboot_notifier(&kvm_reboot_notifier); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING); - on_each_cpu(hardware_disable_nolock, NULL, 1); - kvm_arch_hardware_unsetup(); - kvm_arch_exit(); + cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); +#endif kvm_irqfd_exit(); - free_cpumask_var(cpus_hardware_enabled); - kvm_vfio_ops_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); |