summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--CREDITS5
-rw-r--r--Documentation/admin-guide/bootconfig.rst34
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--Documentation/dev-tools/kunit/usage.rst1
-rw-r--r--Documentation/kbuild/makefiles.rst5
-rw-r--r--Documentation/networking/phy.rst5
-rw-r--r--Documentation/power/index.rst1
-rw-r--r--Documentation/sphinx/parallel-wrapper.sh2
-rw-r--r--Documentation/virt/kvm/api.rst33
-rw-r--r--Documentation/x86/index.rst1
-rw-r--r--MAINTAINERS8
-rw-r--r--Makefile6
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h2
-rw-r--r--arch/arm64/include/asm/cache.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h10
-rw-r--r--arch/arm64/include/asm/io.h4
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h48
-rw-r--r--arch/arm64/include/asm/kvm_host.h32
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h7
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h3
-rw-r--r--arch/arm64/include/asm/virt.h2
-rw-r--r--arch/arm64/kvm/hyp/switch.c39
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c4
-rw-r--r--arch/mips/boot/dts/ingenic/jz4740.dtsi17
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi17
-rw-r--r--arch/mips/boot/dts/ingenic/x1000.dtsi6
-rw-r--r--arch/mips/include/asm/sync.h4
-rw-r--r--arch/mips/kernel/vpe.c2
-rw-r--r--arch/mips/vdso/Makefile28
-rw-r--r--arch/riscv/boot/.gitignore2
-rw-r--r--arch/riscv/include/asm/csr.h12
-rw-r--r--arch/riscv/kernel/head.S6
-rw-r--r--arch/riscv/kernel/traps.c4
-rw-r--r--arch/riscv/mm/kasan_init.c53
-rw-r--r--arch/x86/include/asm/io_bitmap.h9
-rw-r--r--arch/x86/include/asm/kvm_emulate.h13
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/paravirt_types.h4
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/include/asm/vmxfeatures.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/kvm.c65
-rw-r--r--arch/x86/kernel/paravirt.c5
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kvm/Kconfig13
-rw-r--r--arch/x86/kvm/Makefile1
-rw-r--r--arch/x86/kvm/emulate.c36
-rw-r--r--arch/x86/kvm/irq_comm.c2
-rw-r--r--arch/x86/kvm/lapic.c9
-rw-r--r--arch/x86/kvm/mmutrace.h2
-rw-r--r--arch/x86/kvm/svm.c70
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/nested.c89
-rw-r--r--arch/x86/kvm/vmx/nested.h10
-rw-r--r--arch/x86/kvm/vmx/vmx.c147
-rw-r--r--arch/x86/kvm/vmx/vmx.h3
-rw-r--r--arch/x86/kvm/x86.c16
-rw-r--r--arch/x86/mm/dump_pagetables.c7
-rw-r--r--arch/x86/platform/efi/efi_64.c151
-rw-r--r--arch/x86/xen/enlighten_pv.c25
-rw-r--r--block/blk-flush.c2
-rw-r--r--block/blk-mq-sched.c22
-rw-r--r--block/blk-mq-tag.c4
-rw-r--r--block/blk-mq-tag.h4
-rw-r--r--block/blk-mq.c28
-rw-r--r--block/blk-mq.h5
-rw-r--r--drivers/acpi/acpi_watchdog.c15
-rw-r--r--drivers/block/floppy.c7
-rw-r--r--drivers/block/null_blk.h3
-rw-r--r--drivers/block/null_blk_main.c2
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/cdrom/gdrom.c2
-rw-r--r--drivers/cpufreq/cpufreq.c12
-rw-r--r--drivers/devfreq/devfreq.c4
-rw-r--r--drivers/firmware/efi/efi.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c37
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c6
-rw-r--r--drivers/gpu/drm/i915/Makefile2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c2
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c4
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c59
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h11
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c43
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c6
-rw-r--r--drivers/hid/hid-alps.c2
-rw-r--r--drivers/hid/hid-apple.c3
-rw-r--r--drivers/hid/hid-bigbenff.c31
-rw-r--r--drivers/hid/hid-core.c4
-rw-r--r--drivers/hid/hid-ite.c5
-rw-r--r--drivers/hid/hid-logitech-hidpp.c43
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c8
-rw-r--r--drivers/hid/usbhid/hiddev.c2
-rw-r--r--drivers/i2c/busses/i2c-altera.c2
-rw-r--r--drivers/i2c/busses/i2c-jz4780.c36
-rw-r--r--drivers/ide/ide-gd.c2
-rw-r--r--drivers/macintosh/therm_windtunnel.c52
-rw-r--r--drivers/net/dsa/bcm_sf2.c3
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c48
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c10
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_if.h1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h1
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c3
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_rx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h2
-rw-r--r--drivers/net/ethernet/micrel/ks8851_mll.c14
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c186
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h3
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c7
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c8
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h1
-rw-r--r--drivers/net/ethernet/sfc/ptp.c38
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c13
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac.h4
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c209
-rw-r--r--drivers/net/hyperv/netvsc.c2
-rw-r--r--drivers/net/hyperv/netvsc_drv.c3
-rw-r--r--drivers/net/phy/marvell.c5
-rw-r--r--drivers/net/phy/mscc.c4
-rw-r--r--drivers/net/phy/phy-c45.c6
-rw-r--r--drivers/net/phy/phy_device.c11
-rw-r--r--drivers/net/slip/slip.c3
-rw-r--r--drivers/net/usb/qmi_wwan.c3
-rw-r--r--drivers/nvme/host/pci.c2
-rw-r--r--drivers/pci/controller/pcie-brcmstb.c2
-rw-r--r--drivers/platform/chrome/wilco_ec/properties.c2
-rw-r--r--drivers/s390/scsi/zfcp_fsf.h2
-rw-r--r--drivers/s390/scsi/zfcp_sysfs.c2
-rw-r--r--drivers/scsi/libfc/fc_disc.c2
-rw-r--r--drivers/scsi/sd_zbc.c7
-rw-r--r--drivers/scsi/sr.c2
-rw-r--r--drivers/tee/amdtee/Kconfig2
-rw-r--r--drivers/vhost/net.c10
-rw-r--r--drivers/watchdog/wdat_wdt.c25
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/io-wq.c19
-rw-r--r--fs/io-wq.h14
-rw-r--r--fs/io_uring.c132
-rw-r--r--fs/jbd2/transaction.c8
-rw-r--r--fs/zonefs/Kconfig1
-rw-r--r--fs/zonefs/super.c8
-rw-r--r--include/acpi/actypes.h3
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/blktrace_api.h18
-rw-r--r--include/linux/bootconfig.h3
-rw-r--r--include/linux/hid.h2
-rw-r--r--include/linux/icmpv6.h16
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--include/linux/netfilter/ipset/ip_set.h11
-rw-r--r--init/Kconfig5
-rw-r--r--init/main.c38
-rw-r--r--kernel/audit.c40
-rw-r--r--kernel/auditfilter.c71
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/signal.c23
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/blktrace.c114
-rw-r--r--kernel/trace/synth_event_gen_test.c44
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_events_hist.c112
-rw-r--r--lib/bootconfig.c36
-rw-r--r--lib/crypto/chacha20poly1305.c3
-rw-r--r--mm/shmem.c2
-rw-r--r--net/bridge/br_device.c6
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/devlink.c38
-rw-r--r--net/ethtool/bitset.c3
-rw-r--r--net/ethtool/bitset.h2
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/tcp_input.c6
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mptcp/protocol.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c34
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h635
-rw-r--r--net/netfilter/nft_set_pipapo.c6
-rw-r--r--net/netfilter/xt_hashlimit.c16
-rw-r--r--net/netlink/genetlink.c5
-rw-r--r--net/sched/act_api.c1
-rw-r--r--net/smc/af_smc.c25
-rw-r--r--net/smc/smc_core.c12
-rw-r--r--net/smc/smc_core.h2
-rw-r--r--net/smc/smc_ib.c2
-rw-r--r--net/unix/af_unix.c4
-rw-r--r--net/vmw_vsock/af_vsock.c20
-rw-r--r--net/vmw_vsock/hyperv_transport.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c2
-rw-r--r--net/wireless/nl80211.c5
-rw-r--r--net/wireless/reg.c2
-rw-r--r--scripts/Makefile.lib6
-rw-r--r--tools/arch/x86/include/asm/msr-index.h2
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/bootconfig/include/linux/printk.h5
-rw-r--r--tools/bootconfig/main.c51
-rw-r--r--tools/bootconfig/samples/bad-mixed-kv1.bconf3
-rw-r--r--tools/bootconfig/samples/bad-mixed-kv2.bconf3
-rw-r--r--tools/bootconfig/samples/bad-samekey.bconf6
-rwxr-xr-xtools/bootconfig/test-bootconfig.sh18
-rw-r--r--tools/perf/Documentation/perf-config.txt74
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c18
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c17
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c17
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c17
-rw-r--r--tools/perf/builtin-annotate.c4
-rw-r--r--tools/perf/builtin-probe.c6
-rw-r--r--tools/perf/builtin-report.c2
-rw-r--r--tools/perf/builtin-top.c4
-rw-r--r--tools/perf/include/bpf/pid_filter.h2
-rw-r--r--tools/perf/include/bpf/stdio.h2
-rw-r--r--tools/perf/include/bpf/unistd.h2
-rw-r--r--tools/perf/tests/shell/lib/probe_vfs_getname.sh2
-rw-r--r--tools/perf/ui/browsers/annotate.c19
-rw-r--r--tools/perf/ui/gtk/annotate.c2
-rw-r--r--tools/perf/util/annotate.c194
-rw-r--r--tools/perf/util/annotate.h9
-rw-r--r--tools/perf/util/auxtrace.c22
-rw-r--r--tools/perf/util/auxtrace.h6
-rw-r--r--tools/perf/util/config.c12
-rw-r--r--tools/perf/util/config.h1
-rw-r--r--tools/perf/util/probe-file.c28
-rwxr-xr-xtools/testing/kunit/kunit.py12
-rw-r--r--tools/testing/kunit/kunit_kernel.py28
-rw-r--r--tools/testing/selftests/ftrace/Makefile2
-rw-r--r--tools/testing/selftests/livepatch/Makefile2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh55
-rw-r--r--tools/testing/selftests/rseq/Makefile4
-rw-r--r--tools/testing/selftests/rtc/Makefile2
-rw-r--r--virt/kvm/arm/arm.c2
-rw-r--r--virt/kvm/arm/trace.h1
254 files changed, 2920 insertions, 1652 deletions
diff --git a/.gitignore b/.gitignore
index 72ef86a5570d..2763fce8766c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -100,6 +100,10 @@ modules.order
/include/ksym/
/arch/*/include/generated/
+# Generated lkdtm tests
+/tools/testing/selftests/lkdtm/*.sh
+!/tools/testing/selftests/lkdtm/run.sh
+
# stgit generated dirs
patches-*
diff --git a/CREDITS b/CREDITS
index a97d3280a627..032b5994f476 100644
--- a/CREDITS
+++ b/CREDITS
@@ -567,6 +567,11 @@ D: Original author of Amiga FFS filesystem
S: Orlando, Florida
S: USA
+N: Paul Burton
+E: paulburton@kernel.org
+W: https://pburton.com
+D: MIPS maintainer 2018-2020
+
N: Lennert Buytenhek
E: kernel@wantstofly.org
D: Original (2.4) rewrite of the ethernet bridging code
diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst
index b342a6796392..cf2edcd09183 100644
--- a/Documentation/admin-guide/bootconfig.rst
+++ b/Documentation/admin-guide/bootconfig.rst
@@ -62,6 +62,30 @@ Or more shorter, written as following::
In both styles, same key words are automatically merged when parsing it
at boot time. So you can append similar trees or key-values.
+Same-key Values
+---------------
+
+It is prohibited that two or more values or arrays share a same-key.
+For example,::
+
+ foo = bar, baz
+ foo = qux # !ERROR! we can not re-define same key
+
+If you want to append the value to existing key as an array member,
+you can use ``+=`` operator. For example::
+
+ foo = bar, baz
+ foo += qux
+
+In this case, the key ``foo`` has ``bar``, ``baz`` and ``qux``.
+
+However, a sub-key and a value can not co-exist under a parent key.
+For example, following config is NOT allowed.::
+
+ foo = value1
+ foo.bar = value2 # !ERROR! subkey "bar" and value "value1" can NOT co-exist
+
+
Comments
--------
@@ -102,9 +126,13 @@ Boot Kernel With a Boot Config
==============================
Since the boot configuration file is loaded with initrd, it will be added
-to the end of the initrd (initramfs) image file. The Linux kernel decodes
-the last part of the initrd image in memory to get the boot configuration
-data.
+to the end of the initrd (initramfs) image file with size, checksum and
+12-byte magic word as below.
+
+[initrd][bootconfig][size(u32)][checksum(u32)][#BOOTCONFIG\n]
+
+The Linux kernel decodes the last part of the initrd image in memory to
+get the boot configuration data.
Because of this "piggyback" method, there is no need to change or
update the boot loader and the kernel image itself.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index dbc22d684627..c07815d230bc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -136,6 +136,10 @@
dynamic table installation which will install SSDT
tables to /sys/firmware/acpi/tables/dynamic.
+ acpi_no_watchdog [HW,ACPI,WDT]
+ Ignore the ACPI-based watchdog interface (WDAT) and let
+ a native driver control the watchdog device instead.
+
acpi_rsdp= [ACPI,EFI,KEXEC]
Pass the RSDP address to the kernel, mostly used
on machines running EFI runtime service to boot the
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 7cd56a1993b1..607758a66a99 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -551,6 +551,7 @@ options to your ``.config``:
Once the kernel is built and installed, a simple
.. code-block:: bash
+
modprobe example-test
...will run the tests.
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index 0e0eb2c8da7d..6bc126a14b3d 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -765,7 +765,7 @@ is not sufficient this sometimes needs to be explicit.
Example::
#arch/x86/boot/Makefile
- subdir- := compressed/
+ subdir- := compressed
The above assignment instructs kbuild to descend down in the
directory compressed/ when "make clean" is executed.
@@ -1379,9 +1379,6 @@ See subsequent chapter for the syntax of the Kbuild file.
in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate
a wrapper of the asm-generic one.
- The convention is to list one subdir per line and
- preferably in alphabetic order.
-
8 Kbuild Variables
==================
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 1e4735cc0553..256106054c8c 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -487,8 +487,9 @@ phy_register_fixup_for_id()::
The stubs set one of the two matching criteria, and set the other one to
match anything.
-When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module,
-unregister fixup and free allocate memory are required.
+When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module load
+time, the module needs to unregister the fixup and free allocated memory when
+it's unloaded.
Call one of following function before unloading module::
diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst
index 002e42745263..ced8a8007434 100644
--- a/Documentation/power/index.rst
+++ b/Documentation/power/index.rst
@@ -13,7 +13,6 @@ Power Management
drivers-testing
energy-model
freezing-of-tasks
- interface
opp
pci
pm_qos_interface
diff --git a/Documentation/sphinx/parallel-wrapper.sh b/Documentation/sphinx/parallel-wrapper.sh
index 7daf5133bdd3..e54c44ce117d 100644
--- a/Documentation/sphinx/parallel-wrapper.sh
+++ b/Documentation/sphinx/parallel-wrapper.sh
@@ -30,4 +30,4 @@ if [ -n "$parallel" ] ; then
parallel="-j$parallel"
fi
-exec "$sphinx" "$parallel" "$@"
+exec "$sphinx" $parallel "$@"
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 97a72a53fa4b..ebd383fba939 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4611,35 +4611,38 @@ unpins the VPA pages and releases all the device pages that are used to
track the secure pages by hypervisor.
4.122 KVM_S390_NORMAL_RESET
+---------------------------
-Capability: KVM_CAP_S390_VCPU_RESETS
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: KVM_CAP_S390_VCPU_RESETS
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
This ioctl resets VCPU registers and control structures according to
the cpu reset definition in the POP (Principles Of Operation).
4.123 KVM_S390_INITIAL_RESET
+----------------------------
-Capability: none
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: none
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
This ioctl resets VCPU registers and control structures according to
the initial cpu reset definition in the POP. However, the cpu is not
put into ESA mode. This reset is a superset of the normal reset.
4.124 KVM_S390_CLEAR_RESET
+--------------------------
-Capability: KVM_CAP_S390_VCPU_RESETS
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: KVM_CAP_S390_VCPU_RESETS
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
This ioctl resets VCPU registers and control structures according to
the clear cpu reset definition in the POP. However, the cpu is not put
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index a8de2fbc1caa..265d9e9a093b 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -19,7 +19,6 @@ x86-specific Documentation
tlb
mtrr
pat
- intel_mpx
intel-iommu
intel_txt
amd-memory-encryption
diff --git a/MAINTAINERS b/MAINTAINERS
index fcd79fc38928..6158a143a13e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11115,14 +11115,12 @@ S: Maintained
F: drivers/usb/image/microtek.*
MIPS
-M: Ralf Baechle <ralf@linux-mips.org>
-M: Paul Burton <paulburton@kernel.org>
+M: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
L: linux-mips@vger.kernel.org
W: http://www.linux-mips.org/
-T: git git://git.linux-mips.org/pub/scm/ralf/linux.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git
Q: http://patchwork.linux-mips.org/project/linux-mips/list/
-S: Supported
+S: Maintained
F: Documentation/devicetree/bindings/mips/
F: Documentation/mips/
F: arch/mips/
@@ -12741,7 +12739,7 @@ M: Tom Joseph <tjoseph@cadence.com>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/cdns,*.txt
-F: drivers/pci/controller/pcie-cadence*
+F: drivers/pci/controller/cadence/
PCI DRIVER FOR FREESCALE LAYERSCAPE
M: Minghuan Lian <minghuan.Lian@nxp.com>
diff --git a/Makefile b/Makefile
index 0914049d2929..86035d866f2c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 6
SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
@@ -68,6 +68,7 @@ unexport GREP_OPTIONS
#
# If KBUILD_VERBOSE equals 0 then the above command will be hidden.
# If KBUILD_VERBOSE equals 1 then the above command is displayed.
+# If KBUILD_VERBOSE equals 2 then give the reason why each target is rebuilt.
#
# To put more focus on warnings, be less verbose as default
# Use 'make V=1' to see the full commands
@@ -1238,7 +1239,7 @@ ifneq ($(dtstree),)
%.dtb: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
-PHONY += dtbs dtbs_install dt_binding_check
+PHONY += dtbs dtbs_install dtbs_check
dtbs dtbs_check: include/config/kernel.release scripts_dtc
$(Q)$(MAKE) $(build)=$(dtstree)
@@ -1258,6 +1259,7 @@ PHONY += scripts_dtc
scripts_dtc: scripts_basic
$(Q)$(MAKE) $(build)=scripts/dtc
+PHONY += dt_binding_check
dt_binding_check: scripts_dtc
$(Q)$(MAKE) $(build)=Documentation/devicetree/bindings
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c3314b286a61..a827b4d60d38 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -392,9 +392,6 @@ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arm_vhe_guest_enter(void) {}
-static inline void kvm_arm_vhe_guest_exit(void) {}
-
#define KVM_BP_HARDEN_UNKNOWN -1
#define KVM_BP_HARDEN_WA_NEEDED 0
#define KVM_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 25fec4bde43a..a358e97572c1 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -32,7 +32,7 @@ static inline void gic_write_eoir(u32 irq)
isb();
}
-static inline void gic_write_dir(u32 irq)
+static __always_inline void gic_write_dir(u32 irq)
{
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
isb();
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 806e9dc2a852..a4d1b5f771f6 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -69,7 +69,7 @@ static inline int icache_is_aliasing(void)
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
-static inline int icache_is_vpipt(void)
+static __always_inline int icache_is_vpipt(void)
{
return test_bit(ICACHEF_VPIPT, &__icache_flags);
}
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 665c78e0665a..e6cca3d4acf7 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -145,7 +145,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
-static inline void __flush_icache_all(void)
+static __always_inline void __flush_icache_all(void)
{
if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
return;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 92ef9539874a..2a746b99e937 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -435,13 +435,13 @@ cpuid_feature_extract_signed_field(u64 features, int field)
return cpuid_feature_extract_signed_field_width(features, field, 4);
}
-static inline unsigned int __attribute_const__
+static __always_inline unsigned int __attribute_const__
cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width)
{
return (u64)(features << (64 - width - field)) >> (64 - width);
}
-static inline unsigned int __attribute_const__
+static __always_inline unsigned int __attribute_const__
cpuid_feature_extract_unsigned_field(u64 features, int field)
{
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
@@ -564,7 +564,7 @@ static inline bool system_supports_mixed_endian(void)
return val == 0x1;
}
-static inline bool system_supports_fpsimd(void)
+static __always_inline bool system_supports_fpsimd(void)
{
return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
}
@@ -575,13 +575,13 @@ static inline bool system_uses_ttbr0_pan(void)
!cpus_have_const_cap(ARM64_HAS_PAN);
}
-static inline bool system_supports_sve(void)
+static __always_inline bool system_supports_sve(void)
{
return IS_ENABLED(CONFIG_ARM64_SVE) &&
cpus_have_const_cap(ARM64_SVE);
}
-static inline bool system_supports_cnp(void)
+static __always_inline bool system_supports_cnp(void)
{
return IS_ENABLED(CONFIG_ARM64_CNP) &&
cpus_have_const_cap(ARM64_HAS_CNP);
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4e531f57147d..6facd1308e7c 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -34,7 +34,7 @@ static inline void __raw_writew(u16 val, volatile void __iomem *addr)
}
#define __raw_writel __raw_writel
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
{
asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
}
@@ -69,7 +69,7 @@ static inline u16 __raw_readw(const volatile void __iomem *addr)
}
#define __raw_readl __raw_readl
-static inline u32 __raw_readl(const volatile void __iomem *addr)
+static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
{
u32 val;
asm volatile(ALTERNATIVE("ldr %w0, [%1]",
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 688c63412cc2..f658dda12364 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -36,7 +36,7 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu);
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.hcr_el2 & HCR_RW);
}
@@ -127,7 +127,7 @@ static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
vcpu->arch.vsesr_el2 = vsesr;
}
-static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
}
@@ -153,17 +153,17 @@ static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long
*__vcpu_elr_el1(vcpu) = v;
}
-static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
}
-static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
{
return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
}
-static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return kvm_condition_valid32(vcpu);
@@ -181,13 +181,13 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
* coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
* AArch32 with banked registers.
*/
-static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+static __always_inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
u8 reg_num)
{
return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
}
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
unsigned long val)
{
if (reg_num != 31)
@@ -264,12 +264,12 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
return mode != PSR_MODE_EL0t;
}
-static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+static __always_inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.esr_el2;
}
-static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
u32 esr = kvm_vcpu_get_hsr(vcpu);
@@ -279,12 +279,12 @@ static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
return -1;
}
-static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.far_el2;
}
-static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
}
@@ -299,7 +299,7 @@ static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
}
-static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
}
@@ -319,17 +319,17 @@ static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF);
}
-static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
{
return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
}
-static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
}
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
@@ -340,18 +340,18 @@ static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
}
-static inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+static __always_inline unsigned int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
{
return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
}
/* This one is not specific to Data Abort */
-static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL);
}
-static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
{
return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
}
@@ -361,17 +361,17 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW;
}
-static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC;
}
-static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
}
-static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+static __always_inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
case FSC_SEA:
@@ -390,7 +390,7 @@ static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
}
}
-static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
+static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
{
u32 esr = kvm_vcpu_get_hsr(vcpu);
return ESR_ELx_SYS64_ISS_RT(esr);
@@ -504,7 +504,7 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
return data; /* Leave LE untouched */
}
-static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
{
if (vcpu_mode_is_32bit(vcpu))
kvm_skip_instr32(vcpu, is_wide_instr);
@@ -519,7 +519,7 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
* Skip an instruction which has been emulated at hyp while most guest sysregs
* are live.
*/
-static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
+static __always_inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
{
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d87aa609d2b6..57fd46acd058 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -626,38 +626,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
#endif
-static inline void kvm_arm_vhe_guest_enter(void)
-{
- local_daif_mask();
-
- /*
- * Having IRQs masked via PMR when entering the guest means the GIC
- * will not signal the CPU of interrupts of lower priority, and the
- * only way to get out will be via guest exceptions.
- * Naturally, we want to avoid this.
- *
- * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
- * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
- */
- pmr_sync();
-}
-
-static inline void kvm_arm_vhe_guest_exit(void)
-{
- /*
- * local_daif_restore() takes care to properly restore PSTATE.DAIF
- * and the GIC PMR if the host is using IRQ priorities.
- */
- local_daif_restore(DAIF_PROCCTX_NOIRQ);
-
- /*
- * When we exit from the guest we change a number of CPU configuration
- * parameters, such as traps. Make sure these changes take effect
- * before running the host or additional guests.
- */
- isb();
-}
-
#define KVM_BP_HARDEN_UNKNOWN -1
#define KVM_BP_HARDEN_WA_NEEDED 0
#define KVM_BP_HARDEN_NOT_REQUIRED 1
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index a3a6a2ba9a63..fe57f60f06a8 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -47,6 +47,13 @@
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
+/*
+ * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
+ * static inline can allow the compiler to out-of-line this. KVM always wants
+ * the macro version as its always inlined.
+ */
+#define __kvm_swab32(x) ___constant_swab32(x)
+
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 53d846f1bfe7..785762860c63 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -93,7 +93,7 @@ void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
-static inline unsigned long __kern_hyp_va(unsigned long v)
+static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
"ror %0, %0, #1\n"
@@ -473,6 +473,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
extern void *__kvm_bp_vect_base;
extern int __kvm_harden_el2_vector_slot;
+/* This is only called on a VHE system */
static inline void *kvm_get_hyp_vector(void)
{
struct bp_hardening_data *data = arm64_get_bp_hardening_data();
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 0958ed6191aa..61fd26752adc 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -83,7 +83,7 @@ static inline bool is_kernel_in_hyp_mode(void)
return read_sysreg(CurrentEL) == CurrentEL_EL2;
}
-static inline bool has_vhe(void)
+static __always_inline bool has_vhe(void)
{
if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN))
return true;
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index dfe8dd172512..925086b46136 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -625,7 +625,7 @@ static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
}
/* Switch to the guest for VHE systems running in EL2 */
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
@@ -678,7 +678,42 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
return exit_code;
}
-NOKPROBE_SYMBOL(kvm_vcpu_run_vhe);
+NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
+
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ local_daif_mask();
+
+ /*
+ * Having IRQs masked via PMR when entering the guest means the GIC
+ * will not signal the CPU of interrupts of lower priority, and the
+ * only way to get out will be via guest exceptions.
+ * Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
+ */
+ pmr_sync();
+
+ ret = __kvm_vcpu_run_vhe(vcpu);
+
+ /*
+ * local_daif_restore() takes care to properly restore PSTATE.DAIF
+ * and the GIC PMR if the host is using IRQ priorities.
+ */
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
+
+ return ret;
+}
/* Switch to the guest for legacy non-VHE systems */
int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 29ee1feba4eb..4f3a087e36d5 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -69,14 +69,14 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
u32 data = vcpu_get_reg(vcpu, rd);
if (__is_be(vcpu)) {
/* guest pre-swabbed data, undo this for writel() */
- data = swab32(data);
+ data = __kvm_swab32(data);
}
writel_relaxed(data, addr);
} else {
u32 data = readl_relaxed(addr);
if (__is_be(vcpu)) {
/* guest expects swabbed data */
- data = swab32(data);
+ data = __kvm_swab32(data);
}
vcpu_set_reg(vcpu, rd, data);
}
diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi
index 5accda2767be..a3301bab9231 100644
--- a/arch/mips/boot/dts/ingenic/jz4740.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
/ {
#address-cells = <1>;
@@ -45,14 +46,6 @@
#clock-cells = <1>;
};
- watchdog: watchdog@10002000 {
- compatible = "ingenic,jz4740-watchdog";
- reg = <0x10002000 0x10>;
-
- clocks = <&cgu JZ4740_CLK_RTC>;
- clock-names = "rtc";
- };
-
tcu: timer@10002000 {
compatible = "ingenic,jz4740-tcu", "simple-mfd";
reg = <0x10002000 0x1000>;
@@ -73,6 +66,14 @@
interrupt-parent = <&intc>;
interrupts = <23 22 21>;
+
+ watchdog: watchdog@0 {
+ compatible = "ingenic,jz4740-watchdog";
+ reg = <0x0 0xc>;
+
+ clocks = <&tcu TCU_CLK_WDT>;
+ clock-names = "wdt";
+ };
};
rtc_dev: rtc@10003000 {
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index f928329b034b..bb89653d16a3 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/dma/jz4780-dma.h>
/ {
@@ -67,6 +68,14 @@
interrupt-parent = <&intc>;
interrupts = <27 26 25>;
+
+ watchdog: watchdog@0 {
+ compatible = "ingenic,jz4780-watchdog";
+ reg = <0x0 0xc>;
+
+ clocks = <&tcu TCU_CLK_WDT>;
+ clock-names = "wdt";
+ };
};
rtc_dev: rtc@10003000 {
@@ -348,14 +357,6 @@
status = "disabled";
};
- watchdog: watchdog@10002000 {
- compatible = "ingenic,jz4780-watchdog";
- reg = <0x10002000 0x10>;
-
- clocks = <&cgu JZ4780_CLK_RTCLK>;
- clock-names = "rtc";
- };
-
nemc: nemc@13410000 {
compatible = "ingenic,jz4780-nemc";
reg = <0x13410000 0x10000>;
diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi
index 4994c695a1a7..147f7d5c243a 100644
--- a/arch/mips/boot/dts/ingenic/x1000.dtsi
+++ b/arch/mips/boot/dts/ingenic/x1000.dtsi
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <dt-bindings/clock/ingenic,tcu.h>
#include <dt-bindings/clock/x1000-cgu.h>
#include <dt-bindings/dma/x1000-dma.h>
@@ -72,7 +73,7 @@
compatible = "ingenic,x1000-watchdog", "ingenic,jz4780-watchdog";
reg = <0x0 0x10>;
- clocks = <&cgu X1000_CLK_RTCLK>;
+ clocks = <&tcu TCU_CLK_WDT>;
clock-names = "wdt";
};
};
@@ -158,7 +159,6 @@
i2c0: i2c-controller@10050000 {
compatible = "ingenic,x1000-i2c";
reg = <0x10050000 0x1000>;
-
#address-cells = <1>;
#size-cells = <0>;
@@ -173,7 +173,6 @@
i2c1: i2c-controller@10051000 {
compatible = "ingenic,x1000-i2c";
reg = <0x10051000 0x1000>;
-
#address-cells = <1>;
#size-cells = <0>;
@@ -188,7 +187,6 @@
i2c2: i2c-controller@10052000 {
compatible = "ingenic,x1000-i2c";
reg = <0x10052000 0x1000>;
-
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h
index 7c6a1095f556..aabd097933fe 100644
--- a/arch/mips/include/asm/sync.h
+++ b/arch/mips/include/asm/sync.h
@@ -155,9 +155,11 @@
* effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use
* optimized memory barrier primitives."). Here we specify that the affected
* sync instructions should be emitted twice.
+ * Note that this expression is evaluated by the assembler (not the compiler),
+ * and that the assembler evaluates '==' as 0 or -1, not 0 or 1.
*/
#ifdef CONFIG_CPU_CAVIUM_OCTEON
-# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb))
+# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb))
#else
# define __SYNC_rpt(type) 1
#endif
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6176b9acba95..d0d832ab3d3b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -134,7 +134,7 @@ void release_vpe(struct vpe *v)
{
list_del(&v->list);
if (v->load_addr)
- release_progmem(v);
+ release_progmem(v->load_addr);
kfree(v);
}
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index aa89a41dc5dd..d7fe8408603e 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -33,6 +33,7 @@ endif
cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
-O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
+ -mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
$(call cc-option, -fno-asynchronous-unwind-tables) \
$(call cc-option, -fno-stack-protector)
@@ -51,6 +52,8 @@ endif
CFLAGS_REMOVE_vgettimeofday.o = -pg
+DISABLE_VDSO := n
+
#
# For the pre-R6 code in arch/mips/vdso/vdso.h for locating
# the base address of VDSO, the linker will emit a R_MIPS_PC32
@@ -64,11 +67,24 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg
ifndef CONFIG_CPU_MIPSR6
ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
$(warning MIPS VDSO requires binutils >= 2.25)
- obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y))
- ccflags-vdso += -DDISABLE_MIPS_VDSO
+ DISABLE_VDSO := y
endif
endif
+#
+# GCC (at least up to version 9.2) appears to emit function calls that make use
+# of the GOT when targeting microMIPS, which we can't use in the VDSO due to
+# the lack of relocations. As such, we disable the VDSO for microMIPS builds.
+#
+ifdef CONFIG_CPU_MICROMIPS
+ DISABLE_VDSO := y
+endif
+
+ifeq ($(DISABLE_VDSO),y)
+ obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y))
+ ccflags-vdso += -DDISABLE_MIPS_VDSO
+endif
+
# VDSO linker flags.
VDSO_LDFLAGS := \
-Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \
@@ -81,12 +97,18 @@ GCOV_PROFILE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# Check that we don't have PIC 'jalr t9' calls left
+quiet_cmd_vdso_mips_check = VDSOCHK $@
+ cmd_vdso_mips_check = if $(OBJDUMP) --disassemble $@ | egrep -h "jalr.*t9" > /dev/null; \
+ then (echo >&2 "$@: PIC 'jalr t9' calls are not supported"; \
+ rm -f $@; /bin/false); fi
+
#
# Shared build commands.
#
quiet_cmd_vdsold_and_vdso_check = LD $@
- cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
+ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check)
quiet_cmd_vdsold = VDSO $@
cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \
diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore
index 8dab0bb6ae66..8a45a37d2af4 100644
--- a/arch/riscv/boot/.gitignore
+++ b/arch/riscv/boot/.gitignore
@@ -1,2 +1,4 @@
Image
Image.gz
+loader
+loader.lds
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 435b65532e29..8e18d2c64399 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -72,6 +72,16 @@
#define EXC_LOAD_PAGE_FAULT 13
#define EXC_STORE_PAGE_FAULT 15
+/* PMP configuration */
+#define PMP_R 0x01
+#define PMP_W 0x02
+#define PMP_X 0x04
+#define PMP_A 0x18
+#define PMP_A_TOR 0x08
+#define PMP_A_NA4 0x10
+#define PMP_A_NAPOT 0x18
+#define PMP_L 0x80
+
/* symbolic CSR names: */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
@@ -100,6 +110,8 @@
#define CSR_MCAUSE 0x342
#define CSR_MTVAL 0x343
#define CSR_MIP 0x344
+#define CSR_PMPCFG0 0x3a0
+#define CSR_PMPADDR0 0x3b0
#define CSR_MHARTID 0xf14
#ifdef CONFIG_RISCV_M_MODE
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 271860fc2c3f..85f2073e7fe4 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -58,6 +58,12 @@ _start_kernel:
/* Reset all registers except ra, a0, a1 */
call reset_regs
+ /* Setup a PMP to permit access to all of memory. */
+ li a0, -1
+ csrw CSR_PMPADDR0, a0
+ li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
+ csrw CSR_PMPCFG0, a0
+
/*
* The hartid in a0 is expected later on, and we have no firmware
* to hand it to us.
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f4cad5163bf2..ffb3d94bf0cc 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -156,6 +156,6 @@ void __init trap_init(void)
csr_write(CSR_SCRATCH, 0);
/* Set the exception vector address */
csr_write(CSR_TVEC, &handle_exception);
- /* Enable all interrupts */
- csr_write(CSR_IE, -1);
+ /* Enable interrupts */
+ csr_write(CSR_IE, IE_SIE | IE_EIE);
}
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index f0cc86040587..ec0ca90dd900 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -19,18 +19,20 @@ asmlinkage void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
+ PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
- pfn_pmd(PFN_DOWN(__pa((uintptr_t)kasan_early_shadow_pte)),
- __pgprot(_PAGE_TABLE)));
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t) kasan_early_shadow_pte)),
+ __pgprot(_PAGE_TABLE)));
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
i += PGDIR_SIZE, ++pgd)
set_pgd(pgd,
- pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
/* init for swapper_pg_dir */
pgd = pgd_offset_k(KASAN_SHADOW_START);
@@ -38,37 +40,43 @@ asmlinkage void __init kasan_early_init(void)
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
i += PGDIR_SIZE, ++pgd)
set_pgd(pgd,
- pfn_pgd(PFN_DOWN(__pa(((uintptr_t)kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
flush_tlb_all();
}
static void __init populate(void *start, void *end)
{
- unsigned long i;
+ unsigned long i, offset;
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
+ unsigned long n_ptes =
+ ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
unsigned long n_pmds =
- (n_pages % PTRS_PER_PTE) ? n_pages / PTRS_PER_PTE + 1 :
- n_pages / PTRS_PER_PTE;
+ ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
+
+ pte_t *pte =
+ memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ pmd_t *pmd =
+ memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
pgd_t *pgd = pgd_offset_k(vaddr);
- pmd_t *pmd = memblock_alloc(n_pmds * sizeof(pmd_t), PAGE_SIZE);
- pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
for (i = 0; i < n_pages; i++) {
phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
-
- set_pte(pte + i, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
+ set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
}
- for (i = 0; i < n_pmds; ++pgd, i += PTRS_PER_PMD)
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(((uintptr_t)(pmd + i)))),
+ for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
+ set_pmd(&pmd[i],
+ pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
__pgprot(_PAGE_TABLE)));
- for (i = 0; i < n_pages; ++pmd, i += PTRS_PER_PTE)
- set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa((uintptr_t)(pte + i))),
+ for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
+ set_pgd(&pgd[i],
+ pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
__pgprot(_PAGE_TABLE)));
flush_tlb_all();
@@ -81,7 +89,8 @@ void __init kasan_init(void)
unsigned long i;
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
- (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+ (void *)kasan_mem_to_shadow((void *)
+ VMALLOC_END));
for_each_memblock(memory, reg) {
void *start = (void *)__va(reg->base);
@@ -90,14 +99,14 @@ void __init kasan_init(void)
if (start >= end)
break;
- populate(kasan_mem_to_shadow(start),
- kasan_mem_to_shadow(end));
+ populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
};
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_early_shadow_pte[i],
mk_pte(virt_to_page(kasan_early_shadow_page),
- __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_ACCESSED)));
+ __pgprot(_PAGE_PRESENT | _PAGE_READ |
+ _PAGE_ACCESSED)));
memset(kasan_early_shadow_page, 0, PAGE_SIZE);
init_task.kasan_depth = 0;
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
index 02c6ef8f7667..07344d82e88e 100644
--- a/arch/x86/include/asm/io_bitmap.h
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -19,7 +19,14 @@ struct task_struct;
void io_bitmap_share(struct task_struct *tsk);
void io_bitmap_exit(void);
-void tss_update_io_bitmap(void);
+void native_tss_update_io_bitmap(void);
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#define tss_update_io_bitmap native_tss_update_io_bitmap
+#endif
+
#else
static inline void io_bitmap_share(struct task_struct *tsk) { }
static inline void io_bitmap_exit(void) { }
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 03946eb3e2b9..2a8f2bd2e5cf 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -292,6 +292,14 @@ enum x86emul_mode {
#define X86EMUL_SMM_MASK (1 << 6)
#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
+/*
+ * fastop functions are declared as taking a never-defined fastop parameter,
+ * so they can't be called from C directly.
+ */
+struct fastop;
+
+typedef void (*fastop_t)(struct fastop *);
+
struct x86_emulate_ctxt {
const struct x86_emulate_ops *ops;
@@ -324,7 +332,10 @@ struct x86_emulate_ctxt {
struct operand src;
struct operand src2;
struct operand dst;
- int (*execute)(struct x86_emulate_ctxt *ctxt);
+ union {
+ int (*execute)(struct x86_emulate_ctxt *ctxt);
+ fastop_t fop;
+ };
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
/*
* The following six fields are cleared together,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 40a0c0fd95ca..98959e8cd448 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1122,6 +1122,7 @@ struct kvm_x86_ops {
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
@@ -1146,7 +1147,7 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
- void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 86e7317eb31f..694d8daf4983 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_update_io_bitmap(void)
+{
+ PVOP_VCALL0(cpu.update_io_bitmap);
+}
+#endif
+
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 84812964d3dd..732f62e04ddb 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,6 +140,10 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
+#ifdef CONFIG_X86_IOPL_IOPERM
+ void (*update_io_bitmap)(void);
+#endif
+
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2a85287b3685..8521af3fef27 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,7 +72,7 @@
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
-#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE VMCS_CONTROL_BIT(USR_WAIT_PAUSE)
#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index a50e4a0de315..9915990fd8cf 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -81,6 +81,7 @@
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 503d3f42da16..3f3f780c8c65 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52c9bfbbdb2a..4cdb123ff66a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
- get_cpu_cap(c);
+ set_cpu_cap(c, X86_FEATURE_OSPKE);
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d817f255aed8..6efe0410fb72 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -425,7 +425,29 @@ static void __init sev_map_percpu_data(void)
}
}
+static bool pv_tlb_flush_supported(void)
+{
+ return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+}
+
+static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+
#ifdef CONFIG_SMP
+
+static bool pv_ipi_supported(void)
+{
+ return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+}
+
+static bool pv_sched_yield_supported(void)
+{
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+}
+
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
static void __send_ipi_mask(const struct cpumask *mask, int vector)
@@ -490,12 +512,12 @@ static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
unsigned int this_cpu = smp_processor_id();
- struct cpumask new_mask;
+ struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
const struct cpumask *local_mask;
- cpumask_copy(&new_mask, mask);
- cpumask_clear_cpu(this_cpu, &new_mask);
- local_mask = &new_mask;
+ cpumask_copy(new_mask, mask);
+ cpumask_clear_cpu(this_cpu, new_mask);
+ local_mask = new_mask;
__send_ipi_mask(local_mask, vector);
}
@@ -575,7 +597,6 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
-static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask);
static void kvm_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
@@ -583,7 +604,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
u8 state;
int cpu;
struct kvm_steal_time *src;
- struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask);
+ struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
cpumask_copy(flushmask, cpumask);
/*
@@ -619,11 +640,10 @@ static void __init kvm_guest_init(void)
pv_ops.time.steal_clock = kvm_steal_clock;
}
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_tlb_flush_supported()) {
pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
pv_ops.mmu.tlb_remove_table = tlb_remove_table;
+ pr_info("KVM setup pv remote TLB flush\n");
}
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -632,9 +652,7 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
- if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_sched_yield_supported()) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
pr_info("KVM setup pv sched yield\n");
}
@@ -700,7 +718,7 @@ static uint32_t __init kvm_detect(void)
static void __init kvm_apic_init(void)
{
#if defined(CONFIG_SMP)
- if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
+ if (pv_ipi_supported())
kvm_setup_pv_ipi();
#endif
}
@@ -732,26 +750,31 @@ static __init int activate_jump_labels(void)
}
arch_initcall(activate_jump_labels);
-static __init int kvm_setup_pv_tlb_flush(void)
+static __init int kvm_alloc_cpumask(void)
{
int cpu;
+ bool alloc = false;
if (!kvm_para_available() || nopv)
return 0;
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ if (pv_tlb_flush_supported())
+ alloc = true;
+
+#if defined(CONFIG_SMP)
+ if (pv_ipi_supported())
+ alloc = true;
+#endif
+
+ if (alloc)
for_each_possible_cpu(cpu) {
- zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
+ zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
}
- pr_info("KVM setup pv remote TLB flush\n");
- }
return 0;
}
-arch_initcall(kvm_setup_pv_tlb_flush);
+arch_initcall(kvm_alloc_cpumask);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 789f5e4f89de..c131ba4e70ef 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -30,6 +30,7 @@
#include <asm/timer.h>
#include <asm/special_insns.h>
#include <asm/tlb.h>
+#include <asm/io_bitmap.h>
/*
* nop stub, which must not clobber anything *including the stack* to
@@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = {
.cpu.iret = native_iret,
.cpu.swapgs = native_swapgs,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .cpu.update_io_bitmap = native_tss_update_io_bitmap,
+#endif
+
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 839b5244e3b7..3053c85e0e42 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
/**
* tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
*/
-void tss_update_io_bitmap(void)
+void native_tss_update_io_bitmap(void)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
struct thread_struct *t = &current->thread;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 991019d5eee1..1bb4927030af 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -59,6 +59,19 @@ config KVM
If unsure, say N.
+config KVM_WERROR
+ bool "Compile KVM with -Werror"
+ # KASAN may cause the build to fail due to larger frames
+ default y if X86_64 && !KASAN
+ # We use the dependency on !COMPILE_TEST to not be enabled
+ # blindly in allmodconfig or allyesconfig configurations
+ depends on (X86_64 && !KASAN) || !COMPILE_TEST
+ depends on EXPERT
+ help
+ Add -Werror to the build flags for (and only for) i915.ko.
+
+ If in doubt, say "N".
+
config KVM_INTEL
tristate "KVM for Intel (and compatible) processors support"
depends on KVM && IA32_FEAT_CTL
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b19ef421084d..e553f0fdd87d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
ccflags-y += -Iarch/x86/kvm
+ccflags-$(CONFIG_KVM_WERROR) += -Werror
KVM := ../../../virt/kvm
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ddbc61984227..dd19fb3539e0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -191,25 +191,6 @@
#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
#define FASTOP_SIZE 8
-/*
- * fastop functions have a special calling convention:
- *
- * dst: rax (in/out)
- * src: rdx (in/out)
- * src2: rcx (in)
- * flags: rflags (in/out)
- * ex: rsi (in:fastop pointer, out:zero if exception)
- *
- * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
- * different operand sizes can be reached by calculation, rather than a jump
- * table (which would be bigger than the code).
- *
- * fastop functions are declared as taking a never-defined fastop parameter,
- * so they can't be called from C directly.
- */
-
-struct fastop;
-
struct opcode {
u64 flags : 56;
u64 intercept : 8;
@@ -311,8 +292,19 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
#define ON64(x)
#endif
-typedef void (*fastop_t)(struct fastop *);
-
+/*
+ * fastop functions have a special calling convention:
+ *
+ * dst: rax (in/out)
+ * src: rdx (in/out)
+ * src2: rcx (in)
+ * flags: rflags (in/out)
+ * ex: rsi (in:fastop pointer, out:zero if exception)
+ *
+ * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
+ * different operand sizes can be reached by calculation, rather than a jump
+ * table (which would be bigger than the code).
+ */
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
#define __FOP_FUNC(name) \
@@ -5683,7 +5675,7 @@ special_insn:
if (ctxt->execute) {
if (ctxt->d & Fastop)
- rc = fastop(ctxt, (fastop_t)ctxt->execute);
+ rc = fastop(ctxt, ctxt->fop);
else
rc = ctxt->execute(ctxt);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 79afa0bb5f41..c47d2acec529 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -417,7 +417,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
- if (irq.level &&
+ if (irq.trig_mode &&
kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
irq.dest_id, irq.dest_mode))
__set_bit(irq.vector, ioapic_handled_vectors);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afcd30d44cbb..e3099c642fec 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -627,9 +627,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
- if (pv_eoi_get_user(vcpu, &val) < 0)
+ if (pv_eoi_get_user(vcpu, &val) < 0) {
printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
+ return false;
+ }
return val & 0x1;
}
@@ -1046,11 +1048,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (vcpu->arch.apicv_active)
- kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
- else {
+ if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) {
kvm_lapic_set_irr(vector, apic);
-
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 3c6522b84ff1..ffcd96fc02d0 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -339,7 +339,7 @@ TRACE_EVENT(
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
- __field(u8, u)
+ __field(signed char, u)
),
TP_fast_assign(
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index bef0ba35f121..24c0b2ba8fb9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -57,11 +57,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+#ifdef MODULE
static const struct x86_cpu_id svm_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_SVM),
{}
};
MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
+#endif
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
@@ -1005,33 +1007,32 @@ static void svm_cpu_uninit(int cpu)
static int svm_cpu_init(int cpu)
{
struct svm_cpu_data *sd;
- int r;
sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
if (!sd)
return -ENOMEM;
sd->cpu = cpu;
- r = -ENOMEM;
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
- goto err_1;
+ goto free_cpu_data;
if (svm_sev_enabled()) {
- r = -ENOMEM;
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
sizeof(void *),
GFP_KERNEL);
if (!sd->sev_vmcbs)
- goto err_1;
+ goto free_save_area;
}
per_cpu(svm_data, cpu) = sd;
return 0;
-err_1:
+free_save_area:
+ __free_page(sd->save_area);
+free_cpu_data:
kfree(sd);
- return r;
+ return -ENOMEM;
}
@@ -1350,6 +1351,24 @@ static __init void svm_adjust_mmio_mask(void)
kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
}
+static void svm_hardware_teardown(void)
+{
+ int cpu;
+
+ if (svm_sev_enabled()) {
+ bitmap_free(sev_asid_bitmap);
+ bitmap_free(sev_reclaim_asid_bitmap);
+
+ sev_flush_asids();
+ }
+
+ for_each_possible_cpu(cpu)
+ svm_cpu_uninit(cpu);
+
+ __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
+ iopm_base = 0;
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1463,29 +1482,10 @@ static __init int svm_hardware_setup(void)
return 0;
err:
- __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
- iopm_base = 0;
+ svm_hardware_teardown();
return r;
}
-static __exit void svm_hardware_unsetup(void)
-{
- int cpu;
-
- if (svm_sev_enabled()) {
- bitmap_free(sev_asid_bitmap);
- bitmap_free(sev_reclaim_asid_bitmap);
-
- sev_flush_asids();
- }
-
- for_each_possible_cpu(cpu)
- svm_cpu_uninit(cpu);
-
- __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
- iopm_base = 0;
-}
-
static void init_seg(struct vmcb_seg *seg)
{
seg->selector = 0;
@@ -2196,8 +2196,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static int avic_init_vcpu(struct vcpu_svm *svm)
{
int ret;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
- if (!kvm_vcpu_apicv_active(&svm->vcpu))
+ if (!avic || !irqchip_in_kernel(vcpu->kvm))
return 0;
ret = avic_init_backing_page(&svm->vcpu);
@@ -5232,6 +5233,9 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
bool activated = kvm_vcpu_apicv_active(vcpu);
+ if (!avic)
+ return;
+
if (activated) {
/**
* During AVIC temporary deactivation, guest could update
@@ -5255,8 +5259,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
{
+ if (!vcpu->arch.apicv_active)
+ return -1;
+
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
@@ -5268,6 +5275,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
+
+ return 0;
}
static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
@@ -7378,7 +7387,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
.hardware_setup = svm_hardware_setup,
- .hardware_unsetup = svm_hardware_unsetup,
+ .hardware_unsetup = svm_hardware_teardown,
.check_processor_compatibility = svm_check_processor_compat,
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
@@ -7433,6 +7442,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.run = svm_vcpu_run,
.handle_exit = handle_exit,
.skip_emulated_instruction = skip_emulated_instruction,
+ .update_emulated_instruction = NULL,
.set_interrupt_shadow = svm_set_interrupt_shadow,
.get_interrupt_shadow = svm_get_interrupt_shadow,
.patch_hypercall = svm_patch_hypercall,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 283bdb7071af..f486e2606247 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
extern bool __read_mostly enable_pml;
+extern bool __read_mostly enable_apicv;
extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 3589cd3c0fcc..e920d7834d73 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3161,10 +3161,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
*
* Returns:
- * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
- * NVMX_ENTRY_VMFAIL: Consistency check VMFail
- * NVMX_ENTRY_VMEXIT: Consistency check VMExit
- * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
+ * NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
+ * NVMX_VMENTRY_VMFAIL: Consistency check VMFail
+ * NVMX_VMENTRY_VMEXIT: Consistency check VMExit
+ * NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
bool from_vmentry)
@@ -3609,8 +3609,15 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
unsigned long exit_qual;
bool block_nested_events =
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
+ bool mtf_pending = vmx->nested.mtf_pending;
struct kvm_lapic *apic = vcpu->arch.apic;
+ /*
+ * Clear the MTF state. If a higher priority VM-exit is delivered first,
+ * this state is discarded.
+ */
+ vmx->nested.mtf_pending = false;
+
if (lapic_in_kernel(vcpu) &&
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
if (block_nested_events)
@@ -3621,8 +3628,28 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
+ /*
+ * Process any exceptions that are not debug traps before MTF.
+ */
+ if (vcpu->arch.exception.pending &&
+ !vmx_pending_dbg_trap(vcpu) &&
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
+ return 0;
+ }
+
+ if (mtf_pending) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_vmx_update_pending_dbg(vcpu);
+ nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
+ return 0;
+ }
+
if (vcpu->arch.exception.pending &&
- nested_vmx_check_exception(vcpu, &exit_qual)) {
+ nested_vmx_check_exception(vcpu, &exit_qual)) {
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
@@ -5285,24 +5312,17 @@ fail:
return 1;
}
-
-static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+/*
+ * Return true if an IO instruction with the specified port and size should cause
+ * a VM-exit into L1.
+ */
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size)
{
- unsigned long exit_qualification;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
gpa_t bitmap, last_bitmap;
- unsigned int port;
- int size;
u8 b;
- if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
- return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
-
- exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-
- port = exit_qualification >> 16;
- size = (exit_qualification & 7) + 1;
-
last_bitmap = (gpa_t)-1;
b = -1;
@@ -5329,8 +5349,26 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
return false;
}
+static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ unsigned long exit_qualification;
+ unsigned short port;
+ int size;
+
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
+
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ port = exit_qualification >> 16;
+ size = (exit_qualification & 7) + 1;
+
+ return nested_vmx_check_io_bitmaps(vcpu, port, size);
+}
+
/*
- * Return 1 if we should exit from L2 to L1 to handle an MSR access access,
+ * Return 1 if we should exit from L2 to L1 to handle an MSR access,
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
* disinterest in the current event (read or write a specific MSR) by using an
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
@@ -5712,6 +5750,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx->nested.nested_run_pending)
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+
+ if (vmx->nested.mtf_pending)
+ kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
}
}
@@ -5892,6 +5933,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmx->nested.nested_run_pending =
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+ vmx->nested.mtf_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
+
ret = -EINVAL;
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull) {
@@ -5949,8 +5993,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv)
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
{
/*
* Note that as a general rule, the high half of the MSRs (bits in
@@ -5977,7 +6020,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
PIN_BASED_VIRTUAL_NMIS |
- (apicv ? PIN_BASED_POSTED_INTR : 0);
+ (enable_apicv ? PIN_BASED_POSTED_INTR : 0);
msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index fc874d4ead0f..9aeda46f473e 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -17,8 +17,7 @@ enum nvmx_vmentry_status {
};
void vmx_leave_nested(struct kvm_vcpu *vcpu);
-void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
- bool apicv);
+void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps);
void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_set_vmcs_shadowing_bitmap(void);
@@ -34,6 +33,8 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu);
+bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
+ int size);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
@@ -175,6 +176,11 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline int nested_cpu_has_mtf(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG);
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3be25ecae145..40b1e6138cd5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -64,11 +64,13 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+#ifdef MODULE
static const struct x86_cpu_id vmx_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_VMX),
{}
};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
+#endif
bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
@@ -95,7 +97,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
-static bool __read_mostly enable_apicv = 1;
+bool __read_mostly enable_apicv = 1;
module_param(enable_apicv, bool, S_IRUGO);
/*
@@ -1175,6 +1177,10 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
vmx->guest_msrs[i].mask);
}
+
+ if (vmx->nested.need_vmcs12_to_shadow_sync)
+ nested_sync_vmcs12_to_shadow(vcpu);
+
if (vmx->guest_state_loaded)
return;
@@ -1599,6 +1605,40 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
return 1;
}
+
+/*
+ * Recognizes a pending MTF VM-exit and records the nested state for later
+ * delivery.
+ */
+static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!is_guest_mode(vcpu))
+ return;
+
+ /*
+ * Per the SDM, MTF takes priority over debug-trap exceptions besides
+ * T-bit traps. As instruction emulation is completed (i.e. at the
+ * instruction boundary), any #DB exception pending delivery must be a
+ * debug-trap. Record the pending MTF state to be delivered in
+ * vmx_check_nested_events().
+ */
+ if (nested_cpu_has_mtf(vmcs12) &&
+ (!vcpu->arch.exception.pending ||
+ vcpu->arch.exception.nr == DB_VECTOR))
+ vmx->nested.mtf_pending = true;
+ else
+ vmx->nested.mtf_pending = false;
+}
+
+static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ vmx_update_emulated_instruction(vcpu);
+ return skip_emulated_instruction(vcpu);
+}
+
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
/*
@@ -3818,24 +3858,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
* 2. If target vcpu isn't running(root mode), kick it to pick up the
* interrupt from PIR in next vmentry.
*/
-static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
+static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int r;
r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
if (!r)
- return;
+ return 0;
+
+ if (!vcpu->arch.apicv_active)
+ return -1;
if (pi_test_and_set_pir(vector, &vmx->pi_desc))
- return;
+ return 0;
/* If a previous notification has sent the IPI, nothing to do. */
if (pi_test_and_set_on(&vmx->pi_desc))
- return;
+ return 0;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
+
+ return 0;
}
/*
@@ -6482,8 +6527,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
- if (vmx->nested.need_vmcs12_to_shadow_sync)
- nested_sync_vmcs12_to_shadow(vcpu);
+ /*
+ * We did this in prepare_switch_to_guest, because it needs to
+ * be within srcu_read_lock.
+ */
+ WARN_ON_ONCE(vmx->nested.need_vmcs12_to_shadow_sync);
if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@@ -6757,8 +6805,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
- vmx_capability.ept,
- kvm_vcpu_apicv_active(vcpu));
+ vmx_capability.ept);
else
memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
@@ -6839,8 +6886,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
- enable_apicv);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -7101,6 +7147,40 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
to_vmx(vcpu)->req_immediate_exit = true;
}
+static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
+ struct x86_instruction_info *info)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ unsigned short port;
+ bool intercept;
+ int size;
+
+ if (info->intercept == x86_intercept_in ||
+ info->intercept == x86_intercept_ins) {
+ port = info->src_val;
+ size = info->dst_bytes;
+ } else {
+ port = info->dst_val;
+ size = info->src_bytes;
+ }
+
+ /*
+ * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
+ * VM-exits depend on the 'unconditional IO exiting' VM-execution
+ * control.
+ *
+ * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
+ */
+ if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+ intercept = nested_cpu_has(vmcs12,
+ CPU_BASED_UNCOND_IO_EXITING);
+ else
+ intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
+
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
+}
+
static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -7108,19 +7188,45 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ switch (info->intercept) {
/*
* RDPID causes #UD if disabled through secondary execution controls.
* Because it is marked as EmulateOnUD, we need to intercept it here.
*/
- if (info->intercept == x86_intercept_rdtscp &&
- !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
- ctxt->exception.vector = UD_VECTOR;
- ctxt->exception.error_code_valid = false;
- return X86EMUL_PROPAGATE_FAULT;
- }
+ case x86_intercept_rdtscp:
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
+ ctxt->exception.vector = UD_VECTOR;
+ ctxt->exception.error_code_valid = false;
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ break;
+
+ case x86_intercept_in:
+ case x86_intercept_ins:
+ case x86_intercept_out:
+ case x86_intercept_outs:
+ return vmx_check_intercept_io(vcpu, info);
+
+ case x86_intercept_lgdt:
+ case x86_intercept_lidt:
+ case x86_intercept_lldt:
+ case x86_intercept_ltr:
+ case x86_intercept_sgdt:
+ case x86_intercept_sidt:
+ case x86_intercept_sldt:
+ case x86_intercept_str:
+ if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
+ return X86EMUL_CONTINUE;
+
+ /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */
+ break;
/* TODO: check more intercepts... */
- return X86EMUL_CONTINUE;
+ default:
+ break;
+ }
+
+ return X86EMUL_UNHANDLEABLE;
}
#ifdef CONFIG_X86_64
@@ -7702,7 +7808,7 @@ static __init int hardware_setup(void)
if (nested) {
nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept, enable_apicv);
+ vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)
@@ -7786,7 +7892,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
- .skip_emulated_instruction = skip_emulated_instruction,
+ .skip_emulated_instruction = vmx_skip_emulated_instruction,
+ .update_emulated_instruction = vmx_update_emulated_instruction,
.set_interrupt_shadow = vmx_set_interrupt_shadow,
.get_interrupt_shadow = vmx_get_interrupt_shadow,
.patch_hypercall = vmx_patch_hypercall,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7f42cf3dcd70..e64da06c7009 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -150,6 +150,9 @@ struct nested_vmx {
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
+ /* Pending MTF VM-exit into L1. */
+ bool mtf_pending;
+
struct loaded_vmcs vmcs02;
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb5d64ebc35d..5de200663f51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6891,6 +6891,8 @@ restart:
kvm_rip_write(vcpu, ctxt->eip);
if (r && ctxt->tf)
r = kvm_vcpu_do_singlestep(vcpu);
+ if (kvm_x86_ops->update_emulated_instruction)
+ kvm_x86_ops->update_emulated_instruction(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}
@@ -7188,15 +7190,15 @@ static void kvm_timer_init(void)
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
- struct cpufreq_policy policy;
+ struct cpufreq_policy *policy;
int cpu;
- memset(&policy, 0, sizeof(policy));
cpu = get_cpu();
- cpufreq_get_policy(&policy, cpu);
- if (policy.cpuinfo.max_freq)
- max_tsc_khz = policy.cpuinfo.max_freq;
+ policy = cpufreq_cpu_get(cpu);
+ if (policy && policy->cpuinfo.max_freq)
+ max_tsc_khz = policy->cpuinfo.max_freq;
put_cpu();
+ cpufreq_cpu_put(policy);
#endif
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
@@ -7306,12 +7308,12 @@ int kvm_arch_init(void *opaque)
}
if (!ops->cpu_has_kvm_support()) {
- printk(KERN_ERR "kvm: no hardware support\n");
+ pr_err_ratelimited("kvm: no hardware support\n");
r = -EOPNOTSUPP;
goto out;
}
if (ops->disabled_by_bios()) {
- printk(KERN_ERR "kvm: disabled by bios\n");
+ pr_err_ratelimited("kvm: disabled by bios\n");
r = -EOPNOTSUPP;
goto out;
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 64229dad7eab..69309cd56fdf 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -363,13 +363,8 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
{
const struct ptdump_range ptdump_ranges[] = {
#ifdef CONFIG_X86_64
-
-#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1))
-#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \
- normalize_addr_shift)
-
{0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2},
- {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL},
+ {GUARD_HOLE_END_ADDR, ~0UL},
#else
{0, ~0UL},
#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index fa8506e76bbe..d19a2edd63cb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -180,7 +180,7 @@ void efi_sync_low_kernel_mappings(void)
static inline phys_addr_t
virt_to_phys_or_null_size(void *va, unsigned long size)
{
- bool bad_size;
+ phys_addr_t pa;
if (!va)
return 0;
@@ -188,16 +188,13 @@ virt_to_phys_or_null_size(void *va, unsigned long size)
if (virt_addr_valid(va))
return virt_to_phys(va);
- /*
- * A fully aligned variable on the stack is guaranteed not to
- * cross a page bounary. Try to catch strings on the stack by
- * checking that 'size' is a power of two.
- */
- bad_size = size > PAGE_SIZE || !is_power_of_2(size);
+ pa = slow_virt_to_phys(va);
- WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size);
+ /* check if the object crosses a page boundary */
+ if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK))
+ return 0;
- return slow_virt_to_phys(va);
+ return pa;
}
#define virt_to_phys_or_null(addr) \
@@ -568,85 +565,25 @@ efi_thunk_set_virtual_address_map(unsigned long memory_map_size,
static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
- efi_status_t status;
- u32 phys_tm, phys_tc;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
- phys_tc = virt_to_phys_or_null(tc);
-
- status = efi_thunk(get_time, phys_tm, phys_tc);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t efi_thunk_set_time(efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(set_time, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_enabled, phys_pending, phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_enabled = virt_to_phys_or_null(enabled);
- phys_pending = virt_to_phys_or_null(pending);
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(get_wakeup_time, phys_enabled,
- phys_pending, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static efi_status_t
efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
- efi_status_t status;
- u32 phys_tm;
- unsigned long flags;
-
- spin_lock(&rtc_lock);
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_tm = virt_to_phys_or_null(tm);
-
- status = efi_thunk(set_wakeup_time, enabled, phys_tm);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
- spin_unlock(&rtc_lock);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static unsigned long efi_name_size(efi_char16_t *name)
@@ -658,6 +595,8 @@ static efi_status_t
efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 *attr, unsigned long *data_size, void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name, phys_vendor, phys_attr;
u32 phys_data_size, phys_data;
@@ -665,14 +604,19 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_data_size = virt_to_phys_or_null(data_size);
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_attr = virt_to_phys_or_null(attr);
phys_data = virt_to_phys_or_null_size(data, *data_size);
- status = efi_thunk(get_variable, phys_name, phys_vendor,
- phys_attr, phys_data_size, phys_data);
+ if (!phys_name || (data && !phys_data))
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_variable, phys_name, phys_vendor,
+ phys_attr, phys_data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -683,19 +627,25 @@ static efi_status_t
efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size, void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
- /* If data_size is > sizeof(u32) we've got problems */
- status = efi_thunk(set_variable, phys_name, phys_vendor,
- attr, data_size, phys_data);
+ if (!phys_name || !phys_data)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -707,6 +657,8 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
u32 attr, unsigned long data_size,
void *data)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
unsigned long flags;
@@ -714,13 +666,17 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
return EFI_NOT_READY;
+ *vnd = *vendor;
+
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_data = virt_to_phys_or_null_size(data, data_size);
- /* If data_size is > sizeof(u32) we've got problems */
- status = efi_thunk(set_variable, phys_name, phys_vendor,
- attr, data_size, phys_data);
+ if (!phys_name || !phys_data)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
@@ -732,39 +688,36 @@ efi_thunk_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
+ u8 buf[24] __aligned(8);
+ efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd));
efi_status_t status;
u32 phys_name_size, phys_name, phys_vendor;
unsigned long flags;
spin_lock_irqsave(&efi_runtime_lock, flags);
+ *vnd = *vendor;
+
phys_name_size = virt_to_phys_or_null(name_size);
- phys_vendor = virt_to_phys_or_null(vendor);
+ phys_vendor = virt_to_phys_or_null(vnd);
phys_name = virt_to_phys_or_null_size(name, *name_size);
- status = efi_thunk(get_next_variable, phys_name_size,
- phys_name, phys_vendor);
+ if (!phys_name)
+ status = EFI_INVALID_PARAMETER;
+ else
+ status = efi_thunk(get_next_variable, phys_name_size,
+ phys_name, phys_vendor);
spin_unlock_irqrestore(&efi_runtime_lock, flags);
+ *vendor = *vnd;
return status;
}
static efi_status_t
efi_thunk_get_next_high_mono_count(u32 *count)
{
- efi_status_t status;
- u32 phys_count;
- unsigned long flags;
-
- spin_lock_irqsave(&efi_runtime_lock, flags);
-
- phys_count = virt_to_phys_or_null(count);
- status = efi_thunk(get_next_high_mono_count, phys_count);
-
- spin_unlock_irqrestore(&efi_runtime_lock, flags);
-
- return status;
+ return EFI_UNSUPPORTED;
}
static void
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 79409120a603..507f4fb88fa7 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,9 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+#include <asm/io_bitmap.h>
+#endif
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
+#ifdef CONFIG_X86_IOPL_IOPERM
+static void xen_update_io_bitmap(void)
+{
+ struct physdev_set_iobitmap iobitmap;
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
+ native_tss_update_io_bitmap();
+
+ iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
+ tss->x86_tss.io_bitmap_base;
+ if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
+ iobitmap.nr_ports = 0;
+ else
+ iobitmap.nr_ports = IO_BITMAP_BITS;
+
+ HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
+}
+#endif
+
static void xen_io_delay(void)
{
}
@@ -1047,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_idt_entry = xen_write_idt_entry,
.load_sp0 = xen_load_sp0,
+#ifdef CONFIG_X86_IOPL_IOPERM
+ .update_io_bitmap = xen_update_io_bitmap,
+#endif
.io_delay = xen_io_delay,
/* Xen takes care of %gs when switching to usermode for us */
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3f977c517960..5cc775bdb06a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -412,7 +412,7 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
- blk_mq_request_bypass_insert(rq, false);
+ blk_mq_request_bypass_insert(rq, false, false);
return;
}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index ca22afd47b3d..856356b1619e 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
bool has_sched,
struct request *rq)
{
- /* dispatch flush rq directly */
- if (rq->rq_flags & RQF_FLUSH_SEQ) {
- spin_lock(&hctx->lock);
- list_add(&rq->queuelist, &hctx->dispatch);
- spin_unlock(&hctx->lock);
+ /*
+ * dispatch flush and passthrough rq directly
+ *
+ * passthrough request has to be added to hctx->dispatch directly.
+ * For some reason, device may be in one situation which can't
+ * handle FS request, so STS_RESOURCE is always returned and the
+ * FS request will be added to hctx->dispatch. However passthrough
+ * request may be required at that time for fixing the problem. If
+ * passthrough request is added to scheduler queue, there isn't any
+ * chance to dispatch it given we prioritize requests in hctx->dispatch.
+ */
+ if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
return true;
- }
if (has_sched)
rq->rq_flags |= RQF_SORTED;
@@ -391,8 +397,10 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != -1));
- if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
+ if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+ blk_mq_request_bypass_insert(rq, at_head, false);
goto run;
+ }
if (e && e->type->ops.insert_requests) {
LIST_HEAD(list);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index fbacde454718..586c9d6e904a 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -183,8 +183,8 @@ found_tag:
return tag + tag_offset;
}
-void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
- struct blk_mq_ctx *ctx, unsigned int tag)
+void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
+ unsigned int tag)
{
if (!blk_mq_tag_is_reserved(tags, tag)) {
const int real_tag = tag - tags->nr_reserved_tags;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 15bc74acb57e..2b8321efb682 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
-extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
- struct blk_mq_ctx *ctx, unsigned int tag);
+extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
+ unsigned int tag);
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tags,
unsigned int depth, bool can_grow);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a12b1763508d..d92088dec6c3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -477,9 +477,9 @@ static void __blk_mq_free_request(struct request *rq)
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
if (rq->tag != -1)
- blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
+ blk_mq_put_tag(hctx->tags, ctx, rq->tag);
if (sched_tag != -1)
- blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
+ blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
blk_mq_sched_restart(hctx);
blk_queue_exit(q);
}
@@ -735,7 +735,7 @@ static void blk_mq_requeue_work(struct work_struct *work)
* merge.
*/
if (rq->rq_flags & RQF_DONTPREP)
- blk_mq_request_bypass_insert(rq, false);
+ blk_mq_request_bypass_insert(rq, false, false);
else
blk_mq_sched_insert_request(rq, true, false, false);
}
@@ -1286,7 +1286,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
q->mq_ops->commit_rqs(hctx);
spin_lock(&hctx->lock);
- list_splice_init(list, &hctx->dispatch);
+ list_splice_tail_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock);
/*
@@ -1677,12 +1677,16 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
-void blk_mq_request_bypass_insert(struct request *rq, bool run_queue)
+void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
+ bool run_queue)
{
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
spin_lock(&hctx->lock);
- list_add_tail(&rq->queuelist, &hctx->dispatch);
+ if (at_head)
+ list_add(&rq->queuelist, &hctx->dispatch);
+ else
+ list_add_tail(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);
if (run_queue)
@@ -1849,7 +1853,7 @@ insert:
if (bypass_insert)
return BLK_STS_RESOURCE;
- blk_mq_request_bypass_insert(rq, run_queue);
+ blk_mq_request_bypass_insert(rq, false, run_queue);
return BLK_STS_OK;
}
@@ -1876,7 +1880,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
- blk_mq_request_bypass_insert(rq, true);
+ blk_mq_request_bypass_insert(rq, false, true);
else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret);
@@ -1910,7 +1914,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
if (ret != BLK_STS_OK) {
if (ret == BLK_STS_RESOURCE ||
ret == BLK_STS_DEV_RESOURCE) {
- blk_mq_request_bypass_insert(rq,
+ blk_mq_request_bypass_insert(rq, false,
list_empty(list));
break;
}
@@ -3398,7 +3402,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
}
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
unsigned long ret = 0;
@@ -3431,7 +3434,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
}
static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
- struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
struct hrtimer_sleeper hs;
@@ -3451,7 +3453,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
if (q->poll_nsec > 0)
nsecs = q->poll_nsec;
else
- nsecs = blk_mq_poll_nsecs(q, hctx, rq);
+ nsecs = blk_mq_poll_nsecs(q, rq);
if (!nsecs)
return false;
@@ -3506,7 +3508,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
return false;
}
- return blk_mq_poll_hybrid_sleep(q, hctx, rq);
+ return blk_mq_poll_hybrid_sleep(q, rq);
}
/**
diff --git a/block/blk-mq.h b/block/blk-mq.h
index eaaca8fc1c28..10bfdfb494fa 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
-void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
+void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
+ bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
@@ -199,7 +200,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx)
static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
- blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
+ blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
rq->tag = -1;
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index b5516b04ffc0..6e9ec6e3fe47 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -55,12 +55,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat)
}
#endif
+static bool acpi_no_watchdog;
+
static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void)
{
const struct acpi_table_wdat *wdat = NULL;
acpi_status status;
- if (acpi_disabled)
+ if (acpi_disabled || acpi_no_watchdog)
return NULL;
status = acpi_get_table(ACPI_SIG_WDAT, 0,
@@ -88,6 +90,14 @@ bool acpi_has_watchdog(void)
}
EXPORT_SYMBOL_GPL(acpi_has_watchdog);
+/* ACPI watchdog can be disabled on boot command line */
+static int __init disable_acpi_watchdog(char *str)
+{
+ acpi_no_watchdog = true;
+ return 1;
+}
+__setup("acpi_no_watchdog", disable_acpi_watchdog);
+
void __init acpi_watchdog_init(void)
{
const struct acpi_wdat_entry *entries;
@@ -126,12 +136,11 @@ void __init acpi_watchdog_init(void)
gas = &entries[i].register_region;
res.start = gas->address;
+ res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1;
if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
res.flags = IORESOURCE_MEM;
- res.end = res.start + ALIGN(gas->access_width, 4) - 1;
} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
res.flags = IORESOURCE_IO;
- res.end = res.start + gas->access_width - 1;
} else {
pr_warn("Unsupported address space: %u\n",
gas->space_id);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cd3612e4e2e1..8ef65c085640 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -853,14 +853,17 @@ static void reset_fdc_info(int mode)
/* selects the fdc and drive, and enables the fdc's input/dma. */
static void set_fdc(int drive)
{
+ unsigned int new_fdc = fdc;
+
if (drive >= 0 && drive < N_DRIVE) {
- fdc = FDC(drive);
+ new_fdc = FDC(drive);
current_drive = drive;
}
- if (fdc != 1 && fdc != 0) {
+ if (new_fdc >= N_FDC) {
pr_info("bad fdc value\n");
return;
}
+ fdc = new_fdc;
set_dor(fdc, ~0, 8);
#if N_FDC > 1
set_dor(1 - fdc, ~8, 0);
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index bc837862b767..62b660821dbc 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -14,9 +14,6 @@
#include <linux/fault-inject.h>
struct nullb_cmd {
- struct list_head list;
- struct llist_node ll_list;
- struct __call_single_data csd;
struct request *rq;
struct bio *bio;
unsigned int tag;
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 16510795e377..133060431dbd 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1518,8 +1518,6 @@ static int setup_commands(struct nullb_queue *nq)
for (i = 0; i < nq->queue_depth; i++) {
cmd = &nq->cmds[i];
- INIT_LIST_HEAD(&cmd->list);
- cmd->ll_list.next = NULL;
cmd->tag = -1U;
}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 117cfc8cd05a..cda5cf917e9a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -276,7 +276,7 @@ static const struct block_device_operations pcd_bdops = {
.release = pcd_block_release,
.ioctl = pcd_block_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = blkdev_compat_ptr_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
#endif
.check_events = pcd_block_check_events,
};
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 886b2638c730..c51292c2a131 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -519,7 +519,7 @@ static const struct block_device_operations gdrom_bdops = {
.check_events = gdrom_bdops_check_events,
.ioctl = gdrom_bdops_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = blkdev_compat_ptr_ioctl,
+ .compat_ioctl = blkdev_compat_ptr_ioctl,
#endif
};
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index cbe6c94bf158..808874bccf4a 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1076,9 +1076,17 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
pol = policy->last_policy;
} else if (def_gov) {
pol = cpufreq_parse_policy(def_gov->name);
- } else {
- return -ENODATA;
+ /*
+ * In case the default governor is neiter "performance"
+ * nor "powersave", fall back to the initial policy
+ * value set by the driver.
+ */
+ if (pol == CPUFREQ_POLICY_UNKNOWN)
+ pol = policy->policy;
}
+ if (pol != CPUFREQ_POLICY_PERFORMANCE &&
+ pol != CPUFREQ_POLICY_POWERSAVE)
+ return -ENODATA;
}
return cpufreq_set_policy(policy, gov, pol);
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index cceee8bc3c2f..7dcf2093e531 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -738,7 +738,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
{
struct devfreq *devfreq;
struct devfreq_governor *governor;
- static atomic_t devfreq_no = ATOMIC_INIT(-1);
int err = 0;
if (!dev || !profile || !governor_name) {
@@ -800,8 +799,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
atomic_set(&devfreq->suspend_count, 0);
- dev_set_name(&devfreq->dev, "devfreq%d",
- atomic_inc_return(&devfreq_no));
+ dev_set_name(&devfreq->dev, "%s", dev_name(dev));
err = device_register(&devfreq->dev);
if (err) {
mutex_unlock(&devfreq->lock);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 621220ab3d0e..21ea99f65113 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -552,7 +552,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
seed = early_memremap(efi.rng_seed, sizeof(*seed));
if (seed != NULL) {
- size = seed->size;
+ size = READ_ONCE(seed->size);
early_memunmap(seed, sizeof(*seed));
} else {
pr_err("Could not map UEFI random seed!\n");
@@ -562,7 +562,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
sizeof(*seed) + size);
if (seed != NULL) {
pr_notice("seeding entropy pool\n");
- add_bootloader_randomness(seed->bits, seed->size);
+ add_bootloader_randomness(seed->bits, size);
early_memunmap(seed, sizeof(*seed) + size);
} else {
pr_err("Could not map UEFI random seed!\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 94e2fd758e01..42f4febe24c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1389,7 +1389,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
static struct drm_driver kms_driver = {
.driver_features =
- DRIVER_USE_AGP | DRIVER_ATOMIC |
+ DRIVER_ATOMIC |
DRIVER_GEM |
DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
DRIVER_SYNCOBJ_TIMELINE,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index d3c27a3c43f6..7546da0cc70c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -195,6 +195,7 @@ struct amdgpu_gmc {
uint32_t srbm_soft_reset;
bool prt_warning;
uint64_t stolen_size;
+ uint32_t sdpif_register;
/* apertures */
u64 shared_aperture_start;
u64 shared_aperture_end;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..cc0c273a86f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1272,6 +1272,19 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
}
/**
+ * gmc_v9_0_restore_registers - restores regs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This restores register values, saved at suspend.
+ */
+static void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_RAVEN)
+ WREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
+}
+
+/**
* gmc_v9_0_gart_enable - gart enable
*
* @adev: amdgpu_device pointer
@@ -1377,6 +1390,20 @@ static int gmc_v9_0_hw_init(void *handle)
}
/**
+ * gmc_v9_0_save_registers - saves regs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This saves potential register values that should be
+ * restored upon resume
+ */
+static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
+{
+ if (adev->asic_type == CHIP_RAVEN)
+ adev->gmc.sdpif_register = RREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
+}
+
+/**
* gmc_v9_0_gart_disable - gart disable
*
* @adev: amdgpu_device pointer
@@ -1412,9 +1439,16 @@ static int gmc_v9_0_hw_fini(void *handle)
static int gmc_v9_0_suspend(void *handle)
{
+ int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- return gmc_v9_0_hw_fini(adev);
+ r = gmc_v9_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ gmc_v9_0_save_registers(adev);
+
+ return 0;
}
static int gmc_v9_0_resume(void *handle)
@@ -1422,6 +1456,7 @@ static int gmc_v9_0_resume(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ gmc_v9_0_restore_registers(adev);
r = gmc_v9_0_hw_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
index b6f74bf4af02..27bb8c1ab858 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
@@ -7376,6 +7376,8 @@
#define mmCRTC4_CRTC_DRR_CONTROL 0x0f3e
#define mmCRTC4_CRTC_DRR_CONTROL_BASE_IDX 2
+#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x395d
+#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2
// addressBlock: dce_dc_fmt4_dispdec
// base address: 0x2000
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index b06c057a9002..c9e5ce135fd4 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -978,8 +978,12 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu)
struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks;
int ret = 0;
- max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks),
+ if (!smu->smu_table.max_sustainable_clocks)
+ max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks),
GFP_KERNEL);
+ else
+ max_sustainable_clocks = smu->smu_table.max_sustainable_clocks;
+
smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks;
max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100;
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b8c5f8934dbd..a1f2411aa21b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -294,7 +294,7 @@ extra-$(CONFIG_DRM_I915_WERROR) += \
$(shell cd $(srctree)/$(src) && find * -name '*.h')))
quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
- cmd_hdrtest = $(CC) $(c_flags) -S -o /dev/null -x c /dev/null -include $<; touch $@
+ cmd_hdrtest = $(CC) $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
$(obj)/%.hdrtest: $(src)/%.h FORCE
$(call if_changed_dep,hdrtest)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index f7e4b39c734f..59b387ade49c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -256,8 +256,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
freed = i915_gem_shrink(i915, -1UL, NULL,
I915_SHRINK_BOUND |
- I915_SHRINK_UNBOUND |
- I915_SHRINK_ACTIVE);
+ I915_SHRINK_UNBOUND);
}
return freed;
@@ -336,7 +335,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
freed_pages = 0;
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
freed_pages += i915_gem_shrink(i915, -1UL, NULL,
- I915_SHRINK_ACTIVE |
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_WRITEBACK);
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 2477a1e5a166..ae139f0877ae 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -151,12 +151,12 @@ static void dmabuf_gem_object_free(struct kref *kref)
dmabuf_obj = container_of(pos,
struct intel_vgpu_dmabuf_obj, list);
if (dmabuf_obj == obj) {
+ list_del(pos);
intel_gvt_hypervisor_put_vfio_device(vgpu);
idr_remove(&vgpu->object_idr,
dmabuf_obj->dmabuf_id);
kfree(dmabuf_obj->info);
kfree(dmabuf_obj);
- list_del(pos);
break;
}
}
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 85bd9bf4f6ee..487af6ea9972 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -560,9 +560,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
intel_vgpu_reset_mmio(vgpu, dmlr);
populate_pvinfo_page(vgpu);
- intel_vgpu_reset_display(vgpu);
if (dmlr) {
+ intel_vgpu_reset_display(vgpu);
intel_vgpu_reset_cfg_space(vgpu);
/* only reset the failsafe mode when dmlr reset */
vgpu->failsafe = false;
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 83f01401b8b5..f631f6d21127 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -437,7 +437,7 @@ static const struct intel_device_info snb_m_gt2_info = {
.has_rc6 = 1, \
.has_rc6p = 1, \
.has_rps = true, \
- .ppgtt_type = INTEL_PPGTT_FULL, \
+ .ppgtt_type = INTEL_PPGTT_ALIASING, \
.ppgtt_size = 31, \
IVB_PIPE_OFFSETS, \
IVB_CURSOR_OFFSETS, \
@@ -494,7 +494,7 @@ static const struct intel_device_info vlv_info = {
.has_rps = true,
.display.has_gmch = 1,
.display.has_hotplug = 1,
- .ppgtt_type = INTEL_PPGTT_FULL,
+ .ppgtt_type = INTEL_PPGTT_ALIASING,
.ppgtt_size = 31,
.has_snoop = true,
.has_coherent_ggtt = false,
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ec0299490dd4..aa729d04abe2 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -822,11 +822,6 @@ static ssize_t i915_pmu_event_show(struct device *dev,
return sprintf(buf, "config=0x%lx\n", eattr->val);
}
-static struct attribute_group i915_pmu_events_attr_group = {
- .name = "events",
- /* Patch in attrs at runtime. */
-};
-
static ssize_t
i915_pmu_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
@@ -846,13 +841,6 @@ static const struct attribute_group i915_pmu_cpumask_attr_group = {
.attrs = i915_cpumask_attrs,
};
-static const struct attribute_group *i915_pmu_attr_groups[] = {
- &i915_pmu_format_attr_group,
- &i915_pmu_events_attr_group,
- &i915_pmu_cpumask_attr_group,
- NULL
-};
-
#define __event(__config, __name, __unit) \
{ \
.config = (__config), \
@@ -1026,23 +1014,23 @@ err_alloc:
static void free_event_attributes(struct i915_pmu *pmu)
{
- struct attribute **attr_iter = i915_pmu_events_attr_group.attrs;
+ struct attribute **attr_iter = pmu->events_attr_group.attrs;
for (; *attr_iter; attr_iter++)
kfree((*attr_iter)->name);
- kfree(i915_pmu_events_attr_group.attrs);
+ kfree(pmu->events_attr_group.attrs);
kfree(pmu->i915_attr);
kfree(pmu->pmu_attr);
- i915_pmu_events_attr_group.attrs = NULL;
+ pmu->events_attr_group.attrs = NULL;
pmu->i915_attr = NULL;
pmu->pmu_attr = NULL;
}
static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
{
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
GEM_BUG_ON(!pmu->base.event_init);
@@ -1055,7 +1043,7 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
{
- struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node);
+ struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node);
unsigned int target;
GEM_BUG_ON(!pmu->base.event_init);
@@ -1072,8 +1060,6 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
return 0;
}
-static enum cpuhp_state cpuhp_slot = CPUHP_INVALID;
-
static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
{
enum cpuhp_state slot;
@@ -1087,21 +1073,22 @@ static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu)
return ret;
slot = ret;
- ret = cpuhp_state_add_instance(slot, &pmu->node);
+ ret = cpuhp_state_add_instance(slot, &pmu->cpuhp.node);
if (ret) {
cpuhp_remove_multi_state(slot);
return ret;
}
- cpuhp_slot = slot;
+ pmu->cpuhp.slot = slot;
return 0;
}
static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu)
{
- WARN_ON(cpuhp_slot == CPUHP_INVALID);
- WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &pmu->node));
- cpuhp_remove_multi_state(cpuhp_slot);
+ WARN_ON(pmu->cpuhp.slot == CPUHP_INVALID);
+ WARN_ON(cpuhp_state_remove_instance(pmu->cpuhp.slot, &pmu->cpuhp.node));
+ cpuhp_remove_multi_state(pmu->cpuhp.slot);
+ pmu->cpuhp.slot = CPUHP_INVALID;
}
static bool is_igp(struct drm_i915_private *i915)
@@ -1118,6 +1105,13 @@ static bool is_igp(struct drm_i915_private *i915)
void i915_pmu_register(struct drm_i915_private *i915)
{
struct i915_pmu *pmu = &i915->pmu;
+ const struct attribute_group *attr_groups[] = {
+ &i915_pmu_format_attr_group,
+ &pmu->events_attr_group,
+ &i915_pmu_cpumask_attr_group,
+ NULL
+ };
+
int ret = -ENOMEM;
if (INTEL_GEN(i915) <= 2) {
@@ -1128,6 +1122,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
spin_lock_init(&pmu->lock);
hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
pmu->timer.function = i915_sample;
+ pmu->cpuhp.slot = CPUHP_INVALID;
if (!is_igp(i915)) {
pmu->name = kasprintf(GFP_KERNEL,
@@ -1143,11 +1138,16 @@ void i915_pmu_register(struct drm_i915_private *i915)
if (!pmu->name)
goto err;
- i915_pmu_events_attr_group.attrs = create_event_attributes(pmu);
- if (!i915_pmu_events_attr_group.attrs)
+ pmu->events_attr_group.name = "events";
+ pmu->events_attr_group.attrs = create_event_attributes(pmu);
+ if (!pmu->events_attr_group.attrs)
goto err_name;
- pmu->base.attr_groups = i915_pmu_attr_groups;
+ pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups),
+ GFP_KERNEL);
+ if (!pmu->base.attr_groups)
+ goto err_attr;
+
pmu->base.task_ctx_nr = perf_invalid_context;
pmu->base.event_init = i915_pmu_event_init;
pmu->base.add = i915_pmu_event_add;
@@ -1159,7 +1159,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
ret = perf_pmu_register(&pmu->base, pmu->name, -1);
if (ret)
- goto err_attr;
+ goto err_groups;
ret = i915_pmu_register_cpuhp_state(pmu);
if (ret)
@@ -1169,6 +1169,8 @@ void i915_pmu_register(struct drm_i915_private *i915)
err_unreg:
perf_pmu_unregister(&pmu->base);
+err_groups:
+ kfree(pmu->base.attr_groups);
err_attr:
pmu->base.event_init = NULL;
free_event_attributes(pmu);
@@ -1194,6 +1196,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
perf_pmu_unregister(&pmu->base);
pmu->base.event_init = NULL;
+ kfree(pmu->base.attr_groups);
if (!is_igp(i915))
kfree(pmu->name);
free_event_attributes(pmu);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 6c1647c5daf2..f1d6cad0d7d5 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -39,9 +39,12 @@ struct i915_pmu_sample {
struct i915_pmu {
/**
- * @node: List node for CPU hotplug handling.
+ * @cpuhp: Struct used for CPU hotplug handling.
*/
- struct hlist_node node;
+ struct {
+ struct hlist_node node;
+ enum cpuhp_state slot;
+ } cpuhp;
/**
* @base: PMU base.
*/
@@ -105,6 +108,10 @@ struct i915_pmu {
*/
ktime_t sleep_last;
/**
+ * @events_attr_group: Device events attribute group.
+ */
+ struct attribute_group events_attr_group;
+ /**
* @i915_attr: Memory block holding device attributes.
*/
void *i915_attr;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index fd74e2611185..8696af1ee14d 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -37,6 +37,7 @@
#include <linux/vga_switcheroo.h>
#include <linux/mmu_notifier.h>
+#include <drm/drm_agpsupport.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_drv.h>
#include <drm/drm_fb_helper.h>
@@ -325,6 +326,7 @@ static int radeon_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
unsigned long flags = 0;
+ struct drm_device *dev;
int ret;
if (!ent)
@@ -365,7 +367,44 @@ static int radeon_pci_probe(struct pci_dev *pdev,
if (ret)
return ret;
- return drm_get_pci_dev(pdev, ent, &kms_driver);
+ dev = drm_dev_alloc(&kms_driver, &pdev->dev);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
+ ret = pci_enable_device(pdev);
+ if (ret)
+ goto err_free;
+
+ dev->pdev = pdev;
+#ifdef __alpha__
+ dev->hose = pdev->sysdata;
+#endif
+
+ pci_set_drvdata(pdev, dev);
+
+ if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP))
+ dev->agp = drm_agp_init(dev);
+ if (dev->agp) {
+ dev->agp->agp_mtrr = arch_phys_wc_add(
+ dev->agp->agp_info.aper_base,
+ dev->agp->agp_info.aper_size *
+ 1024 * 1024);
+ }
+
+ ret = drm_dev_register(dev, ent->driver_data);
+ if (ret)
+ goto err_agp;
+
+ return 0;
+
+err_agp:
+ if (dev->agp)
+ arch_phys_wc_del(dev->agp->agp_mtrr);
+ kfree(dev->agp);
+ pci_disable_device(pdev);
+err_free:
+ drm_dev_put(dev);
+ return ret;
}
static void
@@ -575,7 +614,7 @@ radeon_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
static struct drm_driver kms_driver = {
.driver_features =
- DRIVER_USE_AGP | DRIVER_GEM | DRIVER_RENDER,
+ DRIVER_GEM | DRIVER_RENDER,
.load = radeon_driver_load_kms,
.open = radeon_driver_open_kms,
.postclose = radeon_driver_postclose_kms,
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index d24f23a81656..dd2f19b8022b 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/vga_switcheroo.h>
+#include <drm/drm_agpsupport.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_file.h>
#include <drm/drm_ioctl.h>
@@ -77,6 +78,11 @@ void radeon_driver_unload_kms(struct drm_device *dev)
radeon_modeset_fini(rdev);
radeon_device_fini(rdev);
+ if (dev->agp)
+ arch_phys_wc_del(dev->agp->agp_mtrr);
+ kfree(dev->agp);
+ dev->agp = NULL;
+
done_free:
kfree(rdev);
dev->dev_private = NULL;
diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c
index ae79a7c66737..fa704153cb00 100644
--- a/drivers/hid/hid-alps.c
+++ b/drivers/hid/hid-alps.c
@@ -730,7 +730,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi)
if (data->has_sp) {
input2 = input_allocate_device();
if (!input2) {
- input_free_device(input2);
+ ret = -ENOMEM;
goto exit;
}
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c
index 6ac8becc2372..d732d1d10caf 100644
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -340,7 +340,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi,
unsigned long **bit, int *max)
{
if (usage->hid == (HID_UP_CUSTOM | 0x0003) ||
- usage->hid == (HID_UP_MSVENDOR | 0x0003)) {
+ usage->hid == (HID_UP_MSVENDOR | 0x0003) ||
+ usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) {
/* The fn key on Apple USB keyboards */
set_bit(EV_REP, hi->input->evbit);
hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN);
diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c
index 3f6abd190df4..db6da21ade06 100644
--- a/drivers/hid/hid-bigbenff.c
+++ b/drivers/hid/hid-bigbenff.c
@@ -174,6 +174,7 @@ static __u8 pid0902_rdesc_fixed[] = {
struct bigben_device {
struct hid_device *hid;
struct hid_report *report;
+ bool removed;
u8 led_state; /* LED1 = 1 .. LED4 = 8 */
u8 right_motor_on; /* right motor off/on 0/1 */
u8 left_motor_force; /* left motor force 0-255 */
@@ -190,6 +191,9 @@ static void bigben_worker(struct work_struct *work)
struct bigben_device, worker);
struct hid_field *report_field = bigben->report->field[0];
+ if (bigben->removed)
+ return;
+
if (bigben->work_led) {
bigben->work_led = false;
report_field->value[0] = 0x01; /* 1 = led message */
@@ -220,10 +224,16 @@ static void bigben_worker(struct work_struct *work)
static int hid_bigben_play_effect(struct input_dev *dev, void *data,
struct ff_effect *effect)
{
- struct bigben_device *bigben = data;
+ struct hid_device *hid = input_get_drvdata(dev);
+ struct bigben_device *bigben = hid_get_drvdata(hid);
u8 right_motor_on;
u8 left_motor_force;
+ if (!bigben) {
+ hid_err(hid, "no device data\n");
+ return 0;
+ }
+
if (effect->type != FF_RUMBLE)
return 0;
@@ -298,8 +308,8 @@ static void bigben_remove(struct hid_device *hid)
{
struct bigben_device *bigben = hid_get_drvdata(hid);
+ bigben->removed = true;
cancel_work_sync(&bigben->worker);
- hid_hw_close(hid);
hid_hw_stop(hid);
}
@@ -319,6 +329,7 @@ static int bigben_probe(struct hid_device *hid,
return -ENOMEM;
hid_set_drvdata(hid, bigben);
bigben->hid = hid;
+ bigben->removed = false;
error = hid_parse(hid);
if (error) {
@@ -341,10 +352,10 @@ static int bigben_probe(struct hid_device *hid,
INIT_WORK(&bigben->worker, bigben_worker);
- error = input_ff_create_memless(hidinput->input, bigben,
+ error = input_ff_create_memless(hidinput->input, NULL,
hid_bigben_play_effect);
if (error)
- return error;
+ goto error_hw_stop;
name_sz = strlen(dev_name(&hid->dev)) + strlen(":red:bigben#") + 1;
@@ -354,8 +365,10 @@ static int bigben_probe(struct hid_device *hid,
sizeof(struct led_classdev) + name_sz,
GFP_KERNEL
);
- if (!led)
- return -ENOMEM;
+ if (!led) {
+ error = -ENOMEM;
+ goto error_hw_stop;
+ }
name = (void *)(&led[1]);
snprintf(name, name_sz,
"%s:red:bigben%d",
@@ -369,7 +382,7 @@ static int bigben_probe(struct hid_device *hid,
bigben->leds[n] = led;
error = devm_led_classdev_register(&hid->dev, led);
if (error)
- return error;
+ goto error_hw_stop;
}
/* initial state: LED1 is on, no rumble effect */
@@ -383,6 +396,10 @@ static int bigben_probe(struct hid_device *hid,
hid_info(hid, "LED and force feedback support for BigBen gamepad\n");
return 0;
+
+error_hw_stop:
+ hid_hw_stop(hid);
+ return error;
}
static __u8 *bigben_report_fixup(struct hid_device *hid, __u8 *rdesc,
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 851fe54ea59e..359616e3efbb 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1741,7 +1741,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
rsize = ((report->size - 1) >> 3) + 1;
- if (rsize > HID_MAX_BUFFER_SIZE)
+ if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
+ rsize = HID_MAX_BUFFER_SIZE - 1;
+ else if (rsize > HID_MAX_BUFFER_SIZE)
rsize = HID_MAX_BUFFER_SIZE;
if (csize < rsize) {
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index c436e12feb23..6c55682c5974 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -41,8 +41,9 @@ static const struct hid_device_id ite_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
/* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
- { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
- USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
{ }
};
MODULE_DEVICE_TABLE(hid, ite_devices);
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 70e1cb928bf0..094f4f1b6555 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1256,36 +1256,35 @@ static int hidpp20_battery_map_status_voltage(u8 data[3], int *voltage,
{
int status;
- long charge_sts = (long)data[2];
+ long flags = (long) data[2];
- *level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
- switch (data[2] & 0xe0) {
- case 0x00:
- status = POWER_SUPPLY_STATUS_CHARGING;
- break;
- case 0x20:
- status = POWER_SUPPLY_STATUS_FULL;
- *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
- break;
- case 0x40:
+ if (flags & 0x80)
+ switch (flags & 0x07) {
+ case 0:
+ status = POWER_SUPPLY_STATUS_CHARGING;
+ break;
+ case 1:
+ status = POWER_SUPPLY_STATUS_FULL;
+ *level = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+ break;
+ case 2:
+ status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ break;
+ default:
+ status = POWER_SUPPLY_STATUS_UNKNOWN;
+ break;
+ }
+ else
status = POWER_SUPPLY_STATUS_DISCHARGING;
- break;
- case 0xe0:
- status = POWER_SUPPLY_STATUS_NOT_CHARGING;
- break;
- default:
- status = POWER_SUPPLY_STATUS_UNKNOWN;
- }
*charge_type = POWER_SUPPLY_CHARGE_TYPE_STANDARD;
- if (test_bit(3, &charge_sts)) {
+ if (test_bit(3, &flags)) {
*charge_type = POWER_SUPPLY_CHARGE_TYPE_FAST;
}
- if (test_bit(4, &charge_sts)) {
+ if (test_bit(4, &flags)) {
*charge_type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
}
-
- if (test_bit(5, &charge_sts)) {
+ if (test_bit(5, &flags)) {
*level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
}
diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
index d31ea82b84c1..a66f08041a1a 100644
--- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
+++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
@@ -342,6 +342,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
.driver_data = (void *)&sipodev_desc
},
{
+ .ident = "Trekstor SURFBOOK E11B",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"),
+ },
+ .driver_data = (void *)&sipodev_desc
+ },
+ {
.ident = "Direkt-Tek DTLAPY116-2",
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"),
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index a970b809d778..4140dea693e9 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -932,9 +932,9 @@ void hiddev_disconnect(struct hid_device *hid)
hiddev->exist = 0;
if (hiddev->open) {
- mutex_unlock(&hiddev->existancelock);
hid_hw_close(hiddev->hid);
wake_up_interruptible(&hiddev->wait);
+ mutex_unlock(&hiddev->existancelock);
} else {
mutex_unlock(&hiddev->existancelock);
kfree(hiddev);
diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c
index 5255d3755411..1de23b4f3809 100644
--- a/drivers/i2c/busses/i2c-altera.c
+++ b/drivers/i2c/busses/i2c-altera.c
@@ -171,7 +171,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev)
/* SCL Low Time */
writel(t_low, idev->base + ALTR_I2C_SCL_LOW);
/* SDA Hold Time, 300ns */
- writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD);
+ writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD);
/* Mask all master interrupt bits */
altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false);
diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c
index 16a67a64284a..b426fc956938 100644
--- a/drivers/i2c/busses/i2c-jz4780.c
+++ b/drivers/i2c/busses/i2c-jz4780.c
@@ -78,25 +78,6 @@
#define X1000_I2C_DC_STOP BIT(9)
-static const char * const jz4780_i2c_abrt_src[] = {
- "ABRT_7B_ADDR_NOACK",
- "ABRT_10ADDR1_NOACK",
- "ABRT_10ADDR2_NOACK",
- "ABRT_XDATA_NOACK",
- "ABRT_GCALL_NOACK",
- "ABRT_GCALL_READ",
- "ABRT_HS_ACKD",
- "SBYTE_ACKDET",
- "ABRT_HS_NORSTRT",
- "SBYTE_NORSTRT",
- "ABRT_10B_RD_NORSTRT",
- "ABRT_MASTER_DIS",
- "ARB_LOST",
- "SLVFLUSH_TXFIFO",
- "SLV_ARBLOST",
- "SLVRD_INTX",
-};
-
#define JZ4780_I2C_INTST_IGC BIT(11)
#define JZ4780_I2C_INTST_ISTT BIT(10)
#define JZ4780_I2C_INTST_ISTP BIT(9)
@@ -576,21 +557,8 @@ done:
static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src)
{
- int i;
-
- dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src);
- dev_err(&i2c->adap.dev, "device addr=%x\n",
- jz4780_i2c_readw(i2c, JZ4780_I2C_TAR));
- dev_err(&i2c->adap.dev, "send cmd count:%d %d\n",
- i2c->cmd, i2c->cmd_buf[i2c->cmd]);
- dev_err(&i2c->adap.dev, "receive data count:%d %d\n",
- i2c->cmd, i2c->data_buf[i2c->cmd]);
-
- for (i = 0; i < 16; i++) {
- if (src & BIT(i))
- dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n",
- i, jz4780_i2c_abrt_src[i]);
- }
+ dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n",
+ src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]);
}
static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 1bb99b556393..05c26986637b 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -361,7 +361,7 @@ static const struct block_device_operations ide_gd_ops = {
.release = ide_gd_release,
.ioctl = ide_gd_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = ide_gd_compat_ioctl,
+ .compat_ioctl = ide_gd_compat_ioctl,
#endif
.getgeo = ide_gd_getgeo,
.check_events = ide_gd_check_events,
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index 8c744578122a..a0d87ed9da69 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -300,9 +300,11 @@ static int control_loop(void *dummy)
/* i2c probing and setup */
/************************************************************************/
-static int
-do_attach( struct i2c_adapter *adapter )
+static void do_attach(struct i2c_adapter *adapter)
{
+ struct i2c_board_info info = { };
+ struct device_node *np;
+
/* scan 0x48-0x4f (DS1775) and 0x2c-2x2f (ADM1030) */
static const unsigned short scan_ds1775[] = {
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
@@ -313,25 +315,24 @@ do_attach( struct i2c_adapter *adapter )
I2C_CLIENT_END
};
- if( strncmp(adapter->name, "uni-n", 5) )
- return 0;
-
- if( !x.running ) {
- struct i2c_board_info info;
+ if (x.running || strncmp(adapter->name, "uni-n", 5))
+ return;
- memset(&info, 0, sizeof(struct i2c_board_info));
- strlcpy(info.type, "therm_ds1775", I2C_NAME_SIZE);
+ np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,ds1775");
+ if (np) {
+ of_node_put(np);
+ } else {
+ strlcpy(info.type, "MAC,ds1775", I2C_NAME_SIZE);
i2c_new_probed_device(adapter, &info, scan_ds1775, NULL);
+ }
- strlcpy(info.type, "therm_adm1030", I2C_NAME_SIZE);
+ np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,adm1030");
+ if (np) {
+ of_node_put(np);
+ } else {
+ strlcpy(info.type, "MAC,adm1030", I2C_NAME_SIZE);
i2c_new_probed_device(adapter, &info, scan_adm1030, NULL);
-
- if( x.thermostat && x.fan ) {
- x.running = 1;
- x.poll_task = kthread_run(control_loop, NULL, "g4fand");
- }
}
- return 0;
}
static int
@@ -404,8 +405,8 @@ out:
enum chip { ds1775, adm1030 };
static const struct i2c_device_id therm_windtunnel_id[] = {
- { "therm_ds1775", ds1775 },
- { "therm_adm1030", adm1030 },
+ { "MAC,ds1775", ds1775 },
+ { "MAC,adm1030", adm1030 },
{ }
};
MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id);
@@ -414,6 +415,7 @@ static int
do_probe(struct i2c_client *cl, const struct i2c_device_id *id)
{
struct i2c_adapter *adapter = cl->adapter;
+ int ret = 0;
if( !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA
| I2C_FUNC_SMBUS_WRITE_BYTE) )
@@ -421,11 +423,19 @@ do_probe(struct i2c_client *cl, const struct i2c_device_id *id)
switch (id->driver_data) {
case adm1030:
- return attach_fan( cl );
+ ret = attach_fan(cl);
+ break;
case ds1775:
- return attach_thermostat(cl);
+ ret = attach_thermostat(cl);
+ break;
}
- return 0;
+
+ if (!x.running && x.thermostat && x.fan) {
+ x.running = 1;
+ x.poll_task = kthread_run(control_loop, NULL, "g4fand");
+ }
+
+ return ret;
}
static struct i2c_driver g4fan_driver = {
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index d1955543acd1..b0f5280a83cb 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -69,8 +69,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
/* Force link status for IMP port */
reg = core_readl(priv, offset);
reg |= (MII_SW_OR | LINK_STS);
- if (priv->type == BCM7278_DEVICE_ID)
- reg |= GMII_SPEED_UP_2G;
+ reg &= ~GMII_SPEED_UP_2G;
core_writel(priv, reg, offset);
/* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index b016cc205f81..ca3a7a7a73c3 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -278,13 +278,13 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
switch (direction) {
case MV88E6XXX_EGRESS_DIR_INGRESS:
dest_port_chip = &chip->ingress_dest_port;
- reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK;
+ reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK;
reg |= port <<
__bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK);
break;
case MV88E6XXX_EGRESS_DIR_EGRESS:
dest_port_chip = &chip->egress_dest_port;
- reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK;
+ reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK;
reg |= port <<
__bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK);
break;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index fd6e0e48cd51..f9a8151f092c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11252,7 +11252,7 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp)
}
}
if (test_and_clear_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event))
- netdev_info(bp->dev, "Receive PF driver unload event!");
+ netdev_info(bp->dev, "Receive PF driver unload event!\n");
}
#else
@@ -11759,7 +11759,7 @@ static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
u32 dw;
if (!pos) {
- netdev_info(bp->dev, "Unable do read adapter's DSN");
+ netdev_info(bp->dev, "Unable do read adapter's DSN\n");
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index eec0168330b7..d3c93ccee86a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -641,14 +641,14 @@ static int bnxt_dl_params_register(struct bnxt *bp)
rc = devlink_params_register(bp->dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
if (rc) {
- netdev_warn(bp->dev, "devlink_params_register failed. rc=%d",
+ netdev_warn(bp->dev, "devlink_params_register failed. rc=%d\n",
rc);
return rc;
}
rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params,
ARRAY_SIZE(bnxt_dl_port_params));
if (rc) {
- netdev_err(bp->dev, "devlink_port_params_register failed");
+ netdev_err(bp->dev, "devlink_port_params_register failed\n");
devlink_params_unregister(bp->dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
return rc;
@@ -679,7 +679,7 @@ int bnxt_dl_register(struct bnxt *bp)
else
dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
if (!dl) {
- netdev_warn(bp->dev, "devlink_alloc failed");
+ netdev_warn(bp->dev, "devlink_alloc failed\n");
return -ENOMEM;
}
@@ -692,7 +692,7 @@ int bnxt_dl_register(struct bnxt *bp)
rc = devlink_register(dl, &bp->pdev->dev);
if (rc) {
- netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
+ netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
goto err_dl_free;
}
@@ -704,7 +704,7 @@ int bnxt_dl_register(struct bnxt *bp)
sizeof(bp->dsn));
rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id);
if (rc) {
- netdev_err(bp->dev, "devlink_port_register failed");
+ netdev_err(bp->dev, "devlink_port_register failed\n");
goto err_dl_unreg;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 6171fa8b3677..e8fc1671c581 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2028,7 +2028,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename,
}
if (fw->size > item_len) {
- netdev_err(dev, "PKG insufficient update area in nvram: %lu",
+ netdev_err(dev, "PKG insufficient update area in nvram: %lu\n",
(unsigned long)fw->size);
rc = -EFBIG;
} else {
@@ -3338,7 +3338,7 @@ err:
kfree(coredump.data);
*dump_len += sizeof(struct bnxt_coredump_record);
if (rc == -ENOBUFS)
- netdev_err(bp->dev, "Firmware returned large coredump buffer");
+ netdev_err(bp->dev, "Firmware returned large coredump buffer\n");
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 0cc6ec51f45f..9bec256b0934 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -50,7 +50,7 @@ static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
/* check if dev belongs to the same switch */
if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
- netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
+ netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
dev->ifindex);
return BNXT_FID_INVALID;
}
@@ -70,7 +70,7 @@ static int bnxt_tc_parse_redir(struct bnxt *bp,
struct net_device *dev = act->dev;
if (!dev) {
- netdev_info(bp->dev, "no dev in mirred action");
+ netdev_info(bp->dev, "no dev in mirred action\n");
return -EINVAL;
}
@@ -106,7 +106,7 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
const struct ip_tunnel_key *tun_key = &tun_info->key;
if (ip_tunnel_info_af(tun_info) != AF_INET) {
- netdev_info(bp->dev, "only IPv4 tunnel-encap is supported");
+ netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
return -EOPNOTSUPP;
}
@@ -295,7 +295,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
int i, rc;
if (!flow_action_has_entries(flow_action)) {
- netdev_info(bp->dev, "no actions");
+ netdev_info(bp->dev, "no actions\n");
return -EINVAL;
}
@@ -370,7 +370,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
(dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
- netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x",
+ netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
dissector->used_keys);
return -EOPNOTSUPP;
}
@@ -508,7 +508,7 @@ static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
return rc;
}
@@ -841,7 +841,7 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
resp = bnxt_get_hwrm_resp_addr(bp, &req);
*decap_filter_handle = resp->decap_filter_id;
} else {
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -859,7 +859,7 @@ static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
return rc;
}
@@ -906,7 +906,7 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
resp = bnxt_get_hwrm_resp_addr(bp, &req);
*encap_record_handle = resp->encap_record_id;
} else {
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -924,7 +924,7 @@ static int hwrm_cfa_encap_record_free(struct bnxt *bp,
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
return rc;
}
@@ -943,7 +943,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp,
tc_info->l2_ht_params);
if (rc)
netdev_err(bp->dev,
- "Error: %s: rhashtable_remove_fast: %d",
+ "Error: %s: rhashtable_remove_fast: %d\n",
__func__, rc);
kfree_rcu(l2_node, rcu);
}
@@ -972,7 +972,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
if (rc) {
kfree_rcu(l2_node, rcu);
netdev_err(bp->dev,
- "Error: %s: rhashtable_insert_fast: %d",
+ "Error: %s: rhashtable_insert_fast: %d\n",
__func__, rc);
return NULL;
}
@@ -1031,7 +1031,7 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
(flow->l4_key.ip_proto != IPPROTO_TCP &&
flow->l4_key.ip_proto != IPPROTO_UDP)) {
- netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports",
+ netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
flow->l4_key.ip_proto);
return false;
}
@@ -1088,7 +1088,7 @@ static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
*ht_params);
if (rc) {
- netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
+ netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
rc = -1;
}
kfree_rcu(tunnel_node, rcu);
@@ -1129,7 +1129,7 @@ bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
tunnel_node->refcount++;
return tunnel_node;
err:
- netdev_info(bp->dev, "error rc=%d", rc);
+ netdev_info(bp->dev, "error rc=%d\n", rc);
return NULL;
}
@@ -1187,7 +1187,7 @@ static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
&decap_l2_node->node,
tc_info->decap_l2_ht_params);
if (rc)
- netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
+ netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
kfree_rcu(decap_l2_node, rcu);
}
}
@@ -1227,7 +1227,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
if (IS_ERR(rt)) {
- netdev_info(bp->dev, "no route to %pI4b", &flow.daddr);
+ netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
return -EOPNOTSUPP;
}
@@ -1241,7 +1241,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
if (vlan->real_dev != real_dst_dev) {
netdev_info(bp->dev,
- "dst_dev(%s) doesn't use PF-if(%s)",
+ "dst_dev(%s) doesn't use PF-if(%s)\n",
netdev_name(dst_dev),
netdev_name(real_dst_dev));
rc = -EOPNOTSUPP;
@@ -1253,7 +1253,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
#endif
} else if (dst_dev != real_dst_dev) {
netdev_info(bp->dev,
- "dst_dev(%s) for %pI4b is not PF-if(%s)",
+ "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
netdev_name(dst_dev), &flow.daddr,
netdev_name(real_dst_dev));
rc = -EOPNOTSUPP;
@@ -1262,7 +1262,7 @@ static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
if (!nbr) {
- netdev_info(bp->dev, "can't lookup neighbor for %pI4b",
+ netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
&flow.daddr);
rc = -EOPNOTSUPP;
goto put_rt;
@@ -1472,7 +1472,7 @@ static int __bnxt_tc_del_flow(struct bnxt *bp,
rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
tc_info->flow_ht_params);
if (rc)
- netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d",
+ netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
__func__, rc);
kfree_rcu(flow_node, rcu);
@@ -1587,7 +1587,7 @@ unlock:
free_node:
kfree_rcu(new_node, rcu);
done:
- netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
+ netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
__func__, tc_flow_cmd->cookie, rc);
return rc;
}
@@ -1700,7 +1700,7 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
le64_to_cpu(resp_bytes[i]);
}
} else {
- netdev_info(bp->dev, "error rc=%d", rc);
+ netdev_info(bp->dev, "error rc=%d\n", rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -1970,7 +1970,7 @@ static int bnxt_tc_indr_block_event(struct notifier_block *nb,
bp);
if (rc)
netdev_info(bp->dev,
- "Failed to register indirect blk: dev: %s",
+ "Failed to register indirect blk: dev: %s\n",
netdev->name);
break;
case NETDEV_UNREGISTER:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index b010b34cdaf8..6f2faf81c1ae 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -43,7 +43,7 @@ static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
*tx_cfa_action, *rx_cfa_code);
} else {
- netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
}
mutex_unlock(&bp->hwrm_cmd_lock);
@@ -60,7 +60,7 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
- netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+ netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
return rc;
}
@@ -465,7 +465,7 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
return 0;
err:
- netdev_info(bp->dev, "%s error=%d", __func__, rc);
+ netdev_info(bp->dev, "%s error=%d\n", __func__, rc);
kfree(cfa_code_map);
__bnxt_vf_reps_destroy(bp);
return rc;
@@ -488,7 +488,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
mutex_lock(&bp->sriov_lock);
if (bp->eswitch_mode == mode) {
- netdev_info(bp->dev, "already in %s eswitch mode",
+ netdev_info(bp->dev, "already in %s eswitch mode\n",
mode == DEVLINK_ESWITCH_MODE_LEGACY ?
"legacy" : "switchdev");
rc = -EINVAL;
@@ -508,7 +508,7 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
}
if (pci_num_vf(bp->pdev) == 0) {
- netdev_info(bp->dev, "Enable VFs before setting switchdev mode");
+ netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n");
rc = -EPERM;
goto done;
}
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 6392a2530183..10244941a7a6 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -294,6 +294,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
*/
if (priv->ext_phy) {
reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
+ reg &= ~ID_MODE_DIS;
reg |= id_mode_dis;
if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
reg |= RGMII_MODE_EN_V123;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 6f2cf569a283..79b3d53f2fbf 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -297,6 +297,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth,
}
hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+ hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif);
hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT;
hw_ioctxt.cmdq_depth = 0;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index b069045de416..66fd2340d447 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -151,8 +151,8 @@ struct hinic_cmd_hw_ioctxt {
u8 lro_en;
u8 rsvd3;
+ u8 ppf_idx;
u8 rsvd4;
- u8 rsvd5;
u16 rq_depth;
u16 rx_buf_sz_idx;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 517794509eb2..c7bb9ceca72c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -137,6 +137,7 @@
#define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx)
#define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx)
#define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx)
+#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx)
#define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type)
#define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index f4a339b10b10..79091e131418 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -94,6 +94,7 @@ struct hinic_rq {
struct hinic_wq *wq;
+ struct cpumask affinity_mask;
u32 irq;
u16 msix_entry;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 02a14f5e7fe3..13560975c103 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -356,7 +356,8 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev)
if (!num_cpus)
num_cpus = num_online_cpus();
- nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus);
+ nic_dev->num_qps = hinic_hwdev_num_qps(hwdev);
+ nic_dev->num_qps = min_t(u16, nic_dev->num_qps, num_cpus);
nic_dev->rss_limit = nic_dev->num_qps;
nic_dev->num_rss = nic_dev->num_qps;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 56ea6d692f1c..2695ad69fca6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -475,7 +475,6 @@ static int rx_request_irq(struct hinic_rxq *rxq)
struct hinic_hwdev *hwdev = nic_dev->hwdev;
struct hinic_rq *rq = rxq->rq;
struct hinic_qp *qp;
- struct cpumask mask;
int err;
rx_add_napi(rxq);
@@ -492,8 +491,8 @@ static int rx_request_irq(struct hinic_rxq *rxq)
}
qp = container_of(rq, struct hinic_qp, rq);
- cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask);
- return irq_set_affinity_hint(rq->irq, &mask);
+ cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask);
+ return irq_set_affinity_hint(rq->irq, &rq->affinity_mask);
}
static void rx_free_irq(struct hinic_rxq *rxq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 966983674663..21de4764d4c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5147,7 +5147,6 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
static void mlx5e_nic_disable(struct mlx5e_priv *priv)
{
- struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5168,7 +5167,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
mlx5e_monitor_counter_cleanup(priv);
mlx5e_disable_async_events(priv);
- mlx5_lag_remove(mdev, netdev);
+ mlx5_lag_remove(mdev);
}
int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7b48ccacebe2..6ed307d7f191 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1861,7 +1861,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
- struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1870,7 +1869,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
#endif
mlx5_notifier_unregister(mdev, &priv->events_nb);
cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
- mlx5_lag_remove(mdev, netdev);
+ mlx5_lag_remove(mdev);
}
static MLX5E_DEFINE_STATS_GRP(sw_rep, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index b91eabc09fbc..8e19f6ab8393 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -464,9 +464,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
struct mlx5_lag *ldev;
int changed = 0;
- if (!net_eq(dev_net(ndev), &init_net))
- return NOTIFY_DONE;
-
if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
return NOTIFY_DONE;
@@ -586,8 +583,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
if (!ldev->nb.notifier_call) {
ldev->nb.notifier_call = mlx5_lag_netdev_event;
- if (register_netdevice_notifier_dev_net(netdev, &ldev->nb,
- &ldev->nn)) {
+ if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
ldev->nb.notifier_call = NULL;
mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
}
@@ -600,7 +596,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
}
/* Must be called with intf_mutex held */
-void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev)
+void mlx5_lag_remove(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
int i;
@@ -620,8 +616,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev)
if (i == MLX5_MAX_PORTS) {
if (ldev->nb.notifier_call)
- unregister_netdevice_notifier_dev_net(netdev, &ldev->nb,
- &ldev->nn);
+ unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
mlx5_lag_dev_free(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 316ab09e2664..f1068aac6406 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -44,7 +44,6 @@ struct mlx5_lag {
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct notifier_block nb;
- struct netdev_net_notifier nn;
struct lag_mp lag_mp;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index fcce9e0fc82c..da67b28d6e23 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -157,7 +157,7 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
u8 feature_group, u8 access_reg_group);
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
-void mlx5_lag_remove(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev);
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index e0d7d2d9a0c8..43fa8c85b5d9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -28,7 +28,7 @@
#define MLXSW_PCI_SW_RESET 0xF0010
#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000
-#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 1c9e70c8cc30..58579baf3f7a 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -513,14 +513,17 @@ static irqreturn_t ks_irq(int irq, void *pw)
{
struct net_device *netdev = pw;
struct ks_net *ks = netdev_priv(netdev);
+ unsigned long flags;
u16 status;
+ spin_lock_irqsave(&ks->statelock, flags);
/*this should be the first in IRQ handler */
ks_save_cmd_reg(ks);
status = ks_rdreg16(ks, KS_ISR);
if (unlikely(!status)) {
ks_restore_cmd_reg(ks);
+ spin_unlock_irqrestore(&ks->statelock, flags);
return IRQ_NONE;
}
@@ -546,6 +549,7 @@ static irqreturn_t ks_irq(int irq, void *pw)
ks->netdev->stats.rx_over_errors++;
/* this should be the last in IRQ handler*/
ks_restore_cmd_reg(ks);
+ spin_unlock_irqrestore(&ks->statelock, flags);
return IRQ_HANDLED;
}
@@ -615,6 +619,7 @@ static int ks_net_stop(struct net_device *netdev)
/* shutdown RX/TX QMU */
ks_disable_qmu(ks);
+ ks_disable_int(ks);
/* set powermode to soft power down to save power */
ks_set_powermode(ks, PMECR_PM_SOFTDOWN);
@@ -671,10 +676,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
{
netdev_tx_t retv = NETDEV_TX_OK;
struct ks_net *ks = netdev_priv(netdev);
+ unsigned long flags;
- disable_irq(netdev->irq);
- ks_disable_int(ks);
- spin_lock(&ks->statelock);
+ spin_lock_irqsave(&ks->statelock, flags);
/* Extra space are required:
* 4 byte for alignment, 4 for status/length, 4 for CRC
@@ -688,9 +692,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
dev_kfree_skb(skb);
} else
retv = NETDEV_TX_BUSY;
- spin_unlock(&ks->statelock);
- ks_enable_int(ks);
- enable_irq(netdev->irq);
+ spin_unlock_irqrestore(&ks->statelock, flags);
return retv;
}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 06de59521fc4..fbf4cbcf1a65 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -13,25 +13,6 @@
#include "rmnet_vnd.h"
#include "rmnet_private.h"
-/* Locking scheme -
- * The shared resource which needs to be protected is realdev->rx_handler_data.
- * For the writer path, this is using rtnl_lock(). The writer paths are
- * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These
- * paths are already called with rtnl_lock() acquired in. There is also an
- * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For
- * dereference here, we will need to use rtnl_dereference(). Dev list writing
- * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link().
- * For the reader path, the real_dev->rx_handler_data is called in the TX / RX
- * path. We only need rcu_read_lock() for these scenarios. In these cases,
- * the rcu_read_lock() is held in __dev_queue_xmit() and
- * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl()
- * to get the relevant information. For dev list reading, we again acquire
- * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu().
- * We also use unregister_netdevice_many() to free all rmnet devices in
- * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in
- * same context.
- */
-
/* Local Definitions and Declarations */
static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = {
@@ -51,9 +32,10 @@ rmnet_get_port_rtnl(const struct net_device *real_dev)
return rtnl_dereference(real_dev->rx_handler_data);
}
-static int rmnet_unregister_real_device(struct net_device *real_dev,
- struct rmnet_port *port)
+static int rmnet_unregister_real_device(struct net_device *real_dev)
{
+ struct rmnet_port *port = rmnet_get_port_rtnl(real_dev);
+
if (port->nr_rmnet_devs)
return -EINVAL;
@@ -61,9 +43,6 @@ static int rmnet_unregister_real_device(struct net_device *real_dev,
kfree(port);
- /* release reference on real_dev */
- dev_put(real_dev);
-
netdev_dbg(real_dev, "Removed from rmnet\n");
return 0;
}
@@ -89,9 +68,6 @@ static int rmnet_register_real_device(struct net_device *real_dev)
return -EBUSY;
}
- /* hold on to real dev for MAP data */
- dev_hold(real_dev);
-
for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
INIT_HLIST_HEAD(&port->muxed_ep[entry]);
@@ -99,28 +75,33 @@ static int rmnet_register_real_device(struct net_device *real_dev)
return 0;
}
-static void rmnet_unregister_bridge(struct net_device *dev,
- struct rmnet_port *port)
+static void rmnet_unregister_bridge(struct rmnet_port *port)
{
- struct rmnet_port *bridge_port;
- struct net_device *bridge_dev;
+ struct net_device *bridge_dev, *real_dev, *rmnet_dev;
+ struct rmnet_port *real_port;
if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
return;
- /* bridge slave handling */
+ rmnet_dev = port->rmnet_dev;
if (!port->nr_rmnet_devs) {
- bridge_dev = port->bridge_ep;
+ /* bridge device */
+ real_dev = port->bridge_ep;
+ bridge_dev = port->dev;
- bridge_port = rmnet_get_port_rtnl(bridge_dev);
- bridge_port->bridge_ep = NULL;
- bridge_port->rmnet_mode = RMNET_EPMODE_VND;
+ real_port = rmnet_get_port_rtnl(real_dev);
+ real_port->bridge_ep = NULL;
+ real_port->rmnet_mode = RMNET_EPMODE_VND;
} else {
+ /* real device */
bridge_dev = port->bridge_ep;
- bridge_port = rmnet_get_port_rtnl(bridge_dev);
- rmnet_unregister_real_device(bridge_dev, bridge_port);
+ port->bridge_ep = NULL;
+ port->rmnet_mode = RMNET_EPMODE_VND;
}
+
+ netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
+ rmnet_unregister_real_device(bridge_dev);
}
static int rmnet_newlink(struct net *src_net, struct net_device *dev,
@@ -135,6 +116,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
int err = 0;
u16 mux_id;
+ if (!tb[IFLA_LINK]) {
+ NL_SET_ERR_MSG_MOD(extack, "link not specified");
+ return -EINVAL;
+ }
+
real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
if (!real_dev || !dev)
return -ENODEV;
@@ -157,7 +143,12 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
if (err)
goto err1;
+ err = netdev_upper_dev_link(real_dev, dev, extack);
+ if (err < 0)
+ goto err2;
+
port->rmnet_mode = mode;
+ port->rmnet_dev = dev;
hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
@@ -173,8 +164,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
return 0;
+err2:
+ unregister_netdevice(dev);
+ rmnet_vnd_dellink(mux_id, port, ep);
err1:
- rmnet_unregister_real_device(real_dev, port);
+ rmnet_unregister_real_device(real_dev);
err0:
kfree(ep);
return err;
@@ -183,77 +177,74 @@ err0:
static void rmnet_dellink(struct net_device *dev, struct list_head *head)
{
struct rmnet_priv *priv = netdev_priv(dev);
- struct net_device *real_dev;
+ struct net_device *real_dev, *bridge_dev;
+ struct rmnet_port *real_port, *bridge_port;
struct rmnet_endpoint *ep;
- struct rmnet_port *port;
- u8 mux_id;
+ u8 mux_id = priv->mux_id;
real_dev = priv->real_dev;
- if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
+ if (!rmnet_is_real_dev_registered(real_dev))
return;
- port = rmnet_get_port_rtnl(real_dev);
-
- mux_id = rmnet_vnd_get_mux(dev);
+ real_port = rmnet_get_port_rtnl(real_dev);
+ bridge_dev = real_port->bridge_ep;
+ if (bridge_dev) {
+ bridge_port = rmnet_get_port_rtnl(bridge_dev);
+ rmnet_unregister_bridge(bridge_port);
+ }
- ep = rmnet_get_endpoint(port, mux_id);
+ ep = rmnet_get_endpoint(real_port, mux_id);
if (ep) {
hlist_del_init_rcu(&ep->hlnode);
- rmnet_unregister_bridge(dev, port);
- rmnet_vnd_dellink(mux_id, port, ep);
+ rmnet_vnd_dellink(mux_id, real_port, ep);
kfree(ep);
}
- rmnet_unregister_real_device(real_dev, port);
+ netdev_upper_dev_unlink(real_dev, dev);
+ rmnet_unregister_real_device(real_dev);
unregister_netdevice_queue(dev, head);
}
-static void rmnet_force_unassociate_device(struct net_device *dev)
+static void rmnet_force_unassociate_device(struct net_device *real_dev)
{
- struct net_device *real_dev = dev;
struct hlist_node *tmp_ep;
struct rmnet_endpoint *ep;
struct rmnet_port *port;
unsigned long bkt_ep;
LIST_HEAD(list);
- if (!rmnet_is_real_dev_registered(real_dev))
- return;
-
- ASSERT_RTNL();
-
- port = rmnet_get_port_rtnl(dev);
-
- rcu_read_lock();
- rmnet_unregister_bridge(dev, port);
-
- hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
- unregister_netdevice_queue(ep->egress_dev, &list);
- rmnet_vnd_dellink(ep->mux_id, port, ep);
+ port = rmnet_get_port_rtnl(real_dev);
- hlist_del_init_rcu(&ep->hlnode);
- kfree(ep);
+ if (port->nr_rmnet_devs) {
+ /* real device */
+ rmnet_unregister_bridge(port);
+ hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) {
+ unregister_netdevice_queue(ep->egress_dev, &list);
+ netdev_upper_dev_unlink(real_dev, ep->egress_dev);
+ rmnet_vnd_dellink(ep->mux_id, port, ep);
+ hlist_del_init_rcu(&ep->hlnode);
+ kfree(ep);
+ }
+ rmnet_unregister_real_device(real_dev);
+ unregister_netdevice_many(&list);
+ } else {
+ rmnet_unregister_bridge(port);
}
-
- rcu_read_unlock();
- unregister_netdevice_many(&list);
-
- rmnet_unregister_real_device(real_dev, port);
}
static int rmnet_config_notify_cb(struct notifier_block *nb,
unsigned long event, void *data)
{
- struct net_device *dev = netdev_notifier_info_to_dev(data);
+ struct net_device *real_dev = netdev_notifier_info_to_dev(data);
- if (!dev)
+ if (!rmnet_is_real_dev_registered(real_dev))
return NOTIFY_DONE;
switch (event) {
case NETDEV_UNREGISTER:
- netdev_dbg(dev, "Kernel unregister\n");
- rmnet_force_unassociate_device(dev);
+ netdev_dbg(real_dev, "Kernel unregister\n");
+ rmnet_force_unassociate_device(real_dev);
break;
default:
@@ -295,16 +286,18 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
if (!dev)
return -ENODEV;
- real_dev = __dev_get_by_index(dev_net(dev),
- nla_get_u32(tb[IFLA_LINK]));
-
- if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
+ real_dev = priv->real_dev;
+ if (!rmnet_is_real_dev_registered(real_dev))
return -ENODEV;
port = rmnet_get_port_rtnl(real_dev);
if (data[IFLA_RMNET_MUX_ID]) {
mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]);
+ if (rmnet_get_endpoint(port, mux_id)) {
+ NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists");
+ return -EINVAL;
+ }
ep = rmnet_get_endpoint(port, priv->mux_id);
if (!ep)
return -ENODEV;
@@ -379,11 +372,10 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = {
.fill_info = rmnet_fill_info,
};
-/* Needs either rcu_read_lock() or rtnl lock */
-struct rmnet_port *rmnet_get_port(struct net_device *real_dev)
+struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev)
{
if (rmnet_is_real_dev_registered(real_dev))
- return rcu_dereference_rtnl(real_dev->rx_handler_data);
+ return rcu_dereference_bh(real_dev->rx_handler_data);
else
return NULL;
}
@@ -409,7 +401,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
struct rmnet_port *port, *slave_port;
int err;
- port = rmnet_get_port(real_dev);
+ port = rmnet_get_port_rtnl(real_dev);
/* If there is more than one rmnet dev attached, its probably being
* used for muxing. Skip the briding in that case
@@ -417,6 +409,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
if (port->nr_rmnet_devs > 1)
return -EINVAL;
+ if (port->rmnet_mode != RMNET_EPMODE_VND)
+ return -EINVAL;
+
if (rmnet_is_real_dev_registered(slave_dev))
return -EBUSY;
@@ -424,9 +419,17 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
if (err)
return -EBUSY;
- slave_port = rmnet_get_port(slave_dev);
+ err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
+ extack);
+ if (err) {
+ rmnet_unregister_real_device(slave_dev);
+ return err;
+ }
+
+ slave_port = rmnet_get_port_rtnl(slave_dev);
slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
slave_port->bridge_ep = real_dev;
+ slave_port->rmnet_dev = rmnet_dev;
port->rmnet_mode = RMNET_EPMODE_BRIDGE;
port->bridge_ep = slave_dev;
@@ -438,16 +441,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev,
int rmnet_del_bridge(struct net_device *rmnet_dev,
struct net_device *slave_dev)
{
- struct rmnet_priv *priv = netdev_priv(rmnet_dev);
- struct net_device *real_dev = priv->real_dev;
- struct rmnet_port *port, *slave_port;
+ struct rmnet_port *port = rmnet_get_port_rtnl(slave_dev);
- port = rmnet_get_port(real_dev);
- port->rmnet_mode = RMNET_EPMODE_VND;
- port->bridge_ep = NULL;
-
- slave_port = rmnet_get_port(slave_dev);
- rmnet_unregister_real_device(slave_dev, slave_port);
+ rmnet_unregister_bridge(port);
netdev_dbg(slave_dev, "removed from rmnet as slave\n");
return 0;
@@ -473,8 +469,8 @@ static int __init rmnet_init(void)
static void __exit rmnet_exit(void)
{
- unregister_netdevice_notifier(&rmnet_dev_notifier);
rtnl_link_unregister(&rmnet_link_ops);
+ unregister_netdevice_notifier(&rmnet_dev_notifier);
}
module_init(rmnet_init)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index cd0a6bcbe74a..be515982d628 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -28,6 +28,7 @@ struct rmnet_port {
u8 rmnet_mode;
struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
struct net_device *bridge_ep;
+ struct net_device *rmnet_dev;
};
extern struct rtnl_link_ops rmnet_link_ops;
@@ -65,7 +66,7 @@ struct rmnet_priv {
struct rmnet_priv_stats stats;
};
-struct rmnet_port *rmnet_get_port(struct net_device *real_dev);
+struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev);
struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id);
int rmnet_add_bridge(struct net_device *rmnet_dev,
struct net_device *slave_dev,
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 1b74bc160402..29a7bfa2584d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -159,6 +159,9 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
static void
rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev)
{
+ if (skb_mac_header_was_set(skb))
+ skb_push(skb, skb->mac_len);
+
if (bridge_dev) {
skb->dev = bridge_dev;
dev_queue_xmit(skb);
@@ -184,7 +187,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
return RX_HANDLER_PASS;
dev = skb->dev;
- port = rmnet_get_port(dev);
+ port = rmnet_get_port_rcu(dev);
switch (port->rmnet_mode) {
case RMNET_EPMODE_VND:
@@ -217,7 +220,7 @@ void rmnet_egress_handler(struct sk_buff *skb)
skb->dev = priv->real_dev;
mux_id = priv->mux_id;
- port = rmnet_get_port(skb->dev);
+ port = rmnet_get_port_rcu(skb->dev);
if (!port)
goto drop;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 509dfc895a33..26ad40f19c64 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -266,14 +266,6 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
return 0;
}
-u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev)
-{
- struct rmnet_priv *priv;
-
- priv = netdev_priv(rmnet_dev);
- return priv->mux_id;
-}
-
int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable)
{
netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index 54cbaf3c3bc4..14d77c709d4a 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -16,6 +16,5 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
struct rmnet_endpoint *ep);
void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
-u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev);
void rmnet_vnd_setup(struct net_device *dev);
#endif /* _RMNET_VND_H_ */
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index af15a737c675..59b4f16896a8 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -560,13 +560,45 @@ efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx,
u32 nic_major, u32 nic_minor,
s32 correction)
{
+ u32 sync_timestamp;
ktime_t kt = { 0 };
+ s16 delta;
if (!(nic_major & 0x80000000)) {
WARN_ON_ONCE(nic_major >> 16);
- /* Use the top bits from the latest sync event. */
- nic_major &= 0xffff;
- nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000);
+
+ /* Medford provides 48 bits of timestamp, so we must get the top
+ * 16 bits from the timesync event state.
+ *
+ * We only have the lower 16 bits of the time now, but we do
+ * have a full resolution timestamp at some point in past. As
+ * long as the difference between the (real) now and the sync
+ * is less than 2^15, then we can reconstruct the difference
+ * between those two numbers using only the lower 16 bits of
+ * each.
+ *
+ * Put another way
+ *
+ * a - b = ((a mod k) - b) mod k
+ *
+ * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know
+ * (a mod k) and b, so can calculate the delta, a - b.
+ *
+ */
+ sync_timestamp = last_sync_timestamp_major(efx);
+
+ /* Because delta is s16 this does an implicit mask down to
+ * 16 bits which is what we need, assuming
+ * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that
+ * we can deal with the (unlikely) case of sync timestamps
+ * arriving from the future.
+ */
+ delta = nic_major - sync_timestamp;
+
+ /* Recover the fully specified time now, by applying the offset
+ * to the (fully specified) sync time.
+ */
+ nic_major = sync_timestamp + delta;
kt = ptp->nic_to_kernel_time(nic_major, nic_minor,
correction);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5836b21edd7e..7da18c9afa01 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4405,6 +4405,8 @@ static void stmmac_init_fs(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ rtnl_lock();
+
/* Create per netdev entries */
priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
@@ -4416,14 +4418,13 @@ static void stmmac_init_fs(struct net_device *dev)
debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev,
&stmmac_dma_cap_fops);
- register_netdevice_notifier(&stmmac_notifier);
+ rtnl_unlock();
}
static void stmmac_exit_fs(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
- unregister_netdevice_notifier(&stmmac_notifier);
debugfs_remove_recursive(priv->dbgfs_dir);
}
#endif /* CONFIG_DEBUG_FS */
@@ -4940,14 +4941,14 @@ int stmmac_dvr_remove(struct device *dev)
netdev_info(priv->dev, "%s: removing driver", __func__);
-#ifdef CONFIG_DEBUG_FS
- stmmac_exit_fs(ndev);
-#endif
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
netif_carrier_off(ndev);
unregister_netdev(ndev);
+#ifdef CONFIG_DEBUG_FS
+ stmmac_exit_fs(ndev);
+#endif
phylink_destroy(priv->phylink);
if (priv->plat->stmmac_rst)
reset_control_assert(priv->plat->stmmac_rst);
@@ -5166,6 +5167,7 @@ static int __init stmmac_init(void)
/* Create debugfs main directory if it doesn't exist yet */
if (!stmmac_fs_dir)
stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
+ register_netdevice_notifier(&stmmac_notifier);
#endif
return 0;
@@ -5174,6 +5176,7 @@ static int __init stmmac_init(void)
static void __exit stmmac_exit(void)
{
#ifdef CONFIG_DEBUG_FS
+ unregister_netdevice_notifier(&stmmac_notifier);
debugfs_remove_recursive(stmmac_fs_dir);
#endif
}
diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 276292bca334..53fb8141f1a6 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -375,10 +375,14 @@ struct temac_local {
int tx_bd_next;
int tx_bd_tail;
int rx_bd_ci;
+ int rx_bd_tail;
/* DMA channel control setup */
u32 tx_chnl_ctrl;
u32 rx_chnl_ctrl;
+ u8 coalesce_count_rx;
+
+ struct delayed_work restart_work;
};
/* Wrappers for temac_ior()/temac_iow() function pointers above */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 6f11f52c9a9e..9461acec6f70 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -51,6 +51,7 @@
#include <linux/ip.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/workqueue.h>
#include <linux/dma-mapping.h>
#include <linux/processor.h>
#include <linux/platform_data/xilinx-ll-temac.h>
@@ -367,6 +368,8 @@ static int temac_dma_bd_init(struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr))
+ goto out;
lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr);
lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
@@ -387,12 +390,13 @@ static int temac_dma_bd_init(struct net_device *ndev)
lp->tx_bd_next = 0;
lp->tx_bd_tail = 0;
lp->rx_bd_ci = 0;
+ lp->rx_bd_tail = RX_BD_NUM - 1;
/* Enable RX DMA transfers */
wmb();
lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p);
lp->dma_out(lp, RX_TAILDESC_PTR,
- lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+ lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail));
/* Prepare for TX DMA transfer */
lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
@@ -788,6 +792,9 @@ static void temac_start_xmit_done(struct net_device *ndev)
stat = be32_to_cpu(cur_p->app0);
}
+ /* Matches barrier in temac_start_xmit */
+ smp_mb();
+
netif_wake_queue(ndev);
}
@@ -830,9 +837,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
if (temac_check_tx_bd_space(lp, num_frag + 1)) {
- if (!netif_queue_stopped(ndev))
- netif_stop_queue(ndev);
- return NETDEV_TX_BUSY;
+ if (netif_queue_stopped(ndev))
+ return NETDEV_TX_BUSY;
+
+ netif_stop_queue(ndev);
+
+ /* Matches barrier in temac_start_xmit_done */
+ smp_mb();
+
+ /* Space might have just been freed - check again */
+ if (temac_check_tx_bd_space(lp, num_frag))
+ return NETDEV_TX_BUSY;
+
+ netif_wake_queue(ndev);
}
cur_p->app0 = 0;
@@ -850,12 +867,16 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
cur_p->len = cpu_to_be32(skb_headlen(skb));
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
ptr_to_txbd((void *)skb, cur_p);
for (ii = 0; ii < num_frag; ii++) {
- lp->tx_bd_tail++;
- if (lp->tx_bd_tail >= TX_BD_NUM)
+ if (++lp->tx_bd_tail >= TX_BD_NUM)
lp->tx_bd_tail = 0;
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
@@ -863,6 +884,27 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_frag_address(frag),
skb_frag_size(frag),
DMA_TO_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) {
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ while (--ii >= 0) {
+ --frag;
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ }
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_headlen(skb), DMA_TO_DEVICE);
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
cur_p->len = cpu_to_be32(skb_frag_size(frag));
cur_p->app0 = 0;
@@ -884,31 +926,56 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}
+static int ll_temac_recv_buffers_available(struct temac_local *lp)
+{
+ int available;
+
+ if (!lp->rx_skb[lp->rx_bd_ci])
+ return 0;
+ available = 1 + lp->rx_bd_tail - lp->rx_bd_ci;
+ if (available <= 0)
+ available += RX_BD_NUM;
+ return available;
+}
static void ll_temac_recv(struct net_device *ndev)
{
struct temac_local *lp = netdev_priv(ndev);
- struct sk_buff *skb, *new_skb;
- unsigned int bdstat;
- struct cdmac_bd *cur_p;
- dma_addr_t tail_p, skb_dma_addr;
- int length;
unsigned long flags;
+ int rx_bd;
+ bool update_tail = false;
spin_lock_irqsave(&lp->rx_lock, flags);
- tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
-
- bdstat = be32_to_cpu(cur_p->app0);
- while ((bdstat & STS_CTRL_APP0_CMPLT)) {
+ /* Process all received buffers, passing them on network
+ * stack. After this, the buffer descriptors will be in an
+ * un-allocated stage, where no skb is allocated for it, and
+ * they are therefore not available for TEMAC/DMA.
+ */
+ do {
+ struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci];
+ struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci];
+ unsigned int bdstat = be32_to_cpu(bd->app0);
+ int length;
+
+ /* While this should not normally happen, we can end
+ * here when GFP_ATOMIC allocations fail, and we
+ * therefore have un-allocated buffers.
+ */
+ if (!skb)
+ break;
- skb = lp->rx_skb[lp->rx_bd_ci];
- length = be32_to_cpu(cur_p->app4) & 0x3FFF;
+ /* Loop over all completed buffer descriptors */
+ if (!(bdstat & STS_CTRL_APP0_CMPLT))
+ break;
- dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
+ dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys),
XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
+ /* The buffer is not valid for DMA anymore */
+ bd->phys = 0;
+ bd->len = 0;
+ length = be32_to_cpu(bd->app4) & 0x3FFF;
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, ndev);
skb_checksum_none_assert(skb);
@@ -923,43 +990,102 @@ static void ll_temac_recv(struct net_device *ndev)
* (back) for proper IP checksum byte order
* (be16).
*/
- skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF);
+ skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF);
skb->ip_summed = CHECKSUM_COMPLETE;
}
if (!skb_defer_rx_timestamp(skb))
netif_rx(skb);
+ /* The skb buffer is now owned by network stack above */
+ lp->rx_skb[lp->rx_bd_ci] = NULL;
ndev->stats.rx_packets++;
ndev->stats.rx_bytes += length;
- new_skb = netdev_alloc_skb_ip_align(ndev,
- XTE_MAX_JUMBO_FRAME_SIZE);
- if (!new_skb) {
- spin_unlock_irqrestore(&lp->rx_lock, flags);
- return;
+ rx_bd = lp->rx_bd_ci;
+ if (++lp->rx_bd_ci >= RX_BD_NUM)
+ lp->rx_bd_ci = 0;
+ } while (rx_bd != lp->rx_bd_tail);
+
+ /* DMA operations will halt when the last buffer descriptor is
+ * processed (ie. the one pointed to by RX_TAILDESC_PTR).
+ * When that happens, no more interrupt events will be
+ * generated. No IRQ_COAL or IRQ_DLY, and not even an
+ * IRQ_ERR. To avoid stalling, we schedule a delayed work
+ * when there is a potential risk of that happening. The work
+ * will call this function, and thus re-schedule itself until
+ * enough buffers are available again.
+ */
+ if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx)
+ schedule_delayed_work(&lp->restart_work, HZ / 1000);
+
+ /* Allocate new buffers for those buffer descriptors that were
+ * passed to network stack. Note that GFP_ATOMIC allocations
+ * can fail (e.g. when a larger burst of GFP_ATOMIC
+ * allocations occurs), so while we try to allocate all
+ * buffers in the same interrupt where they were processed, we
+ * continue with what we could get in case of allocation
+ * failure. Allocation of remaining buffers will be retried
+ * in following calls.
+ */
+ while (1) {
+ struct sk_buff *skb;
+ struct cdmac_bd *bd;
+ dma_addr_t skb_dma_addr;
+
+ rx_bd = lp->rx_bd_tail + 1;
+ if (rx_bd >= RX_BD_NUM)
+ rx_bd = 0;
+ bd = &lp->rx_bd_v[rx_bd];
+
+ if (bd->phys)
+ break; /* All skb's allocated */
+
+ skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE);
+ if (!skb) {
+ dev_warn(&ndev->dev, "skb alloc failed\n");
+ break;
}
- cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
- skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data,
+ skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
- cur_p->phys = cpu_to_be32(skb_dma_addr);
- cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
- lp->rx_skb[lp->rx_bd_ci] = new_skb;
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent,
+ skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ break;
+ }
- lp->rx_bd_ci++;
- if (lp->rx_bd_ci >= RX_BD_NUM)
- lp->rx_bd_ci = 0;
+ bd->phys = cpu_to_be32(skb_dma_addr);
+ bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
+ bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
+ lp->rx_skb[rx_bd] = skb;
- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
- bdstat = be32_to_cpu(cur_p->app0);
+ lp->rx_bd_tail = rx_bd;
+ update_tail = true;
+ }
+
+ /* Move tail pointer when buffers have been allocated */
+ if (update_tail) {
+ lp->dma_out(lp, RX_TAILDESC_PTR,
+ lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail);
}
- lp->dma_out(lp, RX_TAILDESC_PTR, tail_p);
spin_unlock_irqrestore(&lp->rx_lock, flags);
}
+/* Function scheduled to ensure a restart in case of DMA halt
+ * condition caused by running out of buffer descriptors.
+ */
+static void ll_temac_restart_work_func(struct work_struct *work)
+{
+ struct temac_local *lp = container_of(work, struct temac_local,
+ restart_work.work);
+ struct net_device *ndev = lp->ndev;
+
+ ll_temac_recv(ndev);
+}
+
static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev)
{
struct net_device *ndev = _ndev;
@@ -1052,6 +1178,8 @@ static int temac_stop(struct net_device *ndev)
dev_dbg(&ndev->dev, "temac_close()\n");
+ cancel_delayed_work_sync(&lp->restart_work);
+
free_irq(lp->tx_irq, ndev);
free_irq(lp->rx_irq, ndev);
@@ -1173,6 +1301,7 @@ static int temac_probe(struct platform_device *pdev)
lp->dev = &pdev->dev;
lp->options = XTE_OPTION_DEFAULTS;
spin_lock_init(&lp->rx_lock);
+ INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func);
/* Setup mutex for synchronization of indirect register access */
if (pdata) {
@@ -1279,6 +1408,7 @@ static int temac_probe(struct platform_device *pdev)
*/
lp->tx_chnl_ctrl = 0x10220000;
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;
/* Finished with the DMA node; drop the reference */
of_node_put(dma_np);
@@ -1310,11 +1440,14 @@ static int temac_probe(struct platform_device *pdev)
(pdata->tx_irq_count << 16);
else
lp->tx_chnl_ctrl = 0x10220000;
- if (pdata->rx_irq_timeout || pdata->rx_irq_count)
+ if (pdata->rx_irq_timeout || pdata->rx_irq_count) {
lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) |
(pdata->rx_irq_count << 16);
- else
+ lp->coalesce_count_rx = pdata->rx_irq_count;
+ } else {
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;
+ }
}
/* Error handle returned DMA RX and TX interrupts */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index ae3f3084c2ed..1b320bcf150a 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -99,7 +99,7 @@ static struct netvsc_device *alloc_net_device(void)
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
- net_device->tx_disable = false;
+ net_device->tx_disable = true;
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 65e12cb07f45..2c0a24c606fc 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1068,6 +1068,7 @@ static int netvsc_attach(struct net_device *ndev,
}
/* In any case device is now ready */
+ nvdev->tx_disable = false;
netif_device_attach(ndev);
/* Note: enable and attach happen when sub-channels setup */
@@ -2476,6 +2477,8 @@ static int netvsc_probe(struct hv_device *dev,
else
net->max_mtu = ETH_DATA_LEN;
+ nvdev->tx_disable = false;
+
ret = register_netdevice(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 28e33ece4ce1..9a8badafea8a 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1306,6 +1306,9 @@ static int marvell_read_status_page_an(struct phy_device *phydev,
}
}
+ if (!(status & MII_M1011_PHY_STATUS_RESOLVED))
+ return 0;
+
if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
phydev->duplex = DUPLEX_FULL;
else
@@ -1365,6 +1368,8 @@ static int marvell_read_status_page(struct phy_device *phydev, int page)
linkmode_zero(phydev->lp_advertising);
phydev->pause = 0;
phydev->asym_pause = 0;
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
if (phydev->autoneg == AUTONEG_ENABLE)
err = marvell_read_status_page_an(phydev, fiber, status);
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index 937ac7da2789..f686f40f6bdc 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -345,11 +345,11 @@ enum macsec_bank {
BIT(VSC8531_FORCE_LED_OFF) | \
BIT(VSC8531_FORCE_LED_ON))
-#define MSCC_VSC8584_REVB_INT8051_FW "mscc_vsc8584_revb_int8051_fb48.bin"
+#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin"
#define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800
#define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48
-#define MSCC_VSC8574_REVB_INT8051_FW "mscc_vsc8574_revb_int8051_29e8.bin"
+#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin"
#define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000
#define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index a1caeee12236..dd2e23fb67c0 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -167,7 +167,7 @@ EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg);
*/
int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart)
{
- int ret = 0;
+ int ret;
if (!restart) {
/* Configure and restart aneg if it wasn't set before */
@@ -180,9 +180,9 @@ int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart)
}
if (restart)
- ret = genphy_c45_restart_aneg(phydev);
+ return genphy_c45_restart_aneg(phydev);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 6a5056e0ae77..c8b0c34030d3 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -247,7 +247,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
* MDIO bus driver and clock gated at this point.
*/
if (!netdev)
- return !phydev->suspended;
+ goto out;
if (netdev->wol_enabled)
return false;
@@ -267,7 +267,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
if (device_may_wakeup(&netdev->dev))
return false;
- return true;
+out:
+ return !phydev->suspended;
}
static int mdio_bus_phy_suspend(struct device *dev)
@@ -1792,7 +1793,7 @@ EXPORT_SYMBOL(genphy_restart_aneg);
*/
int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart)
{
- int ret = 0;
+ int ret;
if (!restart) {
/* Advertisement hasn't changed, but maybe aneg was never on to
@@ -1807,9 +1808,9 @@ int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart)
}
if (restart)
- ret = genphy_restart_aneg(phydev);
+ return genphy_restart_aneg(phydev);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(genphy_check_and_restart_aneg);
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 6f4d7ba8b109..babb01888b78 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -863,7 +863,10 @@ err_free_chan:
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
sl_free_netdev(sl->dev);
+ /* do not call free_netdev before rtnl_unlock */
+ rtnl_unlock();
free_netdev(sl->dev);
+ return err;
err_exit:
rtnl_unlock();
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3b7a3b8a5e06..5754bb6ca0ee 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -337,6 +337,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net)
netdev_dbg(net, "mode: raw IP\n");
} else if (!net->header_ops) { /* don't bother if already set */
ether_setup(net);
+ /* Restoring min/max mtu values set originally by usbnet */
+ net->min_mtu = 0;
+ net->max_mtu = ETH_MAX_MTU;
clear_bit(EVENT_NO_IP_ALIGN, &dev->flags);
netdev_dbg(net, "mode: Ethernet\n");
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ace4dd9e953c..d3f23d6254e4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1078,9 +1078,9 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx)
spin_lock(&nvmeq->cq_poll_lock);
found = nvme_process_cq(nvmeq, &start, &end, -1);
+ nvme_complete_cqes(nvmeq, start, end);
spin_unlock(&nvmeq->cq_poll_lock);
- nvme_complete_cqes(nvmeq, start, end);
return found;
}
diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index d20aabc26273..3a10e678c7f4 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -670,7 +670,7 @@ static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie,
* outbound memory @ 3GB). So instead it will start at the 1x
* multiple of its size
*/
- if (!*rc_bar2_size || *rc_bar2_offset % *rc_bar2_size ||
+ if (!*rc_bar2_size || (*rc_bar2_offset & (*rc_bar2_size - 1)) ||
(*rc_bar2_offset < SZ_4G && *rc_bar2_offset > SZ_2G)) {
dev_err(dev, "Invalid rc_bar2_offset/size: size 0x%llx, off 0x%llx\n",
*rc_bar2_size, *rc_bar2_offset);
diff --git a/drivers/platform/chrome/wilco_ec/properties.c b/drivers/platform/chrome/wilco_ec/properties.c
index e69682c95ea2..62f27610dd33 100644
--- a/drivers/platform/chrome/wilco_ec/properties.c
+++ b/drivers/platform/chrome/wilco_ec/properties.c
@@ -5,7 +5,7 @@
#include <linux/platform_data/wilco-ec.h>
#include <linux/string.h>
-#include <linux/unaligned/le_memmove.h>
+#include <asm/unaligned.h>
/* Operation code; what the EC should do with the property */
enum ec_property_op {
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h
index 2b1e4da1944f..4bfb79f20588 100644
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -410,7 +410,7 @@ struct fsf_qtcb_bottom_port {
u8 cb_util;
u8 a_util;
u8 res2;
- u16 temperature;
+ s16 temperature;
u16 vcc;
u16 tx_bias;
u16 tx_power;
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c
index 494b9fe9cc94..a711a0d15100 100644
--- a/drivers/s390/scsi/zfcp_sysfs.c
+++ b/drivers/s390/scsi/zfcp_sysfs.c
@@ -800,7 +800,7 @@ static ZFCP_DEV_ATTR(adapter_diag, b2b_credit, 0400,
static ZFCP_DEV_ATTR(adapter_diag_sfp, _name, 0400, \
zfcp_sysfs_adapter_diag_sfp_##_name##_show, NULL)
-ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 5, "%hu");
+ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 6, "%hd");
ZFCP_DEFINE_DIAG_SFP_ATTR(vcc, vcc, 5, "%hu");
ZFCP_DEFINE_DIAG_SFP_ATTR(tx_bias, tx_bias, 5, "%hu");
ZFCP_DEFINE_DIAG_SFP_ATTR(tx_power, tx_power, 5, "%hu");
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 9c5f7c9178c6..2b865c6423e2 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -628,6 +628,8 @@ redisc:
}
out:
kref_put(&rdata->kref, fc_rport_destroy);
+ if (!IS_ERR(fp))
+ fc_frame_free(fp);
}
/**
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index e4282bce5834..f45c22b09726 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -161,6 +161,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct scsi_disk *sdkp = scsi_disk(disk);
+ sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity);
unsigned int nr, i;
unsigned char *buf;
size_t offset, buflen = 0;
@@ -171,11 +172,15 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
/* Not a zoned device */
return -EOPNOTSUPP;
+ if (!capacity)
+ /* Device gone or invalid */
+ return -ENODEV;
+
buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen);
if (!buf)
return -ENOMEM;
- while (zone_idx < nr_zones && sector < get_capacity(disk)) {
+ while (zone_idx < nr_zones && sector < capacity) {
ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
sectors_to_logical(sdkp->device, sector), true);
if (ret)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0fbb8fe6e521..e4240e4ae8bb 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -688,7 +688,7 @@ static const struct block_device_operations sr_bdops =
.release = sr_block_release,
.ioctl = sr_block_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = sr_block_compat_ioctl,
+ .compat_ioctl = sr_block_compat_ioctl,
#endif
.check_events = sr_block_check_events,
.revalidate_disk = sr_block_revalidate_disk,
diff --git a/drivers/tee/amdtee/Kconfig b/drivers/tee/amdtee/Kconfig
index 4e32b6413b41..191f9715fa9a 100644
--- a/drivers/tee/amdtee/Kconfig
+++ b/drivers/tee/amdtee/Kconfig
@@ -3,6 +3,6 @@
config AMDTEE
tristate "AMD-TEE"
default m
- depends on CRYPTO_DEV_SP_PSP
+ depends on CRYPTO_DEV_SP_PSP && CRYPTO_DEV_CCP_DD
help
This implements AMD's Trusted Execution Environment (TEE) driver.
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e158159671fa..18e205eeb9af 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1414,10 +1414,6 @@ static int vhost_net_release(struct inode *inode, struct file *f)
static struct socket *get_raw_socket(int fd)
{
- struct {
- struct sockaddr_ll sa;
- char buf[MAX_ADDR_LEN];
- } uaddr;
int r;
struct socket *sock = sockfd_lookup(fd, &r);
@@ -1430,11 +1426,7 @@ static struct socket *get_raw_socket(int fd)
goto err;
}
- r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0);
- if (r < 0)
- goto err;
-
- if (uaddr.sa.sll_family != AF_PACKET) {
+ if (sock->sk->sk_family != AF_PACKET) {
r = -EPFNOSUPPORT;
goto err;
}
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
index b069349b52f5..3065dd670a18 100644
--- a/drivers/watchdog/wdat_wdt.c
+++ b/drivers/watchdog/wdat_wdt.c
@@ -54,6 +54,13 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+#define WDAT_DEFAULT_TIMEOUT 30
+
+static int timeout = WDAT_DEFAULT_TIMEOUT;
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
+ __MODULE_STRING(WDAT_DEFAULT_TIMEOUT) ")");
+
static int wdat_wdt_read(struct wdat_wdt *wdat,
const struct wdat_instruction *instr, u32 *value)
{
@@ -389,7 +396,7 @@ static int wdat_wdt_probe(struct platform_device *pdev)
memset(&r, 0, sizeof(r));
r.start = gas->address;
- r.end = r.start + gas->access_width - 1;
+ r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1;
if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
r.flags = IORESOURCE_MEM;
} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
@@ -438,6 +445,22 @@ static int wdat_wdt_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, wdat);
+ /*
+ * Set initial timeout so that userspace has time to configure the
+ * watchdog properly after it has opened the device. In some cases
+ * the BIOS default is too short and causes immediate reboot.
+ */
+ if (timeout * 1000 < wdat->wdd.min_hw_heartbeat_ms ||
+ timeout * 1000 > wdat->wdd.max_hw_heartbeat_ms) {
+ dev_warn(dev, "Invalid timeout %d given, using %d\n",
+ timeout, WDAT_DEFAULT_TIMEOUT);
+ timeout = WDAT_DEFAULT_TIMEOUT;
+ }
+
+ ret = wdat_wdt_set_timeout(&wdat->wdd, timeout);
+ if (ret)
+ return ret;
+
watchdog_set_nowayout(&wdat->wdd, nowayout);
return devm_watchdog_register_device(dev, &wdat->wdd);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ff1b764b0c0e..0c7c4adb664e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2391,7 +2391,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct flex_groups **old_groups, **new_groups;
- int size, i;
+ int size, i, j;
if (!sbi->s_log_groups_per_flex)
return 0;
@@ -2412,8 +2412,8 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
sizeof(struct flex_groups)),
GFP_KERNEL);
if (!new_groups[i]) {
- for (i--; i >= sbi->s_flex_groups_allocated; i--)
- kvfree(new_groups[i]);
+ for (j = sbi->s_flex_groups_allocated; j < i; j++)
+ kvfree(new_groups[j]);
kvfree(new_groups);
ext4_msg(sb, KERN_ERR,
"not enough memory for %d flex groups", size);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 0a5ab1a8f69a..bf8ed1b0b90a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -535,42 +535,23 @@ next:
} while (1);
}
-static inline void io_worker_spin_for_work(struct io_wqe *wqe)
-{
- int i = 0;
-
- while (++i < 1000) {
- if (io_wqe_run_queue(wqe))
- break;
- if (need_resched())
- break;
- cpu_relax();
- }
-}
-
static int io_wqe_worker(void *data)
{
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
- bool did_work;
io_worker_start(wqe, worker);
- did_work = false;
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
set_current_state(TASK_INTERRUPTIBLE);
loop:
- if (did_work)
- io_worker_spin_for_work(wqe);
spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
- did_work = true;
goto loop;
}
- did_work = false;
/* drops the lock on success, retry */
if (__io_worker_idle(wqe, worker)) {
__release(&wqe->lock);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index ccc7d84af57d..33baba4370c5 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -79,16 +79,10 @@ struct io_wq_work {
pid_t task_pid;
};
-#define INIT_IO_WORK(work, _func) \
- do { \
- (work)->list.next = NULL; \
- (work)->func = _func; \
- (work)->files = NULL; \
- (work)->mm = NULL; \
- (work)->creds = NULL; \
- (work)->fs = NULL; \
- (work)->flags = 0; \
- } while (0) \
+#define INIT_IO_WORK(work, _func) \
+ do { \
+ *(work) = (struct io_wq_work){ .func = _func }; \
+ } while (0) \
typedef void (get_work_fn)(struct io_wq_work *);
typedef void (put_work_fn)(struct io_wq_work *);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index de650df9ac53..6a595c13e108 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -183,17 +183,12 @@ struct fixed_file_table {
struct file **files;
};
-enum {
- FFD_F_ATOMIC,
-};
-
struct fixed_file_data {
struct fixed_file_table *table;
struct io_ring_ctx *ctx;
struct percpu_ref refs;
struct llist_head put_llist;
- unsigned long state;
struct work_struct ref_work;
struct completion done;
};
@@ -1483,10 +1478,10 @@ static void io_free_req(struct io_kiocb *req)
__attribute__((nonnull))
static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
- io_req_find_next(req, nxtptr);
-
- if (refcount_dec_and_test(&req->refs))
+ if (refcount_dec_and_test(&req->refs)) {
+ io_req_find_next(req, nxtptr);
__io_free_req(req);
+ }
}
static void io_put_req(struct io_kiocb *req)
@@ -1821,6 +1816,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_add(&req->list, &ctx->poll_list);
else
list_add_tail(&req->list, &ctx->poll_list);
+
+ if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+ wq_has_sleeper(&ctx->sqo_wait))
+ wake_up(&ctx->sqo_wait);
}
static void io_file_put(struct io_submit_state *state)
@@ -2071,7 +2070,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
ssize_t ret;
ret = import_single_range(rw, buf, sqe_len, *iovec, iter);
*iovec = NULL;
- return ret;
+ return ret < 0 ? ret : sqe_len;
}
if (req->io) {
@@ -3002,6 +3001,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+
if (!io || req->opcode == IORING_OP_SEND)
return 0;
/* iovec is already imported */
@@ -3154,6 +3158,11 @@ static int io_recvmsg_prep(struct io_kiocb *req,
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
+#ifdef CONFIG_COMPAT
+ if (req->ctx->compat)
+ sr->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+
if (!io || req->opcode == IORING_OP_RECV)
return 0;
/* iovec is already imported */
@@ -4705,11 +4714,21 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_kiocb *linked_timeout;
struct io_kiocb *nxt = NULL;
+ const struct cred *old_creds = NULL;
int ret;
again:
linked_timeout = io_prep_linked_timeout(req);
+ if (req->work.creds && req->work.creds != current_cred()) {
+ if (old_creds)
+ revert_creds(old_creds);
+ if (old_creds == req->work.creds)
+ old_creds = NULL; /* restored original creds */
+ else
+ old_creds = override_creds(req->work.creds);
+ }
+
ret = io_issue_sqe(req, sqe, &nxt, true);
/*
@@ -4735,7 +4754,7 @@ punt:
err:
/* drop submission reference */
- io_put_req(req);
+ io_put_req_find_next(req, &nxt);
if (linked_timeout) {
if (!ret)
@@ -4759,6 +4778,8 @@ done_req:
goto punt;
goto again;
}
+ if (old_creds)
+ revert_creds(old_creds);
}
static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -4803,7 +4824,6 @@ static inline void io_queue_link_head(struct io_kiocb *req)
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_submit_state *state, struct io_kiocb **link)
{
- const struct cred *old_creds = NULL;
struct io_ring_ctx *ctx = req->ctx;
unsigned int sqe_flags;
int ret, id;
@@ -4818,14 +4838,12 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
id = READ_ONCE(sqe->personality);
if (id) {
- const struct cred *personality_creds;
-
- personality_creds = idr_find(&ctx->personality_idr, id);
- if (unlikely(!personality_creds)) {
+ req->work.creds = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!req->work.creds)) {
ret = -EINVAL;
goto err_req;
}
- old_creds = override_creds(personality_creds);
+ get_cred(req->work.creds);
}
/* same numerical values with corresponding REQ_F_*, safe to copy */
@@ -4837,8 +4855,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
err_req:
io_cqring_add_event(req, ret);
io_double_put_req(req);
- if (old_creds)
- revert_creds(old_creds);
return false;
}
@@ -4899,8 +4915,6 @@ err_req:
}
}
- if (old_creds)
- revert_creds(old_creds);
return true;
}
@@ -5081,9 +5095,8 @@ static int io_sq_thread(void *data)
const struct cred *old_cred;
mm_segment_t old_fs;
DEFINE_WAIT(wait);
- unsigned inflight;
unsigned long timeout;
- int ret;
+ int ret = 0;
complete(&ctx->completions[1]);
@@ -5091,39 +5104,19 @@ static int io_sq_thread(void *data)
set_fs(USER_DS);
old_cred = override_creds(ctx->creds);
- ret = timeout = inflight = 0;
+ timeout = jiffies + ctx->sq_thread_idle;
while (!kthread_should_park()) {
unsigned int to_submit;
- if (inflight) {
+ if (!list_empty(&ctx->poll_list)) {
unsigned nr_events = 0;
- if (ctx->flags & IORING_SETUP_IOPOLL) {
- /*
- * inflight is the count of the maximum possible
- * entries we submitted, but it can be smaller
- * if we dropped some of them. If we don't have
- * poll entries available, then we know that we
- * have nothing left to poll for. Reset the
- * inflight count to zero in that case.
- */
- mutex_lock(&ctx->uring_lock);
- if (!list_empty(&ctx->poll_list))
- io_iopoll_getevents(ctx, &nr_events, 0);
- else
- inflight = 0;
- mutex_unlock(&ctx->uring_lock);
- } else {
- /*
- * Normal IO, just pretend everything completed.
- * We don't have to poll completions for that.
- */
- nr_events = inflight;
- }
-
- inflight -= nr_events;
- if (!inflight)
+ mutex_lock(&ctx->uring_lock);
+ if (!list_empty(&ctx->poll_list))
+ io_iopoll_getevents(ctx, &nr_events, 0);
+ else
timeout = jiffies + ctx->sq_thread_idle;
+ mutex_unlock(&ctx->uring_lock);
}
to_submit = io_sqring_entries(ctx);
@@ -5152,7 +5145,7 @@ static int io_sq_thread(void *data)
* more IO, we should wait for the application to
* reap events and wake us up.
*/
- if (inflight ||
+ if (!list_empty(&ctx->poll_list) ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
cond_resched();
@@ -5162,6 +5155,19 @@ static int io_sq_thread(void *data)
prepare_to_wait(&ctx->sqo_wait, &wait,
TASK_INTERRUPTIBLE);
+ /*
+ * While doing polled IO, before going to sleep, we need
+ * to check if there are new reqs added to poll_list, it
+ * is because reqs may have been punted to io worker and
+ * will be added to poll_list later, hence check the
+ * poll_list again.
+ */
+ if ((ctx->flags & IORING_SETUP_IOPOLL) &&
+ !list_empty_careful(&ctx->poll_list)) {
+ finish_wait(&ctx->sqo_wait, &wait);
+ continue;
+ }
+
/* Tell userspace we may need a wakeup call */
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
/* make sure to read SQ tail after writing flags */
@@ -5189,8 +5195,7 @@ static int io_sq_thread(void *data)
mutex_lock(&ctx->uring_lock);
ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
mutex_unlock(&ctx->uring_lock);
- if (ret > 0)
- inflight += ret;
+ timeout = jiffies + ctx->sq_thread_idle;
}
set_fs(old_fs);
@@ -5595,7 +5600,6 @@ static void io_ring_file_ref_switch(struct work_struct *work)
data = container_of(work, struct fixed_file_data, ref_work);
io_ring_file_ref_flush(data);
- percpu_ref_get(&data->refs);
percpu_ref_switch_to_percpu(&data->refs);
}
@@ -5771,8 +5775,13 @@ static void io_atomic_switch(struct percpu_ref *ref)
{
struct fixed_file_data *data;
+ /*
+ * Juggle reference to ensure we hit zero, if needed, so we can
+ * switch back to percpu mode
+ */
data = container_of(ref, struct fixed_file_data, refs);
- clear_bit(FFD_F_ATOMIC, &data->state);
+ percpu_ref_put(&data->refs);
+ percpu_ref_get(&data->refs);
}
static bool io_queue_file_removal(struct fixed_file_data *data,
@@ -5795,11 +5804,7 @@ static bool io_queue_file_removal(struct fixed_file_data *data,
llist_add(&pfile->llist, &data->put_llist);
if (pfile == &pfile_stack) {
- if (!test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
- percpu_ref_put(&data->refs);
- percpu_ref_switch_to_atomic(&data->refs,
- io_atomic_switch);
- }
+ percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
wait_for_completion(&done);
flush_work(&data->ref_work);
return false;
@@ -5873,10 +5878,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
up->offset++;
}
- if (ref_switch && !test_and_set_bit(FFD_F_ATOMIC, &data->state)) {
- percpu_ref_put(&data->refs);
+ if (ref_switch)
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
- }
return done ? done : err;
}
@@ -6334,6 +6337,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_sqe_buffer_unregister(ctx);
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
+ idr_destroy(&ctx->personality_idr);
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
@@ -6647,6 +6651,7 @@ out_fput:
return submitted ? submitted : ret;
}
+#ifdef CONFIG_PROC_FS
static int io_uring_show_cred(int id, void *p, void *data)
{
const struct cred *cred = p;
@@ -6720,6 +6725,7 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
percpu_ref_put(&ctx->refs);
}
}
+#endif
static const struct file_operations io_uring_fops = {
.release = io_uring_release,
@@ -6731,7 +6737,9 @@ static const struct file_operations io_uring_fops = {
#endif
.poll = io_uring_poll,
.fasync = io_uring_fasync,
+#ifdef CONFIG_PROC_FS
.show_fdinfo = io_uring_show_fdinfo,
+#endif
};
static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d181948c0390..3dccc23cf010 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1150,8 +1150,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
/* For undo access buffer must have data copied */
if (undo && !jh->b_committed_data)
goto out;
- if (jh->b_transaction != handle->h_transaction &&
- jh->b_next_transaction != handle->h_transaction)
+ if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
+ READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
goto out;
/*
* There are two reasons for the barrier here:
@@ -2569,8 +2569,8 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)
* our jh reference and thus __jbd2_journal_file_buffer() must not
* take a new one.
*/
- jh->b_transaction = jh->b_next_transaction;
- jh->b_next_transaction = NULL;
+ WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
+ WRITE_ONCE(jh->b_next_transaction, NULL);
if (buffer_freed(bh))
jlist = BJ_Forget;
else if (jh->b_modified)
diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig
index fb87ad372e29..ef2697b78820 100644
--- a/fs/zonefs/Kconfig
+++ b/fs/zonefs/Kconfig
@@ -2,6 +2,7 @@ config ZONEFS_FS
tristate "zonefs filesystem support"
depends on BLOCK
depends on BLK_DEV_ZONED
+ select FS_IOMAP
help
zonefs is a simple file system which exposes zones of a zoned block
device (e.g. host-managed or host-aware SMR disk drives) as files.
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index 8bc6ef82d693..69aee3dfb660 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -601,13 +601,13 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;
/*
- * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT
+ * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
* as this can cause write reordering (e.g. the first aio gets EAGAIN
* on the inode lock but the second goes through but is now unaligned).
*/
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb)
- && (iocb->ki_flags & IOCB_NOWAIT))
- iocb->ki_flags &= ~IOCB_NOWAIT;
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) &&
+ (iocb->ki_flags & IOCB_NOWAIT))
+ return -EOPNOTSUPP;
if (iocb->ki_flags & IOCB_NOWAIT) {
if (!inode_trylock(inode))
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index a2583c2bc054..4defed58ea33 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -532,11 +532,12 @@ typedef u64 acpi_integer;
strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE)
/*
- * Algorithm to obtain access bit width.
+ * Algorithm to obtain access bit or byte width.
* Can be used with access_width of struct acpi_generic_address and access_size of
* struct acpi_resource_generic_register.
*/
#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2))
+#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1))
/*******************************************************************************
*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 053ea4b51988..10455b2bbbb4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -524,7 +524,7 @@ struct request_queue {
unsigned int sg_reserved_size;
int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
- struct blk_trace *blk_trace;
+ struct blk_trace __rcu *blk_trace;
struct mutex blk_trace_mutex;
#endif
/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7bb2d8de9f30..3b6ff5902edc 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
**/
#define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \
do { \
- struct blk_trace *bt = (q)->blk_trace; \
+ struct blk_trace *bt; \
+ \
+ rcu_read_lock(); \
+ bt = rcu_dereference((q)->blk_trace); \
if (unlikely(bt)) \
__trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\
+ rcu_read_unlock(); \
} while (0)
#define blk_add_trace_msg(q, fmt, ...) \
blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__)
@@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f
static inline bool blk_trace_note_message_enabled(struct request_queue *q)
{
- struct blk_trace *bt = q->blk_trace;
- if (likely(!bt))
- return false;
- return bt->act_mask & BLK_TC_NOTIFY;
+ struct blk_trace *bt;
+ bool ret;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ ret = bt && (bt->act_mask & BLK_TC_NOTIFY);
+ rcu_read_unlock();
+ return ret;
}
extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 7e18c939663e..d11e183fcb54 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -10,6 +10,9 @@
#include <linux/kernel.h>
#include <linux/types.h>
+#define BOOTCONFIG_MAGIC "#BOOTCONFIG\n"
+#define BOOTCONFIG_MAGIC_LEN 12
+
/* XBC tree node */
struct xbc_node {
u16 next;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index cd41f209043f..875f71132b14 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -492,7 +492,7 @@ struct hid_report_enum {
};
#define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */
-#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */
+#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */
#define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */
#define HID_OUTPUT_FIFO_SIZE 64
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 93338fd54af8..33d379602314 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -22,19 +22,23 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
unsigned int data_len);
+#if IS_ENABLED(CONFIG_NF_NAT)
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
+#else
+#define icmpv6_ndo_send icmpv6_send
+#endif
+
#else
static inline void icmpv6_send(struct sk_buff *skb,
u8 type, u8 code, __u32 info)
{
-
}
-#endif
-#if IS_ENABLED(CONFIG_NF_NAT)
-void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
-#else
-#define icmpv6_ndo_send icmpv6_send
+static inline void icmpv6_ndo_send(struct sk_buff *skb,
+ u8 type, u8 code, __u32 info)
+{
+}
#endif
extern int icmpv6_init(void);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e89eb67356cb..bcb9b2ac0791 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -889,6 +889,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
+int kvm_arch_post_init_vm(struct kvm *kvm);
+void kvm_arch_pre_destroy_vm(struct kvm *kvm);
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
/*
@@ -1342,7 +1344,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
struct kvm_vcpu *kvm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
bool kvm_arch_has_irq_bypass(void);
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 908d38dbcb91..5448c8b443db 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -121,6 +121,7 @@ struct ip_set_ext {
u32 timeout;
u8 packets_op;
u8 bytes_op;
+ bool target;
};
struct ip_set;
@@ -187,6 +188,14 @@ struct ip_set_type_variant {
/* Return true if "b" set is the same as "a"
* according to the create set parameters */
bool (*same_set)(const struct ip_set *a, const struct ip_set *b);
+ /* Region-locking is used */
+ bool region_lock;
+};
+
+struct ip_set_region {
+ spinlock_t lock; /* Region lock */
+ size_t ext_size; /* Size of the dynamic extensions */
+ u32 elements; /* Number of elements vs timeout */
};
/* The core set type structure */
@@ -501,7 +510,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
}
#define IP_SET_INIT_KEXT(skb, opt, set) \
- { .bytes = (skb)->len, .packets = 1, \
+ { .bytes = (skb)->len, .packets = 1, .target = true,\
.timeout = ip_set_adt_opt_timeout(opt, set) }
#define IP_SET_INIT_UEXT(set) \
diff --git a/init/Kconfig b/init/Kconfig
index 452bc1835cd4..20a6ac33761c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1226,13 +1226,12 @@ endif
config BOOT_CONFIG
bool "Boot config support"
- depends on BLK_DEV_INITRD
- default y
+ select BLK_DEV_INITRD
help
Extra boot config allows system admin to pass a config file as
complemental extension of kernel cmdline when booting.
The boot config file must be attached at the end of initramfs
- with checksum and size.
+ with checksum, size and magic word.
See <file:Documentation/admin-guide/bootconfig.rst> for details.
If unsure, say Y.
diff --git a/init/main.c b/init/main.c
index f95b014a5479..ee4947af823f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -268,7 +268,6 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
{
struct xbc_node *knode, *vnode;
char *end = buf + size;
- char c = '\"';
const char *val;
int ret;
@@ -279,25 +278,20 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
return ret;
vnode = xbc_node_get_child(knode);
- ret = snprintf(buf, rest(buf, end), "%s%c", xbc_namebuf,
- vnode ? '=' : ' ');
- if (ret < 0)
- return ret;
- buf += ret;
- if (!vnode)
+ if (!vnode) {
+ ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
+ if (ret < 0)
+ return ret;
+ buf += ret;
continue;
-
- c = '\"';
+ }
xbc_array_for_each_value(vnode, val) {
- ret = snprintf(buf, rest(buf, end), "%c%s", c, val);
+ ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
+ xbc_namebuf, val);
if (ret < 0)
return ret;
buf += ret;
- c = ',';
}
- if (rest(buf, end) > 2)
- strcpy(buf, "\" ");
- buf += 2;
}
return buf - (end - size);
@@ -335,7 +329,7 @@ static char * __init xbc_make_cmdline(const char *key)
return new_cmdline;
}
-u32 boot_config_checksum(unsigned char *p, u32 size)
+static u32 boot_config_checksum(unsigned char *p, u32 size)
{
u32 ret = 0;
@@ -374,7 +368,11 @@ static void __init setup_boot_config(const char *cmdline)
if (!initrd_end)
goto not_found;
- hdr = (u32 *)(initrd_end - 8);
+ data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+ if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+ goto not_found;
+
+ hdr = (u32 *)(data - 8);
size = hdr[0];
csum = hdr[1];
@@ -418,6 +416,14 @@ not_found:
}
#else
#define setup_boot_config(cmdline) do { } while (0)
+
+static int __init warn_bootconfig(char *str)
+{
+ pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOTCONFIG is not set.\n");
+ return 0;
+}
+early_param("bootconfig", warn_bootconfig);
+
#endif
/* Change NUL term back to "=", to make "param" the whole string. */
diff --git a/kernel/audit.c b/kernel/audit.c
index 17b0d523afb3..9ddfe2aa6671 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1101,13 +1101,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
audit_log_end(ab);
}
-static int audit_set_feature(struct sk_buff *skb)
+static int audit_set_feature(struct audit_features *uaf)
{
- struct audit_features *uaf;
int i;
BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names));
- uaf = nlmsg_data(nlmsg_hdr(skb));
/* if there is ever a version 2 we should handle that here */
@@ -1175,6 +1173,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
u32 seq;
void *data;
+ int data_len;
int err;
struct audit_buffer *ab;
u16 msg_type = nlh->nlmsg_type;
@@ -1188,6 +1187,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
seq = nlh->nlmsg_seq;
data = nlmsg_data(nlh);
+ data_len = nlmsg_len(nlh);
switch (msg_type) {
case AUDIT_GET: {
@@ -1211,7 +1211,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct audit_status s;
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
- memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
+ memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
if (s.mask & AUDIT_STATUS_ENABLED) {
err = audit_set_enabled(s.enabled);
if (err < 0)
@@ -1315,7 +1315,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return err;
break;
case AUDIT_SET_FEATURE:
- err = audit_set_feature(skb);
+ if (data_len < sizeof(struct audit_features))
+ return -EINVAL;
+ err = audit_set_feature(data);
if (err)
return err;
break;
@@ -1327,6 +1329,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_filter(msg_type, AUDIT_FILTER_USER);
if (err == 1) { /* match or error */
+ char *str = data;
+
err = 0;
if (msg_type == AUDIT_USER_TTY) {
err = tty_audit_push();
@@ -1334,26 +1338,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
break;
}
audit_log_user_recv_msg(&ab, msg_type);
- if (msg_type != AUDIT_USER_TTY)
+ if (msg_type != AUDIT_USER_TTY) {
+ /* ensure NULL termination */
+ str[data_len - 1] = '\0';
audit_log_format(ab, " msg='%.*s'",
AUDIT_MESSAGE_TEXT_MAX,
- (char *)data);
- else {
- int size;
-
+ str);
+ } else {
audit_log_format(ab, " data=");
- size = nlmsg_len(nlh);
- if (size > 0 &&
- ((unsigned char *)data)[size - 1] == '\0')
- size--;
- audit_log_n_untrustedstring(ab, data, size);
+ if (data_len > 0 && str[data_len - 1] == '\0')
+ data_len--;
+ audit_log_n_untrustedstring(ab, str, data_len);
}
audit_log_end(ab);
}
break;
case AUDIT_ADD_RULE:
case AUDIT_DEL_RULE:
- if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
+ if (data_len < sizeof(struct audit_rule_data))
return -EINVAL;
if (audit_enabled == AUDIT_LOCKED) {
audit_log_common_recv_msg(audit_context(), &ab,
@@ -1365,7 +1367,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_end(ab);
return -EPERM;
}
- err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
+ err = audit_rule_change(msg_type, seq, data, data_len);
break;
case AUDIT_LIST_RULES:
err = audit_list_rules_send(skb, seq);
@@ -1380,7 +1382,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_MAKE_EQUIV: {
void *bufp = data;
u32 sizes[2];
- size_t msglen = nlmsg_len(nlh);
+ size_t msglen = data_len;
char *old, *new;
err = -EINVAL;
@@ -1456,7 +1458,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
- memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh)));
+ memcpy(&s, data, min_t(size_t, sizeof(s), data_len));
/* check if new data is valid */
if ((s.enabled != 0 && s.enabled != 1) ||
(s.log_passwd != 0 && s.log_passwd != 1))
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b0126e9c0743..026e34da4ace 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -456,6 +456,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
bufp = data->buf;
for (i = 0; i < data->field_count; i++) {
struct audit_field *f = &entry->rule.fields[i];
+ u32 f_val;
err = -EINVAL;
@@ -464,12 +465,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
goto exit_free;
f->type = data->fields[i];
- f->val = data->values[i];
+ f_val = data->values[i];
/* Support legacy tests for a valid loginuid */
- if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
+ if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) {
f->type = AUDIT_LOGINUID_SET;
- f->val = 0;
+ f_val = 0;
entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
}
@@ -485,7 +486,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_SUID:
case AUDIT_FSUID:
case AUDIT_OBJ_UID:
- f->uid = make_kuid(current_user_ns(), f->val);
+ f->uid = make_kuid(current_user_ns(), f_val);
if (!uid_valid(f->uid))
goto exit_free;
break;
@@ -494,11 +495,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_SGID:
case AUDIT_FSGID:
case AUDIT_OBJ_GID:
- f->gid = make_kgid(current_user_ns(), f->val);
+ f->gid = make_kgid(current_user_ns(), f_val);
if (!gid_valid(f->gid))
goto exit_free;
break;
case AUDIT_ARCH:
+ f->val = f_val;
entry->rule.arch_f = f;
break;
case AUDIT_SUBJ_USER:
@@ -511,11 +513,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_OBJ_TYPE:
case AUDIT_OBJ_LEV_LOW:
case AUDIT_OBJ_LEV_HIGH:
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
-
+ }
+ entry->rule.buflen += f_val;
+ f->lsm_str = str;
err = security_audit_rule_init(f->type, f->op, str,
(void **)&f->lsm_rule);
/* Keep currently invalid fields around in case they
@@ -524,68 +528,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
pr_warn("audit rule for LSM \'%s\' is invalid\n",
str);
err = 0;
- }
- if (err) {
- kfree(str);
+ } else if (err)
goto exit_free;
- } else
- f->lsm_str = str;
break;
case AUDIT_WATCH:
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
-
- err = audit_to_watch(&entry->rule, str, f->val, f->op);
+ }
+ err = audit_to_watch(&entry->rule, str, f_val, f->op);
if (err) {
kfree(str);
goto exit_free;
}
+ entry->rule.buflen += f_val;
break;
case AUDIT_DIR:
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
-
+ }
err = audit_make_tree(&entry->rule, str, f->op);
kfree(str);
if (err)
goto exit_free;
+ entry->rule.buflen += f_val;
break;
case AUDIT_INODE:
+ f->val = f_val;
err = audit_to_inode(&entry->rule, f);
if (err)
goto exit_free;
break;
case AUDIT_FILTERKEY:
- if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
+ if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN)
goto exit_free;
- str = audit_unpack_string(&bufp, &remain, f->val);
- if (IS_ERR(str))
+ str = audit_unpack_string(&bufp, &remain, f_val);
+ if (IS_ERR(str)) {
+ err = PTR_ERR(str);
goto exit_free;
- entry->rule.buflen += f->val;
+ }
+ entry->rule.buflen += f_val;
entry->rule.filterkey = str;
break;
case AUDIT_EXE:
- if (entry->rule.exe || f->val > PATH_MAX)
+ if (entry->rule.exe || f_val > PATH_MAX)
goto exit_free;
- str = audit_unpack_string(&bufp, &remain, f->val);
+ str = audit_unpack_string(&bufp, &remain, f_val);
if (IS_ERR(str)) {
err = PTR_ERR(str);
goto exit_free;
}
- entry->rule.buflen += f->val;
-
- audit_mark = audit_alloc_mark(&entry->rule, str, f->val);
+ audit_mark = audit_alloc_mark(&entry->rule, str, f_val);
if (IS_ERR(audit_mark)) {
kfree(str);
err = PTR_ERR(audit_mark);
goto exit_free;
}
+ entry->rule.buflen += f_val;
entry->rule.exe = audit_mark;
break;
+ default:
+ f->val = f_val;
+ break;
}
}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index ddade80ad276..d82b7b88d616 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
* hibernation for allocations made while saving the image and for device
* drivers, in case they need to allocate memory from their hibernation
* callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
- * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
+ * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
* /sys/power/reserved_size, respectively). To make this happen, we compute the
* total number of available page frames and allocate at least
*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3c8a379c357e..c1217bfe5e81 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8337,6 +8337,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
sgs->group_capacity = group->sgc->capacity;
+ sgs->group_weight = group->group_weight;
+
sgs->group_type = group_classify(sd->imbalance_pct, group, sgs);
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 9ad8dea93dbb..5b2396350dd1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -413,27 +413,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
{
struct sigqueue *q = NULL;
struct user_struct *user;
+ int sigpending;
/*
* Protect access to @t credentials. This can go away when all
* callers hold rcu read lock.
+ *
+ * NOTE! A pending signal will hold on to the user refcount,
+ * and we get/put the refcount only when the sigpending count
+ * changes from/to zero.
*/
rcu_read_lock();
- user = get_uid(__task_cred(t)->user);
- atomic_inc(&user->sigpending);
+ user = __task_cred(t)->user;
+ sigpending = atomic_inc_return(&user->sigpending);
+ if (sigpending == 1)
+ get_uid(user);
rcu_read_unlock();
- if (override_rlimit ||
- atomic_read(&user->sigpending) <=
- task_rlimit(t, RLIMIT_SIGPENDING)) {
+ if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
print_dropped_signal(sig);
}
if (unlikely(q == NULL)) {
- atomic_dec(&user->sigpending);
- free_uid(user);
+ if (atomic_dec_and_test(&user->sigpending))
+ free_uid(user);
} else {
INIT_LIST_HEAD(&q->list);
q->flags = 0;
@@ -447,8 +452,8 @@ static void __sigqueue_free(struct sigqueue *q)
{
if (q->flags & SIGQUEUE_PREALLOC)
return;
- atomic_dec(&q->user->sigpending);
- free_uid(q->user);
+ if (atomic_dec_and_test(&q->user->sigpending))
+ free_uid(q->user);
kmem_cache_free(sigqueue_cachep, q);
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 91e885194dbc..402eef84c859 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -143,8 +143,8 @@ if FTRACE
config BOOTTIME_TRACING
bool "Boot-time Tracing support"
- depends on BOOT_CONFIG && TRACING
- default y
+ depends on TRACING
+ select BOOT_CONFIG
help
Enable developer to setup ftrace subsystem via supplemental
kernel cmdline at boot time for debugging (tracing) driver
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 0735ae8545d8..4560878f0bac 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -335,6 +335,7 @@ static void put_probe_ref(void)
static void blk_trace_cleanup(struct blk_trace *bt)
{
+ synchronize_rcu();
blk_trace_free(bt);
put_probe_ref();
}
@@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
static int __blk_trace_startstop(struct request_queue *q, int start)
{
int ret;
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
if (bt == NULL)
return -EINVAL;
@@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
void blk_trace_shutdown(struct request_queue *q)
{
mutex_lock(&q->blk_trace_mutex);
-
- if (q->blk_trace) {
+ if (rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex))) {
__blk_trace_startstop(q, 0);
__blk_trace_remove(q);
}
@@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)
#ifdef CONFIG_BLK_CGROUP
static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ /* We don't use the 'bt' value here except as an optimization... */
+ bt = rcu_dereference_protected(q->blk_trace, 1);
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return 0;
@@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
static void blk_add_trace_rq(struct request *rq, int error,
unsigned int nr_bytes, u32 what, u64 cgid)
{
- struct blk_trace *bt = rq->q->blk_trace;
+ struct blk_trace *bt;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(rq->q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
if (blk_rq_is_passthrough(rq))
what |= BLK_TC_ACT(BLK_TC_PC);
@@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,
__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
rq->cmd_flags, what, error, 0, NULL, cgid);
+ rcu_read_unlock();
}
static void blk_add_trace_rq_insert(void *ignore,
@@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
u32 what, int error)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
bio_op(bio), bio->bi_opf, what, error, 0, NULL,
blk_trace_bio_get_cgid(q, bio));
+ rcu_read_unlock();
}
static void blk_add_trace_bio_bounce(void *ignore,
@@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,
if (bio)
blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
else {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
NULL, 0);
+ rcu_read_unlock();
}
}
@@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,
if (bio)
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
else {
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
0, 0, NULL, 0);
+ rcu_read_unlock();
}
}
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
+ rcu_read_unlock();
}
static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
unsigned int depth, bool explicit)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt) {
__be64 rpdu = cpu_to_be64(depth);
u32 what;
@@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
}
+ rcu_read_unlock();
}
static void blk_add_trace_split(void *ignore,
struct request_queue *q, struct bio *bio,
unsigned int pdu)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
if (bt) {
__be64 rpdu = cpu_to_be64(pdu);
@@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,
BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
&rpdu, blk_trace_bio_get_cgid(q, bio));
}
+ rcu_read_unlock();
}
/**
@@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,
struct request_queue *q, struct bio *bio,
dev_t dev, sector_t from)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
struct blk_io_trace_remap r;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
r.device_from = cpu_to_be32(dev);
r.device_to = cpu_to_be32(bio_dev(bio));
@@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
+ rcu_read_unlock();
}
/**
@@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,
struct request *rq, dev_t dev,
sector_t from)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
struct blk_io_trace_remap r;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
r.device_from = cpu_to_be32(dev);
r.device_to = cpu_to_be32(disk_devt(rq->rq_disk));
@@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
+ rcu_read_unlock();
}
/**
@@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,
struct request *rq,
void *data, size_t len)
{
- struct blk_trace *bt = q->blk_trace;
+ struct blk_trace *bt;
- if (likely(!bt))
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (likely(!bt)) {
+ rcu_read_unlock();
return;
+ }
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
BLK_TA_DRV_DATA, 0, len, data,
blk_trace_request_get_cgid(q, rq));
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
return -EINVAL;
put_probe_ref();
+ synchronize_rcu();
blk_trace_free(bt);
return 0;
}
@@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct hd_struct *p = dev_to_part(dev);
struct request_queue *q;
struct block_device *bdev;
+ struct blk_trace *bt;
ssize_t ret = -ENXIO;
bdev = bdget(part_devt(p));
@@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
mutex_lock(&q->blk_trace_mutex);
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
if (attr == &dev_attr_enable) {
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ ret = sprintf(buf, "%u\n", !!bt);
goto out_unlock_bdev;
}
- if (q->blk_trace == NULL)
+ if (bt == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
- ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
+ ret = blk_trace_mask2str(buf, bt->act_mask);
else if (attr == &dev_attr_pid)
- ret = sprintf(buf, "%u\n", q->blk_trace->pid);
+ ret = sprintf(buf, "%u\n", bt->pid);
else if (attr == &dev_attr_start_lba)
- ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
+ ret = sprintf(buf, "%llu\n", bt->start_lba);
else if (attr == &dev_attr_end_lba)
- ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+ ret = sprintf(buf, "%llu\n", bt->end_lba);
out_unlock_bdev:
mutex_unlock(&q->blk_trace_mutex);
@@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
struct block_device *bdev;
struct request_queue *q;
struct hd_struct *p;
+ struct blk_trace *bt;
u64 value;
ssize_t ret = -EINVAL;
@@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
mutex_lock(&q->blk_trace_mutex);
+ bt = rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex));
if (attr == &dev_attr_enable) {
- if (!!value == !!q->blk_trace) {
+ if (!!value == !!bt) {
ret = 0;
goto out_unlock_bdev;
}
@@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
}
ret = 0;
- if (q->blk_trace == NULL)
+ if (bt == NULL)
ret = blk_trace_setup_queue(q, bdev);
if (ret == 0) {
if (attr == &dev_attr_act_mask)
- q->blk_trace->act_mask = value;
+ bt->act_mask = value;
else if (attr == &dev_attr_pid)
- q->blk_trace->pid = value;
+ bt->pid = value;
else if (attr == &dev_attr_start_lba)
- q->blk_trace->start_lba = value;
+ bt->start_lba = value;
else if (attr == &dev_attr_end_lba)
- q->blk_trace->end_lba = value;
+ bt->end_lba = value;
}
out_unlock_bdev:
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 4aefe003cb7c..7d56d621ffea 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -111,11 +111,11 @@ static int __init test_gen_synth_cmd(void)
/* Create some bogus values just for testing */
vals[0] = 777; /* next_pid_field */
- vals[1] = (u64)"hula hoops"; /* next_comm_field */
+ vals[1] = (u64)(long)"hula hoops"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
vals[3] = 1000; /* ts_ms */
- vals[4] = smp_processor_id(); /* cpu */
- vals[5] = (u64)"thneed"; /* my_string_field */
+ vals[4] = raw_smp_processor_id(); /* cpu */
+ vals[5] = (u64)(long)"thneed"; /* my_string_field */
vals[6] = 598; /* my_int_field */
/* Now generate a gen_synth_test event */
@@ -218,11 +218,11 @@ static int __init test_empty_synth_event(void)
/* Create some bogus values just for testing */
vals[0] = 777; /* next_pid_field */
- vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
+ vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
vals[3] = 1000; /* ts_ms */
- vals[4] = smp_processor_id(); /* cpu */
- vals[5] = (u64)"thneed_2.0"; /* my_string_field */
+ vals[4] = raw_smp_processor_id(); /* cpu */
+ vals[5] = (u64)(long)"thneed_2.0"; /* my_string_field */
vals[6] = 399; /* my_int_field */
/* Now trace an empty_synth_test event */
@@ -290,11 +290,11 @@ static int __init test_create_synth_event(void)
/* Create some bogus values just for testing */
vals[0] = 777; /* next_pid_field */
- vals[1] = (u64)"tiddlywinks"; /* next_comm_field */
+ vals[1] = (u64)(long)"tiddlywinks"; /* next_comm_field */
vals[2] = 1000000; /* ts_ns */
vals[3] = 1000; /* ts_ms */
- vals[4] = smp_processor_id(); /* cpu */
- vals[5] = (u64)"thneed"; /* my_string_field */
+ vals[4] = raw_smp_processor_id(); /* cpu */
+ vals[5] = (u64)(long)"thneed"; /* my_string_field */
vals[6] = 398; /* my_int_field */
/* Now generate a create_synth_test event */
@@ -330,7 +330,7 @@ static int __init test_add_next_synth_val(void)
goto out;
/* next_comm_field */
- ret = synth_event_add_next_val((u64)"slinky", &trace_state);
+ ret = synth_event_add_next_val((u64)(long)"slinky", &trace_state);
if (ret)
goto out;
@@ -345,12 +345,12 @@ static int __init test_add_next_synth_val(void)
goto out;
/* cpu */
- ret = synth_event_add_next_val(smp_processor_id(), &trace_state);
+ ret = synth_event_add_next_val(raw_smp_processor_id(), &trace_state);
if (ret)
goto out;
/* my_string_field */
- ret = synth_event_add_next_val((u64)"thneed_2.01", &trace_state);
+ ret = synth_event_add_next_val((u64)(long)"thneed_2.01", &trace_state);
if (ret)
goto out;
@@ -388,7 +388,7 @@ static int __init test_add_synth_val(void)
if (ret)
goto out;
- ret = synth_event_add_val("cpu", smp_processor_id(), &trace_state);
+ ret = synth_event_add_val("cpu", raw_smp_processor_id(), &trace_state);
if (ret)
goto out;
@@ -396,12 +396,12 @@ static int __init test_add_synth_val(void)
if (ret)
goto out;
- ret = synth_event_add_val("next_comm_field", (u64)"silly putty",
+ ret = synth_event_add_val("next_comm_field", (u64)(long)"silly putty",
&trace_state);
if (ret)
goto out;
- ret = synth_event_add_val("my_string_field", (u64)"thneed_9",
+ ret = synth_event_add_val("my_string_field", (u64)(long)"thneed_9",
&trace_state);
if (ret)
goto out;
@@ -423,13 +423,13 @@ static int __init test_trace_synth_event(void)
/* Trace some bogus values just for testing */
ret = synth_event_trace(create_synth_test, 7, /* number of values */
- 444, /* next_pid_field */
- (u64)"clackers", /* next_comm_field */
- 1000000, /* ts_ns */
- 1000, /* ts_ms */
- smp_processor_id(), /* cpu */
- (u64)"Thneed", /* my_string_field */
- 999); /* my_int_field */
+ (u64)444, /* next_pid_field */
+ (u64)(long)"clackers", /* next_comm_field */
+ (u64)1000000, /* ts_ns */
+ (u64)1000, /* ts_ms */
+ (u64)raw_smp_processor_id(), /* cpu */
+ (u64)(long)"Thneed", /* my_string_field */
+ (u64)999); /* my_int_field */
return ret;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c797a15a1fc7..6b11e4e2150c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1837,6 +1837,7 @@ static __init int init_trace_selftests(void)
pr_info("Running postponed tracer tests:\n");
+ tracing_selftest_running = true;
list_for_each_entry_safe(p, n, &postponed_selftests, list) {
/* This loop can take minutes when sanitizers are enabled, so
* lets make sure we allow RCU processing.
@@ -1859,6 +1860,7 @@ static __init int init_trace_selftests(void)
list_del(&p->list);
kfree(p);
}
+ tracing_selftest_running = false;
out:
mutex_unlock(&trace_types_lock);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 483b3fd1094f..5f6834a2bf41 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -821,6 +821,29 @@ static const char *synth_field_fmt(char *type)
return fmt;
}
+static void print_synth_event_num_val(struct trace_seq *s,
+ char *print_fmt, char *name,
+ int size, u64 val, char *space)
+{
+ switch (size) {
+ case 1:
+ trace_seq_printf(s, print_fmt, name, (u8)val, space);
+ break;
+
+ case 2:
+ trace_seq_printf(s, print_fmt, name, (u16)val, space);
+ break;
+
+ case 4:
+ trace_seq_printf(s, print_fmt, name, (u32)val, space);
+ break;
+
+ default:
+ trace_seq_printf(s, print_fmt, name, val, space);
+ break;
+ }
+}
+
static enum print_line_t print_synth_event(struct trace_iterator *iter,
int flags,
struct trace_event *event)
@@ -859,10 +882,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
+ char *space = (i == se->n_fields - 1 ? "" : " ");
- trace_seq_printf(s, print_fmt, se->fields[i]->name,
- entry->fields[n_u64],
- i == se->n_fields - 1 ? "" : " ");
+ print_synth_event_num_val(s, print_fmt,
+ se->fields[i]->name,
+ se->fields[i]->size,
+ entry->fields[n_u64],
+ space);
if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
trace_seq_puts(s, " (");
@@ -1805,6 +1831,8 @@ __synth_event_trace_start(struct trace_event_file *file,
int entry_size, fields_size = 0;
int ret = 0;
+ memset(trace_state, '\0', sizeof(*trace_state));
+
/*
* Normal event tracing doesn't get called at all unless the
* ENABLED bit is set (which attaches the probe thus allowing
@@ -1885,6 +1913,11 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
return ret;
}
+ if (n_vals != state.event->n_fields) {
+ ret = -EINVAL;
+ goto out;
+ }
+
va_start(args, n_vals);
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
u64 val;
@@ -1898,12 +1931,30 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
- state.entry->fields[n_u64] = val;
+ struct synth_field *field = state.event->fields[i];
+
+ switch (field->size) {
+ case 1:
+ *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+ break;
+
+ default:
+ state.entry->fields[n_u64] = val;
+ break;
+ }
n_u64++;
}
}
va_end(args);
-
+out:
__synth_event_trace_end(&state);
return ret;
@@ -1942,6 +1993,11 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
return ret;
}
+ if (n_vals != state.event->n_fields) {
+ ret = -EINVAL;
+ goto out;
+ }
+
for (i = 0, n_u64 = 0; i < state.event->n_fields; i++) {
if (state.event->fields[i]->is_string) {
char *str_val = (char *)(long)vals[i];
@@ -1950,11 +2006,30 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
- state.entry->fields[n_u64] = vals[i];
+ struct synth_field *field = state.event->fields[i];
+ u64 val = vals[i];
+
+ switch (field->size) {
+ case 1:
+ *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+ break;
+
+ default:
+ state.entry->fields[n_u64] = val;
+ break;
+ }
n_u64++;
}
}
-
+out:
__synth_event_trace_end(&state);
return ret;
@@ -1997,8 +2072,6 @@ int synth_event_trace_start(struct trace_event_file *file,
if (!trace_state)
return -EINVAL;
- memset(trace_state, '\0', sizeof(*trace_state));
-
ret = __synth_event_trace_start(file, trace_state);
if (ret == -ENOENT)
ret = 0; /* just disabled, not really an error */
@@ -2069,8 +2142,25 @@ static int __synth_event_add_val(const char *field_name, u64 val,
str_field = (char *)&entry->fields[field->offset];
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
- } else
- entry->fields[field->offset] = val;
+ } else {
+ switch (field->size) {
+ case 1:
+ *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
+ break;
+
+ default:
+ trace_state->entry->fields[field->offset] = val;
+ break;
+ }
+ }
out:
return ret;
}
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 3ea601a2eba5..ec3ce7fd299f 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -533,7 +533,7 @@ struct xbc_node *find_match_node(struct xbc_node *node, char *k)
static int __init __xbc_add_key(char *k)
{
- struct xbc_node *node;
+ struct xbc_node *node, *child;
if (!xbc_valid_keyword(k))
return xbc_parse_error("Invalid keyword", k);
@@ -543,8 +543,12 @@ static int __init __xbc_add_key(char *k)
if (!last_parent) /* the first level */
node = find_match_node(xbc_nodes, k);
- else
- node = find_match_node(xbc_node_get_child(last_parent), k);
+ else {
+ child = xbc_node_get_child(last_parent);
+ if (child && xbc_node_is_value(child))
+ return xbc_parse_error("Subkey is mixed with value", k);
+ node = find_match_node(child, k);
+ }
if (node)
last_parent = node;
@@ -574,10 +578,10 @@ static int __init __xbc_parse_keys(char *k)
return __xbc_add_key(k);
}
-static int __init xbc_parse_kv(char **k, char *v)
+static int __init xbc_parse_kv(char **k, char *v, int op)
{
struct xbc_node *prev_parent = last_parent;
- struct xbc_node *node;
+ struct xbc_node *child;
char *next;
int c, ret;
@@ -585,12 +589,19 @@ static int __init xbc_parse_kv(char **k, char *v)
if (ret)
return ret;
+ child = xbc_node_get_child(last_parent);
+ if (child) {
+ if (xbc_node_is_key(child))
+ return xbc_parse_error("Value is mixed with subkey", v);
+ else if (op == '=')
+ return xbc_parse_error("Value is redefined", v);
+ }
+
c = __xbc_parse_value(&v, &next);
if (c < 0)
return c;
- node = xbc_add_sibling(v, XBC_VALUE);
- if (!node)
+ if (!xbc_add_sibling(v, XBC_VALUE))
return -ENOMEM;
if (c == ',') { /* Array */
@@ -763,7 +774,7 @@ int __init xbc_init(char *buf)
p = buf;
do {
- q = strpbrk(p, "{}=;\n#");
+ q = strpbrk(p, "{}=+;\n#");
if (!q) {
p = skip_spaces(p);
if (*p != '\0')
@@ -774,8 +785,15 @@ int __init xbc_init(char *buf)
c = *q;
*q++ = '\0';
switch (c) {
+ case '+':
+ if (*q++ != '=') {
+ ret = xbc_parse_error("Wrong '+' operator",
+ q - 2);
+ break;
+ }
+ /* Fall through */
case '=':
- ret = xbc_parse_kv(&p, q);
+ ret = xbc_parse_kv(&p, q, c);
break;
case '{':
ret = xbc_open_brace(&p, q);
diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index 6d83cafebc69..ad0699ce702f 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
__le64 lens[2];
} b __aligned(16);
+ if (WARN_ON(src_len > INT_MAX))
+ return false;
+
chacha_load_key(b.k, key);
b.iv[0] = 0;
diff --git a/mm/shmem.c b/mm/shmem.c
index c8f7540ef048..aad3ba74b0e9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3386,8 +3386,6 @@ static const struct constant_table shmem_param_enums_huge[] = {
{"always", SHMEM_HUGE_ALWAYS },
{"within_size", SHMEM_HUGE_WITHIN_SIZE },
{"advise", SHMEM_HUGE_ADVISE },
- {"deny", SHMEM_HUGE_DENY },
- {"force", SHMEM_HUGE_FORCE },
{}
};
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index dc3d2c1dd9d5..0e3dbc5f3c34 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -34,7 +34,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -54,15 +53,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
diff --git a/net/core/dev.c b/net/core/dev.c
index e10bd680dc03..c6c985fe7b1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3076,6 +3076,8 @@ static u16 skb_tx_hash(const struct net_device *dev,
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
+ if (hash >= qoffset)
+ hash -= qoffset;
while (unlikely(hash >= qcount))
hash -= qcount;
return hash + qoffset;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 549ee56b7a21..5e220809844c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2103,11 +2103,11 @@ err_action_values_put:
static struct devlink_dpipe_table *
devlink_dpipe_table_find(struct list_head *dpipe_tables,
- const char *table_name)
+ const char *table_name, struct devlink *devlink)
{
struct devlink_dpipe_table *table;
-
- list_for_each_entry_rcu(table, dpipe_tables, list) {
+ list_for_each_entry_rcu(table, dpipe_tables, list,
+ lockdep_is_held(&devlink->lock)) {
if (!strcmp(table->name, table_name))
return table;
}
@@ -2226,7 +2226,7 @@ static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -2382,7 +2382,7 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink,
struct devlink_dpipe_table *table;
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
return -EINVAL;
@@ -6854,7 +6854,7 @@ bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
rcu_read_lock();
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
enabled = false;
if (table)
enabled = table->counters_enabled;
@@ -6878,26 +6878,34 @@ int devlink_dpipe_table_register(struct devlink *devlink,
void *priv, bool counter_control_extern)
{
struct devlink_dpipe_table *table;
-
- if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
- return -EEXIST;
+ int err = 0;
if (WARN_ON(!table_ops->size_get))
return -EINVAL;
+ mutex_lock(&devlink->lock);
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name,
+ devlink)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ if (!table) {
+ err = -ENOMEM;
+ goto unlock;
+ }
table->name = table_name;
table->table_ops = table_ops;
table->priv = priv;
table->counter_control_extern = counter_control_extern;
- mutex_lock(&devlink->lock);
list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+unlock:
mutex_unlock(&devlink->lock);
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
@@ -6914,7 +6922,7 @@ void devlink_dpipe_table_unregister(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table)
goto unlock;
list_del_rcu(&table->list);
@@ -7071,7 +7079,7 @@ int devlink_dpipe_table_resource_set(struct devlink *devlink,
mutex_lock(&devlink->lock);
table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
- table_name);
+ table_name, devlink);
if (!table) {
err = -EINVAL;
goto out;
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index 8977fe1f3946..ef9197541cb3 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -305,7 +305,8 @@ nla_put_failure:
static const struct nla_policy bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
[ETHTOOL_A_BITSET_UNSPEC] = { .type = NLA_REJECT },
[ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG },
- [ETHTOOL_A_BITSET_SIZE] = { .type = NLA_U32 },
+ [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32,
+ ETHNL_MAX_BITSET_SIZE),
[ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED },
[ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY },
[ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY },
diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h
index b8247e34109d..b849f9d19676 100644
--- a/net/ethtool/bitset.h
+++ b/net/ethtool/bitset.h
@@ -3,6 +3,8 @@
#ifndef _NET_ETHTOOL_BITSET_H
#define _NET_ETHTOOL_BITSET_H
+#define ETHNL_MAX_BITSET_SIZE S16_MAX
+
typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN];
int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 376882215919..0bd10a1f477f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
+ int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
return;
@@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options));
opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
- if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
return;
if (gateway)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 316ebdf8151d..6b6b57000dad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6124,7 +6124,11 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
struct request_sock *req;
- tcp_try_undo_loss(sk, false);
+ /* If we are still handling the SYNACK RTO, see if timestamp ECR allows
+ * undo. If peer SACKs triggered fast recovery, we can't undo here.
+ */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
+ tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_sk(sk)->retrans_stamp = 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 79fc012dd2ca..debdaeba5d8c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -183,9 +183,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = -EBUSY;
break;
}
- } else if (sk->sk_protocol != IPPROTO_TCP)
+ } else if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_prot != &tcpv6_prot) {
+ retv = -EBUSY;
+ break;
+ }
break;
-
+ } else {
+ break;
+ }
if (sk->sk_state != TCP_ESTABLISHED) {
retv = -ENOTCONN;
break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e041af2f021a..88d7a692a965 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2959,7 +2959,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(auth_transaction == 2 &&
ifmgd->auth_data->expected_transaction == 2)) {
if (!ieee80211_mark_sta_auth(sdata, bssid))
- goto out_err;
+ return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
sdata_info(sdata, "SAE peer confirmed\n");
@@ -2967,10 +2967,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
- return;
- out_err:
- mutex_unlock(&sdata->local->sta_mtx);
- /* ignore frame -- wait for timeout */
}
#define case_WLAN(type) \
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0e05ff037672..0ba98ad9bc85 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4114,7 +4114,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->sta_mtx);
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry(sta, &local->sta_list, list) {
if (sdata != sta->sdata &&
(!sta->sdata->bss || sta->sdata->bss != sdata->bss))
continue;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e9aa6807b5be..3c19a8efdcea 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -543,6 +543,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
}
+static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
+{
+ return 0;
+}
+
static int __mptcp_init_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -551,6 +556,7 @@ static int __mptcp_init_sock(struct sock *sk)
__set_bit(MPTCP_SEND_SPACE, &msk->flags);
msk->first = NULL;
+ inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69c107f9ba8d..8dd17589217d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -723,6 +723,20 @@ ip_set_rcu_get(struct net *net, ip_set_id_t index)
return set;
}
+static inline void
+ip_set_lock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_lock_bh(&set->lock);
+}
+
+static inline void
+ip_set_unlock(struct ip_set *set)
+{
+ if (!set->variant->region_lock)
+ spin_unlock_bh(&set->lock);
+}
+
int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
@@ -744,9 +758,9 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
if (ret == -EAGAIN) {
/* Type requests element to be completed */
pr_debug("element must be completed, ADD is triggered\n");
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
ret = 1;
} else {
/* --return-nomatch: invert matched element */
@@ -775,9 +789,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -797,9 +811,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return -IPSET_ERR_TYPE_MISMATCH;
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
return ret;
}
@@ -1264,9 +1278,9 @@ ip_set_flush_set(struct ip_set *set)
{
pr_debug("set: %s\n", set->name);
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
set->variant->flush(set);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
}
static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
@@ -1713,9 +1727,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
- spin_lock_bh(&set->lock);
+ ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
- spin_unlock_bh(&set->lock);
+ ip_set_unlock(set);
retried = true;
} while (ret == -EAGAIN &&
set->variant->resize &&
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7480ce55b5c8..e52d7b7597a0 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -7,13 +7,21 @@
#include <linux/rcupdate.h>
#include <linux/jhash.h>
#include <linux/types.h>
+#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/ipset/ip_set.h>
-#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c)
-#define ipset_dereference_protected(p, set) \
- __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock))
-
-#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1)
+#define __ipset_dereference(p) \
+ rcu_dereference_protected(p, 1)
+#define ipset_dereference_nfnl(p) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+#define ipset_dereference_set(p, set) \
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+ lockdep_is_held(&(set)->lock))
+#define ipset_dereference_bh_nfnl(p) \
+ rcu_dereference_bh_check(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* Hashing which uses arrays to resolve clashing. The hash table is resized
* (doubled) when searching becomes too long.
@@ -72,11 +80,35 @@ struct hbucket {
__aligned(__alignof__(u64));
};
+/* Region size for locking == 2^HTABLE_REGION_BITS */
+#define HTABLE_REGION_BITS 10
+#define ahash_numof_locks(htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 1 \
+ : jhash_size((htable_bits) - HTABLE_REGION_BITS))
+#define ahash_sizeof_regions(htable_bits) \
+ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
+#define ahash_region(n, htable_bits) \
+ ((n) % ahash_numof_locks(htable_bits))
+#define ahash_bucket_start(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? 0 \
+ : (h) * jhash_size(HTABLE_REGION_BITS))
+#define ahash_bucket_end(h, htable_bits) \
+ ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \
+ : ((h) + 1) * jhash_size(HTABLE_REGION_BITS))
+
+struct htable_gc {
+ struct delayed_work dwork;
+ struct ip_set *set; /* Set the gc belongs to */
+ u32 region; /* Last gc run position */
+};
+
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
atomic_t ref; /* References for resizing */
- atomic_t uref; /* References for dumping */
+ atomic_t uref; /* References for dumping and gc */
u8 htable_bits; /* size of hash table == 2^htable_bits */
+ u32 maxelem; /* Maxelem per region */
+ struct ip_set_region *hregion; /* Region locks and ext sizes */
struct hbucket __rcu *bucket[0]; /* hashtable buckets */
};
@@ -162,6 +194,10 @@ htable_bits(u32 hashsize)
#define NLEN 0
#endif /* IP_SET_HASH_WITH_NETS */
+#define SET_ELEM_EXPIRED(set, d) \
+ (SET_WITH_TIMEOUT(set) && \
+ ip_set_timeout_expired(ext_timeout(d, set)))
+
#endif /* _IP_SET_HASH_GEN_H */
#ifndef MTYPE
@@ -205,10 +241,12 @@ htable_bits(u32 hashsize)
#undef mtype_test_cidrs
#undef mtype_test
#undef mtype_uref
-#undef mtype_expire
#undef mtype_resize
+#undef mtype_ext_size
+#undef mtype_resize_ad
#undef mtype_head
#undef mtype_list
+#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
#undef mtype_variant
@@ -247,10 +285,12 @@ htable_bits(u32 hashsize)
#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs)
#define mtype_test IPSET_TOKEN(MTYPE, _test)
#define mtype_uref IPSET_TOKEN(MTYPE, _uref)
-#define mtype_expire IPSET_TOKEN(MTYPE, _expire)
#define mtype_resize IPSET_TOKEN(MTYPE, _resize)
+#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size)
+#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
+#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
@@ -275,8 +315,7 @@ htable_bits(u32 hashsize)
/* The generic hash structure */
struct htype {
struct htable __rcu *table; /* the hash table */
- struct timer_list gc; /* garbage collection when timeout enabled */
- struct ip_set *set; /* attached to this ip_set */
+ struct htable_gc gc; /* gc workqueue */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -288,21 +327,33 @@ struct htype {
#ifdef IP_SET_HASH_WITH_NETMASK
u8 netmask; /* netmask value for subnets to store */
#endif
+ struct list_head ad; /* Resize add|del backlist */
struct mtype_elem next; /* temporary storage for uadd */
#ifdef IP_SET_HASH_WITH_NETS
struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
#endif
};
+/* ADD|DEL entries saved during resize */
+struct mtype_resize_ad {
+ struct list_head list;
+ enum ipset_adt ad; /* ADD|DEL element */
+ struct mtype_elem d; /* Element value */
+ struct ip_set_ext ext; /* Extensions for ADD */
+ struct ip_set_ext mext; /* Target extensions for ADD */
+ u32 flags; /* Flags for ADD */
+};
+
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
* sized networks. cidr == real cidr + 1 to support /0.
*/
static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
int i, j;
+ spin_lock_bh(&set->lock);
/* Add in increasing prefix order, so larger cidr first */
for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
if (j != -1) {
@@ -311,7 +362,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
j = i;
} else if (h->nets[i].cidr[n] == cidr) {
h->nets[CIDR_POS(cidr)].nets[n]++;
- return;
+ goto unlock;
}
}
if (j != -1) {
@@ -320,24 +371,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
}
h->nets[i].cidr[n] = cidr;
h->nets[CIDR_POS(cidr)].nets[n] = 1;
+unlock:
+ spin_unlock_bh(&set->lock);
}
static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
+mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n)
{
u8 i, j, net_end = NLEN - 1;
+ spin_lock_bh(&set->lock);
for (i = 0; i < NLEN; i++) {
if (h->nets[i].cidr[n] != cidr)
continue;
h->nets[CIDR_POS(cidr)].nets[n]--;
if (h->nets[CIDR_POS(cidr)].nets[n] > 0)
- return;
+ goto unlock;
for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
h->nets[j].cidr[n] = 0;
- return;
+ goto unlock;
}
+unlock:
+ spin_unlock_bh(&set->lock);
}
#endif
@@ -345,7 +401,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
static size_t
mtype_ahash_memsize(const struct htype *h, const struct htable *t)
{
- return sizeof(*h) + sizeof(*t);
+ return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits);
}
/* Get the ith element from the array block n */
@@ -369,24 +425,29 @@ mtype_flush(struct ip_set *set)
struct htype *h = set->data;
struct htable *t;
struct hbucket *n;
- u32 i;
-
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
- if (!n)
- continue;
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set, n);
- /* FIXME: use slab cache */
- rcu_assign_pointer(hbucket(t, i), NULL);
- kfree_rcu(n, rcu);
+ u32 r, i;
+
+ t = ipset_dereference_nfnl(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
+ if (!n)
+ continue;
+ if (set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set, n);
+ /* FIXME: use slab cache */
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ }
+ t->hregion[r].ext_size = 0;
+ t->hregion[r].elements = 0;
+ spin_unlock_bh(&t->hregion[r].lock);
}
#ifdef IP_SET_HASH_WITH_NETS
memset(h->nets, 0, sizeof(h->nets));
#endif
- set->elements = 0;
- set->ext_size = 0;
}
/* Destroy the hashtable part of the set */
@@ -397,7 +458,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -406,6 +467,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
kfree(n);
}
+ ip_set_free(t->hregion);
ip_set_free(t);
}
@@ -414,28 +476,21 @@ static void
mtype_destroy(struct ip_set *set)
{
struct htype *h = set->data;
+ struct list_head *l, *lt;
if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&h->gc);
+ cancel_delayed_work_sync(&h->gc.dwork);
- mtype_ahash_destroy(set,
- __ipset_dereference_protected(h->table, 1), true);
+ mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ list_for_each_safe(l, lt, &h->ad) {
+ list_del(l);
+ kfree(l);
+ }
kfree(h);
set->data = NULL;
}
-static void
-mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
-{
- struct htype *h = set->data;
-
- timer_setup(&h->gc, gc, 0);
- mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
- pr_debug("gc initialized, run in every %u\n",
- IPSET_GC_PERIOD(set->timeout));
-}
-
static bool
mtype_same_set(const struct ip_set *a, const struct ip_set *b)
{
@@ -454,11 +509,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
-/* Delete expired elements from the hashtable */
static void
-mtype_expire(struct ip_set *set, struct htype *h)
+mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r)
{
- struct htable *t;
struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
@@ -466,10 +519,12 @@ mtype_expire(struct ip_set *set, struct htype *h)
#ifdef IP_SET_HASH_WITH_NETS
u8 k;
#endif
+ u8 htable_bits = t->htable_bits;
- t = ipset_dereference_protected(h->table, set);
- for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(t, i), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ for (i = ahash_bucket_start(r, htable_bits);
+ i < ahash_bucket_end(r, htable_bits); i++) {
+ n = __ipset_dereference(hbucket(t, i));
if (!n)
continue;
for (j = 0, d = 0; j < n->pos; j++) {
@@ -485,58 +540,100 @@ mtype_expire(struct ip_set *set, struct htype *h)
smp_mb__after_atomic();
#ifdef IP_SET_HASH_WITH_NETS
for (k = 0; k < IPSET_NET_COUNT; k++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, k)),
k);
#endif
+ t->hregion[r].elements--;
ip_set_ext_destroy(set, data);
- set->elements--;
d++;
}
if (d >= AHASH_INIT_SIZE) {
if (d >= n->size) {
+ t->hregion[r].ext_size -=
+ ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, i), NULL);
kfree_rcu(n, rcu);
continue;
}
tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
- /* Still try to delete expired elements */
+ /* Still try to delete expired elements. */
continue;
tmp->size = n->size - AHASH_INIT_SIZE;
for (j = 0, d = 0; j < n->pos; j++) {
if (!test_bit(j, n->used))
continue;
data = ahash_data(n, j, dsize);
- memcpy(tmp->value + d * dsize, data, dsize);
+ memcpy(tmp->value + d * dsize,
+ data, dsize);
set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, i), tmp);
kfree_rcu(n, rcu);
}
}
+ spin_unlock_bh(&t->hregion[r].lock);
}
static void
-mtype_gc(struct timer_list *t)
+mtype_gc(struct work_struct *work)
{
- struct htype *h = from_timer(h, t, gc);
- struct ip_set *set = h->set;
+ struct htable_gc *gc;
+ struct ip_set *set;
+ struct htype *h;
+ struct htable *t;
+ u32 r, numof_locks;
+ unsigned int next_run;
+
+ gc = container_of(work, struct htable_gc, dwork.work);
+ set = gc->set;
+ h = set->data;
- pr_debug("called\n");
spin_lock_bh(&set->lock);
- mtype_expire(set, h);
+ t = ipset_dereference_set(h->table, set);
+ atomic_inc(&t->uref);
+ numof_locks = ahash_numof_locks(t->htable_bits);
+ r = gc->region++;
+ if (r >= numof_locks) {
+ r = gc->region = 0;
+ }
+ next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks;
+ if (next_run < HZ/10)
+ next_run = HZ/10;
spin_unlock_bh(&set->lock);
- h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
- add_timer(&h->gc);
+ mtype_gc_do(set, h, t, r);
+
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by expire: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run);
+
+}
+
+static void
+mtype_gc_init(struct htable_gc *gc)
+{
+ INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc);
+ queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static int
+mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+static int
+mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
+ struct ip_set_ext *mext, u32 flags);
+
/* Resize a hash: create a new hash table with doubling the hashsize
* and inserting the elements to it. Repeat until we succeed or
* fail due to memory pressures.
@@ -547,7 +644,7 @@ mtype_resize(struct ip_set *set, bool retried)
struct htype *h = set->data;
struct htable *t, *orig;
u8 htable_bits;
- size_t extsize, dsize = set->dsize;
+ size_t dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
struct mtype_elem *tmp;
@@ -555,7 +652,9 @@ mtype_resize(struct ip_set *set, bool retried)
struct mtype_elem *data;
struct mtype_elem *d;
struct hbucket *n, *m;
- u32 i, j, key;
+ struct list_head *l, *lt;
+ struct mtype_resize_ad *x;
+ u32 i, j, r, nr, key;
int ret;
#ifdef IP_SET_HASH_WITH_NETS
@@ -563,10 +662,8 @@ mtype_resize(struct ip_set *set, bool retried)
if (!tmp)
return -ENOMEM;
#endif
- rcu_read_lock_bh();
- orig = rcu_dereference_bh_nfnl(h->table);
+ orig = ipset_dereference_bh_nfnl(h->table);
htable_bits = orig->htable_bits;
- rcu_read_unlock_bh();
retry:
ret = 0;
@@ -583,88 +680,124 @@ retry:
ret = -ENOMEM;
goto out;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits));
+ if (!t->hregion) {
+ kfree(t);
+ ret = -ENOMEM;
+ goto out;
+ }
t->htable_bits = htable_bits;
+ t->maxelem = h->maxelem / ahash_numof_locks(htable_bits);
+ for (i = 0; i < ahash_numof_locks(htable_bits); i++)
+ spin_lock_init(&t->hregion[i].lock);
- spin_lock_bh(&set->lock);
- orig = __ipset_dereference_protected(h->table, 1);
- /* There can't be another parallel resizing, but dumping is possible */
+ /* There can't be another parallel resizing,
+ * but dumping, gc, kernel side add/del are possible
+ */
+ orig = ipset_dereference_bh_nfnl(h->table);
atomic_set(&orig->ref, 1);
atomic_inc(&orig->uref);
- extsize = 0;
pr_debug("attempt to resize set %s from %u to %u, t %p\n",
set->name, orig->htable_bits, htable_bits, orig);
- for (i = 0; i < jhash_size(orig->htable_bits); i++) {
- n = __ipset_dereference_protected(hbucket(orig, i), 1);
- if (!n)
- continue;
- for (j = 0; j < n->pos; j++) {
- if (!test_bit(j, n->used))
+ for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) {
+ /* Expire may replace a hbucket with another one */
+ rcu_read_lock_bh();
+ for (i = ahash_bucket_start(r, orig->htable_bits);
+ i < ahash_bucket_end(r, orig->htable_bits); i++) {
+ n = __ipset_dereference(hbucket(orig, i));
+ if (!n)
continue;
- data = ahash_data(n, j, dsize);
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, dsize);
+ if (SET_ELEM_EXPIRED(set, data))
+ continue;
#ifdef IP_SET_HASH_WITH_NETS
- /* We have readers running parallel with us,
- * so the live data cannot be modified.
- */
- flags = 0;
- memcpy(tmp, data, dsize);
- data = tmp;
- mtype_data_reset_flags(data, &flags);
+ /* We have readers running parallel with us,
+ * so the live data cannot be modified.
+ */
+ flags = 0;
+ memcpy(tmp, data, dsize);
+ data = tmp;
+ mtype_data_reset_flags(data, &flags);
#endif
- key = HKEY(data, h->initval, htable_bits);
- m = __ipset_dereference_protected(hbucket(t, key), 1);
- if (!m) {
- m = kzalloc(sizeof(*m) +
+ key = HKEY(data, h->initval, htable_bits);
+ m = __ipset_dereference(hbucket(t, key));
+ nr = ahash_region(key, htable_bits);
+ if (!m) {
+ m = kzalloc(sizeof(*m) +
AHASH_INIT_SIZE * dsize,
GFP_ATOMIC);
- if (!m) {
- ret = -ENOMEM;
- goto cleanup;
- }
- m->size = AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- RCU_INIT_POINTER(hbucket(t, key), m);
- } else if (m->pos >= m->size) {
- struct hbucket *ht;
-
- if (m->size >= AHASH_MAX(h)) {
- ret = -EAGAIN;
- } else {
- ht = kzalloc(sizeof(*ht) +
+ if (!m) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ m->size = AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ RCU_INIT_POINTER(hbucket(t, key), m);
+ } else if (m->pos >= m->size) {
+ struct hbucket *ht;
+
+ if (m->size >= AHASH_MAX(h)) {
+ ret = -EAGAIN;
+ } else {
+ ht = kzalloc(sizeof(*ht) +
(m->size + AHASH_INIT_SIZE)
* dsize,
GFP_ATOMIC);
- if (!ht)
- ret = -ENOMEM;
+ if (!ht)
+ ret = -ENOMEM;
+ }
+ if (ret < 0)
+ goto cleanup;
+ memcpy(ht, m, sizeof(struct hbucket) +
+ m->size * dsize);
+ ht->size = m->size + AHASH_INIT_SIZE;
+ t->hregion[nr].ext_size +=
+ ext_size(AHASH_INIT_SIZE,
+ dsize);
+ kfree(m);
+ m = ht;
+ RCU_INIT_POINTER(hbucket(t, key), ht);
}
- if (ret < 0)
- goto cleanup;
- memcpy(ht, m, sizeof(struct hbucket) +
- m->size * dsize);
- ht->size = m->size + AHASH_INIT_SIZE;
- extsize += ext_size(AHASH_INIT_SIZE, dsize);
- kfree(m);
- m = ht;
- RCU_INIT_POINTER(hbucket(t, key), ht);
- }
- d = ahash_data(m, m->pos, dsize);
- memcpy(d, data, dsize);
- set_bit(m->pos++, m->used);
+ d = ahash_data(m, m->pos, dsize);
+ memcpy(d, data, dsize);
+ set_bit(m->pos++, m->used);
+ t->hregion[nr].elements++;
#ifdef IP_SET_HASH_WITH_NETS
- mtype_data_reset_flags(d, &flags);
+ mtype_data_reset_flags(d, &flags);
#endif
+ }
}
+ rcu_read_unlock_bh();
}
- rcu_assign_pointer(h->table, t);
- set->ext_size = extsize;
- spin_unlock_bh(&set->lock);
+ /* There can't be any other writer. */
+ rcu_assign_pointer(h->table, t);
/* Give time to other readers of the set */
synchronize_rcu();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
- /* If there's nobody else dumping the table, destroy it */
+ /* Add/delete elements processed by the SET target during resize.
+ * Kernel-side add cannot trigger a resize and userspace actions
+ * are serialized by the mutex.
+ */
+ list_for_each_safe(l, lt, &h->ad) {
+ x = list_entry(l, struct mtype_resize_ad, list);
+ if (x->ad == IPSET_ADD) {
+ mtype_add(set, &x->d, &x->ext, &x->mext, x->flags);
+ } else {
+ mtype_del(set, &x->d, NULL, NULL, 0);
+ }
+ list_del(l);
+ kfree(l);
+ }
+ /* If there's nobody else using the table, destroy it */
if (atomic_dec_and_test(&orig->uref)) {
pr_debug("Table destroy by resize %p\n", orig);
mtype_ahash_destroy(set, orig, false);
@@ -677,15 +810,44 @@ out:
return ret;
cleanup:
+ rcu_read_unlock_bh();
atomic_set(&orig->ref, 0);
atomic_dec(&orig->uref);
- spin_unlock_bh(&set->lock);
mtype_ahash_destroy(set, t, false);
if (ret == -EAGAIN)
goto retry;
goto out;
}
+/* Get the current number of elements and ext_size in the set */
+static void
+mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
+{
+ struct htype *h = set->data;
+ const struct htable *t;
+ u32 i, j, r;
+ struct hbucket *n;
+ struct mtype_elem *data;
+
+ t = rcu_dereference_bh(h->table);
+ for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) {
+ for (i = ahash_bucket_start(r, t->htable_bits);
+ i < ahash_bucket_end(r, t->htable_bits); i++) {
+ n = rcu_dereference_bh(hbucket(t, i));
+ if (!n)
+ continue;
+ for (j = 0; j < n->pos; j++) {
+ if (!test_bit(j, n->used))
+ continue;
+ data = ahash_data(n, j, set->dsize);
+ if (!SET_ELEM_EXPIRED(set, data))
+ (*elements)++;
+ }
+ }
+ *ext_size += t->hregion[r].ext_size;
+ }
+}
+
/* Add an element to a hash and update the internal counters when succeeded,
* otherwise report the proper error code.
*/
@@ -698,32 +860,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n, *old = ERR_PTR(-ENOENT);
- int i, j = -1;
+ int i, j = -1, ret;
bool flag_exist = flags & IPSET_FLAG_EXIST;
bool deleted = false, forceadd = false, reuse = false;
- u32 key, multi = 0;
+ u32 r, key, multi = 0, elements, maxelem;
- if (set->elements >= h->maxelem) {
- if (SET_WITH_TIMEOUT(set))
- /* FIXME: when set is full, we slow down here */
- mtype_expire(set, h);
- if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
+ key = HKEY(value, h->initval, t->htable_bits);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ elements = t->hregion[r].elements;
+ maxelem = t->maxelem;
+ if (elements >= maxelem) {
+ u32 e;
+ if (SET_WITH_TIMEOUT(set)) {
+ rcu_read_unlock_bh();
+ mtype_gc_do(set, h, t, r);
+ rcu_read_lock_bh();
+ }
+ maxelem = h->maxelem;
+ elements = 0;
+ for (e = 0; e < ahash_numof_locks(t->htable_bits); e++)
+ elements += t->hregion[e].elements;
+ if (elements >= maxelem && SET_WITH_FORCEADD(set))
forceadd = true;
}
+ rcu_read_unlock_bh();
- t = ipset_dereference_protected(h->table, set);
- key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n) {
- if (forceadd || set->elements >= h->maxelem)
+ if (forceadd || elements >= maxelem)
goto set_full;
old = NULL;
n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
n->size = AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
goto copy_elem;
}
for (i = 0; i < n->pos; i++) {
@@ -737,38 +916,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
data = ahash_data(n, i, set->dsize);
if (mtype_data_equal(data, d, &multi)) {
- if (flag_exist ||
- (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))) {
+ if (flag_exist || SET_ELEM_EXPIRED(set, data)) {
/* Just the extensions could be overwritten */
j = i;
goto overwrite_extensions;
}
- return -IPSET_ERR_EXIST;
+ ret = -IPSET_ERR_EXIST;
+ goto unlock;
}
/* Reuse first timed out entry */
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)) &&
- j == -1) {
+ if (SET_ELEM_EXPIRED(set, data) && j == -1) {
j = i;
reuse = true;
}
}
if (reuse || forceadd) {
+ if (j == -1)
+ j = 0;
data = ahash_data(n, j, set->dsize);
if (!deleted) {
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_del_cidr(h,
+ mtype_del_cidr(set, h,
NCIDR_PUT(DCIDR_GET(data->cidr, i)),
i);
#endif
ip_set_ext_destroy(set, data);
- set->elements--;
+ t->hregion[r].elements--;
}
goto copy_data;
}
- if (set->elements >= h->maxelem)
+ if (elements >= maxelem)
goto set_full;
/* Create a new slot */
if (n->pos >= n->size) {
@@ -776,28 +954,32 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (n->size >= AHASH_MAX(h)) {
/* Trigger rehashing */
mtype_data_next(&h->next, d);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto resize;
}
old = n;
n = kzalloc(sizeof(*n) +
(old->size + AHASH_INIT_SIZE) * set->dsize,
GFP_ATOMIC);
- if (!n)
- return -ENOMEM;
+ if (!n) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
memcpy(n, old, sizeof(struct hbucket) +
old->size * set->dsize);
n->size = old->size + AHASH_INIT_SIZE;
- set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
+ t->hregion[r].ext_size +=
+ ext_size(AHASH_INIT_SIZE, set->dsize);
}
copy_elem:
j = n->pos++;
data = ahash_data(n, j, set->dsize);
copy_data:
- set->elements++;
+ t->hregion[r].elements++;
#ifdef IP_SET_HASH_WITH_NETS
for (i = 0; i < IPSET_NET_COUNT; i++)
- mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
+ mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
#endif
memcpy(data, d, sizeof(struct mtype_elem));
overwrite_extensions:
@@ -820,13 +1002,41 @@ overwrite_extensions:
if (old)
kfree_rcu(old, rcu);
}
+ ret = 0;
+resize:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side add, save values */
+ struct mtype_resize_ad *x;
+
+ x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC);
+ if (!x)
+ /* Don't bother */
+ goto out;
+ x->ad = IPSET_ADD;
+ memcpy(&x->d, value, sizeof(struct mtype_elem));
+ memcpy(&x->ext, ext, sizeof(struct ip_set_ext));
+ memcpy(&x->mext, mext, sizeof(struct ip_set_ext));
+ x->flags = flags;
+ spin_lock_bh(&set->lock);
+ list_add_tail(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ goto out;
- return 0;
set_full:
if (net_ratelimit())
pr_warn("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
- return -IPSET_ERR_HASH_FULL;
+ set->name, maxelem);
+ ret = -IPSET_ERR_HASH_FULL;
+unlock:
+ spin_unlock_bh(&t->hregion[r].lock);
+out:
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by add: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
+ return ret;
}
/* Delete an element from the hash and free up space if possible.
@@ -840,13 +1050,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
const struct mtype_elem *d = value;
struct mtype_elem *data;
struct hbucket *n;
- int i, j, k, ret = -IPSET_ERR_EXIST;
+ struct mtype_resize_ad *x = NULL;
+ int i, j, k, r, ret = -IPSET_ERR_EXIST;
u32 key, multi = 0;
size_t dsize = set->dsize;
- t = ipset_dereference_protected(h->table, set);
+ /* Userspace add and resize is excluded by the mutex.
+ * Kernespace add does not trigger resize.
+ */
+ rcu_read_lock_bh();
+ t = rcu_dereference_bh(h->table);
key = HKEY(value, h->initval, t->htable_bits);
- n = __ipset_dereference_protected(hbucket(t, key), 1);
+ r = ahash_region(key, t->htable_bits);
+ atomic_inc(&t->uref);
+ rcu_read_unlock_bh();
+
+ spin_lock_bh(&t->hregion[r].lock);
+ n = rcu_dereference_bh(hbucket(t, key));
if (!n)
goto out;
for (i = 0, k = 0; i < n->pos; i++) {
@@ -857,8 +1077,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
data = ahash_data(n, i, dsize);
if (!mtype_data_equal(data, d, &multi))
continue;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(data, set)))
+ if (SET_ELEM_EXPIRED(set, data))
goto out;
ret = 0;
@@ -866,20 +1085,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
smp_mb__after_atomic();
if (i + 1 == n->pos)
n->pos--;
- set->elements--;
+ t->hregion[r].elements--;
#ifdef IP_SET_HASH_WITH_NETS
for (j = 0; j < IPSET_NET_COUNT; j++)
- mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
- j);
+ mtype_del_cidr(set, h,
+ NCIDR_PUT(DCIDR_GET(d->cidr, j)), j);
#endif
ip_set_ext_destroy(set, data);
+ if (atomic_read(&t->ref) && ext->target) {
+ /* Resize is in process and kernel side del,
+ * save values
+ */
+ x = kzalloc(sizeof(struct mtype_resize_ad),
+ GFP_ATOMIC);
+ if (x) {
+ x->ad = IPSET_DEL;
+ memcpy(&x->d, value,
+ sizeof(struct mtype_elem));
+ x->flags = flags;
+ }
+ }
for (; i < n->pos; i++) {
if (!test_bit(i, n->used))
k++;
}
if (n->pos == 0 && k == 0) {
- set->ext_size -= ext_size(n->size, dsize);
+ t->hregion[r].ext_size -= ext_size(n->size, dsize);
rcu_assign_pointer(hbucket(t, key), NULL);
kfree_rcu(n, rcu);
} else if (k >= AHASH_INIT_SIZE) {
@@ -898,7 +1130,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
k++;
}
tmp->pos = k;
- set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
+ t->hregion[r].ext_size -=
+ ext_size(AHASH_INIT_SIZE, dsize);
rcu_assign_pointer(hbucket(t, key), tmp);
kfree_rcu(n, rcu);
}
@@ -906,6 +1139,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
out:
+ spin_unlock_bh(&t->hregion[r].lock);
+ if (x) {
+ spin_lock_bh(&set->lock);
+ list_add(&x->list, &h->ad);
+ spin_unlock_bh(&set->lock);
+ }
+ if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
+ pr_debug("Table destroy after resize by del: %p\n", t);
+ mtype_ahash_destroy(set, t, false);
+ }
return ret;
}
@@ -991,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
int i, ret = 0;
u32 key, multi = 0;
+ rcu_read_lock_bh();
t = rcu_dereference_bh(h->table);
#ifdef IP_SET_HASH_WITH_NETS
/* If we test an IP address and not a network address,
@@ -1022,6 +1266,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
goto out;
}
out:
+ rcu_read_unlock_bh();
return ret;
}
@@ -1033,23 +1278,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
const struct htable *t;
struct nlattr *nested;
size_t memsize;
+ u32 elements = 0;
+ size_t ext_size = 0;
u8 htable_bits;
- /* If any members have expired, set->elements will be wrong
- * mytype_expire function will update it with the right count.
- * we do not hold set->lock here, so grab it first.
- * set->elements can still be incorrect in the case of a huge set,
- * because elements might time out during the listing.
- */
- if (SET_WITH_TIMEOUT(set)) {
- spin_lock_bh(&set->lock);
- mtype_expire(set, h);
- spin_unlock_bh(&set->lock);
- }
-
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
- memsize = mtype_ahash_memsize(h, t) + set->ext_size;
+ t = rcu_dereference_bh(h->table);
+ mtype_ext_size(set, &elements, &ext_size);
+ memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size;
htable_bits = t->htable_bits;
rcu_read_unlock_bh();
@@ -1071,7 +1307,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
#endif
if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
- nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
+ nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
@@ -1091,15 +1327,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start)
if (start) {
rcu_read_lock_bh();
- t = rcu_dereference_bh_nfnl(h->table);
+ t = ipset_dereference_bh_nfnl(h->table);
atomic_inc(&t->uref);
cb->args[IPSET_CB_PRIVATE] = (unsigned long)t;
rcu_read_unlock_bh();
} else if (cb->args[IPSET_CB_PRIVATE]) {
t = (struct htable *)cb->args[IPSET_CB_PRIVATE];
if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) {
- /* Resizing didn't destroy the hash table */
- pr_debug("Table destroy by dump: %p\n", t);
+ pr_debug("Table destroy after resize "
+ " by dump: %p\n", t);
mtype_ahash_destroy(set, t, false);
}
cb->args[IPSET_CB_PRIVATE] = 0;
@@ -1141,8 +1377,7 @@ mtype_list(const struct ip_set *set,
if (!test_bit(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ if (SET_ELEM_EXPIRED(set, e))
continue;
pr_debug("list hash %lu hbucket %p i %u, data %p\n",
cb->args[IPSET_CB_ARG0], n, i, e);
@@ -1208,6 +1443,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .region_lock = true,
};
#ifdef IP_SET_EMIT_CREATE
@@ -1226,6 +1462,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
size_t hsize;
struct htype *h;
struct htable *t;
+ u32 i;
pr_debug("Create set %s with family %s\n",
set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
@@ -1294,6 +1531,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
kfree(h);
return -ENOMEM;
}
+ t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits));
+ if (!t->hregion) {
+ kfree(t);
+ kfree(h);
+ return -ENOMEM;
+ }
+ h->gc.set = set;
+ for (i = 0; i < ahash_numof_locks(hbits); i++)
+ spin_lock_init(&t->hregion[i].lock);
h->maxelem = maxelem;
#ifdef IP_SET_HASH_WITH_NETMASK
h->netmask = netmask;
@@ -1304,9 +1550,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
get_random_bytes(&h->initval, sizeof(h->initval));
t->htable_bits = hbits;
+ t->maxelem = h->maxelem / ahash_numof_locks(hbits);
RCU_INIT_POINTER(h->table, t);
- h->set = set;
+ INIT_LIST_HEAD(&h->ad);
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
@@ -1329,12 +1576,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
#endif
- IPSET_TOKEN(HTYPE, 4_gc_init)(set,
- IPSET_TOKEN(HTYPE, 4_gc));
+ IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc);
#ifndef IP_SET_PROTO_UNDEF
else
- IPSET_TOKEN(HTYPE, 6_gc_init)(set,
- IPSET_TOKEN(HTYPE, 6_gc));
+ IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc);
#endif
}
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index feac8553f6d9..4fc0c924ed5d 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1766,11 +1766,13 @@ static bool pipapo_match_field(struct nft_pipapo_field *f,
static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
- const u8 *data = (const u8 *)elem->key.val.data;
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m = priv->clone;
+ struct nft_pipapo_elem *e = elem->priv;
int rules_f0, first_rule = 0;
- struct nft_pipapo_elem *e;
+ const u8 *data;
+
+ data = (const u8 *)nft_set_ext_key(&e->ext);
e = pipapo_get(net, set, data, 0);
if (IS_ERR(e))
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 7a2c4b8408c4..8c835ad63729 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -402,15 +402,6 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo)
remove_proc_entry(hinfo->name, parent);
}
-static void htable_destroy(struct xt_hashlimit_htable *hinfo)
-{
- cancel_delayed_work_sync(&hinfo->gc_work);
- htable_remove_proc_entry(hinfo);
- htable_selective_cleanup(hinfo, true);
- kfree(hinfo->name);
- vfree(hinfo);
-}
-
static struct xt_hashlimit_htable *htable_find_get(struct net *net,
const char *name,
u_int8_t family)
@@ -432,8 +423,13 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
{
if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) {
hlist_del(&hinfo->node);
+ htable_remove_proc_entry(hinfo);
mutex_unlock(&hashlimit_mutex);
- htable_destroy(hinfo);
+
+ cancel_delayed_work_sync(&hinfo->gc_work);
+ htable_selective_cleanup(hinfo, true);
+ kfree(hinfo->name);
+ vfree(hinfo);
}
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 0522b2b1fd95..9f357aa22b94 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -497,8 +497,9 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
family->policy, validate, extack);
- if (err && parallel) {
- kfree(attrbuf);
+ if (err) {
+ if (parallel)
+ kfree(attrbuf);
return ERR_PTR(err);
}
return attrbuf;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 90a31b15585f..8c466a712cda 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -186,6 +186,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
+ cookie_len /* TCA_ACT_COOKIE */
+ nla_total_size(0) /* TCA_ACT_STATS nested */
+ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */
/* TCA_STATS_BASIC */
+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
/* TCA_STATS_PKT64 */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 90988a511cd5..6fd44bdb0fc3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -512,15 +512,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
static int smc_connect_abort(struct smc_sock *smc, int reason_code,
int local_contact)
{
+ bool is_smcd = smc->conn.lgr->is_smcd;
+
if (local_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(smc->conn.lgr);
- if (smc->conn.lgr->is_smcd)
+ smc_lgr_cleanup_early(&smc->conn);
+ else
+ smc_conn_free(&smc->conn);
+ if (is_smcd)
/* there is only one lgr role for SMC-D; use server lock */
mutex_unlock(&smc_server_lgr_pending);
else
mutex_unlock(&smc_client_lgr_pending);
- smc_conn_free(&smc->conn);
smc->connect_nonblock = 0;
return reason_code;
}
@@ -1091,7 +1094,6 @@ static void smc_listen_out_err(struct smc_sock *new_smc)
if (newsmcsk->sk_state == SMC_INIT)
sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
- smc_conn_free(&new_smc->conn);
smc_listen_out(new_smc);
}
@@ -1102,12 +1104,13 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
{
/* RDMA setup failed, switch back to TCP */
if (local_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
if (reason_code < 0) { /* error, no fallback possible */
smc_listen_out_err(new_smc);
return;
}
- smc_conn_free(&new_smc->conn);
smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
@@ -1170,16 +1173,18 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
new_smc->conn.lgr->vlan_id,
new_smc->conn.lgr->smcd)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
- smc_conn_free(&new_smc->conn);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_SMCDNOTALK;
}
/* Create send and receive buffers */
if (smc_buf_create(new_smc, true)) {
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
- smc_lgr_forget(new_smc->conn.lgr);
- smc_conn_free(&new_smc->conn);
+ smc_lgr_cleanup_early(&new_smc->conn);
+ else
+ smc_conn_free(&new_smc->conn);
return SMC_CLC_DECL_MEM;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2249de5379ee..5b085efa3bce 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -162,6 +162,18 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
conn->lgr = NULL;
}
+void smc_lgr_cleanup_early(struct smc_connection *conn)
+{
+ struct smc_link_group *lgr = conn->lgr;
+
+ if (!lgr)
+ return;
+
+ smc_conn_free(conn);
+ smc_lgr_forget(lgr);
+ smc_lgr_schedule_free_work_fast(lgr);
+}
+
/* Send delete link, either as client to request the initiation
* of the DELETE LINK sequence from server; or as server to
* initiate the delete processing. See smc_llc_rx_delete_link().
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c472e12951d1..234ae25f0025 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -296,6 +296,7 @@ struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
+void smc_lgr_cleanup_early(struct smc_connection *conn);
void smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
@@ -316,7 +317,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smcd_conn_free(struct smc_connection *conn);
void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 548632621f4b..d6ba186f67e2 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -573,6 +573,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
struct smc_ib_device *smcibdev;
smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
+ if (!smcibdev || smcibdev->ibdev != ibdev)
+ return;
ib_set_client_data(ibdev, &smc_ib_client, NULL);
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 62c12cb5763e..68debcb28fa4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -682,6 +682,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
return 0;
}
+#ifdef CONFIG_PROC_FS
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -692,6 +693,9 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
}
}
+#else
+#define unix_show_fdinfo NULL
+#endif
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9c5b2a91baad..a5f28708e0e7 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -451,6 +451,12 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
if (vsk->transport == new_transport)
return 0;
+ /* transport->release() must be called with sock lock acquired.
+ * This path can only be taken during vsock_stream_connect(),
+ * where we have already held the sock lock.
+ * In the other cases, this function is called on a new socket
+ * which is not assigned to any transport.
+ */
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
}
@@ -753,20 +759,18 @@ static void __vsock_release(struct sock *sk, int level)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
- /* The release call is supposed to use lock_sock_nested()
- * rather than lock_sock(), if a sock lock should be acquired.
- */
- if (vsk->transport)
- vsk->transport->release(vsk);
- else if (sk->sk_type == SOCK_STREAM)
- vsock_remove_sock(vsk);
-
/* When "level" is SINGLE_DEPTH_NESTING, use the nested
* version to avoid the warning "possible recursive locking
* detected". When "level" is 0, lock_sock_nested(sk, level)
* is the same as lock_sock(sk).
*/
lock_sock_nested(sk, level);
+
+ if (vsk->transport)
+ vsk->transport->release(vsk);
+ else if (sk->sk_type == SOCK_STREAM)
+ vsock_remove_sock(vsk);
+
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 3492c021925f..630b851f8150 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -526,12 +526,9 @@ static bool hvs_close_lock_held(struct vsock_sock *vsk)
static void hvs_release(struct vsock_sock *vsk)
{
- struct sock *sk = sk_vsock(vsk);
bool remove_sock;
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
remove_sock = hvs_close_lock_held(vsk);
- release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index d9f0c9c5425a..f3c4bab2f737 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -829,7 +829,6 @@ void virtio_transport_release(struct vsock_sock *vsk)
struct sock *sk = &vsk->sk;
bool remove_sock = true;
- lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
@@ -837,7 +836,6 @@ void virtio_transport_release(struct vsock_sock *vsk)
list_del(&pkt->list);
virtio_transport_free_pkt(pkt);
}
- release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cedf17d4933f..5b19e9fac4aa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -20,6 +20,7 @@
#include <linux/netlink.h>
#include <linux/nospec.h>
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <net/net_namespace.h>
#include <net/genetlink.h>
#include <net/cfg80211.h>
@@ -4800,8 +4801,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
err = nl80211_parse_he_obss_pd(
info->attrs[NL80211_ATTR_HE_OBSS_PD],
&params.he_obss_pd);
- if (err)
- return err;
+ goto out;
}
nl80211_calculate_ap_params(&params);
@@ -4823,6 +4823,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
}
wdev_unlock(wdev);
+out:
kfree(params.acl);
return err;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index fff9a74891fc..1a8218f1bbe0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2276,7 +2276,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
break;
}
- if (IS_ERR(reg_rule)) {
+ if (IS_ERR_OR_NULL(reg_rule)) {
pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
chan->center_freq);
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index bae62549e3d2..752ff0a225a9 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -300,15 +300,15 @@ DT_BINDING_DIR := Documentation/devicetree/bindings
DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.yaml
quiet_cmd_dtb_check = CHECK $@
- cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ ;
+ cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@
-define rule_dtc_dt_yaml
+define rule_dtc
$(call cmd_and_fixdep,dtc,yaml)
$(call cmd,dtb_check)
endef
$(obj)/%.dt.yaml: $(src)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE
- $(call if_changed_rule,dtc_dt_yaml)
+ $(call if_changed_rule,dtc)
dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp)
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index ebe1685e92dd..d5e517d1c3dd 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -512,6 +512,8 @@
#define MSR_K7_HWCR 0xc0010015
#define MSR_K7_HWCR_SMMLOCK_BIT 0
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_HWCR_IRPERF_EN_BIT 30
+#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 503d3f42da16..3f3f780c8c65 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -390,6 +390,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h
index e978a63d3222..036e667596eb 100644
--- a/tools/bootconfig/include/linux/printk.h
+++ b/tools/bootconfig/include/linux/printk.h
@@ -4,10 +4,7 @@
#include <stdio.h>
-/* controllable printf */
-extern int pr_output;
-#define printk(fmt, ...) \
- (pr_output ? printf(fmt, ##__VA_ARGS__) : 0)
+#define printk(fmt, ...) printf(fmt, ##__VA_ARGS__)
#define pr_err printk
#define pr_warn printk
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index e18eeb070562..a9b97814d1a9 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -14,8 +14,6 @@
#include <linux/kernel.h>
#include <linux/bootconfig.h>
-int pr_output = 1;
-
static int xbc_show_array(struct xbc_node *node)
{
const char *val;
@@ -131,15 +129,26 @@ int load_xbc_from_initrd(int fd, char **buf)
struct stat stat;
int ret;
u32 size = 0, csum = 0, rcsum;
+ char magic[BOOTCONFIG_MAGIC_LEN];
ret = fstat(fd, &stat);
if (ret < 0)
return -errno;
- if (stat.st_size < 8)
+ if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN)
+ return 0;
+
+ if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) {
+ pr_err("Failed to lseek: %d\n", -errno);
+ return -errno;
+ }
+ if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
+ return -errno;
+ /* Check the bootconfig magic bytes */
+ if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
return 0;
- if (lseek(fd, -8, SEEK_END) < 0) {
+ if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) {
pr_err("Failed to lseek: %d\n", -errno);
return -errno;
}
@@ -150,11 +159,14 @@ int load_xbc_from_initrd(int fd, char **buf)
if (read(fd, &csum, sizeof(u32)) < 0)
return -errno;
- /* Wrong size, maybe no boot config here */
- if (stat.st_size < size + 8)
- return 0;
+ /* Wrong size error */
+ if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
+ pr_err("bootconfig size is too big\n");
+ return -E2BIG;
+ }
- if (lseek(fd, stat.st_size - 8 - size, SEEK_SET) < 0) {
+ if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN),
+ SEEK_SET) < 0) {
pr_err("Failed to lseek: %d\n", -errno);
return -errno;
}
@@ -163,17 +175,17 @@ int load_xbc_from_initrd(int fd, char **buf)
if (ret < 0)
return ret;
- /* Wrong Checksum, maybe no boot config here */
+ /* Wrong Checksum */
rcsum = checksum((unsigned char *)*buf, size);
if (csum != rcsum) {
pr_err("checksum error: %d != %d\n", csum, rcsum);
- return 0;
+ return -EINVAL;
}
ret = xbc_init(*buf);
- /* Wrong data, maybe no boot config here */
+ /* Wrong data */
if (ret < 0)
- return 0;
+ return ret;
return size;
}
@@ -213,20 +225,15 @@ int delete_xbc(const char *path)
return -errno;
}
- /*
- * Suppress error messages in xbc_init() because it can be just a
- * data which concidentally matches the size and checksum footer.
- */
- pr_output = 0;
size = load_xbc_from_initrd(fd, &buf);
- pr_output = 1;
if (size < 0) {
ret = size;
pr_err("Failed to load a boot config from initrd: %d\n", ret);
} else if (size > 0) {
ret = fstat(fd, &stat);
if (!ret)
- ret = ftruncate(fd, stat.st_size - size - 8);
+ ret = ftruncate(fd, stat.st_size
+ - size - 8 - BOOTCONFIG_MAGIC_LEN);
if (ret)
ret = -errno;
} /* Ignore if there is no boot config in initrd */
@@ -295,6 +302,12 @@ int apply_xbc(const char *path, const char *xbc_path)
pr_err("Failed to apply a boot config: %d\n", ret);
return ret;
}
+ /* Write a magic word of the bootconfig */
+ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
+ if (ret < 0) {
+ pr_err("Failed to apply a boot config magic: %d\n", ret);
+ return ret;
+ }
close(fd);
free(data);
diff --git a/tools/bootconfig/samples/bad-mixed-kv1.bconf b/tools/bootconfig/samples/bad-mixed-kv1.bconf
new file mode 100644
index 000000000000..1761547dd05c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-mixed-kv1.bconf
@@ -0,0 +1,3 @@
+# value -> subkey pattern
+key = value
+key.subkey = another-value
diff --git a/tools/bootconfig/samples/bad-mixed-kv2.bconf b/tools/bootconfig/samples/bad-mixed-kv2.bconf
new file mode 100644
index 000000000000..6b32e0c3878c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-mixed-kv2.bconf
@@ -0,0 +1,3 @@
+# subkey -> value pattern
+key.subkey = value
+key = another-value
diff --git a/tools/bootconfig/samples/bad-samekey.bconf b/tools/bootconfig/samples/bad-samekey.bconf
new file mode 100644
index 000000000000..e8d983a4563c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-samekey.bconf
@@ -0,0 +1,6 @@
+# Same key value is not allowed
+key {
+ foo = value
+ bar = value2
+}
+key.foo = value
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 1de06de328e2..1411f4c3454f 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -9,7 +9,7 @@ TEMPCONF=`mktemp temp-XXXX.bconf`
NG=0
cleanup() {
- rm -f $INITRD $TEMPCONF
+ rm -f $INITRD $TEMPCONF $OUTFILE
exit $NG
}
@@ -49,7 +49,7 @@ xpass $BOOTCONF -a $TEMPCONF $INITRD
new_size=$(stat -c %s $INITRD)
echo "File size check"
-xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9)
+xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12)
echo "Apply command repeat test"
xpass $BOOTCONF -a $TEMPCONF $INITRD
@@ -71,7 +71,6 @@ printf " \0\0\0 \0\0\0" >> $INITRD
$BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1
xfail grep -i "failed" $OUTFILE
xfail grep -i "error" $OUTFILE
-rm $OUTFILE
echo "Max node number check"
@@ -96,6 +95,19 @@ truncate -s 32764 $TEMPCONF
echo "\"" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0')
xpass $BOOTCONF -a $TEMPCONF $INITRD
+echo "Adding same-key values"
+cat > $TEMPCONF << EOF
+key = bar, baz
+key += qux
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xpass grep -q "bar" $OUTFILE
+xpass grep -q "baz" $OUTFILE
+xpass grep -q "qux" $OUTFILE
+
echo "=== expected failure cases ==="
for i in samples/bad-* ; do
xfail $BOOTCONF -a $i $INITRD
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c4dd23c4b478..8ead55593984 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -239,7 +239,6 @@ buildid.*::
set buildid.dir to /dev/null. The default is $HOME/.debug
annotate.*::
- These options work only for TUI.
These are in control of addresses, jump function, source code
in lines of assembly code from a specific program.
@@ -269,6 +268,8 @@ annotate.*::
│ mov (%rdi),%rdx
│ return n;
+ This option works with tui, stdio2 browsers.
+
annotate.use_offset::
Basing on a first address of a loaded function, offset can be used.
Instead of using original addresses of assembly code,
@@ -287,6 +288,8 @@ annotate.*::
368:│ mov 0x8(%r14),%rdi
+ This option works with tui, stdio2 browsers.
+
annotate.jump_arrows::
There can be jump instruction among assembly code.
Depending on a boolean value of jump_arrows,
@@ -306,6 +309,8 @@ annotate.*::
│1330: mov %r15,%r10
│1333: cmp %r15,%r14
+ This option works with tui browser.
+
annotate.show_linenr::
When showing source code if this option is 'true',
line numbers are printed as below.
@@ -325,6 +330,8 @@ annotate.*::
│ array++;
│ }
+ This option works with tui, stdio2 browsers.
+
annotate.show_nr_jumps::
Let's see a part of assembly code.
@@ -335,6 +342,8 @@ annotate.*::
│1 1382: movb $0x1,-0x270(%rbp)
+ This option works with tui, stdio2 browsers.
+
annotate.show_total_period::
To compare two records on an instruction base, with this option
provided, display total number of samples that belong to a line
@@ -348,11 +357,30 @@ annotate.*::
99.93 │ mov %eax,%eax
+ This option works with tui, stdio2, stdio browsers.
+
+ annotate.show_nr_samples::
+ By default perf annotate shows percentage of samples. This option
+ can be used to print absolute number of samples. Ex, when set as
+ false:
+
+ Percent│
+ 74.03 │ mov %fs:0x28,%rax
+
+ When set as true:
+
+ Samples│
+ 6 │ mov %fs:0x28,%rax
+
+ This option works with tui, stdio2, stdio browsers.
+
annotate.offset_level::
Default is '1', meaning just jump targets will have offsets show right beside
the instruction. When set to '2' 'call' instructions will also have its offsets
shown, 3 or higher will show offsets for all instructions.
+ This option works with tui, stdio2 browsers.
+
hist.*::
hist.percentage::
This option control the way to calculate overhead of filtered entries -
@@ -490,6 +518,12 @@ top.*::
column by default.
The default is 'true'.
+ top.call-graph::
+ This is identical to 'call-graph.record-mode', except it is
+ applicable only for 'top' subcommand. This option ONLY setup
+ the unwind method. To enable 'perf top' to actually use it,
+ the command line option -g must be specified.
+
man.*::
man.viewer::
This option can assign a tool to view manual pages when 'help'
@@ -517,6 +551,16 @@ record.*::
But if this option is 'no-cache', it will not update the build-id cache.
'skip' skips post-processing and does not update the cache.
+ record.call-graph::
+ This is identical to 'call-graph.record-mode', except it is
+ applicable only for 'record' subcommand. This option ONLY setup
+ the unwind method. To enable 'perf record' to actually use it,
+ the command line option -g must be specified.
+
+ record.aio::
+ Use 'n' control blocks in asynchronous (Posix AIO) trace writing
+ mode ('n' default: 1, max: 4).
+
diff.*::
diff.order::
This option sets the number of columns to sort the result.
@@ -566,6 +610,11 @@ trace.*::
"libbeauty", the default, to use the same argument beautifiers used in the
strace-like sys_enter+sys_exit lines.
+ftrace.*::
+ ftrace.tracer::
+ Can be used to select the default tracer. Possible values are
+ 'function' and 'function_graph'.
+
llvm.*::
llvm.clang-path::
Path to clang. If omit, search it from $PATH.
@@ -610,6 +659,29 @@ scripts.*::
The script gets the same options passed as a full perf script,
in particular -i perfdata file, --cpu, --tid
+convert.*::
+
+ convert.queue-size::
+ Limit the size of ordered_events queue, so we could control
+ allocation size of perf data files without proper finished
+ round events.
+
+intel-pt.*::
+
+ intel-pt.cache-divisor::
+
+ intel-pt.mispred-all::
+ If set, Intel PT decoder will set the mispred flag on all
+ branches.
+
+auxtrace.*::
+
+ auxtrace.dumpdir::
+ s390 only. The directory to save the auxiliary trace buffer
+ can be changed using this option. Ex, auxtrace.dumpdir=/tmp.
+ If the directory does not exist or has the wrong file type,
+ the current directory is used.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 2898cfdf8fe1..941f814820b8 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
free(ptr);
}
-static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct cs_etm_recording *ptr =
- container_of(itr, struct cs_etm_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->cs_etm_pmu->type)
- return perf_evlist__enable_event_idx(ptr->evlist,
- evsel, idx);
- }
-
- return -EINVAL;
-}
-
struct auxtrace_record *cs_etm_record_init(int *err)
{
struct perf_pmu *cs_etm_pmu;
@@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
}
ptr->cs_etm_pmu = cs_etm_pmu;
+ ptr->itr.pmu = cs_etm_pmu;
ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
ptr->itr.recording_options = cs_etm_recording_options;
ptr->itr.info_priv_size = cs_etm_info_priv_size;
@@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
ptr->itr.reference = cs_etm_reference;
ptr->itr.free = cs_etm_recording_free;
- ptr->itr.read_finish = cs_etm_read_finish;
+ ptr->itr.read_finish = auxtrace_record__read_finish;
*err = 0;
return &ptr->itr;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index eba6541ec0f1..8d6821d9c3f6 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
free(sper);
}
-static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct arm_spe_recording *sper =
- container_of(itr, struct arm_spe_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(sper->evlist, evsel) {
- if (evsel->core.attr.type == sper->arm_spe_pmu->type)
- return perf_evlist__enable_event_idx(sper->evlist,
- evsel, idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *arm_spe_recording_init(int *err,
struct perf_pmu *arm_spe_pmu)
{
@@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
}
sper->arm_spe_pmu = arm_spe_pmu;
+ sper->itr.pmu = arm_spe_pmu;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;
sper->itr.free = arm_spe_recording_free;
sper->itr.reference = arm_spe_reference;
- sper->itr.read_finish = arm_spe_read_finish;
+ sper->itr.read_finish = auxtrace_record__read_finish;
sper->itr.alignment = 0;
*err = 0;
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 43f736ed47f2..35b61bfc1b1a 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -517,3 +517,5 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 nospu clone3 ppc_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 27d9e214d068..26cee1052179 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -413,20 +413,6 @@ out_err:
return err;
}
-static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct intel_bts_recording *btsr =
- container_of(itr, struct intel_bts_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(btsr->evlist, evsel) {
- if (evsel->core.attr.type == btsr->intel_bts_pmu->type)
- return perf_evlist__enable_event_idx(btsr->evlist,
- evsel, idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *intel_bts_recording_init(int *err)
{
struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
@@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
}
btsr->intel_bts_pmu = intel_bts_pmu;
+ btsr->itr.pmu = intel_bts_pmu;
btsr->itr.recording_options = intel_bts_recording_options;
btsr->itr.info_priv_size = intel_bts_info_priv_size;
btsr->itr.info_fill = intel_bts_info_fill;
@@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
btsr->itr.find_snapshot = intel_bts_find_snapshot;
btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
btsr->itr.reference = intel_bts_reference;
- btsr->itr.read_finish = intel_bts_read_finish;
+ btsr->itr.read_finish = auxtrace_record__read_finish;
btsr->itr.alignment = sizeof(struct branch);
return &btsr->itr;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 20df442fdf36..7eea4fd7ce58 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
return rdtsc();
}
-static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
-{
- struct intel_pt_recording *ptr =
- container_of(itr, struct intel_pt_recording, itr);
- struct evsel *evsel;
-
- evlist__for_each_entry(ptr->evlist, evsel) {
- if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
- return perf_evlist__enable_event_idx(ptr->evlist, evsel,
- idx);
- }
- return -EINVAL;
-}
-
struct auxtrace_record *intel_pt_recording_init(int *err)
{
struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
@@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
}
ptr->intel_pt_pmu = intel_pt_pmu;
+ ptr->itr.pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
ptr->itr.info_fill = intel_pt_info_fill;
@@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
ptr->itr.find_snapshot = intel_pt_find_snapshot;
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
ptr->itr.reference = intel_pt_reference;
- ptr->itr.read_finish = intel_pt_read_finish;
+ ptr->itr.read_finish = auxtrace_record__read_finish;
/*
* Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
* should give at least 1 PSB per sample.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ff61795a4d13..6c0a0412502e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -566,6 +566,8 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
return ret;
+ annotation_config__init(&annotate.opts);
+
argc = parse_options(argc, argv, options, annotate_usage, 0);
if (argc) {
/*
@@ -605,8 +607,6 @@ int cmd_annotate(int argc, const char **argv)
if (ret < 0)
goto out_delete;
- annotation_config__init();
-
symbol_conf.try_vmlinux_path = true;
ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 26bc5923e6b5..70548df2abb9 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -449,7 +449,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret = probe_file__del_strlist(kfd, klist);
if (ret < 0)
goto error;
- }
+ } else if (ret == -ENOMEM)
+ goto error;
ret2 = probe_file__get_events(ufd, filter, ulist);
if (ret2 == 0) {
@@ -459,7 +460,8 @@ static int perf_del_probe_events(struct strfilter *filter)
ret2 = probe_file__del_strlist(ufd, ulist);
if (ret2 < 0)
goto error;
- }
+ } else if (ret2 == -ENOMEM)
+ goto error;
if (ret == -ENOENT && ret2 == -ENOENT)
pr_warning("\"%s\" does not hit any event.\n", str);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 9483b3f0cae3..72a12b69f120 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1507,7 +1507,7 @@ repeat:
symbol_conf.priv_size += sizeof(u32);
symbol_conf.sort_by_name = true;
}
- annotation_config__init();
+ annotation_config__init(&report.annotation_opts);
}
if (symbol__init(&session->header.env) < 0)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8affcab75604..f6dd1a63f159 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -143,7 +143,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
return err;
}
- err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL);
+ err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL);
if (err == 0) {
top->sym_filter_entry = he;
} else {
@@ -1683,7 +1683,7 @@ int cmd_top(int argc, const char **argv)
if (status < 0)
goto out_delete_evlist;
- annotation_config__init();
+ annotation_config__init(&top.annotation_opts);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
status = symbol__init(NULL);
diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h
index 607189a315b2..6e61c4bdf548 100644
--- a/tools/perf/include/bpf/pid_filter.h
+++ b/tools/perf/include/bpf/pid_filter.h
@@ -3,7 +3,7 @@
#ifndef _PERF_BPF_PID_FILTER_
#define _PERF_BPF_PID_FILTER_
-#include <bpf/bpf.h>
+#include <bpf.h>
#define pid_filter(name) pid_map(name, bool)
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
index 7ca6fa5463ee..316af5b2ff35 100644
--- a/tools/perf/include/bpf/stdio.h
+++ b/tools/perf/include/bpf/stdio.h
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <bpf/bpf.h>
+#include <bpf.h>
struct bpf_map SEC("maps") __bpf_stdout__ = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h
index d1a35b6c649d..ca7877f9a976 100644
--- a/tools/perf/include/bpf/unistd.h
+++ b/tools/perf/include/bpf/unistd.h
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: LGPL-2.1
-#include <bpf/bpf.h>
+#include <bpf.h>
static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 7cb99b433888..c2cc42daf924 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -14,7 +14,7 @@ add_probe_vfs_getname() {
if [ $had_vfs_getname -eq 1 ] ; then
line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
- perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string"
+ perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
fi
}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index badbddbb30f8..9023267e5643 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -754,10 +754,9 @@ static int annotate_browser__run(struct annotate_browser *browser,
"? Search string backwards\n");
continue;
case 'r':
- {
- script_browse(NULL, NULL);
- continue;
- }
+ script_browse(NULL, NULL);
+ annotate_browser__show(&browser->b, title, help);
+ continue;
case 'k':
notes->options->show_linenr = !notes->options->show_linenr;
break;
@@ -834,13 +833,13 @@ show_sup_ins:
map_symbol__annotation_dump(ms, evsel, browser->opts);
continue;
case 't':
- if (notes->options->show_total_period) {
- notes->options->show_total_period = false;
- notes->options->show_nr_samples = true;
- } else if (notes->options->show_nr_samples)
- notes->options->show_nr_samples = false;
+ if (symbol_conf.show_total_period) {
+ symbol_conf.show_total_period = false;
+ symbol_conf.show_nr_samples = true;
+ } else if (symbol_conf.show_nr_samples)
+ symbol_conf.show_nr_samples = false;
else
- notes->options->show_total_period = true;
+ symbol_conf.show_total_period = true;
annotation__update_column_widths(notes);
continue;
case 'c':
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 22cc240f7371..35f9641bf670 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -174,7 +174,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel,
if (ms->map->dso->annotate_warned)
return -1;
- err = symbol__annotate(ms, evsel, 0, &annotation__default_options, NULL);
+ err = symbol__annotate(ms, evsel, &annotation__default_options, NULL);
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ca73fb74ad03..0ea95be84b3b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1143,93 +1143,70 @@ out:
}
struct annotate_args {
- size_t privsize;
- struct arch *arch;
- struct map_symbol ms;
- struct evsel *evsel;
+ struct arch *arch;
+ struct map_symbol ms;
+ struct evsel *evsel;
struct annotation_options *options;
- s64 offset;
- char *line;
- int line_nr;
+ s64 offset;
+ char *line;
+ int line_nr;
};
-static void annotation_line__delete(struct annotation_line *al)
+static void annotation_line__init(struct annotation_line *al,
+ struct annotate_args *args,
+ int nr)
{
- void *ptr = (void *) al - al->privsize;
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->data_nr = nr;
+}
+static void annotation_line__exit(struct annotation_line *al)
+{
free_srcline(al->path);
zfree(&al->line);
- free(ptr);
}
-/*
- * Allocating the annotation line data with following
- * structure:
- *
- * --------------------------------------
- * private space | struct annotation_line
- * --------------------------------------
- *
- * Size of the private space is stored in 'struct annotation_line'.
- *
- */
-static struct annotation_line *
-annotation_line__new(struct annotate_args *args, size_t privsize)
+static size_t disasm_line_size(int nr)
{
struct annotation_line *al;
- struct evsel *evsel = args->evsel;
- size_t size = privsize + sizeof(*al);
- int nr = 1;
-
- if (perf_evsel__is_group_event(evsel))
- nr = evsel->core.nr_members;
- size += sizeof(al->data[0]) * nr;
-
- al = zalloc(size);
- if (al) {
- al = (void *) al + privsize;
- al->privsize = privsize;
- al->offset = args->offset;
- al->line = strdup(args->line);
- al->line_nr = args->line_nr;
- al->data_nr = nr;
- }
-
- return al;
+ return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
}
/*
* Allocating the disasm annotation line data with
* following structure:
*
- * ------------------------------------------------------------
- * privsize space | struct disasm_line | struct annotation_line
- * ------------------------------------------------------------
+ * -------------------------------------------
+ * struct disasm_line | struct annotation_line
+ * -------------------------------------------
*
* We have 'struct annotation_line' member as last member
* of 'struct disasm_line' to have an easy access.
- *
*/
static struct disasm_line *disasm_line__new(struct annotate_args *args)
{
struct disasm_line *dl = NULL;
- struct annotation_line *al;
- size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+ int nr = 1;
- al = annotation_line__new(args, privsize);
- if (al != NULL) {
- dl = disasm_line(al);
+ if (perf_evsel__is_group_event(args->evsel))
+ nr = args->evsel->core.nr_members;
- if (dl->al.line == NULL)
- goto out_delete;
+ dl = zalloc(disasm_line_size(nr));
+ if (!dl)
+ return NULL;
- if (args->offset != -1) {
- if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
- goto out_free_line;
+ annotation_line__init(&dl->al, args, nr);
+ if (dl->al.line == NULL)
+ goto out_delete;
- disasm_line__init_ins(dl, args->arch, &args->ms);
- }
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ goto out_free_line;
+
+ disasm_line__init_ins(dl, args->arch, &args->ms);
}
return dl;
@@ -1248,7 +1225,8 @@ void disasm_line__free(struct disasm_line *dl)
else
ins__delete(&dl->ops);
zfree(&dl->ins.name);
- annotation_line__delete(&dl->al);
+ annotation_line__exit(&dl->al);
+ free(dl);
}
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
@@ -2149,13 +2127,12 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel)
annotation__calc_percent(notes, evsel, symbol__size(sym));
}
-int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, size_t privsize,
+int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
struct annotation_options *options, struct arch **parch)
{
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(sym);
struct annotate_args args = {
- .privsize = privsize,
.evsel = evsel,
.options = options,
};
@@ -2644,6 +2621,8 @@ void annotation__set_offsets(struct annotation *notes, s64 size)
struct annotation_line *al;
notes->max_line_len = 0;
+ notes->nr_entries = 0;
+ notes->nr_asm_entries = 0;
list_for_each_entry(al, &notes->src->source, node) {
size_t line_len = strlen(al->line);
@@ -2790,7 +2769,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel,
struct symbol *sym = ms->sym;
struct rb_root source_line = RB_ROOT;
- if (symbol__annotate(ms, evsel, 0, opts, NULL) < 0)
+ if (symbol__annotate(ms, evsel, opts, NULL) < 0)
return -1;
symbol__calc_percent(sym, evsel);
@@ -2915,9 +2894,9 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
percent = annotation_data__percent(&al->data[i], percent_type);
obj__set_percent_color(obj, percent, current_entry);
- if (notes->options->show_total_period) {
+ if (symbol_conf.show_total_period) {
obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
- } else if (notes->options->show_nr_samples) {
+ } else if (symbol_conf.show_nr_samples) {
obj__printf(obj, "%6" PRIu64 " ",
al->data[i].he.nr_samples);
} else {
@@ -2931,8 +2910,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
obj__printf(obj, "%-*s", pcnt_width, " ");
else {
obj__printf(obj, "%-*s", pcnt_width,
- notes->options->show_total_period ? "Period" :
- notes->options->show_nr_samples ? "Samples" : "Percent");
+ symbol_conf.show_total_period ? "Period" :
+ symbol_conf.show_nr_samples ? "Samples" : "Percent");
}
}
@@ -3070,7 +3049,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
if (perf_evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
- err = symbol__annotate(ms, evsel, 0, options, parch);
+ err = symbol__annotate(ms, evsel, options, parch);
if (err)
goto out_free_offsets;
@@ -3094,69 +3073,46 @@ out_free_offsets:
return err;
}
-#define ANNOTATION__CFG(n) \
- { .name = #n, .value = &annotation__default_options.n, }
-
-/*
- * Keep the entries sorted, they are bsearch'ed
- */
-static struct annotation_config {
- const char *name;
- void *value;
-} annotation__configs[] = {
- ANNOTATION__CFG(hide_src_code),
- ANNOTATION__CFG(jump_arrows),
- ANNOTATION__CFG(offset_level),
- ANNOTATION__CFG(show_linenr),
- ANNOTATION__CFG(show_nr_jumps),
- ANNOTATION__CFG(show_nr_samples),
- ANNOTATION__CFG(show_total_period),
- ANNOTATION__CFG(use_offset),
-};
-
-#undef ANNOTATION__CFG
-
-static int annotation_config__cmp(const void *name, const void *cfgp)
-{
- const struct annotation_config *cfg = cfgp;
-
- return strcmp(name, cfg->name);
-}
-
-static int annotation__config(const char *var, const char *value,
- void *data __maybe_unused)
+static int annotation__config(const char *var, const char *value, void *data)
{
- struct annotation_config *cfg;
- const char *name;
+ struct annotation_options *opt = data;
if (!strstarts(var, "annotate."))
return 0;
- name = var + 9;
- cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
- sizeof(struct annotation_config), annotation_config__cmp);
-
- if (cfg == NULL)
- pr_debug("%s variable unknown, ignoring...", var);
- else if (strcmp(var, "annotate.offset_level") == 0) {
- perf_config_int(cfg->value, name, value);
-
- if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
- *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
- else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
- *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+ if (!strcmp(var, "annotate.offset_level")) {
+ perf_config_u8(&opt->offset_level, "offset_level", value);
+
+ if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL;
+ else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
+ opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else if (!strcmp(var, "annotate.hide_src_code")) {
+ opt->hide_src_code = perf_config_bool("hide_src_code", value);
+ } else if (!strcmp(var, "annotate.jump_arrows")) {
+ opt->jump_arrows = perf_config_bool("jump_arrows", value);
+ } else if (!strcmp(var, "annotate.show_linenr")) {
+ opt->show_linenr = perf_config_bool("show_linenr", value);
+ } else if (!strcmp(var, "annotate.show_nr_jumps")) {
+ opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value);
+ } else if (!strcmp(var, "annotate.show_nr_samples")) {
+ symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples",
+ value);
+ } else if (!strcmp(var, "annotate.show_total_period")) {
+ symbol_conf.show_total_period = perf_config_bool("show_total_period",
+ value);
+ } else if (!strcmp(var, "annotate.use_offset")) {
+ opt->use_offset = perf_config_bool("use_offset", value);
} else {
- *(bool *)cfg->value = perf_config_bool(name, value);
+ pr_debug("%s variable unknown, ignoring...", var);
}
+
return 0;
}
-void annotation_config__init(void)
+void annotation_config__init(struct annotation_options *opt)
{
- perf_config(annotation__config, NULL);
-
- annotation__default_options.show_total_period = symbol_conf.show_total_period;
- annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples;
+ perf_config(annotation__config, opt);
}
static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 455403e8fede..001258601a37 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -83,8 +83,6 @@ struct annotation_options {
full_path,
show_linenr,
show_nr_jumps,
- show_nr_samples,
- show_total_period,
show_minmax_cycle,
show_asm_raw,
annotate_src;
@@ -141,7 +139,6 @@ struct annotation_line {
u64 cycles;
u64 cycles_max;
u64 cycles_min;
- size_t privsize;
char *path;
u32 idx;
int idx_asm;
@@ -309,7 +306,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
static inline int annotation__pcnt_width(struct annotation *notes)
{
- return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+ return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
}
static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
@@ -352,7 +349,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__annotate(struct map_symbol *ms,
- struct evsel *evsel, size_t privsize,
+ struct evsel *evsel,
struct annotation_options *options,
struct arch **parch);
int symbol__annotate2(struct map_symbol *ms,
@@ -413,7 +410,7 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
}
#endif
-void annotation_config__init(void);
+void annotation_config__init(struct annotation_options *opt);
int annotate_parse_percent_type(const struct option *opt, const char *_str,
int unset);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index eb087e7df6f4..3571ce72ca28 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -629,8 +629,10 @@ int auxtrace_record__options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
{
- if (itr)
+ if (itr) {
+ itr->evlist = evlist;
return itr->recording_options(itr, evlist, opts);
+ }
return 0;
}
@@ -664,6 +666,24 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
return -EINVAL;
}
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct evsel *evsel;
+
+ if (!itr->evlist || !itr->pmu)
+ return -EINVAL;
+
+ evlist__for_each_entry(itr->evlist, evsel) {
+ if (evsel->core.attr.type == itr->pmu->type) {
+ if (evsel->disabled)
+ return 0;
+ return perf_evlist__enable_event_idx(itr->evlist, evsel,
+ idx);
+ }
+ }
+ return -EINVAL;
+}
+
/*
* Event record size is 16-bit which results in a maximum size of about 64KiB.
* Allow about 4KiB for the rest of the sample record, to give a maximum
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 749d72cd9c7b..e58ef160b599 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -29,6 +29,7 @@ struct record_opts;
struct perf_record_auxtrace_error;
struct perf_record_auxtrace_info;
struct events_stats;
+struct perf_pmu;
enum auxtrace_error_type {
PERF_AUXTRACE_ERROR_ITRACE = 1,
@@ -322,6 +323,8 @@ struct auxtrace_mmap_params {
* @read_finish: called after reading from an auxtrace mmap
* @alignment: alignment (if any) for AUX area data
* @default_aux_sample_size: default sample size for --aux sample option
+ * @pmu: associated pmu
+ * @evlist: selected events list
*/
struct auxtrace_record {
int (*recording_options)(struct auxtrace_record *itr,
@@ -346,6 +349,8 @@ struct auxtrace_record {
int (*read_finish)(struct auxtrace_record *itr, int idx);
unsigned int alignment;
unsigned int default_aux_sample_size;
+ struct perf_pmu *pmu;
+ struct evlist *evlist;
};
/**
@@ -537,6 +542,7 @@ int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
struct auxtrace_mmap *mm,
unsigned char *data, u64 *head, u64 *old);
u64 auxtrace_record__reference(struct auxtrace_record *itr);
+int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx);
int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
off_t file_offset);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 0bc9c4d7fdc5..ef38eba56ed0 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -374,6 +374,18 @@ int perf_config_int(int *dest, const char *name, const char *value)
return 0;
}
+int perf_config_u8(u8 *dest, const char *name, const char *value)
+{
+ long ret = 0;
+
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
+}
+
static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
{
int ret;
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index bd0a5897c76a..c10b66dde2f3 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *);
int perf_default_config(const char *, const char *, void *);
int perf_config(config_fn_t fn, void *);
int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u8(u8 *dest, const char *name, const char *value);
int perf_config_u64(u64 *dest, const char *, const char *);
int perf_config_bool(const char *, const char *);
int config_error_nonbool(const char *);
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 5003ba403345..0f5fda11675f 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -301,10 +301,15 @@ int probe_file__get_events(int fd, struct strfilter *filter,
p = strchr(ent->s, ':');
if ((p && strfilter__compare(filter, p + 1)) ||
strfilter__compare(filter, ent->s)) {
- strlist__add(plist, ent->s);
+ ret = strlist__add(plist, ent->s);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
ret = 0;
}
}
+out:
strlist__delete(namelist);
return ret;
@@ -511,7 +516,11 @@ static int probe_cache__load(struct probe_cache *pcache)
ret = -EINVAL;
goto out;
}
- strlist__add(entry->tevlist, buf);
+ ret = strlist__add(entry->tevlist, buf);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out;
+ }
}
}
out:
@@ -672,7 +681,12 @@ int probe_cache__add_entry(struct probe_cache *pcache,
command = synthesize_probe_trace_command(&tevs[i]);
if (!command)
goto out_err;
- strlist__add(entry->tevlist, command);
+ ret = strlist__add(entry->tevlist, command);
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ goto out_err;
+ }
+
free(command);
}
list_add_tail(&entry->node, &pcache->entries);
@@ -853,9 +867,15 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
break;
}
- strlist__add(entry->tevlist, buf);
+ ret = strlist__add(entry->tevlist, buf);
+
free(buf);
entry = NULL;
+
+ if (ret == -ENOMEM) {
+ pr_err("strlist__add failed with -ENOMEM\n");
+ break;
+ }
}
if (entry) {
list_del_init(&entry->node);
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index e59eb9e7f923..180ad1e1b04f 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -24,6 +24,8 @@ KunitResult = namedtuple('KunitResult', ['status','result'])
KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig'])
+KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
+
class KunitStatus(Enum):
SUCCESS = auto()
CONFIG_FAILURE = auto()
@@ -35,6 +37,13 @@ def create_default_kunitconfig():
shutil.copyfile('arch/um/configs/kunit_defconfig',
kunit_kernel.kunitconfig_path)
+def get_kernel_root_path():
+ parts = sys.argv[0] if not __file__ else __file__
+ parts = os.path.realpath(parts).split('tools/testing/kunit')
+ if len(parts) != 2:
+ sys.exit(1)
+ return parts[0]
+
def run_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitRequest) -> KunitResult:
config_start = time.time()
@@ -114,6 +123,9 @@ def main(argv, linux=None):
cli_args = parser.parse_args(argv)
if cli_args.subcommand == 'run':
+ if get_kernel_root_path():
+ os.chdir(get_kernel_root_path())
+
if cli_args.build_dir:
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index cc5d844ecca1..d99ae75ef72f 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -93,6 +93,20 @@ class LinuxSourceTree(object):
return False
return True
+ def validate_config(self, build_dir):
+ kconfig_path = get_kconfig_path(build_dir)
+ validated_kconfig = kunit_config.Kconfig()
+ validated_kconfig.read_from_file(kconfig_path)
+ if not self._kconfig.is_subset_of(validated_kconfig):
+ invalid = self._kconfig.entries() - validated_kconfig.entries()
+ message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \
+ 'but not in .config: %s' % (
+ ', '.join([str(e) for e in invalid])
+ )
+ logging.error(message)
+ return False
+ return True
+
def build_config(self, build_dir):
kconfig_path = get_kconfig_path(build_dir)
if build_dir and not os.path.exists(build_dir):
@@ -103,12 +117,7 @@ class LinuxSourceTree(object):
except ConfigError as e:
logging.error(e)
return False
- validated_kconfig = kunit_config.Kconfig()
- validated_kconfig.read_from_file(kconfig_path)
- if not self._kconfig.is_subset_of(validated_kconfig):
- logging.error('Provided Kconfig is not contained in validated .config!')
- return False
- return True
+ return self.validate_config(build_dir)
def build_reconfig(self, build_dir):
"""Creates a new .config if it is not a subset of the .kunitconfig."""
@@ -133,12 +142,7 @@ class LinuxSourceTree(object):
except (ConfigError, BuildError) as e:
logging.error(e)
return False
- used_kconfig = kunit_config.Kconfig()
- used_kconfig.read_from_file(get_kconfig_path(build_dir))
- if not self._kconfig.is_subset_of(used_kconfig):
- logging.error('Provided Kconfig is not contained in final config!')
- return False
- return True
+ return self.validate_config(build_dir)
def run_kernel(self, args=[], timeout=None, build_dir=''):
args.extend(['mem=256M'])
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index cd1f5b3a7774..d6e106fbce11 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -2,7 +2,7 @@
all:
TEST_PROGS := ftracetest
-TEST_FILES := test.d
+TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
include ../lib.mk
diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile
index 3876d8d62494..1acc9e1fa3fb 100644
--- a/tools/testing/selftests/livepatch/Makefile
+++ b/tools/testing/selftests/livepatch/Makefile
@@ -8,4 +8,6 @@ TEST_PROGS := \
test-state.sh \
test-ftrace.sh
+TEST_FILES := settings
+
include ../lib.mk
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 93de52016dde..ba450e62dc5b 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -8,6 +8,8 @@ TEST_PROGS := mptcp_connect.sh
TEST_GEN_FILES = mptcp_connect
+TEST_FILES := settings
+
EXTRA_CLEAN := *.pcap
include ../../lib.mk
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index aca21dde102a..5a4938d6dcf2 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -13,11 +13,12 @@
KSELFTEST_SKIP=4
# Available test groups:
+# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="correctness concurrency timeout"
+TESTS="reported_issues correctness concurrency timeout"
[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance"
# Set types, defined by TYPE_ variables below
@@ -25,6 +26,9 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
net_port_mac_proto_net"
+# Reported bugs, also described by TYPE_ variables below
+BUGS="flush_remove_add"
+
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -327,6 +331,12 @@ flood_spec ip daddr . tcp dport . meta l4proto . ip saddr
perf_duration 0
"
+# Definition of tests for bugs reported in the past:
+# display display text for test report
+TYPE_flush_remove_add="
+display Add two elements, flush, re-add
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -440,6 +450,8 @@ setup_set() {
# Check that at least one of the needed tools is available
check_tools() {
+ [ -z "${tools}" ] && return 0
+
__tools=
for tool in ${tools}; do
if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \
@@ -1025,7 +1037,7 @@ format_noconcat() {
add() {
if ! nft add element inet filter test "${1}"; then
err "Failed to add ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1045,7 +1057,7 @@ add_perf() {
add_perf_norange() {
if ! nft add element netdev perf norange "${1}"; then
err "Failed to add ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1054,7 +1066,7 @@ add_perf_norange() {
add_perf_noconcat() {
if ! nft add element netdev perf noconcat "${1}"; then
err "Failed to add ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1063,7 +1075,7 @@ add_perf_noconcat() {
del() {
if ! nft delete element inet filter test "${1}"; then
err "Failed to delete ${1} given ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1134,7 +1146,7 @@ send_match() {
err " $(for f in ${src}; do
eval format_\$f "${2}"; printf ' '; done)"
err "should have matched ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
nft reset counter inet filter test >/dev/null
@@ -1160,7 +1172,7 @@ send_nomatch() {
err " $(for f in ${src}; do
eval format_\$f "${2}"; printf ' '; done)"
err "should not have matched ruleset:"
- err "$(nft list ruleset -a)"
+ err "$(nft -a list ruleset)"
return 1
fi
}
@@ -1430,6 +1442,23 @@ test_performance() {
kill "${perf_pid}"
}
+test_bug_flush_remove_add() {
+ set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
+ elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
+ elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
+ for i in `seq 1 100`; do
+ nft add table t ${set_cmd} || return ${KSELFTEST_SKIP}
+ nft add element t s ${elem1} 2>/dev/null || return 1
+ nft flush set t s 2>/dev/null || return 1
+ nft add element t s ${elem2} 2>/dev/null || return 1
+ done
+ nft flush ruleset
+}
+
+test_reported_issues() {
+ eval test_bug_"${subtest}"
+}
+
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
tmp="$(mktemp)"
@@ -1438,9 +1467,15 @@ trap cleanup EXIT
# Entry point for test runs
passed=0
for name in ${TESTS}; do
- printf "TEST: %s\n" "${name}"
- for type in ${TYPES}; do
- eval desc=\$TYPE_"${type}"
+ printf "TEST: %s\n" "$(echo ${name} | tr '_' ' ')"
+ if [ "${name}" = "reported_issues" ]; then
+ SUBTESTS="${BUGS}"
+ else
+ SUBTESTS="${TYPES}"
+ fi
+
+ for subtest in ${SUBTESTS}; do
+ eval desc=\$TYPE_"${subtest}"
IFS='
'
for __line in ${desc}; do
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index d6469535630a..2af9d39a9716 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,7 +4,7 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
CLANG_FLAGS += -no-integrated-as
endif
-CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ \
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
@@ -19,6 +19,8 @@ TEST_GEN_PROGS_EXTENDED = librseq.so
TEST_PROGS = run_param_test.sh
+TEST_FILES := settings
+
include ../lib.mk
$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
index 2d93d65723c9..55198ecc04db 100644
--- a/tools/testing/selftests/rtc/Makefile
+++ b/tools/testing/selftests/rtc/Makefile
@@ -6,4 +6,6 @@ TEST_GEN_PROGS = rtctest
TEST_GEN_PROGS_EXTENDED = setdate
+TEST_FILES := settings
+
include ../lib.mk
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index d65a0faa46d8..eda7b624eab8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -742,9 +742,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
guest_enter_irqoff();
if (has_vhe()) {
- kvm_arm_vhe_guest_enter();
ret = kvm_vcpu_run_vhe(vcpu);
- kvm_arm_vhe_guest_exit();
} else {
ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
}
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 204d210d01c2..cc94ccc68821 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -4,6 +4,7 @@
#include <kvm/arm_arch_timer.h>
#include <linux/tracepoint.h>
+#include <asm/kvm_arm.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm