summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS4
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst44
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt10
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml9
-rw-r--r--Documentation/driver-api/virtio/writing_virtio_drivers.rst1
-rw-r--r--Documentation/filesystems/bcachefs/index.rst11
-rw-r--r--Documentation/filesystems/index.rst1
-rw-r--r--MAINTAINERS54
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s-warp.dts1
-rw-r--r--arch/arm/mach-omap2/board-n8x0.c23
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi40
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi8
-rw-r--r--arch/arm64/include/asm/tlbflush.h20
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000.dtsi7
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts33
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000.dtsi24
-rw-r--r--arch/loongarch/include/asm/addrspace.h1
-rw-r--r--arch/loongarch/include/asm/io.h20
-rw-r--r--arch/loongarch/include/asm/kfence.h9
-rw-r--r--arch/loongarch/include/asm/page.h26
-rw-r--r--arch/loongarch/mm/mmap.c4
-rw-r--r--arch/loongarch/mm/pgtable.c4
-rw-r--r--arch/mips/include/asm/ptrace.h2
-rw-r--r--arch/mips/kernel/asm-offsets.c1
-rw-r--r--arch/mips/kernel/ptrace.c15
-rw-r--r--arch/mips/kernel/scall32-o32.S23
-rw-r--r--arch/mips/kernel/scall64-n32.S3
-rw-r--r--arch/mips/kernel/scall64-n64.S3
-rw-r--r--arch/mips/kernel/scall64-o32.S33
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/entry/common.c10
-rw-r--r--arch/x86/entry/entry_64.S61
-rw-r--r--arch/x86/entry/entry_64_compat.S16
-rw-r--r--arch/x86/entry/syscall_32.c21
-rw-r--r--arch/x86/entry/syscall_64.c19
-rw-r--r--arch/x86/entry/syscall_x32.c10
-rw-r--r--arch/x86/events/core.c1
-rw-r--r--arch/x86/hyperv/hv_apic.c16
-rw-r--r--arch/x86/hyperv/hv_proc.c22
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/cpufeatures.h7
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h17
-rw-r--r--arch/x86/include/asm/syscall.h11
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c15
-rw-r--r--arch/x86/kernel/cpu/bugs.c167
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/topology.c7
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c51
-rw-r--r--arch/x86/kvm/reverse_cpuid.h3
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-cgroup.h2
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-iocost.c7
-rw-r--r--block/blk-settings.c16
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c40
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h3
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h6
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c11
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c6
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c8
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c8
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c14
-rw-r--r--drivers/acpi/scan.c3
-rw-r--r--drivers/ata/ahci.c85
-rw-r--r--drivers/ata/libata-core.c2
-rw-r--r--drivers/ata/libata-scsi.c9
-rw-r--r--drivers/cache/sifive_ccache.c72
-rw-r--r--drivers/cxl/acpi.c13
-rw-r--r--drivers/cxl/core/cdat.c152
-rw-r--r--drivers/cxl/core/mbox.c5
-rw-r--r--drivers/cxl/core/port.c114
-rw-r--r--drivers/cxl/core/regs.c5
-rw-r--r--drivers/cxl/cxl.h8
-rw-r--r--drivers/cxl/cxlmem.h2
-rw-r--r--drivers/firmware/arm_ffa/driver.c2
-rw-r--r--drivers/firmware/arm_scmi/powercap.c2
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c1
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c31
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c27
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h55
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h18
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c12
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c347
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c3
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c42
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c6
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c11
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h34
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c10
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c8
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c6
-rw-r--r--drivers/gpu/drm/msm/msm_fb.c6
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c13
-rw-r--r--drivers/gpu/drm/qxl/qxl_release.c50
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c11
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c4
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c5
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/gpu/host1x/bus.c8
-rw-r--r--drivers/hv/channel.c29
-rw-r--r--drivers/hv/connection.c29
-rw-r--r--drivers/hv/vmbus_drv.c94
-rw-r--r--drivers/iommu/amd/init.c25
-rw-r--r--drivers/iommu/amd/iommu.c11
-rw-r--r--drivers/iommu/intel/iommu.c11
-rw-r--r--drivers/iommu/intel/perfmon.c2
-rw-r--r--drivers/iommu/intel/svm.c2
-rw-r--r--drivers/iommu/mtk_iommu.c1
-rw-r--r--drivers/iommu/mtk_iommu_v1.c1
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c8
-rw-r--r--drivers/isdn/mISDN/socket.c10
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c8
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c5
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c11
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c4
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c5
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h2
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c2
-rw-r--r--drivers/mmc/host/omap.c48
-rw-r--r--drivers/net/dsa/mt7530.c246
-rw-r--r--drivers/net/dsa/mt7530.h6
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c35
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_xdp.c4
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c13
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h2
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c3
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c6
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c82
-rw-r--r--drivers/net/ethernet/micrel/ks8851.h3
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c16
-rw-r--r--drivers/net/ethernet/micrel/ks8851_par.c11
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c11
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_port.c4
-rw-r--r--drivers/net/ethernet/realtek/r8169.h6
-rw-r--r--drivers/net/ethernet/realtek/r8169_leds.c35
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c2
-rw-r--r--drivers/net/geneve.c4
-rw-r--r--drivers/net/hyperv/netvsc.c7
-rw-r--r--drivers/net/virtio_net.c26
-rw-r--r--drivers/pci/quirks.c8
-rw-r--r--drivers/platform/chrome/cros_ec_uart.c28
-rw-r--r--drivers/platform/x86/acer-wmi.c9
-rw-r--r--drivers/platform/x86/intel/hid.c9
-rw-r--r--drivers/platform/x86/intel/vbtn.c11
-rw-r--r--drivers/platform/x86/lg-laptop.c2
-rw-r--r--drivers/platform/x86/toshiba_acpi.c4
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c2
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c10
-rw-r--r--drivers/scsi/qla2xxx/qla_edif.c2
-rw-r--r--drivers/scsi/sg.c18
-rw-r--r--drivers/target/target_core_configfs.c12
-rw-r--r--drivers/uio/uio_hv_generic.c12
-rw-r--r--drivers/vhost/vhost.c26
-rw-r--r--drivers/virtio/virtio.c6
-rw-r--r--fs/bcachefs/acl.c30
-rw-r--r--fs/bcachefs/bcachefs_format.h14
-rw-r--r--fs/bcachefs/btree_gc.c13
-rw-r--r--fs/bcachefs/btree_iter.h2
-rw-r--r--fs/bcachefs/btree_journal_iter.c67
-rw-r--r--fs/bcachefs/btree_key_cache.c4
-rw-r--r--fs/bcachefs/btree_locking.c28
-rw-r--r--fs/bcachefs/btree_node_scan.c11
-rw-r--r--fs/bcachefs/btree_types.h14
-rw-r--r--fs/bcachefs/btree_update_interior.c128
-rw-r--r--fs/bcachefs/btree_update_interior.h3
-rw-r--r--fs/bcachefs/chardev.c98
-rw-r--r--fs/bcachefs/data_update.c17
-rw-r--r--fs/bcachefs/debug.c75
-rw-r--r--fs/bcachefs/eytzinger.c8
-rw-r--r--fs/bcachefs/eytzinger.h26
-rw-r--r--fs/bcachefs/journal_reclaim.c2
-rw-r--r--fs/bcachefs/journal_types.h1
-rw-r--r--fs/bcachefs/recovery.c14
-rw-r--r--fs/bcachefs/snapshot.c19
-rw-r--r--fs/bcachefs/super.c5
-rw-r--r--fs/bcachefs/sysfs.c6
-rw-r--r--fs/bcachefs/tests.c2
-rw-r--r--fs/bcachefs/util.h10
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/btrfs/ioctl.c37
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/btrfs/root-tree.h2
-rw-r--r--fs/btrfs/transaction.c19
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/proc/bootconfig.c12
-rw-r--r--fs/smb/client/cached_dir.c4
-rw-r--r--fs/smb/client/cifsglob.h1
-rw-r--r--fs/smb/client/connect.c8
-rw-r--r--fs/smb/client/fs_context.c21
-rw-r--r--fs/smb/client/fs_context.h2
-rw-r--r--fs/smb/client/inode.c3
-rw-r--r--fs/smb/client/misc.c1
-rw-r--r--fs/smb/client/smb2ops.c94
-rw-r--r--fs/smb/client/smb2pdu.c11
-rw-r--r--fs/tracefs/event_inode.c14
-rw-r--r--fs/zonefs/super.c2
-rw-r--r--include/acpi/acpi_bus.h8
-rw-r--r--include/asm-generic/bug.h5
-rw-r--r--include/asm-generic/hyperv-tlfs.h19
-rw-r--r--include/asm-generic/mshyperv.h14
-rw-r--r--include/linux/bootconfig.h1
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/dma-fence.h7
-rw-r--r--include/linux/gfp_types.h2
-rw-r--r--include/linux/hyperv.h1
-rw-r--r--include/linux/io_uring_types.h2
-rw-r--r--include/linux/irqflags.h2
-rw-r--r--include/linux/mm.h10
-rw-r--r--include/linux/randomize_kstack.h2
-rw-r--r--include/linux/rwbase_rt.h4
-rw-r--r--include/linux/rwsem.h6
-rw-r--r--include/linux/sockptr.h25
-rw-r--r--include/linux/u64_stats_sync.h9
-rw-r--r--include/linux/virtio.h7
-rw-r--r--include/net/addrconf.h4
-rw-r--r--include/net/bluetooth/bluetooth.h9
-rw-r--r--include/net/ip_tunnels.h33
-rw-r--r--include/uapi/linux/vhost.h15
-rw-r--r--init/main.c5
-rw-r--r--io_uring/io_uring.c26
-rw-r--r--io_uring/net.c1
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/dma/swiotlb.c91
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/time/tick-common.c17
-rw-r--r--kernel/time/tick-sched.c36
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--lib/checksum_kunit.c5
-rw-r--r--lib/test_ubsan.c2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/bluetooth/hci_request.c4
-rw-r--r--net/bluetooth/hci_sock.c21
-rw-r--r--net/bluetooth/hci_sync.c6
-rw-r--r--net/bluetooth/iso.c46
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/l2cap_sock.c52
-rw-r--r--net/bluetooth/rfcomm/sock.c14
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/ipv6/ip6_fib.c7
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/nfc/llcp_sock.c12
-rw-r--r--net/openvswitch/conntrack.c5
-rw-r--r--net/unix/af_unix.c4
-rw-r--r--net/unix/garbage.c18
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--scripts/gcc-plugins/stackleak_plugin.c2
-rw-r--r--tools/hv/hv_kvp_daemon.c213
-rw-r--r--tools/include/linux/kernel.h1
-rw-r--r--tools/include/linux/mm.h5
-rw-r--r--tools/include/linux/panic.h19
-rw-r--r--tools/power/x86/turbostat/turbostat.818
-rw-r--r--tools/power/x86/turbostat/turbostat.c2017
-rw-r--r--tools/testing/cxl/test/cxl.c10
-rw-r--r--tools/testing/selftests/kselftest.h33
-rw-r--r--tools/testing/selftests/timers/posix_timers.c105
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c73
-rwxr-xr-xtools/testing/selftests/turbostat/defcolumns.py60
350 files changed, 5815 insertions, 2257 deletions
diff --git a/CREDITS b/CREDITS
index c55c5a0ee4ff..0107047f807b 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3146,6 +3146,10 @@ S: Triftstra=DFe 55
S: 13353 Berlin
S: Germany
+N: Gustavo Pimental
+E: gustavo.pimentel@synopsys.com
+D: PCI driver for Synopsys DesignWare
+
N: Emanuel Pirker
E: epirker@edu.uni-klu.ac.at
D: AIC5800 IEEE 1394, RAW I/O on 1394
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index cce768afec6b..25a04cda4c2c 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
Attack scenarios
----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence
+
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks.
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,18 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.
+ spectre_bhi=
+
+ [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the deployment
+ of the HW BHI control and the SW BHB clearing sequence.
+
+ on
+ (default) Enable the HW or SW mitigation as
+ needed.
+ off
+ Disable the mitigation.
+
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 623fce7d5fcd..902ecd92a29f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3444,6 +3444,7 @@
retbleed=off [X86]
spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
@@ -6063,6 +6064,15 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - (default) Enable the HW or SW mitigation
+ as needed.
+ off - Disable the mitigation.
+
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
index c0d6a4fdff97..e6dc5494baee 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
@@ -53,6 +53,15 @@ patternProperties:
compatible:
const: qcom,sm8150-dpu
+ "^displayport-controller@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ contains:
+ const: qcom,sm8150-dp
+
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
diff --git a/Documentation/driver-api/virtio/writing_virtio_drivers.rst b/Documentation/driver-api/virtio/writing_virtio_drivers.rst
index e14c58796d25..e5de6f5d061a 100644
--- a/Documentation/driver-api/virtio/writing_virtio_drivers.rst
+++ b/Documentation/driver-api/virtio/writing_virtio_drivers.rst
@@ -97,7 +97,6 @@ like this::
static struct virtio_driver virtio_dummy_driver = {
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_dummy_probe,
.remove = virtio_dummy_remove,
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
new file mode 100644
index 000000000000..e2bd61ccd96f
--- /dev/null
+++ b/Documentation/filesystems/bcachefs/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+bcachefs Documentation
+======================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ errorcodes
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 0ea1e44fa028..1f9b4c905a6a 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -69,6 +69,7 @@ Documentation for filesystem implementations.
afs
autofs
autofs-mount-control
+ bcachefs/index
befs
bfs
btrfs
diff --git a/MAINTAINERS b/MAINTAINERS
index aea47e04c3a5..c23fda1aa1f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2191,7 +2191,6 @@ N: mxs
ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE
M: Shawn Guo <shawnguo@kernel.org>
-M: Li Yang <leoyang.li@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2708,7 +2707,7 @@ F: sound/soc/rockchip/
N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org
@@ -3573,6 +3572,7 @@ S: Supported
C: irc://irc.oftc.net/bcache
T: git https://evilpiepirate.org/git/bcachefs.git
F: fs/bcachefs/
+F: Documentation/filesystems/bcachefs/
BDISP ST MEDIA DRIVER
M: Fabien Dessenne <fabien.dessenne@foss.st.com>
@@ -4869,7 +4869,6 @@ F: drivers/power/supply/cw2015_battery.c
CEPH COMMON CODE (LIBCEPH)
M: Ilya Dryomov <idryomov@gmail.com>
M: Xiubo Li <xiubli@redhat.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -4881,7 +4880,6 @@ F: net/ceph/
CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
M: Xiubo Li <xiubli@redhat.com>
M: Ilya Dryomov <idryomov@gmail.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -5557,7 +5555,7 @@ F: drivers/cpuidle/cpuidle-big_little.c
CPUIDLE DRIVER - ARM EXYNOS
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Kukjin Kim <kgene@kernel.org>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -8523,7 +8521,6 @@ S: Maintained
F: drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER
-M: Li Yang <leoyang.li@nxp.com>
M: Zhang Wei <zw@zh-kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
@@ -8688,10 +8685,9 @@ F: drivers/soc/fsl/qe/tsa.h
F: include/dt-bindings/soc/cpm1-fsl,tsa.h
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M: Li Yang <leoyang.li@nxp.com>
L: netdev@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/freescale/ucc_geth*
FREESCALE QUICC ENGINE UCC HDLC DRIVER
@@ -8708,10 +8704,9 @@ S: Maintained
F: drivers/tty/serial/ucc_uart.c
FREESCALE SOC DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
+S: Orphan
F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
@@ -8745,10 +8740,9 @@ F: Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml
F: sound/soc/fsl/fsl_qmc_audio.c
FREESCALE USB PERIPHERAL DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/usb/gadget/udc/fsl*
FREESCALE USB PHY DRIVER
@@ -9000,7 +8994,7 @@ F: drivers/i2c/muxes/i2c-mux-gpio.c
F: include/linux/platform_data/i2c-mux-gpio.h
GENERIC GPIO RESET DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: drivers/reset/reset-gpio.c
@@ -13295,7 +13289,7 @@ F: drivers/iio/adc/max11205.c
MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
R: Iskren Chernev <iskren.chernev@gmail.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Matheus Castello <matheus@castello.eng.br>
L: linux-pm@vger.kernel.org
@@ -13305,7 +13299,7 @@ F: drivers/power/supply/max17040_battery.c
MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
R: Hans de Goede <hdegoede@redhat.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
R: Purism Kernel Team <kernel@puri.sm>
@@ -13363,7 +13357,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
F: drivers/power/supply/max77976_charger.c
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
@@ -13374,7 +13368,7 @@ F: drivers/power/supply/max77693_charger.c
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
M: Chanwoo Choi <cw00.choi@samsung.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
@@ -14158,7 +14152,7 @@ F: mm/mm_init.c
F: tools/testing/memblock/
MEMORY CONTROLLER DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:krzysztof.kozlowski@linaro.org
@@ -15539,7 +15533,7 @@ F: include/uapi/linux/nexthop.h
F: net/ipv4/nexthop.c
NFC SUBSYSTEM
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/
@@ -15916,7 +15910,7 @@ F: Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -16527,7 +16521,7 @@ K: of_overlay_remove
OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
M: Rob Herring <robh@kernel.org>
-M: Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
+M: Krzysztof Kozlowski <krzk+dt@kernel.org>
M: Conor Dooley <conor+dt@kernel.org>
L: devicetree@vger.kernel.org
S: Maintained
@@ -16974,7 +16968,6 @@ F: drivers/pci/controller/dwc/pci-exynos.c
PCI DRIVER FOR SYNOPSYS DESIGNWARE
M: Jingoo Han <jingoohan1@gmail.com>
-M: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
L: linux-pci@vger.kernel.org
S: Maintained
@@ -17485,7 +17478,7 @@ F: Documentation/devicetree/bindings/pinctrl/renesas,*
F: drivers/pinctrl/renesas/
PIN CONTROLLER - SAMSUNG
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -19453,7 +19446,7 @@ F: Documentation/devicetree/bindings/sound/samsung*
F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19488,7 +19481,7 @@ S: Maintained
F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19514,7 +19507,7 @@ F: drivers/media/platform/samsung/s3c-camif/
F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
F: drivers/nfc/s3fwrn5
@@ -19535,7 +19528,7 @@ S: Supported
F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Vladimir Zapolskiy <vz@mleia.com>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
@@ -19557,7 +19550,7 @@ F: Documentation/devicetree/bindings/media/samsung,fimc.yaml
F: drivers/media/platform/samsung/exynos4-is/
SAMSUNG SOC CLOCK DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
M: Chanwoo Choi <cw00.choi@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
@@ -19589,7 +19582,7 @@ F: drivers/net/ethernet/samsung/sxgbe/
SAMSUNG THERMAL DRIVER
M: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -22577,6 +22570,7 @@ Q: https://patchwork.kernel.org/project/linux-pm/list/
B: https://bugzilla.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
F: tools/power/x86/turbostat/
+F: tools/testing/selftests/turbostat/
TW5864 VIDEO4LINUX DRIVER
M: Bluecherry Maintainers <maintainers@bluecherrydvr.com>
@@ -23785,7 +23779,7 @@ S: Orphan
F: drivers/mmc/host/vub300.c
W1 DALLAS'S 1-WIRE BUS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/w1/
F: Documentation/w1/
diff --git a/arch/Kconfig b/arch/Kconfig
index 9f066785bb71..65afb1de48b3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB
config PAGE_SHIFT
int
- default 12 if PAGE_SIZE_4KB
- default 13 if PAGE_SIZE_8KB
- default 14 if PAGE_SIZE_16KB
- default 15 if PAGE_SIZE_32KB
- default 16 if PAGE_SIZE_64KB
- default 18 if PAGE_SIZE_256KB
+ default 12 if PAGE_SIZE_4KB
+ default 13 if PAGE_SIZE_8KB
+ default 14 if PAGE_SIZE_16KB
+ default 15 if PAGE_SIZE_32KB
+ default 16 if PAGE_SIZE_64KB
+ default 18 if PAGE_SIZE_256KB
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
index 1235a71c6abe..52869e68f833 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
@@ -666,7 +666,7 @@
bus-width = <4>;
no-1-8-v;
no-sdio;
- no-emmc;
+ no-mmc;
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
index ba7231b364bb..7bab113ca6da 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
@@ -210,6 +210,7 @@
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
+ link-frequencies = /bits/ 64 <330000000>;
};
};
};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 31755a378c73..ff2a4a4d8220 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
static struct gpiod_lookup_table tusb_gpio_table = {
.dev_id = "musb-tusb",
.table = {
- GPIO_LOOKUP("gpio-0-15", 0, "enable",
- GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("gpio-48-63", 10, "int",
- GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -140,12 +138,11 @@ static int slot1_cover_open;
static int slot2_cover_open;
static struct device *mmc_device;
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
/* Slot switch, GPIO 96 */
- GPIO_LOOKUP("gpio-80-111", 16,
- "switch", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
+ /* Slot switch, GPIO 96 */
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
/* Slot index 1, VSD power, GPIO 23 */
- GPIO_LOOKUP_IDX("gpio-16-31", 7,
- "vsd", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
/* Slot index 1, VIO power, GPIO 9 */
- GPIO_LOOKUP_IDX("gpio-0-15", 9,
- "vio", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
static void __init n8x0_mmc_init(void)
{
- gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
if (board_is_n810()) {
mmc1_data.slots[0].name = "external";
@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
mmc1_data.slots[1].name = "internal";
mmc1_data.slots[1].ban_openended = 1;
gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+ } else {
+ gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
}
mmc1_data.nr_slots = 2;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
index 3c42240e78e2..4aaf5a0c1ed8 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
@@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc1 0>;
- clocks = <&usb2_lpcg 0>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 {
usbphy1: usbphy@5b100000 {
compatible = "fsl,imx7ulp-usbphy";
reg = <0x5b100000 0x1000>;
- clocks = <&usb2_lpcg 1>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
power-domains = <&pd IMX_SC_R_USB_0_PHY>;
status = "disabled";
};
@@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b010000 0x10000>;
clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
- <&sdhc0_lpcg IMX_LPCG_CLK_0>,
- <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_0>;
status = "disabled";
@@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b020000 0x10000>;
clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
- <&sdhc1_lpcg IMX_LPCG_CLK_0>,
- <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_1>;
fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b030000 0x10000>;
clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
- <&sdhc2_lpcg IMX_LPCG_CLK_0>,
- <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_2>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index cab3468b1875..f7a91d43a0ff 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi0_lpcg 0>,
- <&spi0_lpcg 1>;
+ clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+ <&spi0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi1_lpcg 0>,
- <&spi1_lpcg 1>;
+ clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+ <&spi1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi2_lpcg 0>,
- <&spi2_lpcg 1>;
+ clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+ <&spi2_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi3_lpcg 0>,
- <&spi3_lpcg 1>;
+ clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+ <&spi3_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 {
compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
reg = <0x5a190000 0x1000>;
interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&adma_pwm_lpcg 1>,
- <&adma_pwm_lpcg 0>;
+ clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+ <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a880000 0x10000>;
interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc0_lpcg 0>,
- <&adc0_lpcg 1>;
+ clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+ <&adc0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a890000 0x10000>;
interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc1_lpcg 0>,
- <&adc1_lpcg 1>;
+ clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+ <&adc1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a8d0000 0x10000>;
interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 {
* CAN1 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 {
* CAN2 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 7e510b21bbac..764c1a08e3b1 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d000000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm0_lpcg 4>,
- <&pwm0_lpcg 1>;
+ clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+ <&pwm0_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d010000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm1_lpcg 4>,
- <&pwm1_lpcg 1>;
+ clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+ <&pwm1_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d020000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm2_lpcg 4>,
- <&pwm2_lpcg 1>;
+ clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+ <&pwm2_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d030000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm3_lpcg 4>,
- <&pwm3_lpcg 1>;
+ clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+ <&pwm3_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 41c79d2ebdd6..f24b14744799 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -183,7 +184,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index d5c400b355af..f5491a608b2f 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -202,7 +203,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 11626fae5f97..aa9f28c4431d 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -153,15 +153,15 @@
};
&flexcan2 {
- clocks = <&can1_lpcg 1>,
- <&can1_lpcg 0>;
+ clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+ <&can1_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
&flexcan3 {
- clocks = <&can2_lpcg 1>,
- <&can2_lpcg 0>;
+ clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+ <&can2_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 3b0e8248e1a4..a75de2665d84 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void)
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
*/
-#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
- ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
/*
* TLB Invalidation
@@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* 3. If there is 1 page remaining, flush it through non-range operations. Range
* operations can only span an even number of pages. We save this for last to
* ensure 64KB start alignment is maintained for the LPA2 case.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
index 49a70f8c3cab..b6aeb1f70e2a 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -100,6 +100,13 @@
#size-cells = <2>;
dma-coherent;
+ isa@18000000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18000000 0x4000>;
+ };
+
liointc0: interrupt-controller@1fe01400 {
compatible = "loongson,liointc-2.0";
reg = <0x0 0x1fe01400 0x0 0x40>,
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index dca91caf895e..74b99bd234cc 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -61,12 +61,45 @@
&gmac0 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <2>;
+ };
+ };
};
&gmac1 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy1>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <2>;
+ };
+ };
};
&gmac2 {
status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy2: ethernet-phy@2 {
+ reg = <0>;
+ };
+ };
};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
index a231949b5f55..9eab2d02cbe8 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -51,6 +51,13 @@
#address-cells = <2>;
#size-cells = <2>;
+ isa@18400000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18400000 0x4000>;
+ };
+
pmc: power-management@100d0000 {
compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
reg = <0x0 0x100d0000 0x0 0x58>;
@@ -109,6 +116,8 @@
msi: msi-controller@1fe01140 {
compatible = "loongson,pch-msi-1.0";
reg = <0x0 0x1fe01140 0x0 0x8>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <192>;
@@ -140,27 +149,34 @@
#address-cells = <3>;
#size-cells = <2>;
device_type = "pci";
+ msi-parent = <&msi>;
bus-range = <0x0 0xff>;
- ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
<0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
gmac0: ethernet@3,0 {
reg = <0x1800 0x0 0x0 0x0 0x0>;
- interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+ <13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac1: ethernet@3,1 {
reg = <0x1900 0x0 0x0 0x0 0x0>;
- interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+ <15 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac2: ethernet@3,2 {
reg = <0x1a00 0x0 0x0 0x0 0x0>;
- interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+ <18 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index b24437e28c6e..7bd47d65bf7a 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -11,6 +11,7 @@
#define _ASM_ADDRSPACE_H
#include <linux/const.h>
+#include <linux/sizes.h>
#include <asm/loongarch.h>
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 4a8adcca329b..c2f9979b2979 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -14,11 +14,6 @@
#include <asm/pgtable-bits.h>
#include <asm/string.h>
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
@@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define __io_aw() mmiowb()
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \
+ page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr) \
+({ \
+ extern char *__kfence_pool; \
+ (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \
+ page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
index 6c82aea1c993..a6a5760da3a3 100644
--- a/arch/loongarch/include/asm/kfence.h
+++ b/arch/loongarch/include/asm/kfence.h
@@ -16,6 +16,7 @@
static inline bool arch_kfence_init_pool(void)
{
int err;
+ char *kaddr, *vaddr;
char *kfence_pool = __kfence_pool;
struct vm_struct *area;
@@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void)
return false;
}
+ kaddr = kfence_pool;
+ vaddr = __kfence_pool;
+ while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+ set_page_address(virt_to_page(kaddr), vaddr);
+ kaddr += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+
return true;
}
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 44027060c54a..e85df33f11c7 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t;
struct page *dmw_virt_to_page(unsigned long kaddr);
struct page *tlb_virt_to_page(unsigned long kaddr);
-#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn) __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr) __phys_to_pfn(paddr)
+
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page) __va(page_to_phys(page))
+#define virt_to_page(kaddr) phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page) \
+({ \
+ extern char *__kfence_pool; \
+ (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \
+})
#define virt_to_page(kaddr) \
({ \
@@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
})
+#endif
+
+#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr))
+
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index a9630a81b38a..89af7c12e8c0 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -4,6 +4,7 @@
*/
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kfence.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr)
{
unsigned long vaddr = (unsigned long)kaddr;
+ if (is_kfence_address((void *)kaddr))
+ return 1;
+
if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
return 0;
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 2aae72e63871..bda018150000 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -11,13 +11,13 @@
struct page *dmw_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(virt_to_pfn(kaddr));
+ return phys_to_page(__pa(kaddr));
}
EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+ return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
}
EXPORT_SYMBOL(tlb_virt_to_page);
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index d14d0e37ad02..4a2b40ce39e0 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
extern void die(const char *, struct pt_regs *) __noreturn;
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index d1b11f66f748..cb1045ebab06 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -101,6 +101,7 @@ void output_thread_info_defines(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_REGS, thread_info, regs);
+ OFFSET(TI_SYSCALL, thread_info, syscall);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 59288c13b581..61503a36067e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request,
* Notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
- current_thread_info()->syscall = syscall;
-
if (test_thread_flag(TIF_SYSCALL_TRACE)) {
if (ptrace_report_syscall_entry(regs))
return -1;
- syscall = current_thread_info()->syscall;
}
#ifdef CONFIG_SECCOMP
@@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
struct seccomp_data sd;
unsigned long args[6];
- sd.nr = syscall;
+ sd.nr = current_thread_info()->syscall;
sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
@@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
ret = __secure_computing(&sd);
if (ret == -1)
return ret;
- syscall = current_thread_info()->syscall;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[2]);
- audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+ audit_syscall_entry(current_thread_info()->syscall,
+ regs->regs[4], regs->regs[5],
regs->regs[6], regs->regs[7]);
/*
* Negative syscall numbers are mistaken for rejected syscalls, but
* won't have had the return value set appropriately, so we do so now.
*/
- if (syscall < 0)
+ if (current_thread_info()->syscall < 0)
syscall_set_return_value(current, regs, -ENOSYS, 0);
- return syscall;
+ return current_thread_info()->syscall;
}
/*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 18dc9b345056..2c604717e630 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -77,6 +77,18 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ */
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_WORK_SYSCALL_ENTRY
and t0, t1
@@ -114,16 +126,7 @@ syscall_trace_entry:
SAVE_STATIC
move a0, sp
- /*
- * syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- */
- move a1, v0
- subu t2, v0, __NR_O32_Linux
- bnez t2, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp)
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 97456b2ca7dc..97788859238c 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -72,7 +74,6 @@ syscall_common:
n32_syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index e6264aa62e45..be11ea5cc67e 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -82,7 +84,6 @@ n64_syscall_exit:
syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d3c2616cba22..7a5abb73e531 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -79,6 +79,22 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * absolute syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ * note: NR_syscall is the first O32 syscall but the macro is
+ * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+ * therefore __NR_O32_Linux is used (4000)
+ */
+
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -113,22 +129,7 @@ trace_a_syscall:
sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
- /*
- * absolute syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- * note: NR_syscall is the first O32 syscall but the macro is
- * only defined when compiling with -mabi=32 (CONFIG_32BIT)
- * therefore __NR_O32_Linux is used (4000)
- */
- .set push
- .set reorder
- subu t1, v0, __NR_O32_Linux
- move a1, v0
- bnez t1, 1f /* __NR_syscall at offset 0 */
- ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
- .set pop
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4fff6ed46e90..4474bf32d0a4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2633,6 +2633,16 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+config MITIGATION_SPECTRE_BHI
+ bool "Mitigate Spectre-BHB (Branch History Injection)"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6356060caaf3..6de50b80702e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}
/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8af2a26b24f6..1b5be07f8669 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index eabf48c4d4b4..c779046cc3fe 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cfc9bc73e7f..c2235bae17ef 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index be120eec1fc9..33b3f09e6f15 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index bdd0e03a1265..03de4a932131 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d..5b0dd07b1ef1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5fc45543e955..0569f579338b 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu)
* IPI implementation on Hyper-V.
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
@@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
- exclude_self ? cpu_is_self : NULL);
+ nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+ exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be
@@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
}
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
- ipi_arg, NULL);
+ ipi_arg, NULL);
ipi_mask_ex_done:
local_irq_restore(flags);
@@ -155,7 +155,7 @@ ipi_mask_ex_done:
}
static bool __send_ipi_mask(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
@@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
/*
@@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
}
status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
- ipi_arg.cpu_mask);
+ ipi_arg.cpu_mask);
return hv_result_success(status);
do_ex_hypercall:
@@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector)
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
if (vp >= 64)
diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
index 68a0843d4750..3fa1f2ee7b0d 100644
--- a/arch/x86/hyperv/hv_proc.c
+++ b/arch/x86/hyperv/hv_proc.c
@@ -3,7 +3,6 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
@@ -116,12 +115,11 @@ free_buf:
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
- struct hv_add_logical_processor_in *input;
- struct hv_add_logical_processor_out *output;
+ struct hv_input_add_logical_processor *input;
+ struct hv_output_add_logical_processor *output;
u64 status;
unsigned long flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/*
* When adding a logical processor, the hypervisor may return
@@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input->lp_index = lp_index;
input->apic_id = apic_id;
- input->flags = 0;
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
input, output);
local_irq_restore(flags);
@@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
u64 status;
unsigned long irq_flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/* Root VPs don't seem to need pages deposited */
if (partition_id != hv_current_partition_id) {
@@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
input->vp_index = vp_index;
input->flags = flags;
input->subnode_type = HvSubnodeAny;
- if (node != NUMA_NO_NODE) {
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
- } else {
- input->proximity_domain_info.as_uint64 = 0;
- }
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 94ce0f7c9d3a..e6ab0cf15ed5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a38f8f9ba657..3c7434329661 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -461,11 +461,15 @@
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
- * CPUID levels like 0x80000022, etc.
+ * CPUID levels like 0x80000022, etc and Linux defined features.
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
/*
* BUG word(s)
@@ -515,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 05956bd8bacf..e72c2b872957 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 170c89ed22fc..ff5f1ecc7d1e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -326,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f44e2f9ab65d..2fc7bc3863ff 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a42d8a6f7149..c342c4aa9c68 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1687,11 +1687,11 @@ static int x2apic_state;
static bool x2apic_hw_locked(void)
{
- u64 ia32_cap;
+ u64 x86_arch_cap_msr;
u64 msr;
- ia32_cap = x86_read_arch_cap_msr();
- if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9bf17c9c29da..cb9eece55904 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -535,7 +535,6 @@ clear_sev:
static void early_init_amd(struct cpuinfo_x86 *c)
{
- u64 value;
u32 dummy;
if (c->x86 >= 0xf)
@@ -603,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_detect_mem_encrypt(c);
- /* Re-enable TopologyExtensions if switched off by BIOS */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
- !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
- if (msr_set_bit(0xc0011005, 54) > 0) {
- rdmsrl(0xc0011005, value);
- if (value & BIT_64(54)) {
- set_cpu_cap(c, X86_FEATURE_TOPOEXT);
- pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
- }
- }
- }
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e7ba936d798b..ca295b0c1eee 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
+static u64 __ro_after_init x86_arch_cap_msr;
+
static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
static void __init taa_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
- !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
- ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
- if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
/*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
- if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)))
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
- if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)
static void __init srbds_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
/* Will verify below that mitigation _can_ be disabled */
/* No microcode */
- if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
@@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+static bool __ro_after_init rrsba_disabled;
+
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
- u64 ia32_cap;
+ if (rrsba_disabled)
+ return;
- if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+ rrsba_disabled = true;
return;
+ }
- ia32_cap = x86_read_arch_cap_msr();
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
- if (ia32_cap & ARCH_CAP_RRSBA) {
- x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- update_spec_ctrl(x86_spec_ctrl_base);
- }
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ rrsba_disabled = true;
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1607,6 +1606,73 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+ spec_ctrl_disable_kernel_rrsba();
+ if (rrsba_disabled)
+ return;
+ }
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1784,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -1832,8 +1901,6 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1848,7 +1915,7 @@ static void update_mds_branch_idle(void)
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
- (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
@@ -2695,15 +2762,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
@@ -2712,10 +2779,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
@@ -2725,14 +2792,30 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}
+static const char *spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled)
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Vulnerable, KVM: SW loop";
+
+ return "; BHI: Vulnerable";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2828,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5c1e6d6be267..605c26c009c8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
@@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
u64 x86_read_arch_cap_msr(void)
{
- u64 ia32_cap = 0;
+ u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
- return ia32_cap;
+ return x86_arch_cap_msr;
}
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
- return (ia32_cap & ARCH_CAP_FBSDP_NO &&
- ia32_cap & ARCH_CAP_PSDP_NO &&
- ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+ return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
- if (ia32_cap & ARCH_CAP_RFDS_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;
/*
@@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
- if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
@@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
+ u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
- !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
- !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Don't use AutoIBRS when SNP is enabled because it degrades host
* userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
- if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
- (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
/*
@@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
- if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
@@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
- if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
- if (vulnerable_to_rfds(ia32_cap))
+ if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
- if (ia32_cap & ARCH_CAP_RDCL_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index a515328d9d7d..af5aa2c754c2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index aaca8d235dc2..d17c9b71eb4a 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
#endif
- set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
topo_info.nr_disabled_cpus++;
}
- /* Register present and possible CPUs in the domain maps */
+ /*
+ * Register present and possible CPUs in the domain
+ * maps. cpu_possible_map will be updated in
+ * topology_init_possible_cpus() after enumeration is done.
+ */
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 1a8b3ad493af..a7aa6eff4ae5 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
if (!sft)
sft = get_count_order(ecx.cpu_nthreads + 1);
- topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+ /*
+ * cpu_nthreads describes the number of threads in the package
+ * sft is the number of APIC ID bits per package
+ *
+ * As the number of actual threads per core is not described in
+ * this leaf, just set the CORE domain shift and let the later
+ * parsers set SMT shift. Assume one thread per core by default
+ * which is correct if there are no other CPUID leafs to parse.
+ */
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
return true;
}
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
{
/*
* Starting with Fam 17h the DIE domain could probably be used to
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
- * If leaf 0xb is available, then SMT shift is set already. If not
- * take it from ecx.threads_per_core and use topo_update_dom() -
- * topology_set_dom() would propagate and overwrite the already
- * propagated CORE level.
+ * If leaf 0xb is available, then the domain shifts are set
+ * already and nothing to do here.
*/
if (!has_0xb) {
+ /*
+ * Leaf 0x80000008 set the CORE domain shift already.
+ * Update the SMT domain, but do not propagate it.
+ */
unsigned int nthreads = leaf.core_nthreads + 1;
topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
static bool parse_fam10h_node_id(struct topo_scan *tscan)
{
- struct {
- union {
+ union {
+ struct {
u64 node_id : 3,
nodes_per_pkg : 3,
unused : 58;
- u64 msr;
};
+ u64 msr;
} nid;
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
}
+static void topoext_fixup(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+ u64 msrval;
+
+ /* Try to re-enable TopologyExtensions if switched off by BIOS */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+ return;
+
+ if (msr_set_bit(0xc0011005, 54) <= 0)
+ return;
+
+ rdmsrl(0xc0011005, msrval);
+ if (msrval & BIT_64(54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+ }
+}
+
static void parse_topology_amd(struct topo_scan *tscan)
{
bool has_0xb = false;
@@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
void cpu_parse_topology_amd(struct topo_scan *tscan)
{
tscan->amd_nodes_per_pkg = 1;
+ topoext_fixup(tscan);
parse_topology_amd(tscan);
if (tscan->amd_nodes_per_pkg > 1)
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 58ac8d69c94b..2f4e155080ba 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
@@ -128,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 2bfbf758d061..f6986dee6f8c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host
+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47d9f03b7778..984ea2089efc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bdbb557feb5a..059467086b13 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
return 0;
}
+void blkg_init_queue(struct request_queue *q)
+{
+ INIT_LIST_HEAD(&q->blkg_list);
+ mutex_init(&q->blkcg_mutex);
+}
+
int blkcg_init_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
bool preloaded;
int ret;
- INIT_LIST_HEAD(&q->blkg_list);
- mutex_init(&q->blkcg_mutex);
-
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 78b74106bf10..90b3959d88cf 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -189,6 +189,7 @@ struct blkcg_policy {
extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;
+void blkg_init_queue(struct request_queue *q);
int blkcg_init_disk(struct gendisk *disk);
void blkcg_exit_disk(struct gendisk *disk);
@@ -482,6 +483,7 @@ struct blkcg {
};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
static inline void blkcg_exit_disk(struct gendisk *disk) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
diff --git a/block/blk-core.c b/block/blk-core.c
index a16b5abdbbf5..b795ac177281 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -442,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
init_waitqueue_head(&q->mq_freeze_wq);
mutex_init(&q->mq_freeze_lock);
+ blkg_init_queue(q);
+
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
@@ -1195,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
+ plug->cur_ktime = 0;
current->flags &= ~PF_BLOCK_TS;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9a85bfbbc45a..baa20c85799d 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1347,7 +1347,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
- u64 tdelta, delay, new_delay;
+ u64 tdelta, delay, new_delay, shift;
s64 vover, vover_pct;
u32 hwa;
@@ -1362,8 +1362,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
/* calculate the current delay in effect - 1/2 every second */
tdelta = now->now - iocg->delay_at;
- if (iocg->delay)
- delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+ shift = div64_u64(tdelta, USEC_PER_SEC);
+ if (iocg->delay && shift < BITS_PER_LONG)
+ delay = iocg->delay >> shift;
else
delay = 0;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index cdbaef159c4b..d2731843f2fc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -182,17 +182,13 @@ static int blk_validate_limits(struct queue_limits *lim)
return -EINVAL;
/*
- * Devices that require a virtual boundary do not support scatter/gather
- * I/O natively, but instead require a descriptor list entry for each
- * page (which might not be identical to the Linux PAGE_SIZE). Because
- * of that they are not limited by our notion of "segment size".
+ * Stacking device may have both virtual boundary and max segment
+ * size limit, so allow this setting now, and long-term the two
+ * might need to move out of stacking limits since we have immutable
+ * bvec and lower layer bio splitting is supposed to handle the two
+ * correctly.
*/
- if (lim->virt_boundary_mask) {
- if (WARN_ON_ONCE(lim->max_segment_size &&
- lim->max_segment_size != UINT_MAX))
- return -EINVAL;
- lim->max_segment_size = UINT_MAX;
- } else {
+ if (!lim->virt_boundary_mask) {
/*
* The maximum segment size has an odd historic 64k default that
* drivers probably should override. Just like the I/O size we
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 39f6d1b98fd6..51d3f1a55d02 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/firmware.h>
@@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
return 0;
}
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
- int ret;
-
- ret = ivpu_rpm_get_if_active(vdev);
- if (ret < 0)
- return ret;
-
- *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
- if (ret)
- ivpu_rpm_put(vdev);
-
- return 0;
-}
-
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- ret = ivpu_get_core_clock_rate(vdev, &args->value);
+ args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -387,12 +371,15 @@ int ivpu_shutdown(struct ivpu_device *vdev)
{
int ret;
- ivpu_prepare_for_reset(vdev);
+ /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+ pci_save_state(to_pci_dev(vdev->drm.dev));
ret = ivpu_hw_power_down(vdev);
if (ret)
ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
return ret;
}
@@ -530,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
- xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
@@ -560,11 +547,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_init(vdev);
if (ret)
@@ -601,10 +588,8 @@ err_mmu_rctx_fini:
ivpu_mmu_reserved_context_fini(vdev);
err_mmu_gctx_fini:
ivpu_mmu_global_context_fini(vdev);
-err_power_down:
- ivpu_hw_power_down(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+err_shutdown:
+ ivpu_shutdown(vdev);
err_xa_destroy:
xa_destroy(&vdev->db_xa);
xa_destroy(&vdev->submitted_jobs_xa);
@@ -628,9 +613,8 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
static void ivpu_dev_fini(struct ivpu_device *vdev)
{
ivpu_pm_disable(vdev);
+ ivpu_prepare_for_reset(vdev);
ivpu_shutdown(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
ivpu_jobs_abort_all(vdev);
ivpu_job_done_consumer_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 7be0500d9bb8..bb4374d0eaec 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_DRV_H__
@@ -90,7 +90,6 @@
struct ivpu_wa_table {
bool punit_disabled;
bool clear_runtime_mem;
- bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
bool disable_d0i3_msg;
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index b2909168a0a6..094c659d2800 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -21,6 +21,7 @@ struct ivpu_hw_ops {
u32 (*profiling_freq_get)(struct ivpu_device *vdev);
void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+ u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
@@ -130,6 +131,11 @@ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
return vdev->hw->ops->reg_pll_freq_get(vdev);
};
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return vdev->hw->ops->reg_telemetry_offset_get(vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 9a0c9498baba..bd25e2d9fb0f 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include "ivpu_drv.h"
@@ -75,7 +75,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
{
vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
- vdev->wa.d3hot_after_power_off = true;
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
@@ -86,7 +85,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
IVPU_PRINT_WA(punit_disabled);
IVPU_PRINT_WA(clear_runtime_mem);
- IVPU_PRINT_WA(d3hot_after_power_off);
IVPU_PRINT_WA(interrupt_clear_with_0);
}
@@ -805,12 +803,12 @@ static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool ena
/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
}
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
u32 cpu_clock;
- if ((config & 0xff) == PLL_RATIO_4_3)
+ if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
cpu_clock = pll_clock * 2 / 4;
else
cpu_clock = pll_clock * 2 / 5;
@@ -829,7 +827,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
if (!ivpu_is_silicon(vdev))
return PLL_SIMULATION_FREQ;
- return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+ return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
}
static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
@@ -1052,6 +1050,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index e4eddbf5d11c..b0b88d4c8926 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -980,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
return PLL_RATIO_TO_FREQ(pll_curr_ratio);
}
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio);
+}
+
static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
@@ -1230,6 +1235,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 04ac4b9840fb..56ff067f63e2 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/genalloc.h>
@@ -501,7 +501,11 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
INIT_LIST_HEAD(&ipc->cb_msg_list);
- drmm_mutex_init(&vdev->drm, &ipc->lock);
+ ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+ goto err_free_rx;
+ }
ivpu_ipc_reset(vdev);
return 0;
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 91bd640655ab..2e46b322c450 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd)
case IVPU_MMU_EVT_F_VMS_FETCH:
return "Fetch of VMS caused external abort";
default:
- return "Unknown CMDQ command";
+ return "Unknown event";
}
}
@@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
{
switch (err) {
case IVPU_MMU_CERROR_NONE:
- return "No CMDQ Error";
+ return "No error";
case IVPU_MMU_CERROR_ILL:
return "Illegal command";
case IVPU_MMU_CERROR_ABT:
- return "External abort on CMDQ read";
+ return "External abort on command queue read";
case IVPU_MMU_CERROR_ATC_INV_SYNC:
return "Sync failed to complete ATS invalidation";
default:
- return "Unknown CMDQ Error";
+ return "Unknown error";
}
}
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 7cce1c928a7f..4f5ea466731f 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/highmem.h>
@@ -58,14 +58,11 @@ static int ivpu_suspend(struct ivpu_device *vdev)
{
int ret;
- /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
- pci_save_state(to_pci_dev(vdev->drm.dev));
+ ivpu_prepare_for_reset(vdev);
ret = ivpu_shutdown(vdev);
if (ret)
- ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+ ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
return ret;
}
@@ -74,10 +71,10 @@ static int ivpu_resume(struct ivpu_device *vdev)
{
int ret;
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+retry:
pci_restore_state(to_pci_dev(vdev->drm.dev));
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
-retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
@@ -100,6 +97,7 @@ err_mmu_disable:
ivpu_mmu_disable(vdev);
err_power_down:
ivpu_hw_power_down(vdev);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
if (!ivpu_fw_is_cold_boot(vdev)) {
ivpu_pm_prepare_cold_boot(vdev);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7c157bf92695..d1464324de95 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1843,7 +1843,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
if (dep->honor_dep)
adev->flags.honor_deps = 1;
- adev->dep_unmet++;
+ if (!dep->met)
+ adev->dep_unmet++;
}
}
}
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 562302e2e57c..6548f10e61d9 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -666,6 +666,87 @@ static int mobile_lpm_policy = -1;
module_param(mobile_lpm_policy, int, 0644);
MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
+static char *ahci_mask_port_map;
+module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444);
+MODULE_PARM_DESC(mask_port_map,
+ "32-bits port map masks to ignore controllers ports. "
+ "Valid values are: "
+ "\"<mask>\" to apply the same mask to all AHCI controller "
+ "devices, and \"<pci_dev>=<mask>,<pci_dev>=<mask>,...\" to "
+ "specify different masks for the controllers specified, "
+ "where <pci_dev> is the PCI ID of an AHCI controller in the "
+ "form \"domain:bus:dev.func\"");
+
+static void ahci_apply_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv, char *mask_s)
+{
+ unsigned int mask;
+
+ if (kstrtouint(mask_s, 0, &mask)) {
+ dev_err(dev, "Invalid port map mask\n");
+ return;
+ }
+
+ hpriv->mask_port_map = mask;
+}
+
+static void ahci_get_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv)
+{
+ char *param, *end, *str, *mask_s;
+ char *name;
+
+ if (!strlen(ahci_mask_port_map))
+ return;
+
+ str = kstrdup(ahci_mask_port_map, GFP_KERNEL);
+ if (!str)
+ return;
+
+ /* Handle single mask case */
+ if (!strchr(str, '=')) {
+ ahci_apply_port_map_mask(dev, hpriv, str);
+ goto free;
+ }
+
+ /*
+ * Mask list case: parse the parameter to apply the mask only if
+ * the device name matches.
+ */
+ param = str;
+ end = param + strlen(param);
+ while (param && param < end && *param) {
+ name = param;
+ param = strchr(name, '=');
+ if (!param)
+ break;
+
+ *param = '\0';
+ param++;
+ if (param >= end)
+ break;
+
+ if (strcmp(dev_name(dev), name) != 0) {
+ param = strchr(param, ',');
+ if (param)
+ param++;
+ continue;
+ }
+
+ mask_s = param;
+ param = strchr(mask_s, ',');
+ if (param) {
+ *param = '\0';
+ param++;
+ }
+
+ ahci_apply_port_map_mask(dev, hpriv, mask_s);
+ }
+
+free:
+ kfree(str);
+}
+
static void ahci_pci_save_initial_config(struct pci_dev *pdev,
struct ahci_host_priv *hpriv)
{
@@ -688,6 +769,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
"Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
}
+ /* Handle port map masks passed as module parameter. */
+ if (ahci_mask_port_map)
+ ahci_get_port_map_mask(&pdev->dev, hpriv);
+
ahci_save_initial_config(&pdev->dev, hpriv);
}
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index be3412cdb22e..c449d60d9bb9 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2539,7 +2539,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
bool cdl_enabled;
u64 val;
- if (ata_id_major_version(dev->id) < 12)
+ if (ata_id_major_version(dev->id) < 11)
goto not_supported;
if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 2f4c58837641..e954976891a9 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4745,7 +4745,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
* bail out.
*/
if (ap->pflags & ATA_PFLAG_SUSPENDED)
- goto unlock;
+ goto unlock_ap;
if (!sdev)
continue;
@@ -4758,7 +4758,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
if (do_resume) {
ret = scsi_resume_device(sdev);
if (ret == -EWOULDBLOCK)
- goto unlock;
+ goto unlock_scan;
dev->flags &= ~ATA_DFLAG_RESUMING;
}
ret = scsi_rescan_device(sdev);
@@ -4766,12 +4766,13 @@ void ata_scsi_dev_rescan(struct work_struct *work)
spin_lock_irqsave(ap->lock, flags);
if (ret)
- goto unlock;
+ goto unlock_ap;
}
}
-unlock:
+unlock_ap:
spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
mutex_unlock(&ap->scsi_scan_mutex);
/* Reschedule with a delay if scsi_rescan_device() returned an error */
diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
index 89ed6cd6b059..e9cc8b4786fb 100644
--- a/drivers/cache/sifive_ccache.c
+++ b/drivers/cache/sifive_ccache.c
@@ -15,6 +15,8 @@
#include <linux/of_address.h>
#include <linux/device.h>
#include <linux/bitfield.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/dma-noncoherent.h>
@@ -247,13 +249,49 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
return IRQ_HANDLED;
}
+static int sifive_ccache_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long quirks;
+ int intr_num, rc;
+
+ quirks = (unsigned long)device_get_match_data(dev);
+
+ intr_num = platform_irq_count(pdev);
+ if (!intr_num)
+ return dev_err_probe(dev, -ENODEV, "No interrupts property\n");
+
+ for (int i = 0; i < intr_num; i++) {
+ if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
+ continue;
+
+ g_irq[i] = platform_get_irq(pdev, i);
+ if (g_irq[i] < 0)
+ return g_irq[i];
+
+ rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL);
+ if (rc)
+ return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]);
+ }
+
+ return 0;
+}
+
+static struct platform_driver sifive_ccache_driver = {
+ .probe = sifive_ccache_probe,
+ .driver = {
+ .name = "sifive_ccache",
+ .of_match_table = sifive_ccache_ids,
+ },
+};
+
static int __init sifive_ccache_init(void)
{
struct device_node *np;
struct resource res;
- int i, rc, intr_num;
const struct of_device_id *match;
unsigned long quirks;
+ int rc;
np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match);
if (!np)
@@ -277,28 +315,6 @@ static int __init sifive_ccache_init(void)
goto err_unmap;
}
- intr_num = of_property_count_u32_elems(np, "interrupts");
- if (!intr_num) {
- pr_err("No interrupts property\n");
- rc = -ENODEV;
- goto err_unmap;
- }
-
- for (i = 0; i < intr_num; i++) {
- g_irq[i] = irq_of_parse_and_map(np, i);
-
- if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
- continue;
-
- rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc",
- NULL);
- if (rc) {
- pr_err("Could not request IRQ %d\n", g_irq[i]);
- goto err_free_irq;
- }
- }
- of_node_put(np);
-
#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) {
riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE;
@@ -315,11 +331,15 @@ static int __init sifive_ccache_init(void)
#ifdef CONFIG_DEBUG_FS
setup_sifive_debug();
#endif
+
+ rc = platform_driver_register(&sifive_ccache_driver);
+ if (rc)
+ goto err_unmap;
+
+ of_node_put(np);
+
return 0;
-err_free_irq:
- while (--i >= 0)
- free_irq(g_irq[i], NULL);
err_unmap:
iounmap(ccache_base);
err_node_put:
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index af5cb818f84d..cb8c155a2c9b 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
{
struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
u32 uid;
- int rc;
if (kstrtou32(acpi_device_uid(hb), 0, &uid))
return -EINVAL;
- rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
- if (rc < 0)
- return rc;
-
- /* Adjust back to picoseconds from nanoseconds */
- for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- dport->hb_coord[i].read_latency *= 1000;
- dport->hb_coord[i].write_latency *= 1000;
- }
-
- return 0;
+ return acpi_get_genport_coordinates(uid, dport->coord);
}
static int add_host_bridge_dport(struct device *match, void *arg)
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index eddbbe21450c..bb83867d9fec 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -14,12 +14,42 @@
struct dsmas_entry {
struct range dpa_range;
u8 handle;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int entries;
int qos_class;
};
+static u32 cdat_normalize(u16 entry, u64 base, u8 type)
+{
+ u32 value;
+
+ /*
+ * Check for invalid and overflow values
+ */
+ if (entry == 0xffff || !entry)
+ return 0;
+ else if (base > (UINT_MAX / (entry)))
+ return 0;
+
+ /*
+ * CDAT fields follow the format of HMAT fields. See table 5 Device
+ * Scoped Latency and Bandwidth Information Structure in Coherent Device
+ * Attribute Table (CDAT) Specification v1.01.
+ */
+ value = entry * base;
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ value = DIV_ROUND_UP(value, 1000);
+ break;
+ default:
+ break;
+ }
+ return value;
+}
+
static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
@@ -58,8 +88,8 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
return 0;
}
-static void cxl_access_coordinate_set(struct access_coordinate *coord,
- int access, unsigned int val)
+static void __cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
{
switch (access) {
case ACPI_HMAT_ACCESS_LATENCY:
@@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord,
}
}
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_access_coordinate_set(&coord[i], access, val);
+}
+
static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
@@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
__le16 le_val;
u64 val;
u16 len;
- int rc;
len = le16_to_cpu((__force __le16)hdr->length);
if (len != size || (unsigned long)hdr + len > end) {
@@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
le_base = (__force __le64)dslbis->entry_base_unit;
le_val = (__force __le16)dslbis->entry[0];
- rc = check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val);
- if (rc)
- pr_warn("DSLBIS value overflowed.\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ dslbis->data_type);
- cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+ cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
return 0;
}
@@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
static int cxl_port_perf_data_calculate(struct cxl_port *port,
struct xarray *dsmas_xa)
{
- struct access_coordinate ep_c;
- struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
struct dsmas_entry *dent;
int valid_entries = 0;
unsigned long index;
int rc;
- rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
+ rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
if (rc) {
dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
return rc;
}
- rc = cxl_hb_get_perf_coordinates(port, coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return rc;
- }
-
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
if (!cxl_root)
@@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
xa_for_each(dsmas_xa, index, dent) {
int qos_class;
- cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
- /*
- * Keeping the host bridge coordinates separate from the dsmas
- * coordinates in order to allow calculation of access class
- * 0 and 1 for region later.
- */
- cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
- &coord[ACCESS_COORDINATE_CPU],
- &dent->coord);
+ cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
dent->entries = 1;
rc = cxl_root->ops->qos_class(cxl_root,
- &coord[ACCESS_COORDINATE_CPU],
+ &dent->coord[ACCESS_COORDINATE_CPU],
1, &qos_class);
if (rc != 1)
continue;
@@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ dpa_perf->coord[i] = dent->coord[i];
dpa_perf->dpa_range = dent->dpa_range;
- dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
- dent->coord.read_bandwidth, dent->coord.write_bandwidth,
- dent->coord.read_latency, dent->coord.write_latency);
+ dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].read_latency,
+ dent->coord[ACCESS_COORDINATE_CPU].write_latency);
}
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
@@ -461,17 +483,16 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
-
- if (check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val))
- dev_warn(dev, "SSLBIS value overflowed!\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ sslbis->data_type);
xa_for_each(&port->dports, index, dport) {
if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
- dsp_id == dport->port_id)
- cxl_access_coordinate_set(&dport->sw_coord,
+ dsp_id == dport->port_id) {
+ cxl_access_coordinate_set(dport->coord,
sslbis->data_type,
val);
+ }
}
}
@@ -493,6 +514,21 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
+static void __cxl_coordinates_combine(struct access_coordinate *out,
+ struct access_coordinate *c1,
+ struct access_coordinate *c2)
+{
+ if (c1->write_bandwidth && c2->write_bandwidth)
+ out->write_bandwidth = min(c1->write_bandwidth,
+ c2->write_bandwidth);
+ out->write_latency = c1->write_latency + c2->write_latency;
+
+ if (c1->read_bandwidth && c2->read_bandwidth)
+ out->read_bandwidth = min(c1->read_bandwidth,
+ c2->read_bandwidth);
+ out->read_latency = c1->read_latency + c2->read_latency;
+}
+
/**
* cxl_coordinates_combine - Combine the two input coordinates
*
@@ -504,15 +540,8 @@ void cxl_coordinates_combine(struct access_coordinate *out,
struct access_coordinate *c1,
struct access_coordinate *c2)
{
- if (c1->write_bandwidth && c2->write_bandwidth)
- out->write_bandwidth = min(c1->write_bandwidth,
- c2->write_bandwidth);
- out->write_latency = c1->write_latency + c2->write_latency;
-
- if (c1->read_bandwidth && c2->read_bandwidth)
- out->read_bandwidth = min(c1->read_bandwidth,
- c2->read_bandwidth);
- out->read_latency = c1->read_latency + c2->read_latency;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
}
MODULE_IMPORT_NS(CXL);
@@ -521,17 +550,13 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlmd->endpoint;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
- struct access_coordinate coord;
struct range dpa = {
.start = cxled->dpa_res->start,
.end = cxled->dpa_res->end,
};
struct cxl_dpa_perf *perf;
- int rc;
switch (cxlr->mode) {
case CXL_DECODER_RAM:
@@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
if (!range_contains(&perf->dpa_range, &dpa))
return;
- rc = cxl_hb_get_perf_coordinates(port, hb_coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return;
- }
-
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- /* Pickup the host bridge coords */
- cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
-
/* Get total bandwidth and the worst latency for the cxl region */
cxlr->coord[i].read_latency = max_t(unsigned int,
cxlr->coord[i].read_latency,
- coord.read_latency);
+ perf->coord[i].read_latency);
cxlr->coord[i].write_latency = max_t(unsigned int,
cxlr->coord[i].write_latency,
- coord.write_latency);
- cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
- cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
-
- /*
- * Convert latency to nanosec from picosec to be consistent
- * with the resulting latency coordinates computed by the
- * HMAT_REPORTING code.
- */
- cxlr->coord[i].read_latency =
- DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
- cxlr->coord[i].write_latency =
- DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+ perf->coord[i].write_latency);
+ cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
+ cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
}
}
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 9adda4795eb7..f0f54aeccc87 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
payload->handles[i++] = gen->hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
- le16_to_cpu(payload->handles[i]));
+ le16_to_cpu(payload->handles[i - 1]));
if (i == max_handles) {
payload->nr_recs = i;
@@ -958,13 +958,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
- .size_out = mds->payload_size,
.min_out = struct_size(payload, records, 0),
};
do {
int rc, i;
+ mbox_cmd.size_out = mds->payload_size;
+
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc) {
dev_err_ratelimited(dev,
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 2b0cab556072..762783bb091a 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
-/**
- * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
- * and host bridge
- *
- * @port: endpoint cxl_port
- * @coord: output access coordinates
- *
- * Return: errno on failure, 0 on success.
- */
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord)
+static void add_latency(struct access_coordinate *c, long latency)
{
- struct cxl_port *iter = port;
- struct cxl_dport *dport;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_latency += latency;
+ c[i].read_latency += latency;
+ }
+}
- if (!is_cxl_endpoint(port))
- return -EINVAL;
+static bool coordinates_valid(struct access_coordinate *c)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ if (c[i].read_bandwidth && c[i].write_bandwidth &&
+ c[i].read_latency && c[i].write_latency)
+ continue;
+ return false;
+ }
- dport = iter->parent_dport;
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- iter = to_cxl_port(iter->dev.parent);
- dport = iter->parent_dport;
+ return true;
+}
+
+static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
+ c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
}
+}
- coord[ACCESS_COORDINATE_LOCAL] =
- dport->hb_coord[ACCESS_COORDINATE_LOCAL];
- coord[ACCESS_COORDINATE_CPU] =
- dport->hb_coord[ACCESS_COORDINATE_CPU];
+static void set_access_coordinates(struct access_coordinate *out,
+ struct access_coordinate *in)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ out[i] = in[i];
+}
- return 0;
+static bool parent_port_is_cxl_root(struct cxl_port *port)
+{
+ return is_cxl_root(to_cxl_port(port->dev.parent));
}
/**
@@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port,
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord)
{
- struct access_coordinate c = {
- .read_bandwidth = UINT_MAX,
- .write_bandwidth = UINT_MAX,
+ struct access_coordinate c[] = {
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
};
struct cxl_port *iter = port;
struct cxl_dport *dport;
struct pci_dev *pdev;
unsigned int bw;
+ bool is_cxl_root;
if (!is_cxl_endpoint(port))
return -EINVAL;
- dport = iter->parent_dport;
-
/*
- * Exit the loop when the parent port of the current port is cxl root.
- * The iterative loop starts at the endpoint and gathers the
- * latency of the CXL link from the current iter to the next downstream
- * port each iteration. If the parent is cxl root then there is
- * nothing to gather.
+ * Exit the loop when the parent port of the current iter port is cxl
+ * root. The iterative loop starts at the endpoint and gathers the
+ * latency of the CXL link from the current device/port to the connected
+ * downstream port each iteration.
*/
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- cxl_coordinates_combine(&c, &c, &dport->sw_coord);
- c.write_latency += dport->link_latency;
- c.read_latency += dport->link_latency;
-
- iter = to_cxl_port(iter->dev.parent);
+ do {
dport = iter->parent_dport;
- }
+ iter = to_cxl_port(iter->dev.parent);
+ is_cxl_root = parent_port_is_cxl_root(iter);
+
+ /*
+ * There's no valid access_coordinate for a root port since RPs do not
+ * have CDAT and therefore needs to be skipped.
+ */
+ if (!is_cxl_root) {
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
+ }
+ add_latency(c, dport->link_latency);
+ } while (!is_cxl_root);
+
+ dport = iter->parent_dport;
+ /* Retrieve HB coords */
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
/* Get the calculated PCI paths bandwidth */
pdev = to_pci_dev(port->uport_dev->parent);
@@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
return -ENXIO;
bw /= BITS_PER_BYTE;
- c.write_bandwidth = min(c.write_bandwidth, bw);
- c.read_bandwidth = min(c.read_bandwidth, bw);
-
- *coord = c;
+ set_min_bandwidth(c, bw);
+ set_access_coordinates(coord, c);
return 0;
}
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 372786f80955..3c42f984eeaf 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
+ u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
u64 offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
if (offset > pci_resource_len(pdev, bar)) {
dev_warn(&pdev->dev,
"BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
- &pdev->resource[bar], &offset, map->reg_type);
+ &pdev->resource[bar], &offset, reg_type);
return false;
}
- map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+ map->reg_type = reg_type;
map->resource = pci_resource_start(pdev, bar) + offset;
map->max_size = pci_resource_len(pdev, bar) - offset;
return true;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 534e25e2f0a4..036d17db68e0 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -663,8 +663,7 @@ struct cxl_rcrb_info {
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
* @regs: Dport parsed register blocks
- * @sw_coord: access coordinates (performance) for switch from CDAT
- * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge)
+ * @coord: access coordinates (bandwidth and latency performance attributes)
* @link_latency: calculated PCIe downstream latency
*/
struct cxl_dport {
@@ -675,8 +674,7 @@ struct cxl_dport {
bool rch;
struct cxl_port *port;
struct cxl_regs regs;
- struct access_coordinate sw_coord;
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
long link_latency;
};
@@ -884,8 +882,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord);
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 20fb3b35e89e..36cee9c30ceb 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -401,7 +401,7 @@ enum cxl_devtype {
*/
struct cxl_dpa_perf {
struct range dpa_range;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int qos_class;
};
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index f2556a8e9401..9bc2e10381af 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -790,7 +790,7 @@ static void ffa_notification_info_get(void)
part_id = packed_id_list[ids_processed++];
- if (!ids_count[list]) { /* Global Notification */
+ if (ids_count[list] == 1) { /* Global Notification */
__do_sched_recv_cb(part_id, 0, false);
continue;
}
diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
index ea9201e7044c..1fa79bba492e 100644
--- a/drivers/firmware/arm_scmi/powercap.c
+++ b/drivers/firmware/arm_scmi/powercap.c
@@ -736,7 +736,7 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph,
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_PAI_GET, 4, domain,
&fc[POWERCAP_FC_PAI].get_addr, NULL,
- &fc[POWERCAP_PAI_GET].rate_limit);
+ &fc[POWERCAP_FC_PAI].rate_limit);
*p_fc = fc;
}
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 350573518503..130d13e9cd6b 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
rd->raw = raw;
filp->private_data = rd;
- return 0;
+ return nonseekable_open(inode, filp);
}
static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
.open = scmi_dbg_raw_mode_open,
.release = scmi_dbg_raw_mode_release,
.write = scmi_dbg_raw_mode_reset_write,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_async_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_notif_read,
.poll = scmi_test_dbg_raw_mode_notif_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_errors_read,
.poll = scmi_test_dbg_raw_mode_errors_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..b3b84647207e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aa16d51dd842..7753a2e64d41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4135,18 +4135,22 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->ip_blocks[i].status.hw = true;
}
}
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
- }
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ }
+
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index fdd36fb027ab..ac5bf01fe8d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1896,6 +1896,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 80b9642f2bc4..e4277298cf1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
int amdgpu_mes_kiq;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
@@ -668,6 +669,15 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
* DOC: mes_kiq (int)
* Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
* (0 = disabled (default), 1 = enabled)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4b3000c21ef2..e4742b65032d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
job->job_run_counter++;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index a98e03e0a51f..a00cf4756ad0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
{
int r;
- r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
@@ -1549,12 +1552,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
- mem, PAGE_SIZE, false);
+ mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
return 0;
}
-
DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
#endif
@@ -1565,7 +1567,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
- if (adev->enable_mes)
+ if (adev->enable_mes && amdgpu_mes_log_enable)
debugfs_create_file("amdgpu_mes_event_log", 0444, root,
adev, &amdgpu_debugfs_mes_event_log_fops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7d4f93fea937..4c8fc3117ef8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level {
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
struct amdgpu_mes_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index d6f808acfb17..fbb43ae7624f 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
+static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev)
+{
+ return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst);
+}
+
static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
uint32_t inst_idx, struct amdgpu_ring *ring)
{
@@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
- if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ if (aqua_vanjaram_xcp_vcn_shared(adev))
inst_mask = 1 << (inst_idx * 2);
break;
default:
@@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update(
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
- /* VCN is shared by two partitions under CPX MODE */
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
- adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ aqua_vanjaram_xcp_vcn_shared(adev))
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 1770e496c1b7..f7325b02a191 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* Make sure that we can't skip the SET_Q_MODE packets when the VM
* changed in any way.
*/
+ ring->set_q_mode_offs = 0;
ring->set_q_mode_ptr = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ad..63f281a9984d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
- mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
- mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 34237a1b1f2e..82eab49be82b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1602,19 +1602,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
u32 sdma_cntl;
sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
- DRAM_ECC_INT_ENABLE, 0);
- WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
- break;
- /* sdma ecc interrupt is enabled by default
- * driver doesn't need to do anything to
- * enable the interrupt */
- case AMDGPU_IRQ_STATE_ENABLE:
- default:
- break;
- }
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 581a3bd11481..43ca63fe85ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
- return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- return false;
default:
return true;
}
@@ -722,7 +720,10 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_GFX_PG;
- adev->external_rev_id = adev->rev_id + 0x1;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
break;
case IP_VERSION(11, 5, 1):
adev->cg_flags =
@@ -869,10 +870,35 @@ static int soc21_common_suspend(void *handle)
return soc21_common_hw_fini(adev);
}
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+ !adev->suspend_complete) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
static int soc21_common_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
return soc21_common_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
index 84368cf1e175..bd57896ab85d 100644
--- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+ ring->wptr = 0;
+
data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f9631f4b1a02..55aa74cbc532 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -779,8 +779,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 041ec3de55e7..719d6d365e15 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
@@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f4d395e38683..0b655555e167 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2001,6 +2001,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
while (halt_if_hws_hang)
schedule();
+ kfd_hws_hang(dqm);
return -ETIME;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 71d2d44681b2..6d2f60c61dec 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -148,6 +148,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
#define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
/* Number of bytes in PSP header for firmware. */
#define PSP_HEADER_BYTES 0x100
@@ -3044,6 +3047,10 @@ static int dm_resume(void *handle)
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
@@ -4820,9 +4827,11 @@ static int dm_init_microcode(struct amdgpu_device *adev)
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
case IP_VERSION(3, 5, 0):
- case IP_VERSION(3, 5, 1):
fw_name_dmub = FIRMWARE_DCN_35_DMUB;
break;
+ case IP_VERSION(3, 5, 1):
+ fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+ break;
default:
/* ASIC doesn't support DMUB. */
return 0;
@@ -5921,6 +5930,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
&aconnector->base.probed_modes :
&aconnector->base.modes;
+ if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return NULL;
+
if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base;
@@ -6306,19 +6318,16 @@ create_stream_for_sink(struct drm_connector *connector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
- if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
tf = TRANSFER_FUNC_GAMMA_22;
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
@@ -8762,10 +8771,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
continue;
+notify:
if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
-notify:
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index 16e72d623630..08c494a7a21b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -76,10 +76,8 @@ static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
{
- struct drm_device *dev = connector->dev;
-
- return drm_add_modes_noedid(connector, dev->mode_config.max_width,
- dev->mode_config.max_height);
+ /* Maximum resolution supported by DWB */
+ return drm_add_modes_noedid(connector, 3840, 2160);
}
static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 12f3e8aa46d8..6ad4f4efec5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
return display_count;
}
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
- dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
if (disable) {
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
reset_sync_context_for_pipe(dc, context, i);
} else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn316_disable_otg_wa(clk_mgr_base, context, true);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn316_disable_otg_wa(clk_mgr_base, context, false);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 101fe96287cb..d9c5692c86c2 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -73,6 +73,12 @@
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+#define regCLK5_0_CLK5_spll_field_8 0x464b
+#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0
+
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
#define REG(reg_name) \
@@ -411,6 +417,17 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
{
}
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t ssc_enable;
+
+ REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ return ssc_enable == 1;
+}
+
static void init_clk_states(struct clk_mgr *clk_mgr)
{
struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
@@ -428,7 +445,16 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
void dcn35_init_clocks(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
init_clk_states(clk_mgr);
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn35_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
}
static struct clk_bw_params dcn35_bw_params = {
.vram_type = Ddr4MemType,
@@ -517,6 +543,28 @@ static DpmClocks_t_dcn35 dummy_clocks;
static struct dcn35_watermarks dummy_wms = { 0 };
+static struct dcn35_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t clock_source;
+
+ REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
{
int i, num_valid_sets;
@@ -1061,6 +1109,8 @@ void dcn35_clk_mgr_construct(
dce_clock_read_ss_info(&clk_mgr->base);
/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
+ dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
clk_mgr->base.base.bw_params = &dcn35_bw_params;
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 5cc7f8da209c..61986e5cb491 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -436,6 +436,15 @@ bool dc_state_add_plane(
goto out;
}
+ if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+ /* ODM combine could prevent us from supporting more planes
+ * we will reset ODM slice count back to 1 when all planes have
+ * been removed to maximize the amount of planes supported when
+ * new planes are added.
+ */
+ resource_update_pipes_for_stream_with_slice_count(
+ state, dc->current_state, dc->res_pool, stream, 1);
+
otg_master_pipe = resource_get_otg_master_for_stream(
&state->res_ctx, stream);
if (otg_master_pipe)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 970644b695cd..b5e0289d2fe8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -976,7 +976,10 @@ static bool dcn31_program_pix_clk(
struct bp_pixel_clock_parameters bp_pc_params = {0};
enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+ // Apply ssed(spread spectrum) dpref clock for edp only.
+ if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
+ && pix_clk_params->signal_type == SIGNAL_TYPE_EDP
+ && encoding == DP_8b_10b_ENCODING)
dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
// For these signal types Driver to program DP_DTO without calling VBIOS Command table
if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
@@ -1093,9 +1096,6 @@ static bool get_pixel_clk_frequency_100hz(
unsigned int modulo_hz = 0;
unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
- dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
-
if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
clock_hz = REG_READ(PHASE[inst]);
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index f07a4c7e48bc..52eab8fccb7f 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -267,9 +267,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc)
OTG_V_TOTAL_MAX_SEL, 1,
OTG_FORCE_LOCK_ON_EVENT, 0,
OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
- // Setup manual flow control for EOF via TRIG_A
- optc->funcs->setup_manual_trigger(optc);
}
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 246b211b1e85..65333141b1c1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -735,7 +735,7 @@ static int smu_early_init(void *handle)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.state = SMU_BACO_STATE_NONE;
smu->smu_baco.platform_support = false;
smu->user_dpm_profile.fan_mode = -1;
@@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
return 0;
}
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if ((!adev->in_runpm) && (!adev->in_suspend) &&
+ (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev))
+ ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+
+ return ret;
+}
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
@@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle)
adev->pm.dpm_enabled = false;
- return smu_smc_hw_cleanup(smu);
+ ret = smu_smc_hw_cleanup(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_reset_mp1_state(smu);
+ if (ret)
+ return ret;
+
+ return 0;
}
static void smu_late_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index a870bdd49a4e..1fa81575788c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -424,6 +424,7 @@ enum smu_reset_mode {
enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
+ SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 5bb7a63c0602..97522c085258 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -144,6 +144,37 @@ typedef struct {
uint32_t MaxGfxClk;
} DpmClocks_t;
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; //Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; //Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint8_t spare;
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_v14_0_1;
+
typedef struct {
uint16_t CoreFrequency[16]; //Target core frequency [MHz]
uint16_t CorePower[16]; //CAC calculated core power [mW]
@@ -224,7 +255,7 @@ typedef enum {
#define TABLE_CUSTOM_DPM 2 // Called by Driver
#define TABLE_BIOS_GPIO_CONFIG 3 // Called by BIOS
#define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS
-#define TABLE_SPARE0 5 // Unused
+#define TABLE_MOMENTARY_PM 5 // Called by Tools
#define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log
#define TABLE_SMU_METRICS 7 // Called by Driver and SMF/PMF
#define TABLE_COUNT 8
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
index 356e0f57a426..ddb625860083 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
@@ -42,7 +42,7 @@
#define FEATURE_EDC_BIT 7
#define FEATURE_PLL_POWER_DOWN_BIT 8
#define FEATURE_VDDOFF_BIT 9
-#define FEATURE_VCN_DPM_BIT 10
+#define FEATURE_VCN_DPM_BIT 10 /* this is for both VCN0 and VCN1 */
#define FEATURE_DS_MPM_BIT 11
#define FEATURE_FCLK_DPM_BIT 12
#define FEATURE_SOCCLK_DPM_BIT 13
@@ -56,9 +56,9 @@
#define FEATURE_DS_GFXCLK_BIT 21
#define FEATURE_DS_SOCCLK_BIT 22
#define FEATURE_DS_LCLK_BIT 23
-#define FEATURE_LOW_POWER_DCNCLKS_BIT 24 // for all DISP clks
+#define FEATURE_LOW_POWER_DCNCLKS_BIT 24
#define FEATURE_DS_SHUBCLK_BIT 25
-#define FEATURE_SPARE0_BIT 26 //SPARE
+#define FEATURE_RESERVED0_BIT 26
#define FEATURE_ZSTATES_BIT 27
#define FEATURE_IOMMUL2_PG_BIT 28
#define FEATURE_DS_FCLK_BIT 29
@@ -66,8 +66,8 @@
#define FEATURE_DS_MP1CLK_BIT 31
#define FEATURE_WHISPER_MODE_BIT 32
#define FEATURE_SMU_LOW_POWER_BIT 33
-#define FEATURE_SMART_L3_RINSER_BIT 34
-#define FEATURE_SPARE1_BIT 35 //SPARE
+#define FEATURE_RESERVED1_BIT 34 /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */
+#define FEATURE_GFX_DEM_BIT 35 /* v14_0_0 SPARE; v14_0_1 GFX_DEM */
#define FEATURE_PSI_BIT 36
#define FEATURE_PROCHOT_BIT 37
#define FEATURE_CPUOFF_BIT 38
@@ -77,11 +77,11 @@
#define FEATURE_PERF_LIMIT_BIT 42
#define FEATURE_CORE_DLDO_BIT 43
#define FEATURE_DVO_BIT 44
-#define FEATURE_DS_VCN_BIT 45
+#define FEATURE_DS_VCN_BIT 45 /* v14_0_1 this is for both VCN0 and VCN1 */
#define FEATURE_CPPC_BIT 46
#define FEATURE_CPPC_PREFERRED_CORES 47
#define FEATURE_DF_CSTATES_BIT 48
-#define FEATURE_SPARE2_BIT 49 //SPARE
+#define FEATURE_FAST_PSTATE_CLDO_BIT 49 /* v14_0_0 SPARE */
#define FEATURE_ATHUB_PG_BIT 50
#define FEATURE_VDDOFF_ECO_BIT 51
#define FEATURE_ZSTATES_ECO_BIT 52
@@ -93,8 +93,8 @@
#define FEATURE_DS_IPUCLK_BIT 58
#define FEATURE_DS_VPECLK_BIT 59
#define FEATURE_VPE_DPM_BIT 60
-#define FEATURE_SPARE_61 61
-#define FEATURE_FP_DIDT 62
+#define FEATURE_SMART_L3_RINSER_BIT 61 /* v14_0_0 SPARE*/
+#define FEATURE_PCC_BIT 62 /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */
#define NUM_FEATURES 63
// Firmware Header/Footer
@@ -151,6 +151,43 @@ typedef struct {
// MP1_EXT_SCRATCH7 = RTOS Current Job
} FwStatus_t;
+typedef struct {
+ // MP1_EXT_SCRATCH0
+ uint32_t DpmHandlerID : 8;
+ uint32_t ActivityMonitorID : 8;
+ uint32_t DpmTimerID : 8;
+ uint32_t DpmHubID : 4;
+ uint32_t DpmHubTask : 4;
+ // MP1_EXT_SCRATCH1
+ uint32_t CclkSyncStatus : 8;
+ uint32_t ZstateStatus : 4;
+ uint32_t Cpu1VddOff : 4;
+ uint32_t DstateFun : 4;
+ uint32_t DstateDev : 4;
+ uint32_t GfxOffStatus : 2;
+ uint32_t Cpu0Off : 2;
+ uint32_t Cpu1Off : 2;
+ uint32_t Cpu0VddOff : 2;
+ // MP1_EXT_SCRATCH2
+ uint32_t P2JobHandler :32;
+ // MP1_EXT_SCRATCH3
+ uint32_t PostCode :32;
+ // MP1_EXT_SCRATCH4
+ uint32_t MsgPortBusy :15;
+ uint32_t RsmuPmiP1Pending : 1;
+ uint32_t RsmuPmiP2PendingCnt : 8;
+ uint32_t DfCstateExitPending : 1;
+ uint32_t Pc6EntryPending : 1;
+ uint32_t Pc6ExitPending : 1;
+ uint32_t WarmResetPending : 1;
+ uint32_t Mp0ClkPending : 1;
+ uint32_t InWhisperMode : 1;
+ uint32_t spare2 : 2;
+ // MP1_EXT_SCRATCH5
+ uint32_t IdleMask :32;
+ // MP1_EXT_SCRATCH6 = RTOS threads' status
+ // MP1_EXT_SCRATCH7 = RTOS Current Job
+} FwStatus_t_v14_0_1;
#pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
index ca7ce4251482..c4dc5881d8df 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
@@ -72,23 +72,19 @@
#define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK
#define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK
#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0)
-
#define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU
-
-#define PPSMC_MSG_spare_0x17 0x17
-#define PPSMC_MSG_spare_0x18 0x18
+#define PPSMC_MSG_spare_0x17 0x17 ///< Get GFX clock frequency
+#define PPSMC_MSG_spare_0x18 0x18 ///< Get FCLK frequency
#define PPSMC_MSG_AllowGfxOff 0x19 ///< Inform PMFW of allowing GFXOFF entry
#define PPSMC_MSG_DisallowGfxOff 0x1A ///< Inform PMFW of disallowing GFXOFF entry
#define PPSMC_MSG_SetSoftMaxGfxClk 0x1B ///< Set soft max for GFX CLK
#define PPSMC_MSG_SetHardMinGfxClk 0x1C ///< Set hard min for GFX CLK
-
#define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK
#define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK
#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0)
-#define PPSMC_MSG_spare_0x20 0x20
+#define PPSMC_MSG_spare_0x20 0x20 ///< Set power limit percentage
#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0
#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default
-
#define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK
#define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK
#define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity
@@ -99,8 +95,8 @@
#define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM
#define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK
#define PPSMC_MSG_SetHardMinIspxclkByFreq 0x2C ///< Set HardMin by frequency for ISPXCLK
-#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler
#define PPSMC_Message_IspStutterOn_MmhubPgDis 0x2F ///< ISP StutterOn mmHub PgDis
#define PPSMC_Message_IspStutterOff_MmhubPgEn 0x30 ///< ISP StufferOff mmHub PgEn
#define PPSMC_MSG_PowerUpVpe 0x31 ///< Power up VPE
@@ -110,7 +106,9 @@
#define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA
#define PPSMC_MSG_SetSoftMaxVpe 0x36 ///<
#define PPSMC_MSG_SetSoftMinVpe 0x37 ///<
-#define PPSMC_Message_Count 0x38 ///< Total number of PPSMC messages
+#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache
+#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache
+#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages
/** @}*/
/**
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index 3f7463c1c1a9..4af1985ae446 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -27,6 +27,7 @@
#define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
#define FEATURE_MASK(feature) (1ULL << feature)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 9c03296f92cd..67117ced7c6a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_PrepareMp1ForUnload,
+ 0x55, NULL);
+
+ if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+ ret = smu_v13_0_disable_pmfw_state(smu);
+
break;
default:
/* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index bb98156b2fa1..949131bd1ecb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && !adev->in_s0ix)
+ if (!en && !adev->in_s0ix) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index 9e39f99154f9..07a65e005785 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -234,7 +234,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
break;
case IP_VERSION(14, 0, 1):
- smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
+ smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1;
break;
default:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index d6de6d97286c..63399c00cc28 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -161,7 +161,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
- SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
+ SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
@@ -171,7 +171,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
- smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL);
+ smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL);
if (!smu_table->clocks_table)
goto err1_out;
@@ -593,6 +593,60 @@ static int smu_v14_0_0_mode2_reset(struct smu_context *smu)
return ret;
}
+static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ if (!clk_table || clk_type >= SMU_CLK_COUNT)
+ return -EINVAL;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ if (dpm_level >= clk_table->NumSocClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->SocClocks[dpm_level];
+ break;
+ case SMU_VCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks0[dpm_level];
+ break;
+ case SMU_DCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks0[dpm_level];
+ break;
+ case SMU_VCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks1[dpm_level];
+ break;
+ case SMU_DCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks1[dpm_level];
+ break;
+ case SMU_UCLK:
+ case SMU_MCLK:
+ if (dpm_level >= clk_table->NumMemPstatesEnabled)
+ return -EINVAL;
+ *freq = clk_table->MemPstateTable[dpm_level].MemClk;
+ break;
+ case SMU_FCLK:
+ if (dpm_level >= clk_table->NumFclkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->FclkClocks_Freq[dpm_level];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t dpm_level,
@@ -637,6 +691,19 @@ static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
return 0;
}
+static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+
+ return 0;
+}
+
static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
enum smu_clk_type clk_type)
{
@@ -657,6 +724,8 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
break;
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
feature_id = SMU_FEATURE_VCN_DPM_BIT;
break;
default:
@@ -666,6 +735,126 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
return smu_cmn_feature_is_enabled(smu, feature_id);
}
+static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint32_t clock_limit;
+ uint32_t max_dpm_level, min_dpm_level;
+ int ret = 0;
+
+ if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) {
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ clock_limit = smu->smu_table.boot_values.uclk;
+ break;
+ case SMU_FCLK:
+ clock_limit = smu->smu_table.boot_values.fclk;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ clock_limit = smu->smu_table.boot_values.gfxclk;
+ break;
+ case SMU_SOCCLK:
+ clock_limit = smu->smu_table.boot_values.socclk;
+ break;
+ case SMU_VCLK:
+ case SMU_VCLK1:
+ clock_limit = smu->smu_table.boot_values.vclk;
+ break;
+ case SMU_DCLK:
+ case SMU_DCLK1:
+ clock_limit = smu->smu_table.boot_values.dclk;
+ break;
+ default:
+ clock_limit = 0;
+ break;
+ }
+
+ /* clock in Mhz unit */
+ if (min)
+ *min = clock_limit / 100;
+ if (max)
+ *max = clock_limit / 100;
+
+ return 0;
+ }
+
+ if (max) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *max = clk_table->MaxGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ case SMU_FCLK:
+ max_dpm_level = 0;
+ break;
+ case SMU_SOCCLK:
+ max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ if (ret)
+ goto failed;
+ }
+ }
+
+ if (min) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *min = clk_table->MinGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ min_dpm_level = clk_table->NumMemPstatesEnabled - 1;
+ break;
+ case SMU_FCLK:
+ min_dpm_level = clk_table->NumFclkLevelsEnabled - 1;
+ break;
+ case SMU_SOCCLK:
+ min_dpm_level = 0;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ min_dpm_level = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ if (ret)
+ goto failed;
+ }
+ }
+
+failed:
+ return ret;
+}
+
static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *min,
@@ -736,7 +925,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
if (ret)
goto failed;
}
@@ -768,7 +957,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
if (ret)
goto failed;
}
@@ -778,6 +967,19 @@ failed:
return ret;
}
+static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+ return 0;
+}
+
static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *value)
@@ -811,6 +1013,37 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
}
+static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ *count = clk_table->NumSocClkLevelsEnabled;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ *count = clk_table->Vcn0ClkLevelsEnabled;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ *count = clk_table->Vcn1ClkLevelsEnabled;
+ break;
+ case SMU_MCLK:
+ *count = clk_table->NumMemPstatesEnabled;
+ break;
+ case SMU_FCLK:
+ *count = clk_table->NumFclkLevelsEnabled;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *count)
@@ -840,6 +1073,18 @@ static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
return 0;
}
+static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+
+ return 0;
+}
+
static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
@@ -866,18 +1111,20 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
case SMU_SOCCLK:
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
case SMU_MCLK:
case SMU_FCLK:
ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count);
+ ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count);
if (ret)
break;
for (i = 0; i < count; i++) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value);
if (ret)
break;
@@ -940,8 +1187,13 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu,
break;
case SMU_VCLK:
case SMU_DCLK:
- msg_set_min = SMU_MSG_SetHardMinVcn;
- msg_set_max = SMU_MSG_SetSoftMaxVcn;
+ msg_set_min = SMU_MSG_SetHardMinVcn0;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn0;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ msg_set_min = SMU_MSG_SetHardMinVcn1;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn1;
break;
default:
return -EINVAL;
@@ -971,11 +1223,11 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu,
case SMU_FCLK:
case SMU_VCLK:
case SMU_DCLK:
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
if (ret)
break;
@@ -1000,25 +1252,25 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
sclk_min = sclk_max;
fclk_min = fclk_max;
socclk_min = socclk_max;
break;
case AMD_DPM_FORCED_LEVEL_LOW:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
sclk_max = sclk_min;
fclk_max = fclk_min;
socclk_max = socclk_min;
break;
case AMD_DPM_FORCED_LEVEL_AUTO:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
@@ -1067,6 +1319,18 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
return ret;
}
+static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
+ smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
+ smu->gfx_actual_hard_min_freq = 0;
+ smu->gfx_actual_soft_max_freq = 0;
+
+ return 0;
+}
+
static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1079,6 +1343,16 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
return 0;
}
+static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+
+ return 0;
+}
+
static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu,
bool enable)
{
@@ -1095,6 +1369,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
0, NULL);
}
+static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint8_t idx;
+
+ /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+ for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+ clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+ clock_table->SocClocks[idx].Vol = 0;
+ }
+
+ for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+ clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+ clock_table->VPEClocks[idx].Vol = 0;
+ }
+
+ return 0;
+}
+
static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1114,6 +1407,16 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *
return 0;
}
+static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_14_0_0_get_dpm_table(smu, clock_table);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_14_0_1_get_dpm_table(smu, clock_table);
+
+ return 0;
+}
+
static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.check_fw_status = smu_v14_0_check_fw_status,
.check_fw_version = smu_v14_0_check_fw_version,
@@ -1135,16 +1438,16 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.set_driver_table_location = smu_v14_0_set_driver_table_location,
.gfx_off_control = smu_v14_0_gfx_off_control,
.mode2_reset = smu_v14_0_0_mode2_reset,
- .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq,
+ .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
.od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
.print_clk_levels = smu_v14_0_0_print_clk_levels,
.force_clk_levels = smu_v14_0_0_force_clk_levels,
.set_performance_level = smu_v14_0_0_set_performance_level,
- .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters,
+ .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters,
.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
- .get_dpm_clock_table = smu_14_0_0_get_dpm_table,
+ .get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
};
static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index ebb6d8ebd44e..1e9259416980 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
{
struct ast_device *ast = to_ast_device(dev);
u8 video_on_off = on;
+ u32 i = 0;
// Video On/Off
ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
@@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
// wait 1 ms
mdelay(1);
+ if (++i > 200)
+ break;
}
}
}
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 871e4e2129d6..0683a129b362 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
unsigned int total_modes_count = 0;
struct drm_client_offset *offsets;
unsigned int connector_count = 0;
+ /* points to modes protected by mode_config.mutex */
struct drm_display_mode **modes;
struct drm_crtc **crtcs;
int i, ret = 0;
@@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
drm_client_pick_crtcs(client, connectors, connector_count,
crtcs, modes, 0, width, height);
}
- mutex_unlock(&dev->mode_config.mutex);
drm_client_modeset_release(client);
@@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
modeset->y = offset->y;
}
}
+ mutex_unlock(&dev->mode_config.mutex);
mutex_unlock(&client->modeset_mutex);
out:
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index ed89b86ea625..f672bfd70d45 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2534,7 +2534,8 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ struct intel_cdclk_config cdclk_config;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
@@ -2543,12 +2544,25 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_pre_notify(state);
- if (pipe == INVALID_PIPE ||
- old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (new_cdclk_state->disable_pipes) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ } else {
+ if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = new_cdclk_state->pipe;
+ } else {
+ cdclk_config = old_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ }
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
+ cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level,
+ old_cdclk_state->actual.voltage_level);
}
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+
+ intel_set_cdclk(i915, &cdclk_config, pipe);
}
/**
@@ -2566,7 +2580,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
@@ -2575,12 +2589,15 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_post_notify(state);
- if (pipe != INVALID_PIPE &&
- old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (!new_cdclk_state->disable_pipes &&
+ new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk)
+ pipe = new_cdclk_state->pipe;
+ else
+ pipe = INVALID_PIPE;
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
- }
+ intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
}
static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
@@ -3058,6 +3075,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa
return NULL;
cdclk_state->pipe = INVALID_PIPE;
+ cdclk_state->disable_pipes = false;
return &cdclk_state->base;
}
@@ -3236,6 +3254,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
if (ret)
return ret;
+ new_cdclk_state->disable_pipes = true;
+
drm_dbg_kms(&dev_priv->drm,
"Modeset required for cdclk change\n");
}
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h
index 48fd7d39e0cd..71bc032bfef1 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
@@ -51,6 +51,9 @@ struct intel_cdclk_state {
/* bitmask of active pipes */
u8 active_pipes;
+
+ /* update cdclk with pipes disabled */
+ bool disable_pipes;
};
int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c587a8efeafc..c17462b4c2ac 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4256,7 +4256,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1,
static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
const struct intel_crtc_state *crtc_state2)
{
+ /*
+ * FIXME the modeset sequence is currently wrong and
+ * can't deal with bigjoiner + port sync at the same time.
+ */
return crtc_state1->hw.active && crtc_state2->hw.active &&
+ !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
crtc_state1->output_types == crtc_state2->output_types &&
crtc_state1->output_format == crtc_state2->output_format &&
crtc_state1->lane_count == crtc_state2->lane_count &&
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index abd62bebc46d..e583515f9b25 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2725,7 +2725,11 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode);
int pixel_clock;
- if (has_seamless_m_n(connector))
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that when updating M/N live.
+ */
+ if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes)
pipe_config->update_m_n = true;
if (!can_enable_drrs(connector, pipe_config, downclock_mode)) {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
index b98a87883fef..9db43bd81ce2 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
@@ -691,12 +691,15 @@ int intel_dp_hdcp_get_remote_capability(struct intel_connector *connector,
u8 bcaps;
int ret;
+ *hdcp_capable = false;
+ *hdcp2_capable = false;
if (!intel_encoder_is_mst(connector->encoder))
return -EINVAL;
ret = _intel_dp_hdcp2_get_capability(aux, hdcp2_capable);
if (ret)
- return ret;
+ drm_dbg_kms(&i915->drm,
+ "HDCP2 DPCD capability read failed err: %d\n", ret);
ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps);
if (ret)
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index b6e539f1342c..aabd018bd737 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1422,6 +1422,17 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
return;
}
+ /*
+ * FIXME figure out what is wrong with PSR+bigjoiner and
+ * fix it. Presumably something related to the fact that
+ * PSR is a transcoder level feature.
+ */
+ if (crtc_state->bigjoiner_pipes) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR disabled due to bigjoiner\n");
+ return;
+ }
+
if (CAN_PANEL_REPLAY(intel_dp))
crtc_state->has_panel_replay = true;
else
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index eb5bd0743902..f542ee1db1d9 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -117,6 +117,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state,
const struct drm_display_info *info = &connector->base.display_info;
int vmin, vmax;
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that during VRR toggle/push/etc.
+ */
+ if (crtc_state->bigjoiner_pipes)
+ return;
+
if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f3dcae4b9d45..0f83c6d4376f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1403,14 +1403,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc)
* Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
* every possible call stack is unfeasible. It would be too intrusive to many
* areas that really don't care about the GuC backend. However, there is the
- * 'reset_in_progress' flag available, so just use that.
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
*
* And note that in the case of a reset occurring during driver unload
- * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
- * is fine because there is another cancel in _finish (when the reset flag is
- * not).
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
*/
- if (guc_to_gt(guc)->uc.reset_in_progress)
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
cancel_delayed_work(&guc->timestamp.work);
else
cancel_delayed_work_sync(&guc->timestamp.work);
@@ -1424,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
unsigned long flags;
ktime_t unused;
- guc_cancel_busyness_worker(guc);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
@@ -2004,13 +2005,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
- /*
- * Ensure the busyness worker gets cancelled even on a fatal wedge.
- * Note that reset_prepare is not allowed to because it confuses lockdep.
- */
- if (guc_submission_initialized(guc))
- guc_cancel_busyness_worker(guc);
-
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
@@ -2136,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 6dfe5d9456c6..399bc319180b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -637,6 +637,10 @@ void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 0674aca0f8a3..cf0b1de1c071 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1377,6 +1377,10 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
if (adreno_is_a618(gpu))
gpu->ubwc_config.highest_bank_bit = 14;
+ if (adreno_is_a619(gpu))
+ /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
+ gpu->ubwc_config.highest_bank_bit = 13;
+
if (adreno_is_a619_holi(gpu))
gpu->ubwc_config.highest_bank_bit = 13;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 1f5245fc2cdc..a847a0f7a73c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -852,7 +852,7 @@ static void a6xx_get_shader_block(struct msm_gpu *gpu,
(block->type << 8) | i);
in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
- block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+ block->size, out);
out += block->size * sizeof(u32);
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
index 9a9f7092c526..a3e60ac70689 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
@@ -324,6 +324,7 @@ static const struct dpu_wb_cfg x1e80100_wb[] = {
},
};
+/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */
static const struct dpu_intf_cfg x1e80100_intf[] = {
{
.name = "intf_0", .id = INTF_0,
@@ -358,8 +359,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.name = "intf_3", .id = INTF_3,
.base = 0x37000, .len = 0x280,
.features = INTF_SC7280_MASK,
- .type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_1,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
@@ -368,7 +369,7 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.base = 0x38000, .len = 0x280,
.features = INTF_SC7280_MASK,
.type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_2,
+ .controller_id = MSM_DP_CONTROLLER_1,
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21),
@@ -381,6 +382,33 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23),
+ }, {
+ .name = "intf_6", .id = INTF_6,
+ .base = 0x3A000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_DP,
+ .controller_id = MSM_DP_CONTROLLER_2,
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16),
+ }, {
+ .name = "intf_7", .id = INTF_7,
+ .base = 0x3b000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_2, /* pair with intf_6 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19),
+ }, {
+ .name = "intf_8", .id = INTF_8,
+ .base = 0x3c000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_1, /* pair with intf_4 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13),
},
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index ef871239adb2..68fae048a9a8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
&perf->core_clk_rate);
debugfs_create_u32("enable_bw_release", 0600, entry,
(u32 *)&perf->enable_bw_release);
- debugfs_create_u32("threshold_low", 0600, entry,
+ debugfs_create_u32("threshold_low", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_low);
- debugfs_create_u32("threshold_high", 0600, entry,
+ debugfs_create_u32("threshold_high", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_high);
- debugfs_create_u32("min_core_ib", 0600, entry,
+ debugfs_create_u32("min_core_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_core_ib);
- debugfs_create_u32("min_llcc_ib", 0600, entry,
+ debugfs_create_u32("min_llcc_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_llcc_ib);
- debugfs_create_u32("min_dram_ib", 0600, entry,
+ debugfs_create_u32("min_dram_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_dram_ib);
debugfs_create_file("perf_mode", 0600, entry,
(u32 *)perf, &dpu_core_perf_mode_fops);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 946dd0135dff..6a0a74832fb6 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -525,14 +525,14 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
int ret;
if (!irq_cb) {
- DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
+ DPU_ERROR("IRQ=[%d, %d] NULL callback\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
return -EINVAL;
}
if (!dpu_core_irq_is_valid(irq_idx)) {
- DPU_ERROR("invalid IRQ=[%d, %d]\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
+ DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index c4cb82af5c2f..ffbfde922589 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -484,7 +484,7 @@ static void dp_display_handle_video_request(struct dp_display_private *dp)
}
}
-static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp)
+static int dp_display_handle_port_status_changed(struct dp_display_private *dp)
{
int rc = 0;
@@ -541,7 +541,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev)
drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n",
dp->hpd_state, sink_request);
if (sink_request & DS_PORT_STATUS_CHANGED)
- rc = dp_display_handle_port_ststus_changed(dp);
+ rc = dp_display_handle_port_status_changed(dp);
else
rc = dp_display_handle_irq_hpd(dp);
}
@@ -588,6 +588,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
ret = dp_display_usbpd_configure_cb(&pdev->dev);
if (ret) { /* link train failed */
dp->hpd_state = ST_DISCONNECTED;
+ pm_runtime_put_sync(&pdev->dev);
} else {
dp->hpd_state = ST_MAINLINK_READY;
}
@@ -645,6 +646,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
dp_display_host_phy_exit(dp);
dp->hpd_state = ST_DISCONNECTED;
dp_display_notify_disconnect(&dp->dp_display.pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
mutex_unlock(&dp->event_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index e3f61c39df69..80166f702a0d 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -89,7 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
for (i = 0; i < n; i++) {
ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]);
- drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)",
+ drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
fb->base.id, i, msm_fb->iova[i], ret);
if (ret)
return ret;
@@ -176,7 +176,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
const struct msm_format *format;
int ret, i, n;
- drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)",
+ drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n",
mode_cmd, mode_cmd->width, mode_cmd->height,
(char *)&mode_cmd->pixel_format);
@@ -232,7 +232,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
refcount_set(&msm_fb->dirtyfb, 1);
- drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+ drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb);
return fb;
diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
index 84c21ec2ceea..af6a6fcb1173 100644
--- a/drivers/gpu/drm/msm/msm_kms.c
+++ b/drivers/gpu/drm/msm/msm_kms.c
@@ -149,7 +149,7 @@ int msm_crtc_enable_vblank(struct drm_crtc *crtc)
struct msm_kms *kms = priv->kms;
if (!kms)
return -ENXIO;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
return vblank_ctrl_queue_work(priv, crtc, true);
}
@@ -160,7 +160,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc)
struct msm_kms *kms = priv->kms;
if (!kms)
return;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
vblank_ctrl_queue_work(priv, crtc, false);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
index 4bf486b57101..cb05f7f48a98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
return ERR_PTR(-EINVAL);
}
+static void of_fini(void *p)
+{
+ kfree(p);
+}
+
const struct nvbios_source
nvbios_of = {
.name = "OpenFirmware",
.init = of_init,
- .fini = (void(*)(void *))kfree,
+ .fini = of_fini,
.read = of_read,
.size = of_size,
.rw = false,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
index 7bcbc4895ec2..271bfa038f5b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
@@ -25,6 +25,7 @@
#include <subdev/bios.h>
#include <subdev/bios/init.h>
+#include <subdev/gsp.h>
void
gm107_devinit_disable(struct nvkm_devinit *init)
@@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init)
u32 r021c00 = nvkm_rd32(device, 0x021c00);
u32 r021c04 = nvkm_rd32(device, 0x021c04);
- if (r021c00 & 0x00000001)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
- if (r021c00 & 0x00000004)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ /* gsp only wants to enable/disable display */
+ if (!nvkm_gsp_rm(device->gsp)) {
+ if (r021c00 & 0x00000001)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
+ if (r021c00 & 0x00000004)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ }
if (r021c04 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
index 11b4c9c274a1..666eb93b1742 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
@@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw,
rm->dtor = r535_devinit_dtor;
rm->post = hw->post;
+ rm->disable = hw->disable;
ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 9994cbd6f1c4..9858c1438aa7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1112,7 +1112,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
- strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES];
+ strings = (char *)rpc + str_offset;
for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) {
int name_len = strlen(r535_registry_entries[i].name) + 1;
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index f38385fe76bb..b91019cd5acb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -502,11 +502,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
mapping_set_unevictable(mapping);
for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+ /* Can happen if the last fault only partially filled this
+ * section of the pages array before failing. In that case
+ * we skip already filled pages.
+ */
+ if (pages[i])
+ continue;
+
pages[i] = shmem_read_mapping_page(mapping, i);
if (IS_ERR(pages[i])) {
ret = PTR_ERR(pages[i]);
pages[i] = NULL;
- goto err_pages;
+ goto err_unlock;
}
}
@@ -514,7 +521,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
if (ret)
- goto err_pages;
+ goto err_unlock;
ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
if (ret)
@@ -537,8 +544,6 @@ out:
err_map:
sg_free_table(sgt);
-err_pages:
- drm_gem_shmem_put_pages(&bo->base);
err_unlock:
dma_resv_unlock(obj->resv);
err_bo:
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 368d26da0d6a..9febc8b73f09 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
signed long timeout)
{
struct qxl_device *qdev;
+ struct qxl_release *release;
+ int count = 0, sc = 0;
+ bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;
qdev = container_of(fence->lock, struct qxl_device, release_lock);
+ release = container_of(fence, struct qxl_release, base);
+ have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
- if (!wait_event_timeout(qdev->release_event,
- (dma_fence_is_signaled(fence) ||
- (qxl_io_notify_oom(qdev), 0)),
- timeout))
- return 0;
+retry:
+ sc++;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ qxl_io_notify_oom(qdev);
+
+ for (count = 0; count < 11; count++) {
+ if (!qxl_queue_garbage_collect(qdev, true))
+ break;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+ }
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ if (have_drawable_releases || sc < 4) {
+ if (sc > 2)
+ /* back off */
+ usleep_range(500, 1000);
+
+ if (time_after(jiffies, end))
+ return 0;
+
+ if (have_drawable_releases && sc > 300) {
+ DMA_FENCE_WARN(fence,
+ "failed to wait on release %llu after spincount %d\n",
+ fence->context & ~0xf0000000, sc);
+ goto signaled;
+ }
+ goto retry;
+ }
+ /*
+ * yeah, original sync_obj_wait gave up after 3 spins when
+ * have_drawable_releases is not set.
+ */
+signaled:
cur = jiffies;
if (time_after(cur, end))
return 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c7d90f96d16a..0a304706e013 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -666,11 +666,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
- /* TTM currently doesn't fully support SEV encryption. */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -EINVAL;
-
- if (vmw_force_coherent)
+ /*
+ * When running with SEV we always want dma mappings, because
+ * otherwise ttm tt pool pages will bounce through swiotlb running
+ * out of available space.
+ */
+ if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index e4db069f0db3..6ec375c1c4b6 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe)
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
drmm_mutex_init(&xe->drm, &xe->sb_lock);
- drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
- drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
- drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
xe->enabled_irq_mask = ~0;
err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 0b1266c88a6a..deddc8be48c0 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -125,7 +125,7 @@
#define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234)
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
-#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index b82233a41606..9ac7fbe201b3 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
* As y can be < 2, we compute tau4 = (4 | x) << y
* and then add 2 when doing the final right shift to account for units
*/
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
/* val in hwmon interface units (millisec) */
out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
@@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
if (val > max_win)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 1426febe86eb..57066faf575e 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -525,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+ regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+ CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
/* TODO: Timestamp */
}
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index ee1bb938c493..2ba4fb9511f6 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (vm->flags & XE_VM_FLAG_64K && level == 1)
flags = XE_PDE_64K;
- entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+ entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
XE_PAGE_SIZE, pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
entry | flags);
@@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* Write PDE's that point to our BO. */
for (i = 0; i < num_entries - num_level; i++) {
- entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+ entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
@@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
#define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
drm_suballoc_manager_init(&m->vm_update_sa,
- (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+ (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
NUM_VMUSA_UNIT_PER_PAGE, 0);
m->pt_bo = bo;
@@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m,
struct xe_vm *vm = m->q->vm;
u16 pat_index;
u32 ptes;
- u64 ofs = at_pt * XE_PAGE_SIZE;
+ u64 ofs = (u64)at_pt * XE_PAGE_SIZE;
u64 cur_ofs;
/* Indirect access needs compression enabled uncached PAT index */
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 783975d1384f..7c52757a89db 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -351,11 +351,6 @@ static int host1x_device_uevent(const struct device *dev,
return 0;
}
-static int host1x_dma_configure(struct device *dev)
-{
- return of_dma_configure(dev, dev->of_node, true);
-}
-
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
@@ -369,7 +364,6 @@ const struct bus_type host1x_bus_type = {
.name = "host1x",
.match = host1x_device_match,
.uevent = host1x_device_uevent,
- .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
};
@@ -458,8 +452,6 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
device->dev.dma_parms = &device->dma_parms;
dma_set_max_seg_size(&device->dev, UINT_MAX);
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index adbf674355b2..fb8cd8469328 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel)
hv_ringbuffer_cleanup(&channel->inbound);
if (channel->ringbuffer_page) {
- __free_pages(channel->ringbuffer_page,
+ /* In a CoCo VM leak the memory if it didn't get re-encrypted */
+ if (!channel->ringbuffer_gpadlhandle.decrypted)
+ __free_pages(channel->ringbuffer_page,
get_order(channel->ringbuffer_pagecount
<< PAGE_SHIFT));
channel->ringbuffer_page = NULL;
@@ -436,9 +438,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo);
- if (ret)
+ if (ret) {
+ gpadl->decrypted = false;
return ret;
+ }
+ /*
+ * Set the "decrypted" flag to true for the set_memory_decrypted()
+ * success case. In the failure case, the encryption state of the
+ * memory is unknown. Leave "decrypted" as true to ensure the
+ * memory will be leaked instead of going back on the free list.
+ */
+ gpadl->decrypted = true;
ret = set_memory_decrypted((unsigned long)kbuffer,
PFN_UP(size));
if (ret) {
@@ -527,9 +538,15 @@ cleanup:
kfree(msginfo);
- if (ret)
- set_memory_encrypted((unsigned long)kbuffer,
- PFN_UP(size));
+ if (ret) {
+ /*
+ * If set_memory_encrypted() fails, the decrypted flag is
+ * left as true so the memory is leaked instead of being
+ * put back on the free list.
+ */
+ if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size)))
+ gpadl->decrypted = false;
+ }
return ret;
}
@@ -850,6 +867,8 @@ post_msg_err:
if (ret)
pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
+ gpadl->decrypted = ret;
+
return ret;
}
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 3cabeeabb1ca..f001ae880e1d 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -237,8 +237,17 @@ int vmbus_connect(void)
vmbus_connection.monitor_pages[0], 1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1], 1);
- if (ret)
+ if (ret) {
+ /*
+ * If set_memory_decrypted() fails, the encryption state
+ * of the memory is unknown. So leak the memory instead
+ * of risking returning decrypted memory to the free list.
+ * For simplicity, always handle both pages the same.
+ */
+ vmbus_connection.monitor_pages[0] = NULL;
+ vmbus_connection.monitor_pages[1] = NULL;
goto cleanup;
+ }
/*
* Set_memory_decrypted() will change the memory contents if
@@ -337,13 +346,19 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
+ if (vmbus_connection.monitor_pages[0]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[0], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+ vmbus_connection.monitor_pages[0] = NULL;
+ }
- hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
- hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
- vmbus_connection.monitor_pages[0] = NULL;
- vmbus_connection.monitor_pages[1] = NULL;
+ if (vmbus_connection.monitor_pages[1]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[1], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+ vmbus_connection.monitor_pages[1] = NULL;
+ }
}
/*
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4cb17603a828..12a707ab73f8 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -131,7 +131,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);
@@ -142,7 +142,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->state);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);
@@ -153,7 +153,7 @@ static ssize_t monitor_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);
@@ -164,8 +164,8 @@ static ssize_t class_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_type);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_type);
}
static DEVICE_ATTR_RO(class_id);
@@ -176,8 +176,8 @@ static ssize_t device_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_instance);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_instance);
}
static DEVICE_ATTR_RO(device_id);
@@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+ return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
}
static DEVICE_ATTR_RO(modalias);
@@ -199,7 +199,7 @@ static ssize_t numa_node_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+ return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
}
static DEVICE_ATTR_RO(numa_node);
#endif
@@ -212,9 +212,8 @@ static ssize_t server_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_pending);
@@ -226,9 +225,8 @@ static ssize_t client_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
@@ -240,9 +238,8 @@ static ssize_t server_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);
@@ -254,9 +251,8 @@ static ssize_t client_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);
@@ -268,9 +264,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);
@@ -282,9 +277,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);
@@ -303,7 +297,7 @@ static ssize_t out_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);
@@ -321,7 +315,7 @@ static ssize_t out_read_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);
@@ -340,7 +334,7 @@ static ssize_t out_write_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);
@@ -359,7 +353,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);
@@ -378,7 +372,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);
@@ -396,7 +390,7 @@ static ssize_t in_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);
@@ -414,7 +408,7 @@ static ssize_t in_read_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);
@@ -432,7 +426,7 @@ static ssize_t in_write_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);
@@ -451,7 +445,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);
@@ -470,7 +464,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);
@@ -480,7 +474,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
- int buf_size = PAGE_SIZE, n_written, tot_written;
+ int n_written;
struct list_head *cur;
if (!channel)
@@ -488,25 +482,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
mutex_lock(&vmbus_connection.channel_mutex);
- tot_written = snprintf(buf, buf_size, "%u:%u\n",
- channel->offermsg.child_relid, channel->target_cpu);
+ n_written = sysfs_emit(buf, "%u:%u\n",
+ channel->offermsg.child_relid,
+ channel->target_cpu);
list_for_each(cur, &channel->sc_list) {
- if (tot_written >= buf_size - 1)
- break;
cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
- n_written = scnprintf(buf + tot_written,
- buf_size - tot_written,
- "%u:%u\n",
- cur_sc->offermsg.child_relid,
- cur_sc->target_cpu);
- tot_written += n_written;
+ n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+ cur_sc->offermsg.child_relid,
+ cur_sc->target_cpu);
}
mutex_unlock(&vmbus_connection.channel_mutex);
- return tot_written;
+ return n_written;
}
static DEVICE_ATTR_RO(channel_vp_mapping);
@@ -516,7 +506,7 @@ static ssize_t vendor_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
}
static DEVICE_ATTR_RO(vendor);
@@ -526,7 +516,7 @@ static ssize_t device_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->device_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
}
static DEVICE_ATTR_RO(device);
@@ -551,7 +541,7 @@ static ssize_t driver_override_show(struct device *dev,
ssize_t len;
device_lock(dev);
- len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+ len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
device_unlock(dev);
return len;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 33228c1c8980..ac6754a85f35 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3232,28 +3232,29 @@ static void iommu_snp_enable(void)
return;
/*
* The SNP support requires that IOMMU must be enabled, and is
- * not configured in the passthrough mode.
+ * configured with V1 page table (DTE[Mode] = 0 is not supported).
*/
if (no_iommu || iommu_default_passthrough()) {
- pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
- cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
- return;
+ pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
+ if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
+ goto disable_snp;
}
amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en) {
- pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
- cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
- return;
+ pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+ goto disable_snp;
}
pr_info("IOMMU SNP support enabled.\n");
+ return;
- /* Enforce IOMMU v1 pagetable when SNP is enabled. */
- if (amd_iommu_pgtable != AMD_IOMMU_V1) {
- pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
- }
+disable_snp:
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
#endif
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d35c1b8c8e65..e692217fcb28 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1692,26 +1692,29 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
static u16 domain_id_alloc(void)
{
+ unsigned long flags;
int id;
- spin_lock(&pd_bitmap_lock);
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
return id;
}
static void domain_id_free(int id)
{
- spin_lock(&pd_bitmap_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
}
static void free_gcr3_tbl_level1(u64 *tbl)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 50eb9aed47cc..a7ecd90303dc 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4299,9 +4299,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
dev_iommu_priv_set(dev, info);
- ret = device_rbtree_insert(iommu, info);
- if (ret)
- goto free;
+ if (pdev && pci_ats_supported(pdev)) {
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;
+ }
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
@@ -4336,7 +4338,8 @@ static void intel_iommu_release_device(struct device *dev)
struct intel_iommu *iommu = info->iommu;
mutex_lock(&iommu->iopf_lock);
- device_rbtree_remove(info);
+ if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
+ device_rbtree_remove(info);
mutex_unlock(&iommu->iopf_lock);
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index cf43e798eca4..44083d01852d 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
iommu_pmu_set_filter(domain, event->attr.config1,
IOMMU_PMU_FILTER_DOMAIN, idx,
event->attr.config1);
- iommu_pmu_set_filter(pasid, event->attr.config1,
+ iommu_pmu_set_filter(pasid, event->attr.config2,
IOMMU_PMU_FILTER_PASID, idx,
event->attr.config1);
iommu_pmu_set_filter(ats, event->attr.config2,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c1bed89b1026..ee3b469e2da1 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -66,7 +66,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
struct page *pages;
int irq, ret;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
if (!pages) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index b8c47f18bc26..6a2707fe7a78 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1790,6 +1790,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids);
static struct platform_driver mtk_iommu_driver = {
.probe = mtk_iommu_probe,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a9fa2a54dc9b..d6e4002200bd 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -600,6 +600,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = {
{ .compatible = "mediatek,mt2701-m4u", },
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids);
static const struct component_master_ops mtk_iommu_v1_com_ops = {
.bind = mtk_iommu_v1_bind,
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fca888b36680..2a537cbfcb07 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -786,6 +786,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
struct its_cmd_block *cmd,
struct its_cmd_desc *desc)
{
+ struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe);
unsigned long vpt_addr, vconf_addr;
u64 target;
bool alloc;
@@ -798,6 +799,11 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
if (is_v4_1(its)) {
alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
its_encode_alloc(cmd, alloc);
+ /*
+ * Unmapping a VPE is self-synchronizing on GICv4.1,
+ * no need to issue a VSYNC.
+ */
+ vpe = NULL;
}
goto out;
@@ -832,7 +838,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
out:
its_fixup_cmd(cmd);
- return valid_vpe(its, desc->its_vmapp_cmd.vpe);
+ return vpe;
}
static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 2776ca5fc33f..b215b28cad7b 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
static int data_sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0, opt = 0;
if (*debug & DEBUG_SOCKET)
printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
- level, optname, len);
+ level, optname, optlen);
lock_sock(sk);
switch (optname) {
case MISDN_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(int))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt)
_pms(sk)->cmask |= MISDN_TIME_STAMP;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index be8ac24f50b6..7b8a71ca66dd 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1558,7 +1558,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
- free_r1bio(r1_bio);
+ mempool_free(r1_bio, &conf->r1bio_pool);
allow_barrier(conf, bio->bi_iter.bi_sector);
if (bio->bi_opf & REQ_NOWAIT) {
diff --git a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
index 4c34344dc7dc..d7027d600208 100644
--- a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
+++ b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
@@ -50,12 +50,12 @@ static void mtk_vcodec_vpu_reset_dec_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
@@ -65,12 +65,12 @@ static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
index f47c98faf068..2073781ccadb 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
@@ -268,7 +268,9 @@ static int fops_vcodec_open(struct file *file)
ctx->dev->vdec_pdata->init_vdec_params(ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mtk_vcodec_dbgfs_create(ctx);
mutex_unlock(&dev->dev_mutex);
@@ -311,7 +313,9 @@ static int fops_vcodec_release(struct file *file)
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
mtk_vcodec_dbgfs_remove(dev, ctx->id);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
@@ -404,6 +408,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
for (i = 0; i < MTK_VDEC_HW_MAX; i++)
mutex_init(&dev->dec_mutex[i]);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
index 849b89dd205c..85b2c0d3d8bc 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
@@ -241,6 +241,7 @@ struct mtk_vcodec_dec_ctx {
*
* @dec_mutex: decoder hardware lock
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @decode_workqueue: decode work queue
*
* @irqlock: protect data access by irq handler and work thread
@@ -282,6 +283,7 @@ struct mtk_vcodec_dec_dev {
/* decoder hardware mutex lock */
struct mutex dec_mutex[MTK_VDEC_HW_MAX];
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *decode_workqueue;
spinlock_t irqlock;
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
index 06ed47df693b..21836dd6ef85 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
@@ -869,7 +869,6 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
inst->vpu.codec_type = ctx->current_codec;
inst->vpu.capture_type = ctx->capture_fourcc;
- ctx->drv_handle = inst;
err = vpu_dec_init(&inst->vpu);
if (err) {
mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
@@ -898,6 +897,7 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
inst, inst->vpu.codec_type);
+ ctx->drv_handle = inst;
return 0;
error_free_inst:
kfree(inst);
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
index 19407f9bc773..987b3d71b662 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
@@ -449,7 +449,7 @@ static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
inst->cur_fb = fb;
- dec->bs_dma = (unsigned long)bs->dma_addr;
+ dec->bs_dma = (uint64_t)bs->dma_addr;
dec->bs_sz = bs->size;
dec->cur_y_fb_dma = y_fb_dma;
dec->cur_c_fb_dma = c_fb_dma;
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
index 55355fa70090..039082f600c8 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
@@ -16,6 +16,7 @@
#include "../vdec_drv_base.h"
#include "../vdec_vpu_if.h"
+#define VP9_MAX_SUPER_FRAMES_NUM 8
#define VP9_SUPER_FRAME_BS_SZ 64
#define MAX_VP9_DPB_SIZE 9
@@ -133,11 +134,11 @@ struct vp9_sf_ref_fb {
*/
struct vdec_vp9_vsi {
unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
- struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+ struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM];
int sf_next_ref_fb_idx;
unsigned int sf_frm_cnt;
- unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
- unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+ unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM];
+ unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM];
unsigned int sf_frm_idx;
unsigned int sf_init;
struct vdec_fb fb;
@@ -526,7 +527,7 @@ static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst)
/* if this super frame and it is not last sub-frame, get next fb for
* sub-frame decode
*/
- if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+ if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt)
vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
}
@@ -735,7 +736,7 @@ static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
struct vdec_vp9_vsi *vsi) {
- if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+ if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) {
mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
return -EIO;
}
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
index cf48d09b78d7..eea709d93820 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
@@ -1074,7 +1074,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
unsigned int mi_row;
unsigned int mi_col;
unsigned int offset;
- unsigned int pa;
+ dma_addr_t pa;
unsigned int size;
struct vdec_vp9_slice_tiles *tiles;
unsigned char *pos;
@@ -1109,7 +1109,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
pos = va + offset;
end = va + bs->size;
/* truncated */
- pa = (unsigned int)bs->dma_addr + offset;
+ pa = bs->dma_addr + offset;
tb = instance->tile.va;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
index 82e57ae983d5..da6be556727b 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
@@ -77,12 +77,14 @@ static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vde
struct mtk_vcodec_dec_ctx *ctx;
int ret = false;
+ mutex_lock(&dec_dev->dev_ctx_lock);
list_for_each_entry(ctx, &dec_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&dec_dev->dev_ctx_lock);
return ret;
}
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
index 6319f24bc714..3cb8a1622222 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
@@ -177,7 +177,9 @@ static int fops_vcodec_open(struct file *file)
mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ",
ctx->id, ctx, ctx->m2m_ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mutex_unlock(&dev->dev_mutex);
mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
@@ -212,7 +214,9 @@ static int fops_vcodec_release(struct file *file)
v4l2_fh_exit(&ctx->fh);
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
@@ -294,6 +298,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
mutex_init(&dev->enc_mutex);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
index a042f607ed8d..0bd85d0fb379 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
@@ -178,6 +178,7 @@ struct mtk_vcodec_enc_ctx {
*
* @enc_mutex: encoder hardware lock.
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @encode_workqueue: encode work queue
*
* @enc_irq: h264 encoder irq resource
@@ -205,6 +206,7 @@ struct mtk_vcodec_enc_dev {
/* encoder hardware mutex lock */
struct mutex enc_mutex;
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *encode_workqueue;
int enc_irq;
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
index 84ad1cc6ad17..51bb7ee141b9 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
@@ -47,12 +47,14 @@ static bool vpu_enc_check_ap_inst(struct mtk_vcodec_enc_dev *enc_dev, struct ven
struct mtk_vcodec_enc_ctx *ctx;
int ret = false;
+ mutex_lock(&enc_dev->dev_ctx_lock);
list_for_each_entry(ctx, &enc_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&enc_dev->dev_ctx_lock);
return ret;
}
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 088f8ed4fdc4..a8ee0df47148 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1114,10 +1114,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
host = slot->host;
- if (slot->vsd)
- gpiod_set_value(slot->vsd, power_on);
- if (slot->vio)
- gpiod_set_value(slot->vio, power_on);
+ if (power_on) {
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(1);
+ }
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(1);
+ }
+ } else {
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(50);
+ }
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(50);
+ }
+ }
if (slot->pdata->set_power != NULL)
slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
@@ -1254,18 +1269,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
slot->pdata = &host->pdata->slots[id];
/* Check for some optional GPIO controls */
- slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
- id, GPIOD_OUT_LOW);
+ slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vsd))
return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
"error looking up VSD GPIO\n");
- slot->vio = gpiod_get_index_optional(host->dev, "vio",
- id, GPIOD_OUT_LOW);
+ slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vio))
return dev_err_probe(host->dev, PTR_ERR(slot->vio),
"error looking up VIO GPIO\n");
- slot->cover = gpiod_get_index_optional(host->dev, "cover",
- id, GPIOD_IN);
+ slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+ id, GPIOD_IN);
if (IS_ERR(slot->cover))
return dev_err_probe(host->dev, PTR_ERR(slot->cover),
"error looking up cover switch GPIO\n");
@@ -1379,13 +1394,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
if (IS_ERR(host->virt_base))
return PTR_ERR(host->virt_base);
- host->slot_switch = gpiod_get_optional(host->dev, "switch",
- GPIOD_OUT_LOW);
- if (IS_ERR(host->slot_switch))
- return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
- "error looking up slot switch GPIO\n");
-
-
INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
@@ -1404,6 +1412,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
host->dev = &pdev->dev;
platform_set_drvdata(pdev, host);
+ host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(host->slot_switch))
+ return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+ "error looking up slot switch GPIO\n");
+
host->id = pdev->id;
host->irq = irq;
host->phys_base = res->start;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 1035820c2377..c0d0bce0b594 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -950,20 +950,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
mutex_unlock(&priv->reg_mutex);
}
-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Qâ„¢-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
*
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ * PDUs transmitted using this destination address, or any other addresses
+ * that appear in Table 8-1, Table 8-2, and Table 8-3
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ * therefore travel no further than those stations that can be reached via a
+ * single individual LAN from the originating station.
+ *
+ * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ * address that no conformant S-VLAN component, C-VLAN component, or MAC
+ * Bridge can forward; however, this address is relayed by a TPMR component.
+ * PDUs using this destination address, or any of the other addresses that
+ * appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ * any TPMRs but will propagate no further than the nearest S-VLAN component,
+ * C-VLAN component, or MAC Bridge.
+ *
+ * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ * that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ * relayed by TPMR components and S-VLAN components. PDUs using this
+ * destination address, or any of the other addresses that appear in Table 8-1
+ * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ * will be relayed by TPMR components and S-VLAN components but will propagate
+ * no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ * Link-local frames with these destination addresses will be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-00
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
*/
static void
mt753x_trap_frames(struct mt7530_priv *priv)
@@ -971,35 +1124,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
/* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
* VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
- MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
- MT753X_BPDU_PORT_FW_MASK,
- MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_BPC,
+ MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+ MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+ MT753X_BPDU_PORT_FW_MASK,
+ MT753X_PAE_BPDU_FR |
+ MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
- MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
- MT753X_R01_PORT_FW_MASK,
- MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC1,
+ MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+ MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+ MT753X_R02_BPDU_FR |
+ MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
- MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
- MT753X_R03_PORT_FW_MASK,
- MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC2,
+ MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+ MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+ MT753X_R0E_BPDU_FR |
+ MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
}
static void
@@ -2505,18 +2666,25 @@ mt7531_setup(struct dsa_switch *ds)
mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
MT7531_GPIO0_INTERRUPT);
- /* Enable PHY core PLL, since phy_device has not yet been created
- * provided for phy_[read,write]_mmd_indirect is called, we provide
- * our own mt7531_ind_mmd_phy_[read,write] to complete this
- * function.
+ /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since
+ * phy_device has not yet been created provided for
+ * phy_[read,write]_mmd_indirect is called, we provide our own
+ * mt7531_ind_mmd_phy_[read,write] to complete this function.
*/
val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
MDIO_MMD_VEND2, CORE_PLL_GROUP4);
- val |= MT7531_PHY_PLL_BYPASS_MODE;
+ val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE;
val &= ~MT7531_PHY_PLL_OFF;
mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
CORE_PLL_GROUP4, val);
+ /* Disable EEE advertisement on the switch PHYs. */
+ for (i = MT753X_CTRL_PHY_ADDR;
+ i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) {
+ mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+ 0);
+ }
+
mt7531_setup_common(ds);
/* Setup VLAN ID 0 for VLAN-unaware bridges */
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index d17b318e6ee4..585db03c0548 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -65,6 +65,7 @@ enum mt753x_id {
/* Registers for BPDU and PAE frame control*/
#define MT753X_BPC 0x24
+#define MT753X_PAE_BPDU_FR BIT(25)
#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16)
@@ -75,20 +76,24 @@ enum mt753x_id {
/* Register for :01 and :02 MAC DA frame control */
#define MT753X_RGAC1 0x28
+#define MT753X_R02_BPDU_FR BIT(25)
#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define MT753X_R01_BPDU_FR BIT(9)
#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0)
/* Register for :03 and :0E MAC DA frame control */
#define MT753X_RGAC2 0x2c
+#define MT753X_R0E_BPDU_FR BIT(25)
#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define MT753X_R03_BPDU_FR BIT(9)
#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0)
@@ -616,6 +621,7 @@ enum mt7531_clk_skew {
#define RG_SYSPLL_DDSFBK_EN BIT(12)
#define RG_SYSPLL_BIAS_EN BIT(11)
#define RG_SYSPLL_BIAS_LPF_EN BIT(10)
+#define MT7531_RG_SYSPLL_DMY2 BIT(6)
#define MT7531_PHY_PLL_OFF BIT(5)
#define MT7531_PHY_PLL_BYPASS_MODE BIT(4)
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 9e9e4a03f1a8..2d8a66ea82fa 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -351,7 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
io_sq->bounce_buf_ctrl.next_to_use = 0;
- size = io_sq->bounce_buf_ctrl.buffer_size *
+ size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
io_sq->bounce_buf_ctrl.buffers_num;
dev_node = dev_to_node(ena_dev->dmadev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 09e7da1a69c9..be5acfa41ee0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -718,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring,
static void ena_free_tx_bufs(struct ena_ring *tx_ring)
{
bool print_once = true;
+ bool is_xdp_ring;
u32 i;
+ is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
for (i = 0; i < tx_ring->ring_size; i++) {
struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
@@ -739,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
ena_unmap_tx_buff(tx_ring, tx_info);
- dev_kfree_skb_any(tx_info->skb);
+ if (is_xdp_ring)
+ xdp_return_frame(tx_info->xdpf);
+ else
+ dev_kfree_skb_any(tx_info->skb);
}
- netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->qid));
+
+ if (!is_xdp_ring)
+ netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->qid));
}
static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
@@ -3481,10 +3489,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
- int i, budget, rc;
+ int qid, budget, rc;
int io_queue_count;
io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();
@@ -3497,27 +3506,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
return;
- budget = ENA_MONITORED_TX_QUEUES;
+ budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
- for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
- tx_ring = &adapter->tx_ring[i];
- rx_ring = &adapter->rx_ring[i];
+ qid = adapter->last_monitored_tx_qid;
+
+ while (budget) {
+ qid = (qid + 1) % io_queue_count;
+
+ tx_ring = &adapter->tx_ring[qid];
+ rx_ring = &adapter->rx_ring[qid];
rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
if (unlikely(rc))
return;
- rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ rc = !ENA_IS_XDP_INDEX(adapter, qid) ?
check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
if (unlikely(rc))
return;
budget--;
- if (!budget)
- break;
}
- adapter->last_monitored_tx_qid = i % io_queue_count;
+ adapter->last_monitored_tx_qid = qid;
}
/* trigger napi schedule after 2 consecutive detections */
diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
index 337c435d3ce9..5b175e7e92a1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
+++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
@@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
if (unlikely(rc))
- return rc;
+ goto err;
ena_tx_ctx.req_id = req_id;
@@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
error_unmap_dma:
ena_unmap_tx_buff(tx_ring, tx_info);
+err:
tx_info->xdpf = NULL;
+
return rc;
}
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 9662ee72814c..536635e57727 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -593,6 +593,16 @@ err_out:
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+ struct pci_dev *pdev = pdsc->pdev;
+
+ pci_dev_get(pdev);
+ pci_reset_function(pdev);
+ pci_dev_put(pdev);
+}
+
static void pdsc_check_pci_health(struct pdsc *pdsc)
{
u8 fw_status;
@@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc)
if (fw_status != PDS_RC_BAD_PCI)
return;
- pci_reset_function(pdsc->pdev);
+ /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+ queue_work(pdsc->wq, &pdsc->pci_reset_work);
}
void pdsc_health_thread(struct work_struct *work)
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 92d7657dd614..a3e17a0c187a 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -197,6 +197,7 @@ struct pdsc {
struct pdsc_qcq notifyqcq;
u64 last_eid;
struct pdsc_viftype *viftype_status;
+ struct work_struct pci_reset_work;
};
/** enum pds_core_dbell_bits - bitwise composition of dbell values.
@@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
void pdsc_fw_down(struct pdsc *pdsc);
void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index e494e1298dc9..495ef4ef8c10 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc)
.reset.opcode = PDS_CORE_CMD_RESET,
};
+ if (!pdsc_is_fw_running(pdsc))
+ return 0;
+
return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index ab6133e7db42..660268ff9562 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
pdsc->wq = create_singlethread_workqueue(wq_name);
INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 493b724848c8..57e61f963167 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11758,6 +11758,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ if (bp->ptp_cfg)
+ atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
bnxt_cfg_usr_fltrs(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 93f9bd55020f..195c02dc0683 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -210,6 +210,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
if (err)
return;
+ if (edev->ulp_tbl->msix_requested)
+ bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
if (aux_priv) {
struct auxiliary_device *adev;
@@ -392,12 +395,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
if (!edev)
goto aux_dev_uninit;
+ aux_priv->edev = edev;
+
ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
if (!ulp)
goto aux_dev_uninit;
edev->ulp_tbl = ulp;
- aux_priv->edev = edev;
bp->edev = edev;
bnxt_set_edev_info(edev, bp);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d39001cdc707..00af8888e329 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -4819,18 +4819,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
*/
rvu_write64(rvu, blkaddr, NIX_AF_CFG,
rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+ }
- /* Set chan/link to backpressure TL3 instead of TL2 */
- rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+ /* Set chan/link to backpressure TL3 instead of TL2 */
+ rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
- /* Disable SQ manager's sticky mode operation (set TM6 = 0)
- * This sticky mode is known to cause SQ stalls when multiple
- * SQs are mapped to same SMQ and transmitting pkts at a time.
- */
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
- cfg &= ~BIT_ULL(15);
- rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
- }
+ /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+ * This sticky mode is known to cause SQ stalls when multiple
+ * SQs are mapped to same SMQ and transmitting pkts at a time.
+ */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+ cfg &= ~BIT_ULL(15);
+ rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
ltdefs = rvu->kpu.lt_def;
/* Calibrate X2P bus to check if CGX/LBK links are fine */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 1e77bbf5d22a..1723e9912ae0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
otx2_qos_read_txschq_cfg_tl(node, cfg);
cnt = cfg->static_node_pos[node->level];
cfg->schq_contig_list[node->level][cnt] = node->schq;
+ cfg->schq_index_used[node->level][cnt] = true;
cfg->schq_contig[node->level]++;
cfg->static_node_pos[node->level]++;
otx2_qos_read_txschq_cfg_schq(node, cfg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 86f1854698b4..883c044852f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
}
static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+ return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
{
- return fifo->data[fifo->mask & fifo->cc++];
+ fifo->cc++;
}
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index e87e26f2c669..6743806b8480 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -83,24 +83,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
txq_ix = mlx5e_qid_from_qos(chs, node_qid);
- WARN_ON(node_qid > priv->htb_max_qos_sqs);
- if (node_qid == priv->htb_max_qos_sqs) {
- struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
- if (priv->htb_max_qos_sqs == 0) {
- stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
- sizeof(*stats_list),
- GFP_KERNEL);
- if (!stats_list)
- return -ENOMEM;
- }
+ WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+ if (!priv->htb_qos_sq_stats) {
+ struct mlx5e_sq_stats **stats_list;
+
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list), GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ }
+
+ if (!priv->htb_qos_sq_stats[node_qid]) {
+ struct mlx5e_sq_stats *stats;
+
stats = kzalloc(sizeof(*stats), GFP_KERNEL);
- if (!stats) {
- kvfree(stats_list);
+ if (!stats)
return -ENOMEM;
- }
- if (stats_list)
- WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
/* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
index bcafb4bf9415..8d9a3b5ec973 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -179,6 +179,13 @@ u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels)
return min_t(u32, rqt_size, max_cap_rqt_size);
}
+#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256
+
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void)
+{
+ return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR;
+}
+
void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
{
mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
index e0bc30308c77..2f9e04a8418f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -38,6 +38,7 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
}
u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void);
int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
unsigned int num_rqns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
index f675b1926340..f66bbc846464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
{
+ mutex_lock(selq->state_lock);
WARN_ON_ONCE(selq->is_prepared);
kvfree(selq->standby);
@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
kvfree(selq->standby);
selq->standby = NULL;
+ mutex_unlock(selq->state_lock);
}
void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc51ce16df14..8f101181648c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -451,6 +451,34 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
+ if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto out;
+ }
+ }
+
+ /* If RXFH is configured, changing the channels number is allowed only if
+ * it does not require resizing the RSS table. This is because the previous
+ * configuration may no longer be compatible with the new RSS table.
+ */
+ if (netif_is_rxfh_configured(priv->netdev)) {
+ int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels);
+ int new_rqt_size = mlx5e_rqt_size(priv->mdev, count);
+
+ if (new_rqt_size != cur_rqt_size) {
+ err = -EINVAL;
+ netdev_err(priv->netdev,
+ "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n",
+ __func__, new_rqt_size, cur_rqt_size);
+ goto out;
+ }
+ }
+
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
@@ -1281,17 +1309,30 @@ int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh,
struct mlx5e_priv *priv = netdev_priv(dev);
u32 *rss_context = &rxfh->rss_context;
u8 hfunc = rxfh->hfunc;
+ unsigned int count;
int err;
mutex_lock(&priv->state_lock);
+
+ count = priv->channels.params.num_channels;
+
+ if (hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto unlock;
+ }
+ }
+
if (*rss_context && rxfh->rss_delete) {
err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
goto unlock;
}
if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
- unsigned int count = priv->channels.params.num_channels;
-
err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
if (err)
goto unlock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 91848eae4565..b375ef268671 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5726,9 +5726,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
- mutex_lock(&priv->state_lock);
mlx5e_selq_cleanup(&priv->selq);
- mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb_max_qos_sqs; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2fa076b23fbe..e21a3b4128ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
+ mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
- if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
- mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
- be32_to_cpu(eseg->flow_table_metadata));
dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}
@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
eseg->flow_table_metadata =
- cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+ cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
}
static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 3047d7015c52..1789800faaeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1868,6 +1868,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
if (err)
goto abort;
+ dev->priv.eswitch = esw;
err = esw_offloads_init(esw);
if (err)
goto reps_err;
@@ -1892,11 +1893,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (MLX5_ESWITCH_MANAGER(dev) &&
- mlx5_esw_vport_match_metadata_supported(esw))
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
- dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
esw_info(dev,
@@ -1908,6 +1904,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
reps_err:
mlx5_esw_vports_cleanup(esw);
+ dev->priv.eswitch = NULL;
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
@@ -1926,7 +1923,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
- esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
mutex_destroy(&esw->state_lock);
@@ -1937,6 +1933,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
esw_offloads_cleanup(esw);
+ esw->dev->priv.eswitch = NULL;
mlx5_esw_vports_cleanup(esw);
debugfs_remove_recursive(esw->debugfs_root);
devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index baaae628b0a0..1f60954c12f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,6 +43,7 @@
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/mlx5.h"
#include "lib/devcom.h"
#include "lib/eq.h"
#include "lib/fs_chains.h"
@@ -2476,6 +2477,10 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
if (err)
return err;
+ if (MLX5_ESWITCH_MANAGER(esw->dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
err = devl_params_register(priv_to_devlink(esw->dev),
esw_devlink_params,
ARRAY_SIZE(esw_devlink_params));
@@ -3707,6 +3712,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured.");
+ return -EPERM;
+ }
+
mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e6bfa7e4f146..cf085a478e3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
return err;
}
+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+ struct mlx5_pkt_reformat *p2)
+{
+ return p1->owner == p2->owner &&
+ (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+ p1->id == p2->id :
+ mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+ mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
- (d1->vport.pkt_reformat->id ==
- d2->vport.pkt_reformat->id) : true)) ||
+ mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+ d2->vport.pkt_reformat) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
}
trace_mlx5_fs_set_fte(fte, false);
+ /* Link newly added rules into the tree. */
for (i = 0; i < handle->num_rules; i++) {
- if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ if (!handle->rule[i]->node.parent) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c2593625c09a..59806553889e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1480,6 +1480,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
if (err)
goto err_register;
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+ err = mlx5_hwmon_dev_register(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
mutex_unlock(&dev->intf_state_mutex);
return 0;
@@ -1505,7 +1513,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
int err;
devl_lock(devlink);
+ devl_register(devlink);
err = mlx5_init_one_devl_locked(dev);
+ if (err)
+ devl_unregister(devlink);
devl_unlock(devlink);
return err;
}
@@ -1517,6 +1528,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
+ mlx5_hwmon_dev_unregister(dev);
+ mlx5_crdump_disable(dev);
mlx5_unregister_device(dev);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1534,6 +1547,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mlx5_function_teardown(dev, true);
out:
mutex_unlock(&dev->intf_state_mutex);
+ devl_unregister(devlink);
devl_unlock(devlink);
}
@@ -1680,16 +1694,20 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
}
devl_lock(devlink);
+ devl_register(devlink);
+
err = mlx5_devlink_params_register(priv_to_devlink(dev));
- devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
goto query_hca_caps_err;
}
+ devl_unlock(devlink);
return 0;
query_hca_caps_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
mlx5_function_disable(dev, true);
out:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
@@ -1702,6 +1720,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
devl_lock(devlink);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
+ devl_unregister(devlink);
devl_unlock(devlink);
if (dev->state != MLX5_DEVICE_STATE_UP)
return;
@@ -1943,16 +1962,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_init_one;
}
- err = mlx5_crdump_enable(dev);
- if (err)
- dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
- err = mlx5_hwmon_dev_register(dev);
- if (err)
- mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
pci_save_state(pdev);
- devlink_register(devlink);
return 0;
err_init_one:
@@ -1973,16 +1983,9 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
- /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
- * devlink notify APIs.
- * Hence, we must drain them before unregistering the devlink.
- */
mlx5_drain_fw_reset(dev);
mlx5_drain_health_wq(dev);
- devlink_unregister(devlink);
mlx5_sriov_disable(pdev, false);
- mlx5_hwmon_dev_unregister(dev);
- mlx5_crdump_disable(dev);
mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 4dcf995cb1a2..6bac8ad70ba6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -19,6 +19,7 @@
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1
#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
return;
}
+ vecidx -= MLX5_IRQ_VEC_COMP_BASE;
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool = table->pcif_pool;
struct irq_affinity_desc af_desc;
- int offset = 1;
+ int offset = MLX5_IRQ_VEC_COMP_BASE;
if (!pool->xa_num_irqs.max)
offset = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index bc863e1f062e..e3bf8c7e4baa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -101,7 +101,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
devlink = priv_to_devlink(mdev);
set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
mlx5_drain_health_wq(mdev);
- devlink_unregister(devlink);
if (mlx5_dev_is_lightweight(mdev))
mlx5_uninit_one_light(mdev);
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index 64f4cc284aea..030a5776c937 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -205,12 +205,11 @@ dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
}
static int
-dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id,
struct mlx5dr_rule_action_member *action_mem)
{
struct mlx5dr_action *action = action_mem->action;
const u64 action_id = DR_DBG_PTR_TO_ID(action);
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 hit_tbl_ptr, miss_tbl_ptr;
u32 hit_tbl_id, miss_tbl_id;
int ret;
@@ -488,10 +487,9 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
}
static int
-dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste,
bool is_rx, const u64 rule_id, u8 format_ver)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char hw_ste_dump[DR_HEX_SIZE];
u32 mem_rec_type;
int ret;
@@ -522,7 +520,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
}
static int
-dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+dr_dump_rule_rx_tx(struct seq_file *file, char *buff,
+ struct mlx5dr_rule_rx_tx *rule_rx_tx,
bool is_rx, const u64 rule_id, u8 format_ver)
{
struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
@@ -533,7 +532,7 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
return 0;
while (i--) {
- ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id,
format_ver);
if (ret < 0)
return ret;
@@ -542,7 +541,8 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
return 0;
}
-static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+static noinline_for_stack int
+dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
{
struct mlx5dr_rule_action_member *action_mem;
const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
@@ -565,19 +565,19 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
return ret;
if (rx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver);
if (ret < 0)
return ret;
}
if (tx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver);
if (ret < 0)
return ret;
}
list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
- ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem);
if (ret < 0)
return ret;
}
@@ -586,10 +586,10 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
}
static int
-dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+dr_dump_matcher_mask(struct seq_file *file, char *buff,
+ struct mlx5dr_match_param *mask,
u8 criteria, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char dump[DR_HEX_SIZE];
int ret;
@@ -681,10 +681,10 @@ dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
}
static int
-dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+dr_dump_matcher_builder(struct seq_file *file, char *buff,
+ struct mlx5dr_ste_build *builder,
u32 index, bool is_rx, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -702,11 +702,10 @@ dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
}
static int
-dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr, e_icm_addr;
int i, ret;
@@ -731,7 +730,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
return ret;
for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
- ret = dr_dump_matcher_builder(file,
+ ret = dr_dump_matcher_builder(file, buff,
&matcher_rx_tx->ste_builder[i],
i, is_rx, matcher_id);
if (ret < 0)
@@ -741,7 +740,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
{
struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
@@ -763,19 +762,19 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
if (ret)
return ret;
- ret = dr_dump_matcher_mask(file, &matcher->mask,
+ ret = dr_dump_matcher_mask(file, buff, &matcher->mask,
matcher->match_criteria, matcher_id);
if (ret < 0)
return ret;
if (rx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id);
if (ret < 0)
return ret;
}
if (tx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id);
if (ret < 0)
return ret;
}
@@ -803,11 +802,10 @@ dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
}
static int
-dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_table_rx_tx *table_rx_tx,
const u64 table_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr;
int ret;
@@ -829,7 +827,8 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
return 0;
}
-static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+static noinline_for_stack int
+dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
{
struct mlx5dr_table_rx_tx *rx = &table->rx;
struct mlx5dr_table_rx_tx *tx = &table->tx;
@@ -848,14 +847,14 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
return ret;
if (rx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, true, rx,
+ ret = dr_dump_table_rx_tx(file, buff, true, rx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
}
if (tx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, false, tx,
+ ret = dr_dump_table_rx_tx(file, buff, false, tx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
@@ -881,10 +880,10 @@ static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
}
static int
-dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+dr_dump_send_ring(struct seq_file *file, char *buff,
+ struct mlx5dr_send_ring *ring,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -902,13 +901,13 @@ dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
return 0;
}
-static noinline_for_stack int
+static int
dr_dump_domain_info_flex_parser(struct seq_file *file,
+ char *buff,
const char *flex_parser_name,
const u8 flex_parser_value,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -925,11 +924,11 @@ dr_dump_domain_info_flex_parser(struct seq_file *file,
return 0;
}
-static noinline_for_stack int
-dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+static int
+dr_dump_domain_info_caps(struct seq_file *file, char *buff,
+ struct mlx5dr_cmd_caps *caps,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
struct mlx5dr_cmd_vport_cap *vport_caps;
unsigned long i, vports_num;
int ret;
@@ -969,34 +968,35 @@ dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
}
static int
-dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+dr_dump_domain_info(struct seq_file *file, char *buff,
+ struct mlx5dr_domain_info *info,
const u64 domain_id)
{
int ret;
- ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0",
info->caps.flex_parser_id_icmp_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1",
info->caps.flex_parser_id_icmp_dw1,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0",
info->caps.flex_parser_id_icmpv6_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1",
info->caps.flex_parser_id_icmpv6_dw1,
domain_id);
if (ret < 0)
@@ -1032,12 +1032,12 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
if (ret)
return ret;
- ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id);
if (ret < 0)
return ret;
if (dmn->info.supp_sw_steering) {
- ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id);
if (ret < 0)
return ret;
}
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index e5ec0a363aff..31f75b4a67fd 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
* @rdfifo: FIFO read callback
* @wrfifo: FIFO write callback
* @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
* @flush_tx_work: flush_tx_work() implementation callback
*
* The @statelock is used to protect information in the structure which may
@@ -423,8 +422,6 @@ struct ks8851_net {
struct sk_buff *txp, bool irq);
netdev_tx_t (*start_xmit)(struct sk_buff *skb,
struct net_device *dev);
- void (*rx_skb)(struct ks8851_net *ks,
- struct sk_buff *skb);
void (*flush_tx_work)(struct ks8851_net *ks);
};
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 0bf13b38b8f5..d4cdf3d4f552 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -232,16 +232,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
}
/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
- ks->rx_skb(ks, skb);
-}
-
-/**
* ks8851_rx_pkts - receive packets from the host
* @ks: The device information.
*
@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
ks8851_dbg_dumpkkt(ks, rxpkt);
skb->protocol = eth_type_trans(skb, ks->netdev);
- ks8851_rx_skb(ks, skb);
+ __netif_rx(skb);
ks->netdev->stats.rx_packets++;
ks->netdev->stats.rx_bytes += rxlen;
@@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
unsigned long flags;
unsigned int status;
+ local_bh_disable();
+
ks8851_lock(ks, &flags);
status = ks8851_rdreg16(ks, KS_ISR);
@@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
if (status & IRQ_LCI)
mii_check_link(&ks->mii);
+ local_bh_enable();
+
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
index 2a7f29854267..381b9cd285eb 100644
--- a/drivers/net/ethernet/micrel/ks8851_par.c
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
}
-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
{
return ks8851_rdreg16_par(ks, KS_TXQCR);
@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
ks->rdfifo = ks8851_rdfifo_par;
ks->wrfifo = ks8851_wrfifo_par;
ks->start_xmit = ks8851_start_xmit_par;
- ks->rx_skb = ks8851_rx_skb_par;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
IRQ_RXI | /* RX done */ \
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 2f803377c9f9..670c1de966db 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -299,16 +299,6 @@ static unsigned int calc_txlen(unsigned int len)
}
/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
-/**
* ks8851_tx_work - process tx packet(s)
* @work: The work strucutre what was scheduled.
*
@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
ks->rdfifo = ks8851_rdfifo_spi;
ks->wrfifo = ks8851_wrfifo_spi;
ks->start_xmit = ks8851_start_xmit_spi;
- ks->rx_skb = ks8851_rx_skb_spi;
ks->flush_tx_work = ks8851_flush_tx_work_spi;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
index 3a1b1a1f5a19..60dd2fd603a8 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
bool sgmii = false, inband_aneg = false;
int err;
- if (port->conf.inband) {
+ if (conf->inband) {
if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
conf->portmode == PHY_INTERFACE_MODE_QSGMII)
inband_aneg = true; /* Cisco-SGMII in-band-aneg */
@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
if (err)
return -EINVAL;
- if (port->conf.inband) {
+ if (conf->inband) {
/* Enable/disable 1G counters in ASM */
spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
ASM_PORT_CFG_CSC_STAT_DIS,
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 4c043052198d..00882ffc7a02 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -73,6 +73,7 @@ enum mac_version {
};
struct rtl8169_private;
+struct r8169_led_classdev;
void r8169_apply_firmware(struct rtl8169_private *tp);
u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
@@ -84,7 +85,8 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx,
char *buf, int buf_len);
int rtl8168_get_led_mode(struct rtl8169_private *tp);
int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
-void rtl8168_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev);
int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
-void rtl8125_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev);
+void r8169_remove_leds(struct r8169_led_classdev *leds);
diff --git a/drivers/net/ethernet/realtek/r8169_leds.c b/drivers/net/ethernet/realtek/r8169_leds.c
index 7c5dc9d0df85..e10bee706bc6 100644
--- a/drivers/net/ethernet/realtek/r8169_leds.c
+++ b/drivers/net/ethernet/realtek/r8169_leds.c
@@ -146,22 +146,22 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8168_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8168_NUM_LEDS; i++)
rtl8168_setup_ldev(leds + i, ndev, i);
+
+ return leds;
}
static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
@@ -245,20 +245,31 @@ static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8125_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8125_NUM_LEDS; i++)
rtl8125_setup_led_ldev(leds + i, ndev, i);
+
+ return leds;
+}
+
+void r8169_remove_leds(struct r8169_led_classdev *leds)
+{
+ if (!leds)
+ return;
+
+ for (struct r8169_led_classdev *l = leds; l->ndev; l++)
+ led_classdev_unregister(&l->led);
+
+ kfree(leds);
}
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 6f1e6f386b7b..0fc5fe564ae5 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -647,6 +647,8 @@ struct rtl8169_private {
const char *fw_name;
struct rtl_fw *rtl_fw;
+ struct r8169_led_classdev *leds;
+
u32 ocp_base;
};
@@ -5044,6 +5046,9 @@ static void rtl_remove_one(struct pci_dev *pdev)
cancel_work_sync(&tp->wk.work);
+ if (IS_ENABLED(CONFIG_R8169_LEDS))
+ r8169_remove_leds(tp->leds);
+
unregister_netdev(tp->dev);
if (tp->dash_type != RTL_DASH_NONE)
@@ -5501,9 +5506,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (IS_ENABLED(CONFIG_R8169_LEDS)) {
if (rtl_is_8125(tp))
- rtl8125_init_leds(dev);
+ tp->leds = rtl8125_init_leds(dev);
else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
- rtl8168_init_leds(dev);
+ tp->leds = rtl8168_init_leds(dev);
}
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index dff02d75d519..5d1ea3e07459 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -52,6 +52,7 @@ struct stmmac_counters {
unsigned int mmc_tx_excessdef;
unsigned int mmc_tx_pause_frame;
unsigned int mmc_tx_vlan_frame_g;
+ unsigned int mmc_tx_oversize_g;
unsigned int mmc_tx_lpi_usec;
unsigned int mmc_tx_lpi_tran;
@@ -80,6 +81,7 @@ struct stmmac_counters {
unsigned int mmc_rx_fifo_overflow;
unsigned int mmc_rx_vlan_frames_gb;
unsigned int mmc_rx_watchdog_error;
+ unsigned int mmc_rx_error;
unsigned int mmc_rx_lpi_usec;
unsigned int mmc_rx_lpi_tran;
unsigned int mmc_rx_discard_frames_gb;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 7eb477faa75a..0fab842902a8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -53,6 +53,7 @@
#define MMC_TX_EXCESSDEF 0x6c
#define MMC_TX_PAUSE_FRAME 0x70
#define MMC_TX_VLAN_FRAME_G 0x74
+#define MMC_TX_OVERSIZE_G 0x78
/* MMC RX counter registers */
#define MMC_RX_FRAMECOUNT_GB 0x80
@@ -79,6 +80,13 @@
#define MMC_RX_FIFO_OVERFLOW 0xd4
#define MMC_RX_VLAN_FRAMES_GB 0xd8
#define MMC_RX_WATCHDOG_ERROR 0xdc
+#define MMC_RX_ERROR 0xe0
+
+#define MMC_TX_LPI_USEC 0xec
+#define MMC_TX_LPI_TRAN 0xf0
+#define MMC_RX_LPI_USEC 0xf4
+#define MMC_RX_LPI_TRAN 0xf8
+
/* IPC*/
#define MMC_RX_IPC_INTR_MASK 0x100
#define MMC_RX_IPC_INTR 0x108
@@ -283,6 +291,9 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF);
mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME);
mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G);
+ mmc->mmc_tx_oversize_g += readl(mmcaddr + MMC_TX_OVERSIZE_G);
+ mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC);
+ mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN);
/* MMC RX counter registers */
mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB);
@@ -316,6 +327,10 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
+ mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR);
+ mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC);
+ mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN);
+
/* IPv4 */
mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e1537a57815f..542e2633a6f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -212,6 +212,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_tx_excessdef),
STMMAC_MMC_STAT(mmc_tx_pause_frame),
STMMAC_MMC_STAT(mmc_tx_vlan_frame_g),
+ STMMAC_MMC_STAT(mmc_tx_oversize_g),
STMMAC_MMC_STAT(mmc_tx_lpi_usec),
STMMAC_MMC_STAT(mmc_tx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_framecount_gb),
@@ -238,6 +239,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_rx_fifo_overflow),
STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb),
STMMAC_MMC_STAT(mmc_rx_watchdog_error),
+ STMMAC_MMC_STAT(mmc_rx_error),
STMMAC_MMC_STAT(mmc_rx_lpi_usec),
STMMAC_MMC_STAT(mmc_rx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 2f6739fe78af..6c2835086b57 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -822,7 +822,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs4)
@@ -929,7 +929,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs6)
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index a6fcbda64ecc..2b6ec979a62f 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -154,8 +154,11 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
+
+ if (!nvdev->recv_buf_gpadl_handle.decrypted)
+ vfree(nvdev->recv_buf);
+ if (!nvdev->send_buf_gpadl_handle.decrypted)
+ vfree(nvdev->send_buf);
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a133..115c3c5414f2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
+ bool update = false;
int i;
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
@@ -3814,13 +3815,28 @@ static int virtnet_set_rxfh(struct net_device *dev,
return -EOPNOTSUPP;
if (rxfh->indir) {
+ if (!vi->has_rss)
+ return -EOPNOTSUPP;
+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+ update = true;
}
- if (rxfh->key)
+
+ if (rxfh->key) {
+ /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
+ * device provides hash calculation capabilities, that is,
+ * hash_key is configured.
+ */
+ if (!vi->has_rss && !vi->has_rss_hash_report)
+ return -EOPNOTSUPP;
+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+ update = true;
+ }
- virtnet_commit_rss_command(vi);
+ if (update)
+ virtnet_commit_rss_command(vi);
return 0;
}
@@ -4729,13 +4745,15 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
- if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+ }
+
+ if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bf4833221816..eff7f5df08e2 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3766,14 +3766,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
/*
- * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus
- * Reset and requires a power-off or suspend/resume and rescan. Prevent
- * use of that reset.
- */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset);
-
-/*
* Some TI KeyStone C667X devices do not support bus/hot reset. The PCIESS
* automatically disables LTSSM when Secondary Bus Reset is received and
* the device stops working. Prevent bus reset for these devices. With
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c
index 8ea867c2a01a..62bc24f6dcc7 100644
--- a/drivers/platform/chrome/cros_ec_uart.c
+++ b/drivers/platform/chrome/cros_ec_uart.c
@@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
if (!ec_dev)
return -ENOMEM;
- ret = devm_serdev_device_open(dev, serdev);
- if (ret) {
- dev_err(dev, "Unable to open UART device");
- return ret;
- }
-
serdev_device_set_drvdata(serdev, ec_dev);
init_waitqueue_head(&ec_uart->response.wait_queue);
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
return ret;
}
- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
- if (ret < 0) {
- dev_err(dev, "Failed to set up host baud rate (%d)", ret);
- return ret;
- }
-
- serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
/* Initialize ec_dev for cros_ec */
ec_dev->phys_name = dev_name(dev);
ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
+ ret = devm_serdev_device_open(dev, serdev);
+ if (ret) {
+ dev_err(dev, "Unable to open UART device");
+ return ret;
+ }
+
+ ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+ if (ret < 0) {
+ dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+ return ret;
+ }
+
+ serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
return cros_ec_register(ec_dev);
}
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index ee2e164f86b9..38c932df6446 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -598,6 +598,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
.driver_data = &quirk_acer_predator_v4,
},
{
+ .callback = dmi_matched,
+ .ident = "Acer Predator PH18-71",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"),
+ },
+ .driver_data = &quirk_acer_predator_v4,
+ },
+ {
.callback = set_force_caps,
.ident = "Acer Aspire Switch 10E SW3-016",
.matches = {
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 7457ca2b27a6..c7a827645864 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -49,6 +49,8 @@ static const struct acpi_device_id intel_hid_ids[] = {
{"INTC1076", 0},
{"INTC1077", 0},
{"INTC1078", 0},
+ {"INTC107B", 0},
+ {"INTC10CB", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
@@ -504,6 +506,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
struct platform_device *device = context;
struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
unsigned long long ev_index;
+ struct key_entry *ke;
int err;
/*
@@ -545,11 +548,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
if (event == 0xc0 || !priv->array)
return;
- if (!sparse_keymap_entry_from_scancode(priv->array, event)) {
+ ke = sparse_keymap_entry_from_scancode(priv->array, event);
+ if (!ke) {
dev_info(&device->dev, "unknown event 0x%x\n", event);
return;
}
+ if (ke->type == KE_IGNORE)
+ return;
+
wakeup:
pm_wakeup_hard_event(&device->dev);
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 084c355c86f5..79bb2c801daa 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device)
priv->switches_dev->id.bustype = BUS_HOST;
if (priv->has_switches) {
- detect_tablet_mode(&device->dev);
-
ret = input_register_device(priv->switches_dev);
if (ret)
return ret;
@@ -258,9 +256,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
{
- unsigned long long vgbs;
- acpi_status status;
-
/* See dual_accel_detect.h for more info */
if (dual_accel)
return false;
@@ -268,8 +263,7 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
if (!dmi_check_system(dmi_switches_allow_list))
return false;
- status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
- return ACPI_SUCCESS(status);
+ return acpi_has_method(handle, "VGBS");
}
static int intel_vbtn_probe(struct platform_device *device)
@@ -316,6 +310,9 @@ static int intel_vbtn_probe(struct platform_device *device)
if (ACPI_FAILURE(status))
dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status);
}
+ // Check switches after buttons since VBDL may have side effects.
+ if (has_switches)
+ detect_tablet_mode(&device->dev);
device_init_wakeup(&device->dev, true);
/*
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index ad3c39e9e9f5..e714ee6298dd 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -736,7 +736,7 @@ static int acpi_add(struct acpi_device *device)
default:
year = 2019;
}
- pr_info("product: %s year: %d\n", product, year);
+ pr_info("product: %s year: %d\n", product ?: "unknown", year);
if (year >= 2019)
battery_limit_use_wmbb = 1;
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 291f14ef6702..77244c9aa60d 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -264,6 +264,7 @@ static const struct key_entry toshiba_acpi_keymap[] = {
{ KE_KEY, 0xb32, { KEY_NEXTSONG } },
{ KE_KEY, 0xb33, { KEY_PLAYPAUSE } },
{ KE_KEY, 0xb5a, { KEY_MEDIA } },
+ { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */
{ KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */
@@ -3523,9 +3524,10 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
(dev->kbd_mode == SCI_KBD_MODE_ON) ?
LED_FULL : LED_OFF);
break;
+ case 0x8e: /* Power button pressed */
+ break;
case 0x85: /* Unknown */
case 0x8d: /* Unknown */
- case 0x8e: /* Unknown */
case 0x94: /* Unknown */
case 0x95: /* Unknown */
default:
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 097dfe4b620d..35f8e00850d6 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1797,7 +1797,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
if (dev_is_sata(device)) {
struct ata_link *link = &device->sata_dev.ap->link;
- rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+ rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
smp_ata_check_ready_type);
} else {
msleep(2000);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 7d2a33514538..34f96cc35342 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
(sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
- ts->stat = SAS_PROTO_RESPONSE;
+ if (task->ata_task.use_ncq) {
+ struct domain_device *device = task->dev;
+ struct hisi_sas_device *sas_dev = device->lldd_dev;
+
+ sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+ slot->abort = 1;
+ } else {
+ ts->stat = SAS_PROTO_RESPONSE;
+ }
} else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
ts->residual = trans_tx_fail_type;
ts->stat = SAS_DATA_UNDERRUN;
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
index 26e6b3e3af43..dcde55c8ee5d 100644
--- a/drivers/scsi/qla2xxx/qla_edif.c
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
if (fcport->edif.enable) {
- if (pcnt > app_req.num_ports)
+ if (pcnt >= app_req.num_ports)
break;
app_reply->elem[pcnt].rekey_count =
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 386981c6976a..baf870a03ecf 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp)
int dev = iminor(inode);
int flags = filp->f_flags;
struct request_queue *q;
+ struct scsi_device *device;
Sg_device *sdp;
Sg_fd *sfp;
int retval;
@@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp)
/* This driver's module count bumped by fops_get in <linux/fs.h> */
/* Prevent the device driver from vanishing while we sleep */
- retval = scsi_device_get(sdp->device);
+ device = sdp->device;
+ retval = scsi_device_get(device);
if (retval)
goto sg_put;
- retval = scsi_autopm_get_device(sdp->device);
+ retval = scsi_autopm_get_device(device);
if (retval)
goto sdp_put;
@@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp)
* check if O_NONBLOCK. Permits SCSI commands to be issued
* during error recovery. Tread carefully. */
if (!((flags & O_NONBLOCK) ||
- scsi_block_when_processing_errors(sdp->device))) {
+ scsi_block_when_processing_errors(device))) {
retval = -ENXIO;
/* we are in error recovery for this device */
goto error_out;
@@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp)
if (sdp->open_cnt < 1) { /* no existing opens */
sdp->sgdebug = 0;
- q = sdp->device->request_queue;
+ q = device->request_queue;
sdp->sg_tablesize = queue_max_segments(q);
}
sfp = sg_add_sfp(sdp);
@@ -370,10 +372,11 @@ out_undo:
error_mutex_locked:
mutex_unlock(&sdp->open_rel_lock);
error_out:
- scsi_autopm_put_device(sdp->device);
+ scsi_autopm_put_device(device);
sdp_put:
- scsi_device_put(sdp->device);
- goto sg_put;
+ kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
+ return retval;
}
/* Release resources associated with a successful sg_open()
@@ -2233,7 +2236,6 @@ sg_remove_sfp_usercontext(struct work_struct *work)
"sg_remove_sfp: sfp=0x%p\n", sfp));
kfree(sfp);
- WARN_ON_ONCE(kref_read(&sdp->d_ref) != 1);
kref_put(&sdp->d_ref, sg_device_destroy);
scsi_device_put(device);
module_put(THIS_MODULE);
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index c1fbcdd16182..c40217f44b1b 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -3672,6 +3672,8 @@ static int __init target_core_init_configfs(void)
{
struct configfs_subsystem *subsys = &target_core_fabrics;
struct t10_alua_lu_gp *lu_gp;
+ struct cred *kern_cred;
+ const struct cred *old_cred;
int ret;
pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage"
@@ -3748,11 +3750,21 @@ static int __init target_core_init_configfs(void)
if (ret < 0)
goto out;
+ /* We use the kernel credentials to access the target directory */
+ kern_cred = prepare_kernel_cred(&init_task);
+ if (!kern_cred) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ old_cred = override_creds(kern_cred);
target_init_dbroot();
+ revert_creds(old_cred);
+ put_cred(kern_cred);
return 0;
out:
+ target_xcopy_release_pt();
configfs_unregister_subsystem(subsys);
core_dev_release_virtual_lun0();
rd_module_exit();
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 20d9762331bd..6be3462b109f 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
{
if (pdata->send_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
}
if (pdata->recv_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
}
}
@@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev,
ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
RECV_BUFFER_SIZE, &pdata->recv_gpadl);
if (ret) {
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
goto fail_close;
}
@@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev,
ret = vmbus_establish_gpadl(channel, pdata->send_buf,
SEND_BUFFER_SIZE, &pdata->send_gpadl);
if (ret) {
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
goto fail_close;
}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 045f666b4f12..8995730ce0bf 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2515,7 +2515,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
+ vq_err(vq, "Guest moved avail index from %u to %u",
last_avail_idx, vq->avail_idx);
return -EFAULT;
}
@@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
r = vhost_get_avail_idx(vq, &avail_idx);
if (unlikely(r))
return false;
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return false;
+ }
- return vq->avail_idx == vq->last_avail_idx;
+ return true;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
@@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->avail->idx, r);
return false;
}
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return true;
+ }
- return vq->avail_idx != vq->last_avail_idx;
+ return false;
}
EXPORT_SYMBOL_GPL(vhost_enable_notify);
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f173587893cb..9510c551dce8 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -362,14 +362,16 @@ static const struct bus_type virtio_bus = {
.remove = virtio_dev_remove,
};
-int register_virtio_driver(struct virtio_driver *driver)
+int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
+ driver->driver.owner = owner;
+
return driver_register(&driver->driver);
}
-EXPORT_SYMBOL_GPL(register_virtio_driver);
+EXPORT_SYMBOL_GPL(__register_virtio_driver);
void unregister_virtio_driver(struct virtio_driver *driver)
{
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index 3640f417cce1..5c180fdc3efb 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
- struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
@@ -290,28 +289,27 @@ retry:
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
&hash, inode_inum(inode), &search, 0);
- if (ret) {
- if (!bch2_err_matches(ret, ENOENT))
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
- if (ret) {
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
- xattr = bkey_s_c_to_xattr(k);
+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
- le16_to_cpu(xattr.v->x_val_len));
+ le16_to_cpu(xattr.v->x_val_len));
+ ret = PTR_ERR_OR_ZERO(acl);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
- if (!IS_ERR(acl))
+ if (ret)
+ acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
+
+ if (!IS_ERR_OR_NULL(acl))
set_cached_acl(&inode->v, type, acl);
-out:
- if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
- goto retry;
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 63102992d955..364ae42022af 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1535,6 +1535,20 @@ enum btree_id {
BTREE_ID_NR
};
+static inline bool btree_id_is_alloc(enum btree_id id)
+{
+ switch (id) {
+ case BTREE_ID_alloc:
+ case BTREE_ID_backpointers:
+ case BTREE_ID_need_discard:
+ case BTREE_ID_freespace:
+ case BTREE_ID_bucket_gens:
+ return true;
+ default:
+ return false;
+ }
+}
+
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 6280da1244b5..d2555da55c6d 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -368,11 +368,16 @@ again:
buf.buf)) {
bch2_btree_node_evict(trans, cur_k.k);
cur = NULL;
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?:
- bch2_journal_key_delete(c, b->c.btree_id,
- b->c.level, cur_k.k->k.p);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
if (ret)
break;
+
+ if (!btree_id_is_alloc(b->c.btree_id)) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+ }
continue;
}
@@ -544,12 +549,12 @@ reconstruct_root:
bch2_btree_root_alloc_fake(c, i, 0);
} else {
bch2_btree_root_alloc_fake(c, i, 1);
+ bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
if (ret)
break;
}
- bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
reconstructed_root = true;
}
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 24772538e4cc..1d58d447b386 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -642,7 +642,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
- if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+ if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
return __bch2_btree_trans_too_many_iters(trans);
return 0;
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 5cbcbfe85235..1e8cf49a6935 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
}
+static void journal_iter_verify(struct journal_iter *iter)
+{
+ struct journal_keys *keys = iter->keys;
+ size_t gap_size = keys->size - keys->nr;
+
+ BUG_ON(iter->idx >= keys->gap &&
+ iter->idx < keys->gap + gap_size);
+
+ if (iter->idx < keys->size) {
+ struct journal_key *k = keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ BUG_ON(cmp < 0);
+ }
+}
+
static void journal_iters_fix(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
/* The key we just inserted is immediately before the gap: */
size_t gap_end = keys->gap + (keys->size - keys->nr);
- struct btree_and_journal_iter *iter;
+ struct journal_key *new_key = &keys->data[keys->gap - 1];
+ struct journal_iter *iter;
/*
* If an iterator points one after the key we just inserted, decrement
@@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c)
* decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
* handle that:
*/
- list_for_each_entry(iter, &c->journal_iters, journal.list)
- if (iter->journal.idx == gap_end)
- iter->journal.idx = keys->gap - 1;
+ list_for_each_entry(iter, &c->journal_iters, list) {
+ journal_iter_verify(iter);
+ if (iter->idx == gap_end &&
+ new_key->btree_id == iter->btree_id &&
+ new_key->level == iter->level)
+ iter->idx = keys->gap - 1;
+ journal_iter_verify(iter);
+ }
}
static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
@@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
if (idx > keys->gap)
idx -= keys->size - keys->nr;
+ size_t old_gap = keys->gap;
+
if (keys->nr == keys->size) {
+ journal_iters_move_gap(c, old_gap, keys->size);
+ old_gap = keys->size;
+
struct journal_keys new_keys = {
.nr = keys->nr,
.size = max_t(size_t, keys->size, 8) * 2,
@@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
keys->gap = keys->nr;
}
- journal_iters_move_gap(c, keys->gap, idx);
+ journal_iters_move_gap(c, old_gap, idx);
move_gap(keys, idx);
@@ -301,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
{
- struct journal_key *k = iter->keys->data + iter->idx;
+ journal_iter_verify(iter);
+
+ while (iter->idx < iter->keys->size) {
+ struct journal_key *k = iter->keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ if (cmp > 0)
+ break;
+ BUG_ON(cmp);
- while (k < iter->keys->data + iter->keys->size &&
- k->btree_id == iter->btree_id &&
- k->level == iter->level) {
if (!k->overwritten)
return bkey_i_to_s_c(k->k);
bch2_journal_iter_advance(iter);
- k = iter->keys->data + iter->idx;
}
return bkey_s_c_null;
@@ -330,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c,
iter->level = level;
iter->keys = &c->journal_keys;
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+
+ journal_iter_verify(iter);
}
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@@ -434,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
iter->trans = trans;
iter->b = b;
iter->node_iter = node_iter;
- bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
- INIT_LIST_HEAD(&iter->journal.list);
iter->pos = b->data->min_key;
iter->at_end = false;
+ INIT_LIST_HEAD(&iter->journal.list);
+
+ if (trans->journal_replay_not_finished) {
+ bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
+ if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+ list_add(&iter->journal.list, &trans->c->journal_iters);
+ }
}
/*
@@ -452,9 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
bch2_btree_node_iter_init_from_start(&node_iter, b);
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
- if (trans->journal_replay_not_finished &&
- !test_bit(BCH_FS_may_go_rw, &trans->c->flags))
- list_add(&iter->journal.list, &trans->c->journal_iters);
}
/* sort and dedup all keys in the journal: */
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 581edcb0911b..88a3582a3275 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
} else {
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_pcpu);
+ bc->nr_freed_pcpu++;
mutex_unlock(&bc->lock);
}
}
@@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
if (!list_empty(&bc->freed_pcpu)) {
ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_pcpu--;
}
mutex_unlock(&bc->lock);
}
@@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
- j->watermark == BCH_WATERMARK_stripe)
+ !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
ret = bch2_btree_iter_traverse(&b_iter) ?:
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index b9b151e693ed..f2caf491957e 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
- struct btree_path *linked;
- unsigned i, iter;
- int ret;
-
- /*
- * XXX BIG FAT NOTICE
- *
- * Drop all read locks before taking a write lock:
- *
- * This is a hack, because bch2_btree_node_lock_write_nofail() is a
- * hack - but by dropping read locks first, this should never fail, and
- * we only use this in code paths where whatever read locks we've
- * already taken are no longer needed:
- */
-
- trans_for_each_path(trans, linked, iter) {
- if (!linked->nodes_locked)
- continue;
-
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- if (btree_node_read_locked(linked, i)) {
- btree_node_unlock(trans, linked, i);
- btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
- }
- }
-
- ret = __btree_node_lock_write(trans, path, b, true);
+ int ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 3f33be7e5e5c..556f76f5c84e 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -133,6 +133,9 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
if (le64_to_cpu(bn->magic) != bset_magic(c))
return;
+ if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+ return;
+
rcu_read_lock();
struct found_btree_node n = {
.btree_id = BTREE_NODE_ID(bn),
@@ -213,6 +216,9 @@ static int read_btree_nodes(struct find_btree_nodes *f)
closure_init_stack(&cl);
for_each_online_member(c, ca) {
+ if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
+ continue;
+
struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
struct task_struct *t;
@@ -290,7 +296,7 @@ again:
found_btree_node_to_text(&buf, c, n);
bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
- return -1;
+ return -BCH_ERR_fsck_repair_unimplemented;
}
}
@@ -436,6 +442,9 @@ bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos node_min, struct bpos node_max)
{
+ if (btree_id_is_alloc(btree))
+ return 0;
+
struct find_btree_nodes *f = &c->found_btree_nodes;
int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 9404d96c38f3..e0c982a4195c 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -364,7 +364,21 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
+/* Number of btree paths we preallocate, usually enough */
#define BTREE_ITER_INITIAL 64
+/*
+ * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
+ * paths should run inside this limit, and if they don't it usually indicates a
+ * bug (leaking/duplicated btree paths).
+ *
+ * exception: some fsck paths
+ *
+ * bugs with excessive path usage seem to have possibly been eliminated now, so
+ * we might consider eliminating this (and btree_trans_too_many_iter()) at some
+ * point.
+ */
+#define BTREE_ITER_NORMAL_LIMIT 256
+/* never exceed limit */
#define BTREE_ITER_MAX (1U << 10)
struct btree_trans_commit_hook;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 32397b99752f..c4a5e83a56a4 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -26,9 +26,9 @@
#include <linux/random.h>
-const char * const bch2_btree_update_modes[] = {
+static const char * const bch2_btree_update_modes[] = {
#define x(t) #t,
- BCH_WATERMARKS()
+ BTREE_UPDATE_MODES()
#undef x
NULL
};
@@ -704,9 +704,13 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s", bch2_err_str(ret));
err:
- if (as->b) {
-
- b = as->b;
+ /*
+ * We have to be careful because another thread might be getting ready
+ * to free as->b and calling btree_update_reparent() on us - we'll
+ * recheck under btree_update_lock below:
+ */
+ b = READ_ONCE(as->b);
+ if (b) {
btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
as->btree_id, b->c.level, b->key.k.p);
struct btree_path *path = trans->paths + path_idx;
@@ -850,15 +854,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
{
struct bch_fs *c = as->c;
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-
BUG_ON(as->mode != BTREE_UPDATE_none);
+ BUG_ON(as->update_level_end < b->c.level);
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
as->mode = BTREE_UPDATE_node;
as->b = b;
+ as->update_level_end = b->c.level;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
@@ -1100,7 +1106,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level, bool split, unsigned flags)
+ unsigned level_start, bool split, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
@@ -1108,7 +1114,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2] = { 0, 0 };
- unsigned update_level = level;
+ unsigned level_end = level_start;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
int ret = 0;
u32 restart_count = trans->restart_count;
@@ -1123,34 +1129,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (watermark < c->journal.watermark) {
- struct journal_res res = { 0 };
- unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
+ if (watermark < BCH_WATERMARK_reclaim &&
+ test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+ if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+ return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
- if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark < BCH_WATERMARK_reclaim)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-
- ret = drop_locks_do(trans,
- bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
- if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ bch2_trans_unlock(trans);
+ wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+ ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
}
while (1) {
- nr_nodes[!!update_level] += 1 + split;
- update_level++;
+ nr_nodes[!!level_end] += 1 + split;
+ level_end++;
- ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+ ret = bch2_btree_path_upgrade(trans, path, level_end + 1);
if (ret)
return ERR_PTR(ret);
- if (!btree_path_node(path, update_level)) {
+ if (!btree_path_node(path, level_end)) {
/* Allocating new root? */
nr_nodes[1] += split;
- update_level = BTREE_MAX_DEPTH;
+ level_end = BTREE_MAX_DEPTH;
break;
}
@@ -1158,11 +1160,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
* Always check for space for two keys, even if we won't have to
* split at prior level - it might have been a merge instead:
*/
- if (bch2_btree_node_insert_fits(path->l[update_level].b,
+ if (bch2_btree_node_insert_fits(path->l[level_end].b,
BKEY_BTREE_PTR_U64s_MAX * 2))
break;
- split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+ split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
}
if (!down_read_trylock(&c->gc_lock)) {
@@ -1176,14 +1178,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
- as->c = c;
- as->start_time = start_time;
- as->ip_started = _RET_IP_;
- as->mode = BTREE_UPDATE_none;
- as->watermark = watermark;
- as->took_gc_lock = true;
- as->btree_id = path->btree_id;
- as->update_level = update_level;
+ as->c = c;
+ as->start_time = start_time;
+ as->ip_started = _RET_IP_;
+ as->mode = BTREE_UPDATE_none;
+ as->watermark = watermark;
+ as->took_gc_lock = true;
+ as->btree_id = path->btree_id;
+ as->update_level_start = level_start;
+ as->update_level_end = level_end;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1373,12 +1376,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
}
static void
-__bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct btree_node_iter node_iter,
- struct keylist *keys)
+bch2_btree_insert_keys_interior(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct keylist *keys)
{
struct bkey_i *insert = bch2_keylist_front(keys);
struct bkey_packed *k;
@@ -1534,7 +1537,7 @@ static void btree_split_insert_keys(struct btree_update *as,
bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
- __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
BUG_ON(bch2_btree_node_check_topology(trans, b));
}
@@ -1714,27 +1717,6 @@ err:
goto out;
}
-static void
-bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct keylist *keys)
-{
- struct btree_path *linked;
- unsigned i;
-
- __bch2_btree_insert_keys_interior(as, trans, path, b,
- path->l[b->c.level].iter, keys);
-
- btree_update_updated_node(as, b);
-
- trans_for_each_path_with_node(trans, b, linked, i)
- bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-
- bch2_trans_verify_paths(trans);
-}
-
/**
* bch2_btree_insert_node - insert bkeys into a given btree node
*
@@ -1755,7 +1737,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
struct keylist *keys)
{
struct bch_fs *c = as->c;
- struct btree_path *path = trans->paths + path_idx;
+ struct btree_path *path = trans->paths + path_idx, *linked;
+ unsigned i;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
@@ -1784,7 +1767,13 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
return ret;
}
- bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+ bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
+
+ trans_for_each_path_with_node(trans, b, linked, i)
+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
+
+ bch2_trans_verify_paths(trans);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -1798,6 +1787,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_maybe_compact_whiteouts(c, b))
bch2_trans_node_reinit_iter(trans, b);
+ btree_update_updated_node(as, b);
bch2_btree_node_unlock_write(trans, path, b);
BUG_ON(bch2_btree_node_check_topology(trans, b));
@@ -1807,7 +1797,7 @@ split:
* We could attempt to avoid the transaction restart, by calling
* bch2_btree_path_upgrade() and allocating more nodes:
*/
- if (b->c.level >= as->update_level) {
+ if (b->c.level >= as->update_level_end) {
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
}
@@ -2519,9 +2509,11 @@ void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned lev
static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
- prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+ prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
(void *) as->ip_started,
bch2_btree_id_str(as->btree_id),
+ as->update_level_start,
+ as->update_level_end,
bch2_watermarks[as->watermark],
bch2_btree_update_modes[as->mode],
as->nodes_written,
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 88dcf5a22a3b..c1a479ebaad1 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -57,7 +57,8 @@ struct btree_update {
unsigned took_gc_lock:1;
enum btree_id btree_id;
- unsigned update_level;
+ unsigned update_level_start;
+ unsigned update_level_end;
struct disk_reservation disk_res;
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index cbfa6459bdbc..72781aad6ba7 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
struct fsck_thread {
struct thread_with_stdio thr;
struct bch_fs *c;
- char **devs;
- size_t nr_devs;
struct bch_opts opts;
};
static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
{
struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
- if (thr->devs)
- for (size_t i = 0; i < thr->nr_devs; i++)
- kfree(thr->devs[i]);
- kfree(thr->devs);
kfree(thr);
}
static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
{
struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
- struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
-
- if (IS_ERR(c))
- return PTR_ERR(c);
+ struct bch_fs *c = thr->c;
- int ret = 0;
- if (test_bit(BCH_FS_errors_fixed, &c->flags))
- ret |= 1;
- if (test_bit(BCH_FS_error, &c->flags))
- ret |= 4;
+ int ret = PTR_ERR_OR_ZERO(c);
+ if (ret)
+ return ret;
- bch2_fs_stop(c);
+ ret = bch2_fs_start(thr->c);
+ if (ret)
+ goto err;
- if (ret & 1)
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
- if (ret & 4)
+ ret |= 1;
+ }
+ if (test_bit(BCH_FS_error, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
-
+ ret |= 4;
+ }
+err:
+ bch2_fs_stop(c);
return ret;
}
@@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
- u64 *devs = NULL;
+ darray_str(devs) = {};
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -194,29 +190,32 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
- !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
- !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
- ret = -ENOMEM;
- goto err;
- }
+ for (size_t i = 0; i < arg.nr_devs; i++) {
+ u64 dev_u64;
+ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
+ if (ret)
+ goto err;
- thr->opts = bch2_opts_empty();
- thr->nr_devs = arg.nr_devs;
+ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(dev_str);
+ if (ret)
+ goto err;
- if (copy_from_user(devs, &user_arg->devs[0],
- array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
- ret = -EINVAL;
- goto err;
+ ret = darray_push(&devs, dev_str);
+ if (ret) {
+ kfree(dev_str);
+ goto err;
+ }
}
- for (size_t i = 0; i < arg.nr_devs; i++) {
- thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
- ret = PTR_ERR_OR_ZERO(thr->devs[i]);
- if (ret)
- goto err;
+ thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+ if (!thr) {
+ ret = -ENOMEM;
+ goto err;
}
+ thr->opts = bch2_opts_empty();
+
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
@@ -230,15 +229,26 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
+ /* We need request_key() to be called before we punt to kthread: */
+ opt_set(thr->opts, nostart, true);
+
+ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+
+ if (!IS_ERR(thr->c) &&
+ thr->c->opts.errors == BCH_ON_ERROR_panic)
+ thr->c->opts.errors = BCH_ON_ERROR_ro;
+
ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
-err:
- if (ret < 0) {
- if (thr)
- bch2_fsck_thread_exit(&thr->thr);
- pr_err("ret %s", bch2_err_str(ret));
- }
- kfree(devs);
+out:
+ darray_for_each(devs, i)
+ kfree(*i);
+ darray_exit(&devs);
return ret;
+err:
+ if (thr)
+ bch2_fsck_thread_exit(&thr->thr);
+ pr_err("ret %s", bch2_err_str(ret));
+ goto out;
}
static long bch2_global_ioctl(unsigned cmd, void __user *arg)
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 34731ee0217f..0022b51ce3c0 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -598,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans,
i++;
}
+ unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
+
/*
* If current extent durability is less than io_opts.data_replicas,
* we're not trying to rereplicate the extent up to data_replicas here -
@@ -607,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans,
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
*/
if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
- durability_have >= io_opts.data_replicas) {
+ !durability_required) {
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */
@@ -616,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans,
goto done;
}
- m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->op.nr_replicas = min(durability_removing, durability_required) +
m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
- BUG_ON(!m->op.nr_replicas);
+ /*
+ * If device(s) were set to durability=0 after data was written to them
+ * we can end up with a duribilty=0 extent, and the normal algorithm
+ * that tries not to increase durability doesn't work:
+ */
+ if (!(durability_have + durability_removing))
+ m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+
+ m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 208ce6f0fc43..cd99b7399414 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -13,6 +13,7 @@
#include "btree_iter.h"
#include "btree_locking.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "buckets.h"
#include "debug.h"
#include "error.h"
@@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- do {
+ while (1) {
err = flush_buf(i);
if (err)
return err;
@@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
if (!i->size)
break;
+ if (done)
+ break;
+
done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
i->iter++;
- } while (!done);
+ }
if (i->buf.allocation_failure)
return -ENOMEM;
@@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = {
.read = bch2_journal_pins_read,
};
+static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ bch2_btree_updates_to_text(&i->buf, c);
+ i->iter++;
+ }
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations btree_updates_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_updates_read,
+};
+
static int btree_transaction_stats_open(struct inode *inode, struct file *file)
{
struct bch_fs *c = inode->i_private;
struct dump_iter *i;
i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-
if (!i)
return -ENOMEM;
@@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c)
debugfs_remove_recursive(c->fs_debug_dir);
}
+static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
+{
+ struct dentry *d;
+
+ d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
+
+ debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
+
+ debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
+
+ debugfs_create_file("bfloat-failed", 0400, d, bd,
+ &bfloat_failed_debug_ops);
+}
+
void bch2_fs_debug_init(struct bch_fs *c)
{
struct btree_debug *bd;
@@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_updates_ops);
+
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
c, &btree_transaction_stats_op);
@@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c)
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
- debugfs_create_file(bch2_btree_id_str(bd->id),
- 0400, c->btree_debug_dir, bd,
- &btree_debug_ops);
-
- snprintf(name, sizeof(name), "%s-formats",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &btree_format_debug_ops);
-
- snprintf(name, sizeof(name), "%s-bfloat-failed",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &bfloat_failed_debug_ops);
+ bch2_fs_debug_btree_init(c, bd);
}
}
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
index 4ce5e957a6e9..0f955c3c76a7 100644
--- a/fs/bcachefs/eytzinger.c
+++ b/fs/bcachefs/eytzinger.c
@@ -115,7 +115,7 @@ static void swap_bytes(void *a, void *b, size_t n)
struct wrapper {
cmp_func_t cmp;
- swap_func_t swap;
+ swap_func_t swap_func;
};
/*
@@ -125,7 +125,7 @@ struct wrapper {
static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
{
if (swap_func == SWAP_WRAPPER) {
- ((const struct wrapper *)priv)->swap(a, b, (int)size);
+ ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
return;
}
@@ -174,7 +174,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
int i, c, r;
/* called from 'sort' without swap function, let's pick the default */
- if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
swap_func = NULL;
if (!swap_func) {
@@ -227,7 +227,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
{
struct wrapper w = {
.cmp = cmp_func,
- .swap = swap_func,
+ .swap_func = swap_func,
};
return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index ee0e2df33322..24840aee335c 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -242,8 +242,8 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
(_i) = eytzinger0_next((_i), (_size)))
/* return greatest node <= @search, or -1 if not found */
-static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
- cmp_func_t cmp, const void *search)
+static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
{
unsigned i, n = 0;
@@ -256,18 +256,32 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
} while (n < nr);
if (n & 1) {
- /* @i was greater than @search, return previous node: */
+ /*
+ * @i was greater than @search, return previous node:
+ *
+ * if @i was leftmost/smallest element,
+ * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
+ */
return eytzinger0_prev(i, nr);
} else {
return i;
}
}
-static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size,
- cmp_func_t cmp, const void *search)
+static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
{
ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
- return eytzinger0_next(idx, size);
+
+ /*
+ * if eytitzinger0_find_le() returned -1 - no element was <= search - we
+ * want to return the first element; next/prev identities mean this work
+ * as expected
+ *
+ * similarly if find_le() returns last element, we should return -1;
+ * identities mean this all works out:
+ */
+ return eytzinger0_next(idx, nr);
}
#define eytzinger0_find(base, nr, size, _cmp, search) \
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index ab811c0dad26..04a577848b01 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c);
+ mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 8c053cb64ca5..b5161b5d76a0 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -134,6 +134,7 @@ enum journal_flags {
JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH,
JOURNAL_NEED_FLUSH_WRITE,
+ JOURNAL_SPACE_LOW,
};
/* Reasons we may fail to get a journal reservation: */
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index b76c16152579..0f328aba9760 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -47,20 +47,6 @@ void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
}
}
-static bool btree_id_is_alloc(enum btree_id id)
-{
- switch (id) {
- case BTREE_ID_alloc:
- case BTREE_ID_backpointers:
- case BTREE_ID_need_discard:
- case BTREE_ID_freespace:
- case BTREE_ID_bucket_gens:
- return true;
- default:
- return false;
- }
-}
-
/* for -o reconstruct_alloc: */
static void bch2_reconstruct_alloc(struct bch_fs *c)
{
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 0e806f04f3d7..544322d5c251 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -125,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
return s->parent;
}
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return false;
+
+ return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
bool ret;
@@ -140,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- if (id && id < ancestor) {
- ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+ ret = id && id < ancestor
+ ? test_ancestor_bitmap(t, id, ancestor)
+ : id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
- } else {
- ret = id == ancestor;
- }
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
out:
rcu_read_unlock();
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index ed63018f21be..8daf80a38d60 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -288,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_emergency_ro, &c->flags))
set_bit(BCH_FS_clean_shutdown, &c->flags);
+
bch2_fs_journal_stop(&c->journal);
+ bch_info(c, "%sshutdown complete, journal seq %llu",
+ test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un",
+ c->journal.seq_ondisk);
+
/*
* After stopping journal:
*/
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index c86a93a8d8fc..b18b0cc81b59 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -17,7 +17,6 @@
#include "btree_iter.h"
#include "btree_key_cache.h"
#include "btree_update.h"
-#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
#include "clock.h"
@@ -166,7 +165,6 @@ read_attribute(btree_write_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
-read_attribute(btree_updates);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
@@ -415,9 +413,6 @@ SHOW(bch2_fs)
if (attr == &sysfs_journal_debug)
bch2_journal_debug_to_text(out, &c->journal);
- if (attr == &sysfs_btree_updates)
- bch2_btree_updates_to_text(out, c);
-
if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c);
@@ -639,7 +634,6 @@ SYSFS_OPS(bch2_fs_internal);
struct attribute *bch2_fs_internal_files[] = {
&sysfs_flags,
&sysfs_journal_debug,
- &sysfs_btree_updates,
&sysfs_btree_cache,
&sysfs_btree_key_cache,
&sysfs_new_stripes,
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index b3fe9fc57747..bfec656f94c0 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index b7e7c29278fc..5cf885b09986 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
#endif
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+ if (v)
+ set_bit(nr, addr);
+ else
+ clear_bit(nr, addr);
+}
+
static inline void __set_bit_le64(size_t bit, __le64 *addr)
{
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
@@ -795,7 +803,7 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr)
static inline void __clear_bit_le64(size_t bit, __le64 *addr)
{
- addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64));
+ addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64));
}
static inline bool test_bit_le64(size_t bit, __le64 *addr)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index dd6f566a383f..121ab890bd05 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;
+ ret = btrfs_record_root_in_trans(trans, node->root);
+ if (ret)
+ return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 37701531eeb1..c65fe5de4022 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
*/
if (bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, len, false);
+ btrfs_delalloc_release_metadata(inode, len, true);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
@@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
+ u64 qgroup_reserved = 0;
int ret;
down_write(&fs_info->subvol_sem);
@@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_undead;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
+ ret = btrfs_record_root_in_trans(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
@@ -4611,7 +4620,9 @@ out_end_trans:
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(root, &block_rsv);
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 294e31edec9d..55f3ba6a831c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
int ret;
dev_t anon_dev;
u64 objectid;
+ u64 qgroup_reserved = 0;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
@@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
trans_num_items, false);
if (ret)
goto out_new_inode_args;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(root, &block_rsv);
- goto out_new_inode_args;
+ goto out_release_rsv;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret)
+ goto out;
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
@@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
out:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(root, &block_rsv);
-
btrfs_end_transaction(trans);
+out_release_rsv:
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv *block_rsv;
+ u64 qgroup_reserved = 0;
int ret;
/* We do not support snapshotting right now. */
@@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
- btrfs_init_block_rsv(&pending_snapshot->block_rsv,
- BTRFS_BLOCK_RSV_TEMP);
+ block_rsv = &pending_snapshot->block_rsv;
+ btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* 1 to add dir item
* 1 to add dir index
* 1 to update parent inode item
*/
trans_num_items = create_subvol_num_items(inherit) + 3;
- ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv,
+ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
trans_num_items, false);
if (ret)
goto free_pending;
+ qgroup_reserved = block_rsv->qgroup_rsv_reserved;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
ret = PTR_ERR(trans);
goto fail;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto fail;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->pending_snapshot = pending_snapshot;
@@ -881,7 +898,9 @@ fail:
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
- btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+ btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5f90f0605b12..cf8820ce7aa2 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+ if (!sb_rdonly(fs_info->sb))
+ add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}
/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 4bb538a372ce..7007f9e0c972 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
return ret;
}
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- u64 qgroup_to_release;
-
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
- btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h
index 6f929cf3bd49..8f5739e732b9 100644
--- a/fs/btrfs/root-tree.h
+++ b/fs/btrfs/root-tree.h
@@ -18,8 +18,6 @@ struct btrfs_trans_handle;
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
u64 ref_id, u64 dirid, u64 sequence,
const struct fscrypt_str *name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 46e8426adf4f..85f359e0e0a7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -745,14 +745,6 @@ again:
h->reloc_reserved = reloc_reserved;
}
- /*
- * Now that we have found a transaction to be a part of, convert the
- * qgroup reservation from prealloc to pertrans. A different transaction
- * can't race in and free our pertrans out from under us.
- */
- if (qgroup_reserved)
- btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
got_it:
if (!current->journal_info)
current->journal_info = h;
@@ -786,8 +778,15 @@ got_it:
* not just freed.
*/
btrfs_end_transaction(h);
- return ERR_PTR(ret);
+ goto reserve_fail;
}
+ /*
+ * Now that we have found a transaction to be a part of, convert the
+ * qgroup reservation from prealloc to pertrans. A different transaction
+ * can't race in and free our pertrans out from under us.
+ */
+ if (qgroup_reserved)
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
return h;
@@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
+ btrfs_qgroup_free_meta_all_pertrans(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log(trans, root);
@@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
if (ret2)
return ret2;
spin_lock(&fs_info->fs_roots_radix_lock);
- btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1340d77124ae..ee9caf7916fb 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -795,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
ihold(inode);
if (wbc->sync_mode == WB_SYNC_NONE &&
- ceph_inode_to_fs_client(inode)->write_congested)
+ ceph_inode_to_fs_client(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }
wait_on_page_fscache(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 55051ad09c19..c4941ba245ac 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4783,13 +4783,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
ceph_vinop(inode));
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
ci->i_ceph_flags |= CEPH_I_FLUSH;
if (!list_empty(&ci->i_cap_delay_list))
list_del_init(&ci->i_cap_delay_list);
list_add_tail(&ci->i_cap_delay_list,
&mdsc->cap_unlink_delay_list);
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
/*
* Fire the work immediately, because the MDS maybe
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3ab9c268a8bb..360b686c3c67 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2504,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
struct ceph_client *cl = mdsc->fsc->client;
doutc(cl, "begin\n");
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
while (!list_empty(&mdsc->cap_unlink_delay_list)) {
struct ceph_inode_info *ci;
struct inode *inode;
@@ -2516,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
inode = igrab(&ci->netfs.inode);
if (inode) {
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "on %p %llx.%llx\n", inode,
ceph_vinop(inode));
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
iput(inode);
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
}
}
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "done\n");
}
@@ -5404,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
- spin_lock_init(&mdsc->cap_unlink_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->last_cap_flush_tid = 1;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 03f8ff00874f..b88e80415224 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -461,9 +461,8 @@ struct ceph_mds_client {
struct delayed_work delayed_work; /* delayed work */
unsigned long last_renew_caps; /* last time we renewed our caps */
struct list_head cap_delay_list; /* caps with delayed release */
- spinlock_t cap_delay_lock; /* protects cap_delay_list */
struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
- spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
+ spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
index 902b326e1e56..87dcaae32ff8 100644
--- a/fs/proc/bootconfig.c
+++ b/fs/proc/bootconfig.c
@@ -62,12 +62,12 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
break;
dst += ret;
}
- if (ret >= 0 && boot_command_line[0]) {
- ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
- boot_command_line);
- if (ret > 0)
- dst += ret;
- }
+ }
+ if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) {
+ ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
+ boot_command_line);
+ if (ret > 0)
+ dst += ret;
}
out:
kfree(key);
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 13a9d7acf8f8..0ff2491c311d 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -433,8 +433,8 @@ smb2_close_cached_fid(struct kref *ref)
if (cfid->is_open) {
rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
- if (rc != -EBUSY && rc != -EAGAIN)
- atomic_dec(&cfid->tcon->num_remote_opens);
+ if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+ cifs_dbg(VFS, "close cached dir rc %d\n", rc);
}
free_cached_dir(cfid);
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index f6a302205f89..d6669ce4ae87 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1077,6 +1077,7 @@ struct cifs_ses {
and after mount option parsing we fill it */
char *domainName;
char *password;
+ char *password2; /* When key rotation used, new password may be set before it expires */
char workstation_name[CIFS_MAX_WORKSTATION_LEN];
struct session_key auth_key;
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 85679ae106fd..4e35970681bf 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2183,6 +2183,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
++delim;
+ /* BB consider adding support for password2 (Key Rotation) for multiuser in future */
ctx->password = kstrndup(delim, len, GFP_KERNEL);
if (!ctx->password) {
cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
@@ -2206,6 +2207,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
kfree(ctx->username);
ctx->username = NULL;
kfree_sensitive(ctx->password);
+ /* no need to free ctx->password2 since not allocated in this path */
ctx->password = NULL;
goto out_key_put;
}
@@ -2317,6 +2319,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
if (!ses->password)
goto get_ses_fail;
}
+ /* ctx->password freed at unmount */
+ if (ctx->password2) {
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
+ if (!ses->password2)
+ goto get_ses_fail;
+ }
if (ctx->domainname) {
ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
if (!ses->domainName)
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index b7bfe705b2c4..6c727d8c31e8 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_string("username", Opt_user),
fsparam_string("pass", Opt_pass),
fsparam_string("password", Opt_pass),
+ fsparam_string("password2", Opt_pass2),
fsparam_string("ip", Opt_ip),
fsparam_string("addr", Opt_ip),
fsparam_string("domain", Opt_domain),
@@ -345,6 +346,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
new_ctx->nodename = NULL;
new_ctx->username = NULL;
new_ctx->password = NULL;
+ new_ctx->password2 = NULL;
new_ctx->server_hostname = NULL;
new_ctx->domainname = NULL;
new_ctx->UNC = NULL;
@@ -357,6 +359,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
DUP_CTX_STR(prepath);
DUP_CTX_STR(username);
DUP_CTX_STR(password);
+ DUP_CTX_STR(password2);
DUP_CTX_STR(server_hostname);
DUP_CTX_STR(UNC);
DUP_CTX_STR(source);
@@ -905,6 +908,8 @@ static int smb3_reconfigure(struct fs_context *fc)
else {
kfree_sensitive(ses->password);
ses->password = kstrdup(ctx->password, GFP_KERNEL);
+ kfree_sensitive(ses->password2);
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
}
STEAL_STRING(cifs_sb, ctx, domainname);
STEAL_STRING(cifs_sb, ctx, nodename);
@@ -1305,6 +1310,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
break;
+ case Opt_pass2:
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
+ if (strlen(param->string) == 0)
+ break;
+
+ ctx->password2 = kstrdup(param->string, GFP_KERNEL);
+ if (ctx->password2 == NULL) {
+ cifs_errorf(fc, "OOM when copying password2 string\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_ip:
if (strlen(param->string) == 0) {
ctx->got_ip = false;
@@ -1608,6 +1625,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
cifs_parse_mount_err:
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
return -EINVAL;
}
@@ -1713,6 +1732,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
ctx->username = NULL;
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
kfree(ctx->server_hostname);
ctx->server_hostname = NULL;
kfree(ctx->UNC);
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 8a35645e0b65..a947bddeba27 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -145,6 +145,7 @@ enum cifs_param {
Opt_source,
Opt_user,
Opt_pass,
+ Opt_pass2,
Opt_ip,
Opt_domain,
Opt_srcaddr,
@@ -177,6 +178,7 @@ struct smb3_fs_context {
char *username;
char *password;
+ char *password2;
char *domainname;
char *source;
char *server_hostname;
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 91b07ef9e25c..60afab5c83d4 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1105,7 +1105,8 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
} else {
cifs_open_info_to_fattr(fattr, data, sb);
}
- if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+ if (!rc && *inode &&
+ (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING))
cifs_mark_open_handles_for_deleted_file(*inode, full_path);
break;
case -EREMOTE:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 33ac4f8f5050..7d15a1969b81 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -98,6 +98,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
kfree_sensitive(buf_to_free->password);
+ kfree_sensitive(buf_to_free->password2);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kfree_sensitive(buf_to_free->auth_key.response);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index b156eefa75d7..78c94d0350fe 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4964,68 +4964,84 @@ static int smb2_next_header(struct TCP_Server_Info *server, char *buf,
return 0;
}
-int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
- struct dentry *dentry, struct cifs_tcon *tcon,
- const char *full_path, umode_t mode, dev_t dev)
+static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_open_info_data buf = {};
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_fid fid;
unsigned int bytes_written;
- struct win_dev *pdev;
+ struct win_dev pdev = {};
struct kvec iov[2];
__u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
int rc;
- if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
+ switch (mode & S_IFMT) {
+ case S_IFCHR:
+ strscpy(pdev.type, "IntxCHR");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFBLK:
+ strscpy(pdev.type, "IntxBLK");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFIFO:
+ strscpy(pdev.type, "LnxFIFO");
+ break;
+ default:
return -EPERM;
+ }
- oparms = (struct cifs_open_parms) {
- .tcon = tcon,
- .cifs_sb = cifs_sb,
- .desired_access = GENERIC_WRITE,
- .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
- CREATE_OPTION_SPECIAL),
- .disposition = FILE_CREATE,
- .path = full_path,
- .fid = &fid,
- };
+ oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE,
+ FILE_CREATE, CREATE_NOT_DIR |
+ CREATE_OPTION_SPECIAL, ACL_NO_MODE);
+ oparms.fid = &fid;
- rc = server->ops->open(xid, &oparms, &oplock, &buf);
+ rc = server->ops->open(xid, &oparms, &oplock, NULL);
if (rc)
return rc;
- /*
- * BB Do not bother to decode buf since no local inode yet to put
- * timestamps in, but we can reuse it safely.
- */
- pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
- io_parms.length = sizeof(*pdev);
- iov[1].iov_base = pdev;
- iov[1].iov_len = sizeof(*pdev);
- if (S_ISCHR(mode)) {
- memcpy(pdev->type, "IntxCHR", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISBLK(mode)) {
- memcpy(pdev->type, "IntxBLK", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISFIFO(mode)) {
- memcpy(pdev->type, "LnxFIFO", 8);
- }
+ io_parms.length = sizeof(pdev);
+ iov[1].iov_base = &pdev;
+ iov[1].iov_len = sizeof(pdev);
rc = server->ops->sync_write(xid, &fid, &io_parms,
&bytes_written, iov, 1);
server->ops->close(xid, tcon, &fid);
- d_drop(dentry);
- /* FIXME: add code here to set EAs */
- cifs_free_open_info(&buf);
+ return rc;
+}
+
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
+{
+ struct inode *new = NULL;
+ int rc;
+
+ rc = __cifs_sfu_make_node(xid, inode, dentry, tcon,
+ full_path, mode, dev);
+ if (rc)
+ return rc;
+
+ if (tcon->posix_extensions) {
+ rc = smb311_posix_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid);
+ } else if (tcon->unix_ext) {
+ rc = cifs_get_inode_info_unix(&new, full_path,
+ inode->i_sb, xid);
+ } else {
+ rc = cifs_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid, NULL);
+ }
+ if (!rc)
+ d_instantiate(dentry, new);
return rc;
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index c0c4933af5fc..86c647a947cc 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -367,6 +367,17 @@ again:
}
rc = cifs_setup_session(0, ses, server, nls_codepage);
+ if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect (key rotation
+ * could be enabled on the server e.g.) if an alternate
+ * password is available and the current password is expired,
+ * but do not swap on non pwd related errors like host down
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+
if ((rc == -EACCES) && !tcon->retry) {
mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index dc067eeb6387..894c6ca1e500 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -336,6 +336,7 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
/**
* lookup_file - look up a file in the tracefs filesystem
+ * @parent_ei: Pointer to the eventfs_inode that represents parent of the file
* @dentry: the dentry to look up
* @mode: the permission that the file should have.
* @attr: saved attributes changed by user
@@ -389,6 +390,7 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
/**
* lookup_dir_entry - look up a dir in the tracefs filesystem
* @dentry: the directory to look up
+ * @pei: Pointer to the parent eventfs_inode if available
* @ei: the eventfs_inode that represents the directory to create
*
* This function will look up a dentry for a directory represented by
@@ -478,16 +480,20 @@ void eventfs_d_release(struct dentry *dentry)
/**
* lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @dentry: The parent dentry under which the new file's dentry will be created
* @ei: the eventfs_inode that the file will be created under
* @idx: the index into the entry_attrs[] of the @ei
- * @parent: The parent dentry of the created file.
- * @name: The name of the file to create
* @mode: The mode of the file.
* @data: The data to use to set the inode of the file with on open()
* @fops: The fops of the file to be created.
*
- * Create a dentry for a file of an eventfs_inode @ei and place it into the
- * address located at @e_dentry.
+ * This function creates a dentry for a file associated with an
+ * eventfs_inode @ei. It uses the entry attributes specified by @idx,
+ * if available. The file will have the specified @mode and its inode will be
+ * set up with @data upon open. The file operations will be set to @fops.
+ *
+ * Return: Returns a pointer to the newly created file's dentry or an error
+ * pointer.
*/
static struct dentry *
lookup_file_dentry(struct dentry *dentry,
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index c6a124e8d565..964fa7f24003 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1048,7 +1048,7 @@ static int zonefs_init_zgroup(struct super_block *sb,
zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
zonefs_zgroup_name(ztype),
zgroup->g_nr_zones,
- zgroup->g_nr_zones > 1 ? "s" : "");
+ str_plural(zgroup->g_nr_zones));
return 0;
}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 5de954e2b18a..e7796f373d0d 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -911,17 +911,19 @@ static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2)
* acpi_dev_hid_uid_match - Match device by supplied HID and UID
* @adev: ACPI device to match.
* @hid2: Hardware ID of the device.
- * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
*
* Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
* will be treated as a match. If user wants to validate @uid2, it should be
* done before calling this function.
*
- * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise.
+ * Returns: %true if matches or @uid2 is NULL, %false otherwise.
*/
#define acpi_dev_hid_uid_match(adev, hid2, uid2) \
(acpi_dev_hid_match(adev, hid2) && \
- (!(uid2) || acpi_dev_uid_match(adev, uid2)))
+ /* Distinguish integer 0 from NULL @uid2 */ \
+ (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \
+ acpi_dev_uid_match(adev, uid2)))
void acpi_dev_clear_dependencies(struct acpi_device *supplier);
bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 6e794420bd39..b7de3a4eade1 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -156,7 +156,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
#else /* !CONFIG_BUG */
#ifndef HAVE_ARCH_BUG
-#define BUG() do {} while (1)
+#define BUG() do { \
+ do {} while (1); \
+ unreachable(); \
+} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 87e3d49a4e29..814207e7c37f 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -512,13 +512,9 @@ struct hv_proximity_domain_flags {
u32 proximity_info_valid : 1;
} __packed;
-/* Not a union in windows but useful for zeroing */
-union hv_proximity_domain_info {
- struct {
- u32 domain_id;
- struct hv_proximity_domain_flags flags;
- };
- u64 as_uint64;
+struct hv_proximity_domain_info {
+ u32 domain_id;
+ struct hv_proximity_domain_flags flags;
} __packed;
struct hv_lp_startup_status {
@@ -532,14 +528,13 @@ struct hv_lp_startup_status {
} __packed;
/* HvAddLogicalProcessor hypercall */
-struct hv_add_logical_processor_in {
+struct hv_input_add_logical_processor {
u32 lp_index;
u32 apic_id;
- union hv_proximity_domain_info proximity_domain_info;
- u64 flags;
+ struct hv_proximity_domain_info proximity_domain_info;
} __packed;
-struct hv_add_logical_processor_out {
+struct hv_output_add_logical_processor {
struct hv_lp_startup_status startup_status;
} __packed;
@@ -560,7 +555,7 @@ struct hv_create_vp {
u8 padding[3];
u8 subnode_type;
u64 subnode_id;
- union hv_proximity_domain_info proximity_domain_info;
+ struct hv_proximity_domain_info proximity_domain_info;
u64 flags;
} __packed;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 99935779682d..8fe7aaab2599 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -21,6 +21,7 @@
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
#include <linux/cpumask.h>
#include <linux/nmi.h>
#include <asm/ptrace.h>
@@ -67,6 +68,19 @@ extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
+static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
+{
+ struct hv_proximity_domain_info pxm_info = {};
+
+ if (node != NUMA_NO_NODE) {
+ pxm_info.domain_id = node_to_pxm(node);
+ pxm_info.flags.proximity_info_valid = 1;
+ pxm_info.flags.proximity_preferred = 1;
+ }
+
+ return pxm_info;
+}
+
/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
static inline int hv_result(u64 status)
{
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index ca73940e26df..e5ee2c694401 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -10,6 +10,7 @@
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/types.h>
+bool __init cmdline_has_extra_options(void);
#else /* !__KERNEL__ */
/*
* NOTE: This is only for tools/bootconfig, because tools/bootconfig will
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c00cc6c0878a..8c252e073bd8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off)
* - When one operand is a null pointer constant (i.e. when x is an integer
* constant expression) and the other is an object pointer (i.e. our
* third operand), the conditional operator returns the type of the
- * object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ * object pointer operand (i.e. "int *"). Here, within the sizeof(), we
* would then get:
* sizeof(*((int *)(...)) == sizeof(int) == 4
* - When one operand is a void pointer (i.e. when x is not an integer
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index e06bad467f55..c3f9bb6602ba 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -682,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
}
+#define DMA_FENCE_WARN(f, fmt, args...) \
+ do { \
+ struct dma_fence *__ff = (f); \
+ pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+ ##args); \
+ } while (0)
+
#endif /* __LINUX_DMA_FENCE_H */
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 868c8fb1bbc1..13becafe41df 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -2,6 +2,8 @@
#ifndef __LINUX_GFP_TYPES_H
#define __LINUX_GFP_TYPES_H
+#include <linux/bits.h>
+
/* The typedef is in types.h but we want the documentation here */
#if 0
/**
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6ef0557b4bff..96ceb4095425 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -832,6 +832,7 @@ struct vmbus_gpadl {
u32 gpadl_handle;
u32 size;
void *buffer;
+ bool decrypted;
};
struct vmbus_channel {
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 05df0e399d7c..ac333ea81d31 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -13,7 +13,7 @@ enum {
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
- * Must not be used wirh requests generating more than one CQE.
+ * Must not be used with requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 147feebd508c..3f003d5fde53 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -114,7 +114,7 @@ do { \
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0436b919f1c7..7b0ee64225de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2207,11 +2207,6 @@ static inline int arch_make_folio_accessible(struct folio *folio)
*/
#include <linux/vmstat.h>
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
- return page_to_virt(page);
-}
-
#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif
@@ -2234,6 +2229,11 @@ void set_page_address(struct page *page, void *virtual);
void page_address_init(void);
#endif
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+ return page_to_virt(page);
+}
+
#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address) do { } while(0)
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 5d868505a94e..6d92b68efbf6 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset);
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
u32 offset = raw_cpu_read(kstack_offset); \
- offset ^= (rand); \
+ offset = ror32(offset, 5) ^ (rand); \
raw_cpu_write(kstack_offset, offset); \
} \
} while (0)
diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h
index 29c4e4f243e4..f2394a409c9d 100644
--- a/include/linux/rwbase_rt.h
+++ b/include/linux/rwbase_rt.h
@@ -31,9 +31,9 @@ static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb)
return atomic_read(&rwb->readers) != READER_BIAS;
}
-static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
{
- WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS);
+ return atomic_read(&rwb->readers) == WRITER_BIAS;
}
static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 4f1c18992f76..c8b543d428b0 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -167,14 +167,14 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
return rw_base_is_locked(&sem->rwbase);
}
-static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
{
WARN_ON(!rwsem_is_locked(sem));
}
-static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
{
- rw_base_assert_held_write(sem);
+ WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
}
static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index 307961b41541..317200cd3a60 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
return 0;
}
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
{
return copy_from_sockptr_offset(dst, src, 0, size);
}
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst: Destination address, in kernel space. This buffer must be @ksize
+ * bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ * * -EINVAL: @optlen < @ksize
+ * * -EFAULT: access to userspace failed.
+ * * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+ sockptr_t optval, unsigned int optlen)
+{
+ if (optlen < ksize)
+ return -EINVAL;
+ return copy_from_sockptr(dst, optval, ksize);
+}
+
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
sockptr_t src, size_t usize)
{
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index ffe48e69b3f3..457879938fc1 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
p->v++;
}
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
- seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp) \
+ do { \
+ struct u64_stats_sync *__s = (syncp); \
+ seqcount_init(&__s->seq); \
+ } while (0)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b0201747a263..26c4325aa373 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -170,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
/**
* struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
* @id_table: the ids serviced by this driver.
* @feature_table: an array of feature numbers supported by this driver.
* @feature_table_size: number of entries in the feature table array.
@@ -208,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
return container_of(drv, struct virtio_driver, driver);
}
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+ __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
void unregister_virtio_driver(struct virtio_driver *drv);
/* module_virtio_driver() - Helper macro for drivers that don't do
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9d06eb945509..62a407db1bf5 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
refcount_inc(&ifp->refcnt);
}
+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+ return refcount_inc_not_zero(&ifp->refcnt);
+}
/*
* compute link-local solicited-node multicast address
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 9fe95a22abeb..eaec5d6caa29 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
return skb;
}
+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+ sockptr_t src, size_t src_size)
+{
+ if (dst_size > src_size)
+ return -EINVAL;
+
+ return copy_from_sockptr(dst, src, dst_size);
+}
+
int bt_to_errno(u16 code);
__u8 bt_status(int err);
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 5cd64bb2104d..c286cc2e766e 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -361,6 +361,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
return pskb_network_may_pull(skb, nhlen);
}
+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+ int nhlen = 0, maclen = ETH_HLEN;
+ __be16 type = skb->protocol;
+
+ /* Essentially this is skb_protocol(skb, true)
+ * And we get MAC len.
+ */
+ if (eth_type_vlan(type))
+ type = __vlan_get_protocol(skb, type, &maclen);
+
+ switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ nhlen = sizeof(struct ipv6hdr);
+ break;
+#endif
+ case htons(ETH_P_IP):
+ nhlen = sizeof(struct iphdr);
+ break;
+ }
+ /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+ * a base network header in skb->head.
+ */
+ if (!pskb_may_pull(skb, maclen + nhlen))
+ return false;
+
+ skb_set_network_header(skb, maclen);
+ return true;
+}
+
static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
{
const struct ip_tunnel_encap_ops *ops;
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bea697390613..b95dd84eef2d 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -179,12 +179,6 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
@@ -228,10 +222,17 @@
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
/* Get the queue size of a specific virtqueue.
* userspace set the vring index in vhost_vring_state.index
* kernel set the queue size in vhost_vring_state.num
*/
-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \
+#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
#endif
diff --git a/init/main.c b/init/main.c
index 2ca52474d0c3..881f6230ee59 100644
--- a/init/main.c
+++ b/init/main.c
@@ -487,6 +487,11 @@ static int __init warn_bootconfig(char *str)
early_param("bootconfig", warn_bootconfig);
+bool __init cmdline_has_extra_options(void)
+{
+ return extra_command_line || extra_init_args;
+}
+
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
{
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4521c2b66b98..c170a2b8d2cf 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2602,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (__io_cqring_events_user(ctx) >= min_events)
return 0;
- if (sig) {
-#ifdef CONFIG_COMPAT
- if (in_compat_syscall())
- ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- sigsz);
- else
-#endif
- ret = set_user_sigmask(sig, sigsz);
-
- if (ret)
- return ret;
- }
-
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
iowq.wq.private = current;
INIT_LIST_HEAD(&iowq.wq.entry);
@@ -2633,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
io_napi_adjust_timeout(ctx, &iowq, &ts);
}
+ if (sig) {
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+ sigsz);
+ else
+#endif
+ ret = set_user_sigmask(sig, sigsz);
+
+ if (ret)
+ return ret;
+ }
+
io_napi_busy_loop(ctx, &iowq);
trace_io_uring_cqring_wait(ctx, min_events);
diff --git a/io_uring/net.c b/io_uring/net.c
index 1e7665ff6ef7..4afb475d4197 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1276,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8f6affd051f7..07ad53b7f119 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3207,7 +3207,8 @@ enum cpu_mitigations {
};
static enum cpu_mitigations cpu_mitigations __ro_after_init =
- CPU_MITIGATIONS_AUTO;
+ IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
+ CPU_MITIGATIONS_OFF;
static int __init mitigations_parse_cmdline(char *arg)
{
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 86fe172b5958..a5e0dfc44d24 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,11 +69,14 @@
* @alloc_size: Size of the allocated buffer.
* @list: The free list describing the number of free entries available
* from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
*/
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
- unsigned int list;
+ unsigned short list;
+ unsigned short pad_slots;
};
static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
mem->nslabs - i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
memset(vaddr, 0, bytes);
@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev)
#endif
}
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev: Owning device.
+ * @align_mask: Allocation alignment mask.
+ * @addr: DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
*/
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static unsigned int swiotlb_align_offset(struct device *dev,
+ unsigned int align_mask, u64 addr)
{
- return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+ return addr & dma_get_min_align_mask(dev) &
+ (align_mask | (IO_TLB_SIZE - 1));
}
/*
@@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
- unsigned int tlb_offset, orig_addr_offset;
+ int tlb_offset;
if (orig_addr == INVALID_PHYS_ADDR)
return;
- tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
- orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
- if (tlb_offset < orig_addr_offset) {
- dev_WARN_ONCE(dev, 1,
- "Access before mapping start detected. orig offset %u, requested offset %u.\n",
- orig_addr_offset, tlb_offset);
- return;
- }
-
- tlb_offset -= orig_addr_offset;
- if (tlb_offset > alloc_size) {
- dev_WARN_ONCE(dev, 1,
- "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
- alloc_size, size, tlb_offset);
- return;
- }
+ /*
+ * It's valid for tlb_offset to be negative. This can happen when the
+ * "offset" returned by swiotlb_align_offset() is non-zero, and the
+ * tlb_addr is pointing within the first "offset" bytes of the second
+ * or subsequent slots of the allocated swiotlb area. While it's not
+ * valid for tlb_addr to be pointing within the first "offset" bytes
+ * of the first slot, there's no way to check for such an error since
+ * this function can't distinguish the first slot from the second and
+ * subsequent slots.
+ */
+ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(dev, 0, orig_addr);
orig_addr += tlb_offset;
alloc_size -= tlb_offset;
@@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
unsigned int index, slots_checked, count = 0, i;
unsigned long flags;
unsigned int slot_base;
@@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset;
struct io_tlb_pool *pool;
unsigned int i;
int index;
phys_addr_t tlb_addr;
+ unsigned short pad_slots;
if (!mem || !mem->nslabs) {
dev_warn_ratelimited(dev,
@@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
+ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
index = swiotlb_find_slots(dev, orig_addr,
alloc_size + offset, alloc_align_mask, &pool);
if (index == -1) {
@@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
+ pad_slots = offset >> IO_TLB_SHIFT;
+ offset &= (IO_TLB_SIZE - 1);
+ index += pad_slots;
+ pool->slots[index].pad_slots = pad_slots;
for (i = 0; i < nr_slots(alloc_size + offset); i++)
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
{
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
- unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
- int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int aindex = index / mem->area_nslabs;
- struct io_tlb_area *area = &mem->areas[aindex];
+ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
int count, i;
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
+
/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
@@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
/*
@@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
- atomic_long_set(&mem->total_used, 0);
- atomic_long_set(&mem->used_hiwater, 0);
-
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
@@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
&fops_io_tlb_hiwater);
#ifdef CONFIG_SWIOTLB_DYNAMIC
- atomic_long_set(&mem->transient_nslabs, 0);
debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
mem, &fops_io_tlb_transient_used);
#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9d9095e81792..65adc815fc6e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
jump_label_lock();
preempt_disable();
- /* Ensure it is not in reserved area nor out of text */
- if (!(core_kernel_text((unsigned long) p->addr) ||
- is_module_text_address((unsigned long) p->addr)) ||
- in_gate_area_no_mm((unsigned long) p->addr) ||
+ /* Ensure the address is in a text area, and find a module if exists. */
+ *probed_mod = NULL;
+ if (!core_kernel_text((unsigned long) p->addr)) {
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (!(*probed_mod)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ /* Ensure it is not in reserved area. */
+ if (in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
@@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
goto out;
}
- /* Check if 'p' is probing a module. */
- *probed_mod = __module_text_address((unsigned long) p->addr);
+ /* Get module refcount and reject __init functions for loaded modules. */
if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e3ae93bbcb9b..09f8397bae15 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,6 +106,12 @@ static void s2idle_enter(void)
swait_event_exclusive(s2idle_wait_head,
s2idle_state == S2IDLE_STATE_WAKE);
+ /*
+ * Kick all CPUs to ensure that they resume their timers and restore
+ * consistent system state.
+ */
+ wake_up_all_idle_cpus();
+
cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fb0fdec8719a..d88b13076b79 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1331216a9cae..71a792cd8936 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -891,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -947,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -970,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1007,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1173,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61c541c36596..47345bf1d4a9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION
config FTRACE_RECORD_RECURSION_SIZE
int "Max number of recursed functions to record"
- default 128
+ default 128
depends on FTRACE_RECORD_RECURSION
help
This defines the limit of number of functions that can be
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 25476ead681b..6511dc3a00da 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1393,7 +1393,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
- local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -1430,8 +1429,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
*/
local_set(&next_page->page->commit, 0);
- /* Again, either we update tail_page or an interrupt does */
- (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+ /* Either we update tail_page or an interrupt does */
+ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+ local_inc(&cpu_buffer->pages_touched);
}
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7c364b87352e..52f75c36bbca 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1670,6 +1670,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
@@ -1684,6 +1685,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
}
+#endif
static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
@@ -2152,10 +2154,12 @@ static const struct file_operations ftrace_event_format_fops = {
.release = seq_release,
};
+#ifdef CONFIG_PERF_EVENTS
static const struct file_operations ftrace_event_id_fops = {
.read = event_id_read,
.llseek = default_llseek,
};
+#endif
static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_file_tr,
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index bf70850035c7..404dba36bae3 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -594,13 +594,15 @@ static void test_ip_fast_csum(struct kunit *test)
static void test_csum_ipv6_magic(struct kunit *test)
{
-#if defined(CONFIG_NET)
const struct in6_addr *saddr;
const struct in6_addr *daddr;
unsigned int len;
unsigned char proto;
__wsum csum;
+ if (!IS_ENABLED(CONFIG_NET))
+ return;
+
const int daddr_offset = sizeof(struct in6_addr);
const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
@@ -618,7 +620,6 @@ static void test_csum_ipv6_magic(struct kunit *test)
CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
csum_ipv6_magic(saddr, daddr, len, proto, csum));
}
-#endif /* !CONFIG_NET */
}
static struct kunit_case __refdata checksum_test_cases[] = {
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 276c12140ee2..c288df9372ed 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -134,7 +134,7 @@ static const test_ubsan_fp test_ubsan_array[] = {
};
/* Excluded because they Oops the module. */
-static const test_ubsan_fp skip_ubsan_array[] = {
+static __used const test_ubsan_fp skip_ubsan_array[] = {
test_ubsan_divrem_overflow,
};
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b95c36765d04..2243cec18ecc 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
spin_lock_bh(&bat_priv->tt.commit_lock);
- while (true) {
+ while (timeout) {
table_size = batadv_tt_local_table_transmit_size(bat_priv);
if (packet_size_max >= table_size)
break;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 00e02138003e..efea25eb56ce 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
- if (skb)
+ if (skb) {
+ kfree_skb(hdev->req_skb);
hdev->req_skb = skb_get(skb);
+ }
wake_up_interruptible(&hdev->req_wait_q);
}
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4ee1b976678b..703b84bd48d5 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1946,10 +1946,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
switch (optname) {
case HCI_DATA_DIR:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
@@ -1958,10 +1957,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
break;
case HCI_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
@@ -1979,11 +1977,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
uf.event_mask[1] = *((u32 *) f->event_mask + 1);
}
- len = min_t(unsigned int, len, sizeof(uf));
- if (copy_from_sockptr(&uf, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+ if (err)
break;
- }
if (!capable(CAP_NET_RAW)) {
uf.type_mask &= hci_sec_filter.type_mask;
@@ -2042,10 +2038,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
goto done;
}
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
hci_pi(sk)->mtu = opt;
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 8fe02921adf1..c5d8799046cc 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2814,8 +2814,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
hci_le_scan_phy_params(phy, type,
- interval,
- window);
+ interval * 3,
+ window * 3);
num_phy++;
phy++;
}
@@ -2835,7 +2835,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (scan_coded(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
- hci_le_scan_phy_params(phy, type, interval, window);
+ hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
num_phy++;
phy++;
}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index c8793e57f4b5..ef0cc80b4c0c 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -1451,8 +1451,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
static bool check_bcast_qos(struct bt_iso_qos *qos)
{
- if (qos->bcast.sync_factor == 0x00)
- return false;
+ if (!qos->bcast.sync_factor)
+ qos->bcast.sync_factor = 0x01;
if (qos->bcast.packing > 0x01)
return false;
@@ -1475,6 +1475,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.skip > 0x01f3)
return false;
+ if (!qos->bcast.sync_timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
return false;
@@ -1484,6 +1487,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.mse > 0x1f)
return false;
+ if (!qos->bcast.timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
return false;
@@ -1494,7 +1500,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_iso_qos qos = default_qos;
u32 opt;
@@ -1509,10 +1515,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -1521,10 +1526,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -1539,17 +1543,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- len = min_t(unsigned int, sizeof(qos), optlen);
-
- if (copy_from_sockptr(&qos, optval, len)) {
- err = -EFAULT;
- break;
- }
-
- if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) {
- err = -EINVAL;
+ err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+ if (err)
break;
- }
iso_pi(sk)->qos = qos;
iso_pi(sk)->qos_user_set = true;
@@ -1564,18 +1560,16 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
}
if (optlen > sizeof(iso_pi(sk)->base)) {
- err = -EOVERFLOW;
+ err = -EINVAL;
break;
}
- len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
-
- if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
+ optlen);
+ if (err)
break;
- }
- iso_pi(sk)->base_len = len;
+ iso_pi(sk)->base_len = optlen;
break;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 467b242d8be0..dc0897408793 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
return -EPROTO;
hci_dev_lock(hdev);
- if (hci_dev_test_flag(hdev, HCI_MGMT) &&
- !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+ if (hci_dev_test_flag(hdev, HCI_MGMT))
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 4287aa6cc988..e7d810b23082 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -727,7 +727,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
- int len, err = 0;
+ int err = 0;
u32 opt;
BT_DBG("sk %p", sk);
@@ -754,11 +754,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
opts.max_tx = chan->max_tx;
opts.txwin_size = chan->tx_win;
- len = min_t(unsigned int, sizeof(opts), optlen);
- if (copy_from_sockptr(&opts, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+ if (err)
break;
- }
if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) {
err = -EINVAL;
@@ -801,10 +799,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
case L2CAP_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt & L2CAP_LM_FIPS) {
err = -EINVAL;
@@ -885,7 +882,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
struct bt_security sec;
struct bt_power pwr;
struct l2cap_conn *conn;
- int len, err = 0;
+ int err = 0;
u32 opt;
u16 mtu;
u8 mode;
@@ -911,11 +908,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level < BT_SECURITY_LOW ||
sec.level > BT_SECURITY_FIPS) {
@@ -960,10 +955,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt) {
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -975,10 +969,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_FLUSHABLE:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt > BT_FLUSHABLE_ON) {
err = -EINVAL;
@@ -1010,11 +1003,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
- len = min_t(unsigned int, sizeof(pwr), optlen);
- if (copy_from_sockptr(&pwr, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+ if (err)
break;
- }
if (pwr.force_active)
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
@@ -1023,10 +1014,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_CHANNEL_POLICY:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
err = -EOPNOTSUPP;
break;
@@ -1055,10 +1045,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+ if (err)
break;
- }
if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
sk->sk_state == BT_CONNECTED)
@@ -1086,10 +1075,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+ if (err)
break;
- }
BT_DBG("mode %u", mode);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index b54e8a530f55..29aa07e9db9d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
switch (optname) {
case RFCOMM_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
err = -EFAULT;
break;
}
@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct bt_security sec;
int err = 0;
- size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level > BT_SECURITY_HIGH) {
err = -EINVAL;
@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 43daf965a01e..368e026f4d15 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -824,7 +824,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_voice voice;
u32 opt;
struct bt_codecs *codecs;
@@ -843,10 +843,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -863,11 +862,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
- len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_sockptr(&voice, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
+ if (err)
break;
- }
/* Explicitly check for these values */
if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -890,10 +888,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -934,9 +931,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(buffer, optval, optlen)) {
+ err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+ if (err) {
hci_dev_put(hdev);
- err = -EFAULT;
break;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b150c9929b12..14365b20f1c5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 487670759578..fe89a056eb06 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1118,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1504,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c8f76f56dc16..d36ace160d42 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 92db9b474f2b..779aa6ecdd49 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2091,9 +2091,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7209419cfb0e..c1f62352a481 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1385,7 +1385,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1409,9 +1412,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 636b360311c5..131f7bb2110d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1135,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1513,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 819157bbb5a2..d5344563e525 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_RW) {
err = -EINVAL;
@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_MIUX) {
err = -EINVAL;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3019a4406ca4..74b63cdb5992 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5b41e2321209..d032eb5fa6df 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2665,7 +2665,9 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
}
} else if (!(flags & MSG_PEEK)) {
skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
+ WRITE_ONCE(u->oob_skb, NULL);
+ if (!WARN_ON_ONCE(skb_unref(skb)))
+ kfree_skb(skb);
skb = skb_peek(&sk->sk_receive_queue);
}
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index fa39b6265238..6433a414acf8 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work)
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
+ *
+ * Embryos, though never candidates themselves, affect which
+ * candidates are reachable by the garbage collector. Before
+ * being added to a listener's queue, an embryo may already
+ * receive data carrying SCM_RIGHTS, potentially making the
+ * passed socket a candidate that is not yet reachable by the
+ * collector. It becomes reachable once the embryo is
+ * enqueued. Therefore, we must ensure that no SCM-laden
+ * embryo appears in a (candidate) listener's queue between
+ * consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ struct sock *sk = &u->sk;
long total_refs;
- total_refs = file_count(u->sk.sk_socket->file);
+ total_refs = file_count(sk->sk_socket->file);
WARN_ON_ONCE(!u->inflight);
WARN_ON_ONCE(total_refs < u->inflight);
@@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work)
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ unix_state_lock(sk);
+ unix_state_unlock(sk);
+ }
}
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3404d076a8a3..727aa20be4bd 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1417,6 +1417,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index c5c2ce113c92..d20c47d21ad8 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -467,6 +467,8 @@ static bool stackleak_gate(void)
return false;
if (STRING_EQUAL(section, ".entry.text"))
return false;
+ if (STRING_EQUAL(section, ".head.text"))
+ return false;
}
return track_frame_size >= 0;
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 318e2dad27e0..ae57bf69ad4a 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,6 +76,12 @@ enum {
DNS
};
+enum {
+ IPV4 = 1,
+ IPV6,
+ IP_TYPE_MAX
+};
+
static int in_hand_shake;
static char *os_name = "";
@@ -102,6 +108,11 @@ static struct utsname uts_buf;
#define MAX_FILE_NAME 100
#define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
struct kvp_record {
char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
@@ -1171,6 +1182,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
return 0;
}
+int ip_version_check(const char *input_addr)
+{
+ struct in6_addr addr;
+
+ if (inet_pton(AF_INET, input_addr, &addr))
+ return IPV4;
+ else if (inet_pton(AF_INET6, input_addr, &addr))
+ return IPV6;
+
+ return -EINVAL;
+}
+
/*
* Only IPv4 subnet strings needs to be converted to plen
* For IPv6 the subnet is already privided in plen format
@@ -1197,14 +1220,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str)
return plen;
}
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+ int ip_sec)
+{
+ char addr[INET6_ADDRSTRLEN], *output_str;
+ int ip_offset = 0, error = 0, ip_ver;
+ char *param_name;
+
+ if (type == DNS)
+ param_name = "dns";
+ else if (type == GATEWAY)
+ param_name = "gateway";
+ else
+ return -EINVAL;
+
+ output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+ if (!output_str)
+ return -ENOMEM;
+
+ while (1) {
+ memset(addr, 0, sizeof(addr));
+
+ if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+ (MAX_IP_ADDR_SIZE * 2)))
+ break;
+
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+ (ip_ver == IPV6 && ip_sec == IPV6)) {
+ /*
+ * do a bound check to avoid out-of bound writes
+ */
+ if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+ (strlen(addr) + 1)) {
+ strncat(output_str, addr,
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ strncat(output_str, ",",
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ }
+ } else {
+ continue;
+ }
+ }
+
+ if (strlen(output_str)) {
+ /*
+ * This is to get rid of that extra comma character
+ * in the end of the string
+ */
+ output_str[strlen(output_str) - 1] = '\0';
+ error = fprintf(f, "%s=%s\n", param_name, output_str);
+ }
+
+ free(output_str);
+ return error;
+}
+
static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
- int is_ipv6)
+ int ip_sec)
{
char addr[INET6_ADDRSTRLEN];
char subnet_addr[INET6_ADDRSTRLEN];
- int error, i = 0;
+ int error = 0, i = 0;
int ip_offset = 0, subnet_offset = 0;
- int plen;
+ int plen, ip_ver;
memset(addr, 0, sizeof(addr));
memset(subnet_addr, 0, sizeof(subnet_addr));
@@ -1216,10 +1300,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
subnet_addr,
(MAX_IP_ADDR_SIZE *
2))) {
- if (!is_ipv6)
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if (ip_ver == IPV4 && ip_sec == IPV4)
plen = kvp_subnet_to_plen((char *)subnet_addr);
- else
+ else if (ip_ver == IPV6 && ip_sec == IPV6)
plen = atoi(subnet_addr);
+ else
+ continue;
if (plen < 0)
return plen;
@@ -1233,17 +1323,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
memset(subnet_addr, 0, sizeof(subnet_addr));
}
- return 0;
+ return error;
}
static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
{
- int error = 0;
+ int error = 0, ip_ver;
char if_filename[PATH_MAX];
char nm_filename[PATH_MAX];
FILE *ifcfg_file, *nmfile;
char cmd[PATH_MAX];
- int is_ipv6 = 0;
char *mac_addr;
int str_len;
@@ -1421,52 +1510,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
if (error)
goto setval_error;
- if (new_val->addr_family & ADDR_FAMILY_IPV6) {
- error = fprintf(nmfile, "\n[ipv6]\n");
- if (error < 0)
- goto setval_error;
- is_ipv6 = 1;
- } else {
- error = fprintf(nmfile, "\n[ipv4]\n");
- if (error < 0)
- goto setval_error;
- }
-
/*
* Now we populate the keyfile format
+ *
+ * The keyfile format expects the IPv6 and IPv4 configuration in
+ * different sections. Therefore we iterate through the list twice,
+ * once to populate the IPv4 section and the next time for IPv6
*/
+ ip_ver = IPV4;
+ do {
+ if (ip_ver == IPV4) {
+ error = fprintf(nmfile, "\n[ipv4]\n");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = fprintf(nmfile, "\n[ipv6]\n");
+ if (error < 0)
+ goto setval_error;
+ }
- if (new_val->dhcp_enabled) {
- error = kvp_write_file(nmfile, "method", "", "auto");
- if (error < 0)
- goto setval_error;
- } else {
- error = kvp_write_file(nmfile, "method", "", "manual");
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name services
+ */
+ error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+ (char *)new_val->sub_net,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
- /*
- * Write the configuration for ipaddress, netmask, gateway and
- * name services
- */
- error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
- (char *)new_val->sub_net, is_ipv6);
- if (error < 0)
- goto setval_error;
+ /*
+ * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+ * methods for ipv6 based on dhcp_enabled flag.
+ *
+ * For ipv4, set method to manual only when dhcp_enabled is
+ * false and specific ipv4 addresses are configured. If neither
+ * dhcp_enabled is true and no ipv4 addresses are configured,
+ * set method to 'disabled'.
+ *
+ * For ipv6, set method to manual when we configure ipv6
+ * addresses. Otherwise set method to 'auto' so that SLAAC from
+ * RA may be used.
+ */
+ if (ip_ver == IPV4) {
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ } else if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "disabled");
+ if (error < 0)
+ goto setval_error;
+ }
+ } else if (ip_ver == IPV6) {
+ if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ }
+ }
- /* we do not want ipv4 addresses in ipv6 section and vice versa */
- if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
- error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->gate_way,
+ GATEWAY, ip_ver);
if (error < 0)
goto setval_error;
- }
- if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
- error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->dns_addr, DNS,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
+
+ ip_ver++;
+ } while (ip_ver < IP_TYPE_MAX);
+
fclose(nmfile);
fclose(ifcfg_file);
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 4b0673bf52c2..07cfad817d53 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -8,6 +8,7 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
+#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index f3c82ab5b14c..7d73da098047 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
{
}
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644
index 000000000000..9c8f17a41ce8
--- /dev/null
+++ b/tools/include/linux/panic.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ vfprintf(stderr, fmt, argp);
+ va_end(argp);
+ exit(-1);
+}
+
+#endif
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 8f08c3fd498d..0d3672e5d9ed 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--quiet\fP Do not decode and print the system configuration header information.
.PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
\fB--num_iterations num\fP number of the measurement iterations.
@@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
.PP
@@ -370,7 +382,7 @@ below the processor's base frequency.
Busy% = MPERF_delta/TSC_delta
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
Note that these calculations depend on TSC_delta, so they
are not reliable during intervals when TSC_MHz is not running at the base frequency.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7a334377f92b..98256468e248 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -36,6 +36,8 @@
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
#define UNUSED(x) (void)(x)
@@ -53,9 +55,13 @@
#define NAME_BYTES 20
#define PATH_BYTES 128
+#define MAX_NOFILE 0x8000
+
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
struct msr_counter {
unsigned int msr_num;
@@ -127,6 +133,9 @@ struct msr_counter bic[] = {
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
{ 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -185,11 +194,14 @@ struct msr_counter bic[] = {
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
#define BIC_UNCORE_MHZ (1ULL << 54)
+#define BIC_SAM_mc6 (1ULL << 55)
+#define BIC_SAMMHz (1ULL << 56)
+#define BIC_SAMACTMHz (1ULL << 57)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+struct amperf_group_fd;
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
@@ -242,11 +257,8 @@ char *output_buffer, *outp;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
@@ -263,6 +275,28 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+ GFX_rc6,
+ GFX_MHz,
+ GFX_ACTMHz,
+ SAM_mc6,
+ SAM_MHz,
+ SAM_ACTMHz,
+ GFX_MAX
+};
+
+struct gfx_sysfs_info {
+ const char *path;
+ FILE *fp;
+ unsigned int val;
+ unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
@@ -652,6 +686,7 @@ static const struct platform_features icx_features = {
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_ICX,
+ .has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
@@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+ RAPL_RCI_INDEX_ENERGY_PKG = 0,
+ RAPL_RCI_INDEX_ENERGY_CORES = 1,
+ RAPL_RCI_INDEX_DRAM = 2,
+ RAPL_RCI_INDEX_GFX = 3,
+ RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+ RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+ RAPL_RCI_INDEX_CORE_ENERGY = 6,
+ NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+ RAPL_UNIT_INVALID,
+ RAPL_UNIT_JOULES,
+ RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+ unsigned long long data[NUM_RAPL_COUNTERS];
+ enum rapl_source source[NUM_RAPL_COUNTERS];
+ unsigned long long flags[NUM_RAPL_COUNTERS];
+ double scale[NUM_RAPL_COUNTERS];
+ enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+ union {
+ /* Active when source == RAPL_SOURCE_MSR */
+ struct {
+ unsigned long long msr[NUM_RAPL_COUNTERS];
+ unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+ int msr_shift[NUM_RAPL_COUNTERS];
+ };
+ };
+
+ int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+ int feature_mask; /* Mask for testing if the counter is supported on host */
+ const char *perf_subsys;
+ const char *perf_name;
+ unsigned long long msr;
+ unsigned long long msr_mask;
+ int msr_shift; /* Positive mean shift right, negative mean shift left */
+ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+ unsigned int rci_index; /* Maps data from perf counters to global variables */
+ unsigned long long bic;
+ double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
+ unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+ {
+ .feature_mask = RAPL_PKG,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_CORE_ENERGY_STATUS,
+ .perf_subsys = "power",
+ .perf_name = "energy-cores",
+ .msr = MSR_PP0_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM,
+ .perf_subsys = "power",
+ .perf_name = "energy-ram",
+ .msr = MSR_DRAM_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_dram_energy_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM,
+ .bic = BIC_RAMWatt | BIC_RAM_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_GFX,
+ .perf_subsys = "power",
+ .perf_name = "energy-gpu",
+ .msr = MSR_PP1_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_GFX,
+ .bic = BIC_GFXWatt | BIC_GFX_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_PKG_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_PKG_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+ .bic = BIC_PKG__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_DRAM_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+ .bic = BIC_RAM__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_CORE_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = 0,
+ },
+};
+
+struct rapl_counter {
+ unsigned long long raw_value;
+ enum rapl_unit unit;
+ double scale;
+};
+
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
@@ -974,7 +1178,7 @@ struct core_data {
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
- unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
+ struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -989,8 +1193,8 @@ struct pkg_data {
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
- unsigned long long cpu_lpi;
- unsigned long long sys_lpi;
+ long long cpu_lpi;
+ long long sys_lpi;
unsigned long long pkg_wtd_core_c0;
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
@@ -998,13 +1202,16 @@ struct pkg_data {
long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int gfx_act_mhz;
+ long long sam_mc6_ms;
+ unsigned int sam_mhz;
+ unsigned int sam_act_mhz;
unsigned int package_id;
- unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
- unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
- unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
- unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
- unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
- unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -1150,6 +1357,38 @@ struct sys_counters {
struct msr_counter *pp;
} sys;
+void free_sys_counters(void)
+{
+ struct msr_counter *p = sys.tp, *pnext = NULL;
+
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.cp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.pp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ sys.added_thread_counters = 0;
+ sys.added_core_counters = 0;
+ sys.added_package_counters = 0;
+ sys.tp = NULL;
+ sys.cp = NULL;
+ sys.pp = NULL;
+}
+
struct system_summary {
struct thread_data threads;
struct core_data cores;
@@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu)
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+ "or run with --no-msr, or run as root", pathname);
fd_percpu[cpu] = fd;
return fd;
}
+static void bic_disable_msr_access(void)
+{
+ const unsigned long bic_msrs =
+ BIC_SMI |
+ BIC_CPU_c1 |
+ BIC_CPU_c3 |
+ BIC_CPU_c6 |
+ BIC_CPU_c7 |
+ BIC_Mod_c6 |
+ BIC_CoreTmp |
+ BIC_Totl_c0 |
+ BIC_Any_c0 |
+ BIC_GFX_c0 |
+ BIC_CPUGFX |
+ BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+ bic_enabled &= ~bic_msrs;
+
+ free_sys_counters();
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
+ assert(!no_perf);
+
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
{
- struct perf_event_attr pea;
- int fd;
+ struct perf_event_attr attr;
+ const pid_t pid = -1;
+ const unsigned long flags = 0;
- memset(&pea, 0, sizeof(struct perf_event_attr));
- pea.type = PERF_TYPE_HARDWARE;
- pea.size = sizeof(struct perf_event_attr);
- pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+ assert(!no_perf);
- /* counter for cpu_num, including user + kernel and all processes */
- fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1) {
- warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
- BIC_NOT_PRESENT(BIC_IPC);
- }
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+
+ attr.type = type;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = config;
+ attr.disabled = 0;
+ attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+ attr.read_format = read_format;
+
+ const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
return fd;
}
@@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu)
if (fd_instr_count_percpu[cpu])
return fd_instr_count_percpu[cpu];
- fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+ fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
return fd_instr_count_percpu[cpu];
}
@@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
+ assert(!no_msr);
+
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
@@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
+int probe_msr(int cpu, off_t offset)
+{
+ ssize_t retval;
+ unsigned long long dummy;
+
+ assert(!no_msr);
+
+ retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+ if (retval != sizeof(dummy))
+ return 1;
+
+ return 0;
+}
+
#define MAX_DEFERRED 16
char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
@@ -1369,6 +1651,8 @@ void help(void)
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
+ " -M, --no-msr Disable all uses of the MSR driver\n"
+ " -P, --no-perf Disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
@@ -1573,6 +1857,15 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_SAM_mc6))
+ outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
@@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+ outp +=
+ sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ t->counter[i], mp->path);
}
}
- if (c) {
+ if (c && is_cpu_first_thread_in_core(t, c, p)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
- outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+ const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+ const double energy_scale = c->core_energy.scale;
+
+ if (c->core_energy.unit == RAPL_UNIT_JOULES)
+ outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+ outp +=
+ sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ c->counter[i], mp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p) {
+ if (p && is_cpu_first_core_in_package(t, c, p)) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
- outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
- outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
- outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
- outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+ outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+ outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+ outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+ outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+ outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+ outp +=
+ sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ p->counter[i], mp->path);
}
}
@@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
}
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+ assert(desired_unit != RAPL_UNIT_INVALID);
+
+ /*
+ * For now we don't expect anything other than joules,
+ * so just simplify the logic.
+ */
+ assert(c->unit == RAPL_UNIT_JOULES);
+
+ const double scaled = c->raw_value * c->scale;
+
+ if (desired_unit == RAPL_UNIT_WATTS)
+ return scaled / interval;
+ return scaled;
+}
+
/*
* column formatting convention & formats
*/
@@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
/* print per-package data only for 1st core in package */
if (!is_cpu_first_core_in_package(t, c, p))
@@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+ /* SAMmc6 */
+ if (DO_BIC(BIC_SAM_mc6)) {
+ if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+ } else {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ p->sam_mc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* SAMMHz */
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+ /* SAMACTMHz */
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
@@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
- if (DO_BIC(BIC_CPU_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
- if (DO_BIC(BIC_SYS_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ if (DO_BIC(BIC_CPU_LPI)) {
+ if (p->cpu_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_SYS_LPI)) {
+ if (p->sys_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_PkgWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_GFXWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAMWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- p->energy_dram * rapl_dram_energy_units / interval_float);
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Pkg_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_GFX_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_RAM_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_PKG__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAM__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
@@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
- old->energy_pkg = new->energy_pkg - old->energy_pkg;
- old->energy_cores = new->energy_cores - old->energy_cores;
- old->energy_gfx = new->energy_gfx - old->energy_gfx;
- old->energy_dram = new->energy_dram - old->energy_dram;
- old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
- old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+ /* flag an error when mc6 counter resets/wraps */
+ if (old->sam_mc6_ms > new->sam_mc6_ms)
+ old->sam_mc6_ms = -1;
+ else
+ old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+ old->sam_mhz = new->sam_mhz;
+ old->sam_act_mhz = new->sam_act_mhz;
+
+ old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+ old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+ old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+ old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+ old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+ old->rapl_dram_perf_status.raw_value =
+ new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
- DELTA_WRAP32(new->core_energy, old->core_energy);
+ DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
return retval;
}
+void rapl_counter_clear(struct rapl_counter *c)
+{
+ c->raw_value = 0;
+ c->scale = 0.0;
+ c->unit = RAPL_UNIT_INVALID;
+}
+
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
- c->core_energy = 0;
+ rapl_counter_clear(&c->core_energy);
c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
@@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->cpu_lpi = 0;
p->sys_lpi = 0;
- p->energy_pkg = 0;
- p->energy_dram = 0;
- p->energy_cores = 0;
- p->energy_gfx = 0;
- p->rapl_pkg_perf_status = 0;
- p->rapl_dram_perf_status = 0;
+ rapl_counter_clear(&p->energy_pkg);
+ rapl_counter_clear(&p->energy_dram);
+ rapl_counter_clear(&p->energy_cores);
+ rapl_counter_clear(&p->energy_gfx);
+ rapl_counter_clear(&p->rapl_pkg_perf_status);
+ rapl_counter_clear(&p->rapl_dram_perf_status);
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
+ p->sam_mc6_ms = 0;
+ p->sam_mhz = 0;
+ p->sam_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
@@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->counter[i] = 0;
}
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+ /* Copy unit and scale from src if dst is not initialized */
+ if (dst->unit == RAPL_UNIT_INVALID) {
+ dst->unit = src->unit;
+ dst->scale = src->scale;
+ }
+
+ assert(dst->unit == src->unit);
+ assert(dst->scale == src->scale);
+
+ dst->raw_value += src->raw_value;
+}
+
int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
- average.cores.core_energy += c->core_energy;
+ rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.packages.cpu_lpi = p->cpu_lpi;
average.packages.sys_lpi = p->sys_lpi;
- average.packages.energy_pkg += p->energy_pkg;
- average.packages.energy_dram += p->energy_dram;
- average.packages.energy_cores += p->energy_cores;
- average.packages.energy_gfx += p->energy_gfx;
+ rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+ rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+ rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+ rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
+ average.packages.sam_mc6_ms = p->sam_mc6_ms;
+ average.packages.sam_mhz = p->sam_mhz;
+ average.packages.sam_act_mhz = p->sam_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
- average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
- average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+ rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+ rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW)
- continue;
- average.packages.counter[i] += p->counter[i];
+ if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+ average.packages.counter[i] = p->counter[i];
+ else
+ average.packages.counter[i] += p->counter[i];
}
return 0;
}
@@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
if (mp->msr_num != 0) {
+ assert(!no_msr);
if (get_msr(cpu, mp->msr_num, counterp))
return -1;
} else {
@@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die)
{
char path[128];
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
die);
return (snapshot_sysfs_counter(path) / 1000);
@@ -2627,6 +3023,9 @@ int get_epb(int cpu)
return epb;
msr_fallback:
+ if (no_msr)
+ return -1;
+
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
return msr & 0xf;
@@ -2700,6 +3099,351 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
return 0;
}
+struct amperf_group_fd {
+ int aperf; /* Also the group descriptor */
+ int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
+{
+ int fdmt;
+ int bytes_read;
+ char buf[64];
+ int ret = -1;
+
+ fdmt = open(path, O_RDONLY, 0);
+ if (fdmt == -1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+ if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ buf[bytes_read] = '\0';
+
+ if (sscanf(buf, parse_format, value_ptr) != 1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ ret = 0;
+
+cleanup_and_exit:
+ close(fdmt);
+ return ret;
+}
+
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+ unsigned int v;
+ int status;
+
+ status = read_perf_counter_info(path, parse_format, &v);
+ if (status)
+ v = -1;
+
+ return v;
+}
+
+static unsigned int read_msr_type(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/type";
+ const char *const format = "%u";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_aperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_mperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_type(const char *subsys)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+ const char *const format = "%u";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+ const char *const format = "event=%x";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+ const char *const format = "%s";
+ char path[128];
+ char unit_buffer[16];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ read_perf_counter_info(path, format, &unit_buffer);
+ if (strcmp("Joules", unit_buffer) == 0)
+ return RAPL_UNIT_JOULES;
+
+ return RAPL_UNIT_INVALID;
+}
+
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+ const char *const format = "%lf";
+ char path[128];
+ double scale;
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ if (read_perf_counter_info(path, format, &scale))
+ return 0.0;
+
+ return scale;
+}
+
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+ const unsigned int msr_type = read_msr_type();
+ const unsigned int aperf_config = read_aperf_config();
+ const unsigned int mperf_config = read_mperf_config();
+ struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+ fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+ fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+ return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+ assert(fd_amperf_percpu);
+
+ if (fd_amperf_percpu[cpu].aperf)
+ return fd_amperf_percpu[cpu].aperf;
+
+ fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+ return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+ union {
+ struct {
+ unsigned long nr_entries;
+ unsigned long aperf;
+ unsigned long mperf;
+ };
+
+ unsigned long as_array[3];
+ } cnt;
+
+ const int fd_amperf = get_amperf_fd(cpu);
+
+ /*
+ * Read the TSC with rdtsc, because we want the absolute value and not
+ * the offset from the start of the counter.
+ */
+ t->tsc = rdtsc();
+
+ const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+ if (n != sizeof(cnt.as_array))
+ return -2;
+
+ t->aperf = cnt.aperf * aperf_mperf_multiplier;
+ t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+ int aperf_mperf_retry_count = 0;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+retry:
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ tsc_between = rdtsc();
+
+ if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+ return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
+ t->aperf = t->aperf * aperf_mperf_multiplier;
+ t->mperf = t->mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+ size_t ret = 0;
+
+ for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+ if (rci->source[i] == RAPL_SOURCE_PERF)
+ ++ret;
+
+ return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+ rc->raw_value = rci->data[idx];
+ rc->unit = rci->unit[idx];
+ rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+ if (debug)
+ fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+ assert(rapl_counter_info_perdomain);
+
+ /*
+ * If we have any perf counters to read, read them all now, in bulk
+ */
+ if (rci->fd_perf != -1) {
+ size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+ const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+ const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+ if (actual_read_size != expected_read_size)
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+ actual_read_size);
+ }
+
+ for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+ switch (rci->source[i]) {
+ case RAPL_SOURCE_NONE:
+ break;
+
+ case RAPL_SOURCE_PERF:
+ assert(pi < ARRAY_SIZE(perf_data));
+ assert(rci->fd_perf != -1);
+
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+ i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+ rci->data[i] = perf_data[pi];
+
+ ++pi;
+ break;
+
+ case RAPL_SOURCE_MSR:
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+ assert(!no_msr);
+ if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+ if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ } else {
+ if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ }
+
+ rci->data[i] &= rci->msr_mask[i];
+ if (rci->msr_shift[i] >= 0)
+ rci->data[i] >>= abs(rci->msr_shift[i]);
+ else
+ rci->data[i] <<= abs(rci->msr_shift[i]);
+
+ break;
+ }
+ }
+
+ _Static_assert(NUM_RAPL_COUNTERS == 7);
+ write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+ write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+ write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+ write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+ write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+ write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+ write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+ return 0;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -2709,12 +3453,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int cpu = t->cpu_id;
unsigned long long msr;
- int aperf_mperf_retry_count = 0;
struct msr_counter *mp;
int i;
+ int status;
if (cpu_migrate(cpu)) {
- fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
return -1;
}
@@ -2722,63 +3466,26 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (first_counter_read)
get_apic_id(t);
-retry:
+
t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
|| soft_c1_residency_display(BIC_Avg_MHz)) {
- unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
-
- /*
- * The TSC, APERF and MPERF must be read together for
- * APERF/MPERF and MPERF/TSC to give accurate results.
- *
- * Unfortunately, APERF and MPERF are read by
- * individual system call, so delays may occur
- * between them. If the time to read them
- * varies by a large amount, we re-read them.
- */
-
- /*
- * This initial dummy APERF read has been seen to
- * reduce jitter in the subsequent reads.
- */
-
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
-
- t->tsc = rdtsc(); /* re-read close to APERF */
-
- tsc_before = t->tsc;
+ int status = -1;
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
+ assert(!no_perf || !no_msr);
- tsc_between = rdtsc();
-
- if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
- return -4;
-
- tsc_after = rdtsc();
-
- aperf_time = tsc_between - tsc_before;
- mperf_time = tsc_after - tsc_between;
-
- /*
- * If the system call latency to read APERF and MPERF
- * differ by more than 2x, then try again.
- */
- if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
- aperf_mperf_retry_count++;
- if (aperf_mperf_retry_count < 5)
- goto retry;
- else
- warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ switch (amperf_source) {
+ case AMPERF_SOURCE_PERF:
+ status = read_aperf_mperf_tsc_perf(t, cpu);
+ break;
+ case AMPERF_SOURCE_MSR:
+ status = read_aperf_mperf_tsc_msr(t, cpu);
+ break;
}
- aperf_mperf_retry_count = 0;
- t->aperf = t->aperf * aperf_mperf_multiplier;
- t->mperf = t->mperf * aperf_mperf_multiplier;
+ if (status != 0)
+ return status;
}
if (DO_BIC(BIC_IPC))
@@ -2806,6 +3513,12 @@ retry:
if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
+ if (platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, c->core_id, c, p);
+ if (status != 0)
+ return status;
+ }
+
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
@@ -2846,12 +3559,6 @@ retry:
if (DO_BIC(BIC_CORE_THROT_CNT))
get_core_throt_cnt(cpu, &c->core_throt_cnt);
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
- return -14;
- c->core_energy = msr & 0xFFFFFFFF;
- }
-
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &c->counter[i]))
return -10;
@@ -2911,59 +3618,39 @@ retry:
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (platform->rapl_msrs & RAPL_PKG) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
- return -13;
- p->energy_pkg = msr;
- }
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
- if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
- return -14;
- p->energy_cores = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM) {
- if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
- return -15;
- p->energy_dram = msr;
- }
- if (platform->rapl_msrs & RAPL_GFX) {
- if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
- return -16;
- p->energy_gfx = msr;
- }
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
- return -16;
- p->rapl_pkg_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
- return -16;
- p->rapl_dram_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
- return -13;
- p->energy_pkg = msr;
+ if (!platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, p->package_id, c, p);
+ if (status != 0)
+ return status;
}
+
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
- if (DO_BIC(BIC_GFX_rc6))
- p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
/* n.b. assume die0 uncore frequency applies to whole package */
if (DO_BIC(BIC_UNCORE_MHZ))
p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+ if (DO_BIC(BIC_GFX_rc6))
+ p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
if (DO_BIC(BIC_GFXMHz))
- p->gfx_mhz = gfx_cur_mhz;
+ p->gfx_mhz = gfx_info[GFX_MHz].val;
if (DO_BIC(BIC_GFXACTMHz))
- p->gfx_act_mhz = gfx_act_mhz;
+ p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+ if (DO_BIC(BIC_SAM_mc6))
+ p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+ if (DO_BIC(BIC_SAMMHz))
+ p->sam_mhz = gfx_info[SAM_MHz].val;
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &p->counter[i]))
@@ -3053,7 +3740,7 @@ void probe_cst_limit(void)
unsigned long long msr;
int *pkg_cstate_limits;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
switch (platform->cst_limit) {
@@ -3097,7 +3784,7 @@ static void dump_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3802,7 @@ static void dump_power_ctl(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +4080,7 @@ void print_irtl(void)
{
unsigned long long msr;
- if (!platform->has_irtl_msrs)
+ if (!platform->has_irtl_msrs || no_msr)
return;
if (platform->supported_cstates & PC3) {
@@ -3443,12 +4130,64 @@ void free_fd_percpu(void)
{
int i;
+ if (!fd_percpu)
+ return;
+
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
if (fd_percpu[i] != 0)
close(fd_percpu[i]);
}
free(fd_percpu);
+ fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+ int i;
+
+ if (!fd_amperf_percpu)
+ return;
+
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_amperf_percpu[i].mperf != 0)
+ close(fd_amperf_percpu[i].mperf);
+
+ if (fd_amperf_percpu[i].aperf != 0)
+ close(fd_amperf_percpu[i].aperf);
+ }
+
+ free(fd_amperf_percpu);
+ fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+ if (!fd_instr_count_percpu)
+ return;
+
+ for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_instr_count_percpu[i] != 0)
+ close(fd_instr_count_percpu[i]);
+ }
+
+ free(fd_instr_count_percpu);
+ fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+ if (!rapl_counter_info_perdomain)
+ return;
+
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+ close(rapl_counter_info_perdomain[domain_id].fd_perf);
+ }
+
+ free(rapl_counter_info_perdomain);
}
void free_all_buffers(void)
@@ -3492,6 +4231,9 @@ void free_all_buffers(void)
outp = NULL;
free_fd_percpu();
+ free_fd_instr_count_percpu();
+ free_fd_amperf_percpu();
+ free_fd_rapl_percpu();
free(irq_column_2_cpu);
free(irqs_per_cpu);
@@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup)
err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
}
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
void re_initialize(void)
{
free_all_buffers();
setup_all_buffers(false);
- fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+ linux_perf_init();
+ rapl_perf_init();
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+ topo.allowed_cpus);
}
void set_max_cpu_num(void)
@@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void)
}
/*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
*
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
*
* return 1 if config change requires a restart, else return 0
*/
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
- retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
- if (retval != 1)
- err(1, "GFX rc6");
-
- fclose(fp);
-
- return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
- }
-
- retval = fscanf(fp, "%d", &gfx_cur_mhz);
- if (retval != 1)
- err(1, "GFX MHz");
-
- return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
+ switch (idx) {
+ case GFX_rc6:
+ case SAM_mc6:
+ fp = fopen_or_die(gfx_info[idx].path, "r");
+ retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+ if (retval != 1)
+ err(1, "rc6");
+ fclose(fp);
+ return 0;
+ case GFX_MHz:
+ case GFX_ACTMHz:
+ case SAM_MHz:
+ case SAM_ACTMHz:
+ if (gfx_info[idx].fp == NULL) {
+ gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+ } else {
+ rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
+ }
+ retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+ if (retval != 1)
+ err(1, "MHz");
+ return 0;
+ default:
+ return -EINVAL;
}
-
- retval = fscanf(fp, "%d", &gfx_act_mhz);
- if (retval != 1)
- err(1, "GFX ACT MHz");
-
- return 0;
}
/*
@@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void)
return 1;
if (DO_BIC(BIC_GFX_rc6))
- snapshot_gfx_rc6_ms();
+ snapshot_graphics(GFX_rc6);
if (DO_BIC(BIC_GFXMHz))
- snapshot_gfx_mhz();
+ snapshot_graphics(GFX_MHz);
if (DO_BIC(BIC_GFXACTMHz))
- snapshot_gfx_act_mhz();
+ snapshot_graphics(GFX_ACTMHz);
+
+ if (DO_BIC(BIC_SAM_mc6))
+ snapshot_graphics(SAM_mc6);
+
+ if (DO_BIC(BIC_SAMMHz))
+ snapshot_graphics(SAM_MHz);
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ snapshot_graphics(SAM_ACTMHz);
if (DO_BIC(BIC_CPU_LPI))
snapshot_cpu_lpi_us();
@@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
int ret, idx;
unsigned long long msr_cur, msr_last;
+ assert(!no_msr);
+
if (!per_cpu_msr_sum)
return 1;
@@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
UNUSED(c);
UNUSED(p);
+ assert(!no_msr);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
@@ -4280,7 +4999,8 @@ release_msr:
/*
* set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
*/
int set_my_sched_priority(int priority)
{
@@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority)
errno = 0;
original_priority = getpriority(PRIO_PROCESS, 0);
if (errno && (original_priority == -1))
- err(errno, "getpriority");
+ return -21;
retval = setpriority(PRIO_PROCESS, 0, priority);
if (retval)
- errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+ return -21;
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+ return -21;
return original_priority;
}
@@ -4314,6 +5034,9 @@ void turbostat_loop()
/*
* elevate own priority for interval mode
+ *
+ * ignore on error - we probably don't have permission to set it, but
+ * it's not a big deal
*/
set_my_sched_priority(-20);
@@ -4399,10 +5122,13 @@ void check_dev_msr()
struct stat sb;
char pathname[32];
+ if (no_msr)
+ return;
+
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ no_msr = 1;
}
/*
@@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void)
{
cap_t caps;
cap_flag_value_t cap_flag_value;
+ int ret = 0;
caps = cap_get_proc();
if (caps == NULL)
- err(-6, "cap_get_proc\n");
+ return 1;
- if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
- err(-6, "cap_get\n");
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+ ret = 1;
+ goto free_and_exit;
+ }
if (cap_flag_value != CAP_SET) {
- warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- return 1;
+ ret = 1;
+ goto free_and_exit;
}
+free_and_exit:
if (cap_free(caps) == -1)
err(-6, "cap_free\n");
- return 0;
+ return ret;
}
-void check_permissions(void)
+void check_msr_permission(void)
{
- int do_exit = 0;
+ int failed = 0;
char pathname[32];
+ if (no_msr)
+ return;
+
/* check for CAP_SYS_RAWIO */
- do_exit += check_for_cap_sys_rawio();
+ failed += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
- do_exit++;
- warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ failed++;
}
- /* if all else fails, thell them to be root */
- if (do_exit)
- if (getuid() != 0)
- warnx("... or simply run as root");
-
- if (do_exit)
- exit(-6);
+ if (failed) {
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+ no_msr = 1;
+ }
}
void probe_bclk(void)
@@ -4462,7 +5192,7 @@ void probe_bclk(void)
unsigned long long msr;
unsigned int base_ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void)
if (!has_turbo)
return;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path)
static void probe_intel_uncore_frequency(void)
{
int i, j;
- char path[128];
+ char path[256];
if (!genuine_intel)
return;
- if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
- return;
-
- /* Cluster level sysfs not supported yet. */
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
- return;
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ goto probe_cluster;
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
- BIC_PRESENT(BIC_UNCORE_MHZ);
+ BIC_PRESENT(BIC_UNCORE_MHZ);
if (quiet)
return;
@@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void)
for (i = 0; i < topo.num_packages; ++i) {
for (j = 0; j < topo.num_die; ++j) {
int k, l;
+ char path_base[128];
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+ j);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
- i, j);
+ sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
- i, j);
+ sprintf(path, "%s/max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+ fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
}
}
+ return;
+
+probe_cluster:
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+ return;
+
+ if (quiet)
+ return;
+
+ for (i = 0;; ++i) {
+ int k, l;
+ char path_base[128];
+ int package_id, domain_id, cluster_id;
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+ if (access(path_base, R_OK))
+ break;
+
+ sprintf(path, "%s/package_id", path_base);
+ package_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/domain_id", path_base);
+ domain_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/fabric_cluster_id", path_base);
+ cluster_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+ cluster_id, k / 1000, l / 1000);
+
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
+ }
}
static void probe_graphics(void)
{
+ /* Xe graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+ FILE *fp;
+ char buf[8];
+ bool gt0_is_gt;
+ int idx;
+
+ fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+ if (!fp)
+ goto next;
+
+ if (!fread(buf, sizeof(char), 7, fp)) {
+ fclose(fp);
+ goto next;
+ }
+ fclose(fp);
+
+ if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+ gt0_is_gt = true;
+ else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+ gt0_is_gt = false;
+ else
+ goto next;
+
+ idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+ idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+ idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+ idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+ goto end;
+ }
+
+next:
+ /* New i915 graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+ gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+ gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+ gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+ goto end;
+ }
+
+ /* Fall back to traditional i915 graphics sysfs knobs */
if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
- if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
- if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+ if (gfx_info[GFX_rc6].path)
+ BIC_PRESENT(BIC_GFX_rc6);
+ if (gfx_info[GFX_MHz].path)
+ BIC_PRESENT(BIC_GFXMHz);
+ if (gfx_info[GFX_ACTMHz].path)
BIC_PRESENT(BIC_GFXACTMHz);
+ if (gfx_info[SAM_mc6].path)
+ BIC_PRESENT(BIC_SAM_mc6);
+ if (gfx_info[SAM_MHz].path)
+ BIC_PRESENT(BIC_SAMMHz);
+ if (gfx_info[SAM_ACTMHz].path)
+ BIC_PRESENT(BIC_SAMACTMHz);
}
static void dump_sysfs_cstate_config(void)
@@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!has_hwp)
return 0;
@@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
cpu = t->cpu_id;
/* per-package */
@@ -4983,31 +5852,18 @@ void rapl_probe_intel(void)
unsigned long long msr;
unsigned int time_unit;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
- if (rapl_joules) {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_Pkg_J);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_Cor_J);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAM_J);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFX_J);
- } else {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_PkgWatt);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_CorWatt);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAMWatt);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFXWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
- BIC_PRESENT(BIC_PKG__);
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
- BIC_PRESENT(BIC_RAM__);
+ if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+ bic_enabled &= ~BIC_PKG__;
+ if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+ bic_enabled &= ~BIC_RAM__;
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
@@ -5041,14 +5897,13 @@ void rapl_probe_amd(void)
{
unsigned long long msr;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
@@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs)
+ if (!platform->rapl_msrs || no_msr)
return;
if (genuine_intel)
@@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!(do_dts || do_ptm))
return 0;
@@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!genuine_intel)
return;
@@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_feature_control)
return;
@@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_pwr_mgmt)
return;
@@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_c6_demotion_policy_config)
return;
@@ -5489,7 +6362,8 @@ void print_dev_latency(void)
fd = open(path, O_RDONLY);
if (fd < 0) {
- warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+ if (debug)
+ warnx("Read %s failed", path);
return;
}
@@ -5504,23 +6378,260 @@ void print_dev_latency(void)
close(fd);
}
+static int has_instr_count_access(void)
+{
+ int fd;
+ int has_access;
+
+ if (no_perf)
+ return 0;
+
+ fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+ has_access = fd != -1;
+
+ if (fd != -1)
+ close(fd);
+
+ if (!has_access)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "instructions retired perf counter", "--no-perf");
+
+ return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+ return BIC_IS_ENABLED(BIC_Avg_MHz)
+ || BIC_IS_ENABLED(BIC_Busy)
+ || BIC_IS_ENABLED(BIC_Bzy_MHz)
+ || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
+{
+ if (no_perf)
+ return -1;
+
+ const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+ if (scale == 0.0)
+ return -1;
+
+ const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+ if (unit == RAPL_UNIT_INVALID)
+ return -1;
+
+ const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+ const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+ const int fd_counter =
+ open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (fd_counter == -1)
+ return -1;
+
+ /* If it's the first counter opened, make it a group descriptor */
+ if (rci->fd_perf == -1)
+ rci->fd_perf = fd_counter;
+
+ *scale_ = scale;
+ *unit_ = unit;
+ return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale, enum rapl_unit *unit)
+{
+ int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+ if (debug)
+ fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+ return ret;
+}
+
/*
* Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
{
- if (!BIC_IS_ENABLED(BIC_IPC))
- return;
-
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
return;
- fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
- if (fd_instr_count_percpu == NULL)
- err(-1, "calloc fd_instr_count_percpu");
+ if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+ fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_instr_count_percpu == NULL)
+ err(-1, "calloc fd_instr_count_percpu");
+ }
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+ fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+ if (fd_amperf_percpu == NULL)
+ err(-1, "calloc fd_amperf_percpu");
+ }
+}
+
+void rapl_perf_init(void)
+{
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+ bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+ rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+ if (rapl_counter_info_perdomain == NULL)
+ err(-1, "calloc rapl_counter_info_percpu");
+
+ /*
+ * Initialize rapl_counter_info_percpu
+ */
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+ rci->fd_perf = -1;
+ for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+ rci->data[i] = 0;
+ rci->source[i] = RAPL_SOURCE_NONE;
+ }
+ }
- BIC_PRESENT(BIC_IPC);
+ /*
+ * Open/probe the counters
+ * If can't get it via perf, fallback to MSR
+ */
+ for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+ const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+ bool has_counter = 0;
+ double scale;
+ enum rapl_unit unit;
+ int next_domain;
+
+ memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+ for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+ if (cpu_is_not_allowed(cpu))
+ continue;
+
+ /* Skip already seen and handled RAPL domains */
+ next_domain =
+ platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+ if (domain_visited[next_domain])
+ continue;
+
+ domain_visited[next_domain] = 1;
+
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+ /* Check if the counter is enabled and accessible */
+ if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+ /* Use perf API for this counter */
+ if (!no_perf && cai->perf_name
+ && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
+ }
+ }
+
+ if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+ has_counter = 1;
+ }
+
+ /* If any CPU has access to the counter, make it present */
+ if (has_counter)
+ BIC_PRESENT(cai->bic);
+ }
+
+ free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+ if (no_msr)
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_APERF))
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_MPERF))
+ return 0;
+
+ return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+ struct amperf_group_fd fds;
+
+ /*
+ * Cache the last result, so we don't warn the user multiple times
+ *
+ * Negative means cached, no access
+ * Zero means not cached
+ * Positive means cached, has access
+ */
+ static int has_access_cached;
+
+ if (no_perf)
+ return 0;
+
+ if (has_access_cached != 0)
+ return has_access_cached > 0;
+
+ fds = open_amperf_fd(base_cpu);
+ has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+ if (fds.aperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "APERF perf counter", "--no-perf");
+ else
+ close(fds.aperf);
+
+ if (fds.mperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "MPERF perf counter", "--no-perf");
+ else
+ close(fds.mperf);
+
+ if (has_access_cached == 0)
+ has_access_cached = -1;
+
+ return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+ if (!is_aperf_access_required())
+ return 0;
+
+ if (!no_msr && has_amperf_access_via_msr())
+ return 1;
+
+ if (!no_perf && has_amperf_access_via_perf())
+ return 1;
+
+ return 0;
}
void probe_cstates(void)
@@ -5563,7 +6674,7 @@ void probe_cstates(void)
if (platform->has_msr_module_c6_res_ms)
BIC_PRESENT(BIC_Mod_c6);
- if (platform->has_ext_cst_msrs) {
+ if (platform->has_ext_cst_msrs && !no_msr) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +6734,7 @@ void process_cpuid()
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
unsigned long long ucode_patch = 0;
+ bool ucode_patch_valid = false;
eax = ebx = ecx = edx = 0;
@@ -5650,8 +6762,12 @@ void process_cpuid()
ecx_flags = ecx;
edx_flags = edx;
- if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
- warnx("get_msr(UCODE)");
+ if (!no_msr) {
+ if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+ warnx("get_msr(UCODE)");
+ else
+ ucode_patch_valid = true;
+ }
/*
* check max extended function levels of CPUID.
@@ -5662,9 +6778,12 @@ void process_cpuid()
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
- family, model, stepping, family, model, stepping,
- (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+ family, model, stepping, family, model, stepping);
+ if (ucode_patch_valid)
+ fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fputc('\n', outf);
+
fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5700,10 +6819,11 @@ void process_cpuid()
__cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
- if (has_aperf) {
+ if (has_aperf && has_amperf_access()) {
BIC_PRESENT(BIC_Avg_MHz);
BIC_PRESENT(BIC_Busy);
BIC_PRESENT(BIC_Bzy_MHz);
+ BIC_PRESENT(BIC_IPC);
}
do_dts = eax & (1 << 0);
if (do_dts)
@@ -5786,6 +6906,15 @@ void process_cpuid()
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+ bclk = bus_mhz;
+
+ base_hz = base_mhz * 1000000;
+ has_base_hz = 1;
+
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
+
if (!quiet)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +6943,7 @@ void probe_pm_features(void)
probe_thermal();
- if (platform->has_nhm_msrs)
+ if (platform->has_nhm_msrs && !no_msr)
BIC_PRESENT(BIC_SMI);
if (!quiet)
@@ -6142,6 +7271,7 @@ void topology_update(void)
topo.allowed_packages = 0;
for_all_cpus(update_topo, ODD_COUNTERS);
}
+
void setup_all_buffers(bool startup)
{
topology_probe(startup);
@@ -6169,21 +7299,129 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found");
}
+static void set_amperf_source(void)
+{
+ amperf_source = AMPERF_SOURCE_PERF;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+ amperf_source = AMPERF_SOURCE_MSR;
+
+ if (quiet || !debug)
+ return;
+
+ fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+ /*
+ * It only makes sense to call this after the command line is parsed,
+ * otherwise sys structure is not populated.
+ */
+
+ return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+ if (no_msr)
+ return false;
+
+ if (has_added_counters())
+ return true;
+
+ return BIC_IS_ENABLED(BIC_SMI)
+ || BIC_IS_ENABLED(BIC_CPU_c1)
+ || BIC_IS_ENABLED(BIC_CPU_c3)
+ || BIC_IS_ENABLED(BIC_CPU_c6)
+ || BIC_IS_ENABLED(BIC_CPU_c7)
+ || BIC_IS_ENABLED(BIC_Mod_c6)
+ || BIC_IS_ENABLED(BIC_CoreTmp)
+ || BIC_IS_ENABLED(BIC_Totl_c0)
+ || BIC_IS_ENABLED(BIC_Any_c0)
+ || BIC_IS_ENABLED(BIC_GFX_c0)
+ || BIC_IS_ENABLED(BIC_CPUGFX)
+ || BIC_IS_ENABLED(BIC_Pkgpc3)
+ || BIC_IS_ENABLED(BIC_Pkgpc6)
+ || BIC_IS_ENABLED(BIC_Pkgpc2)
+ || BIC_IS_ENABLED(BIC_Pkgpc7)
+ || BIC_IS_ENABLED(BIC_Pkgpc8)
+ || BIC_IS_ENABLED(BIC_Pkgpc9)
+ || BIC_IS_ENABLED(BIC_Pkgpc10)
+ /* TODO: Multiplex access with perf */
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_PkgWatt)
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_GFXWatt)
+ || BIC_IS_ENABLED(BIC_RAMWatt)
+ || BIC_IS_ENABLED(BIC_Pkg_J)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_GFX_J)
+ || BIC_IS_ENABLED(BIC_RAM_J)
+ || BIC_IS_ENABLED(BIC_PKG__)
+ || BIC_IS_ENABLED(BIC_RAM__)
+ || BIC_IS_ENABLED(BIC_PkgTmp)
+ || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+ if (!is_msr_access_required())
+ no_msr = 1;
+
+ check_dev_msr();
+ check_msr_permission();
+
+ if (no_msr)
+ bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+ const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+ if (no_perf || !intrcount_required || !has_instr_count_access())
+ bic_enabled &= ~BIC_IPC;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (!aperf_required || !has_amperf_access()) {
+ bic_enabled &= ~BIC_Avg_MHz;
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ bic_enabled &= ~BIC_IPC;
+ }
+}
+
void turbostat_init()
{
setup_all_buffers(true);
set_base_cpu();
- check_dev_msr();
- check_permissions();
+ check_msr_access();
+ check_perf_access();
process_cpuid();
probe_pm_features();
+ set_amperf_source();
linux_perf_init();
+ rapl_perf_init();
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
+
+ /*
+ * If TSC tweak is needed, but couldn't get it,
+ * disable more BICs, since it can't be reported accurately.
+ */
+ if (platform->enable_tsc_tweak && !has_base_hz) {
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ }
}
int fork_it(char **argv)
@@ -6259,7 +7497,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
{
struct msr_counter *msrp;
+ if (no_msr && msr_num)
+ errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
msrp = calloc(1, sizeof(struct msr_counter));
if (msrp == NULL) {
perror("calloc");
@@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv)
{ "list", no_argument, 0, 'l' },
{ "out", required_argument, 0, 'o' },
{ "quiet", no_argument, 0, 'q' },
+ { "no-msr", no_argument, 0, 'M' },
+ { "no-perf", no_argument, 0, 'P' },
{ "show", required_argument, 0, 's' },
{ "Summary", no_argument, 0, 'S' },
{ "TCC", required_argument, 0, 'T' },
@@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+ /*
+ * Parse some options early, because they may make other options invalid,
+ * like adding the MSR counter with --add and at the same time using --no-msr.
+ */
+ while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'M':
+ no_msr = 1;
+ break;
+ case 'P':
+ no_perf = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ optind = 0;
+
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
@@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv)
case 'q':
quiet = 1;
break;
+ case 'M':
+ case 'P':
+ /* Parsed earlier */
+ break;
case 'n':
num_iterations = strtod(optarg, NULL);
@@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv)
}
}
+void set_rlimit(void)
+{
+ struct rlimit limit;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to get rlimit");
+
+ if (limit.rlim_max < MAX_NOFILE)
+ limit.rlim_max = MAX_NOFILE;
+ if (limit.rlim_cur < MAX_NOFILE)
+ limit.rlim_cur = MAX_NOFILE;
+
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to set rlimit");
+}
+
int main(int argc, char **argv)
{
int fd, ret;
@@ -6729,9 +8010,13 @@ skip_cgroup_setting:
probe_sysfs();
+ if (!getuid())
+ set_rlimit();
+
turbostat_init();
- msr_sum_record();
+ if (!no_msr)
+ msr_sum_record();
/* dump counters and exit */
if (dump_only)
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 908e0d083936..61c69297e797 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
{
dpa_perf->qos_class = FAKE_QTG_ID;
dpa_perf->dpa_range = *range;
- dpa_perf->coord.read_latency = 500;
- dpa_perf->coord.write_latency = 500;
- dpa_perf->coord.read_bandwidth = 1000;
- dpa_perf->coord.write_bandwidth = 1000;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ dpa_perf->coord[i].read_latency = 500;
+ dpa_perf->coord[i].write_latency = 500;
+ dpa_perf->coord[i].read_bandwidth = 1000;
+ dpa_perf->coord[i].write_bandwidth = 1000;
+ }
}
static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 541bf192e30e..7b89362da4bf 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -51,6 +51,7 @@
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
+#include <sys/utsname.h>
#endif
#ifndef ARRAY_SIZE
@@ -79,6 +80,9 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
/* counters */
@@ -299,13 +303,13 @@ void ksft_test_result_code(int exit_code, const char *test_name,
va_end(args);
}
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -332,7 +336,7 @@ static inline int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -347,19 +351,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
exit(KSFT_FAIL);
}
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -388,4 +392,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
exit(KSFT_SKIP);
}
+static inline int ksft_min_kernel_version(unsigned int min_major,
+ unsigned int min_minor)
+{
+#ifdef NOLIBC
+ ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+ return 0;
+#else
+ unsigned int major, minor;
+ struct utsname info;
+
+ if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+ ksft_exit_fail_msg("Can't parse kernel version\n");
+
+ return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index d49dd3ffd0d9..c001dd79179d 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
diff = end.tv_usec - start.tv_usec;
diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
- if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
printf("Diff too high: %lld..", diff);
return -1;
}
@@ -184,80 +184,71 @@ static int check_timer_create(int which)
return 0;
}
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
{
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
- return NULL;
+ if (pthread_self() != ctd_thread)
+ ctd_failed = 1;
+ ctd_count--;
}
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
{
- if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
- __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
- int err, i;
- timer_t id;
- const int nthreads = 10;
- pthread_t threads[nthreads];
struct itimerspec val = {
.it_value.tv_sec = 0,
.it_value.tv_nsec = 1000 * 1000,
.it_interval.tv_sec = 0,
.it_interval.tv_nsec = 1000 * 1000,
};
+ timer_t id;
- remain = nthreads + 1; /* worker threads + this thread */
- signal(SIGALRM, distribution_handler);
- err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ /* 1/10 seconds to ensure the leader sleeps */
+ usleep(10000);
- for (i = 0; i < nthreads; i++) {
- err = pthread_create(&threads[i], NULL, distribution_thread,
- NULL);
- if (err) {
- ksft_print_msg("Can't create thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ ctd_count = 100;
+ if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+ return "Can't create timer\n";
+ if (timer_settime(id, 0, &val, NULL))
+ return "Can't set timer\n";
- /* Wait for all threads to receive the signal. */
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+ while (ctd_count > 0 && !ctd_failed)
+ ;
- for (i = 0; i < nthreads; i++) {
- err = pthread_join(threads[i], NULL);
- if (err) {
- ksft_print_msg("Can't join thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ if (timer_delete(id))
+ return "Can't delete timer\n";
- if (timer_delete(id)) {
- ksft_perror("Can't delete timer");
- return -1;
- }
+ return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+ const char *errmsg;
- ksft_test_result_pass("check_timer_distribution\n");
+ signal(SIGALRM, ctd_sighandler);
+
+ errmsg = "Can't create thread\n";
+ if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+ goto err;
+
+ errmsg = "Can't join thread\n";
+ if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+ goto err;
+
+ if (!ctd_failed)
+ ksft_test_result_pass("check signal distribution\n");
+ else if (ksft_min_kernel_version(6, 3))
+ ksft_test_result_fail("check signal distribution\n");
+ else
+ ksft_test_result_skip("check signal distribution (old kernel)\n");
return 0;
+err:
+ ksft_print_msg("%s", errmsg);
+ return -1;
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 48b9a803235a..d13ebde20322 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -21,9 +21,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
-
-
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -62,45 +59,47 @@ int clear_time_state(void)
#define NUM_FREQ_OUTOFRANGE 4
#define NUM_FREQ_INVALID 2
+#define SHIFTED_PPM (1 << 16)
+
long valid_freq[NUM_FREQ_VALID] = {
- -499<<16,
- -450<<16,
- -400<<16,
- -350<<16,
- -300<<16,
- -250<<16,
- -200<<16,
- -150<<16,
- -100<<16,
- -75<<16,
- -50<<16,
- -25<<16,
- -10<<16,
- -5<<16,
- -1<<16,
+ -499 * SHIFTED_PPM,
+ -450 * SHIFTED_PPM,
+ -400 * SHIFTED_PPM,
+ -350 * SHIFTED_PPM,
+ -300 * SHIFTED_PPM,
+ -250 * SHIFTED_PPM,
+ -200 * SHIFTED_PPM,
+ -150 * SHIFTED_PPM,
+ -100 * SHIFTED_PPM,
+ -75 * SHIFTED_PPM,
+ -50 * SHIFTED_PPM,
+ -25 * SHIFTED_PPM,
+ -10 * SHIFTED_PPM,
+ -5 * SHIFTED_PPM,
+ -1 * SHIFTED_PPM,
-1000,
- 1<<16,
- 5<<16,
- 10<<16,
- 25<<16,
- 50<<16,
- 75<<16,
- 100<<16,
- 150<<16,
- 200<<16,
- 250<<16,
- 300<<16,
- 350<<16,
- 400<<16,
- 450<<16,
- 499<<16,
+ 1 * SHIFTED_PPM,
+ 5 * SHIFTED_PPM,
+ 10 * SHIFTED_PPM,
+ 25 * SHIFTED_PPM,
+ 50 * SHIFTED_PPM,
+ 75 * SHIFTED_PPM,
+ 100 * SHIFTED_PPM,
+ 150 * SHIFTED_PPM,
+ 200 * SHIFTED_PPM,
+ 250 * SHIFTED_PPM,
+ 300 * SHIFTED_PPM,
+ 350 * SHIFTED_PPM,
+ 400 * SHIFTED_PPM,
+ 450 * SHIFTED_PPM,
+ 499 * SHIFTED_PPM,
};
long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
- -1000<<16,
- -550<<16,
- 550<<16,
- 1000<<16,
+ -1000 * SHIFTED_PPM,
+ -550 * SHIFTED_PPM,
+ 550 * SHIFTED_PPM,
+ 1000 * SHIFTED_PPM,
};
#define LONG_MAX (~0UL>>1)
diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py
new file mode 100755
index 000000000000..d9b042097da7
--- /dev/null
+++ b/tools/testing/selftests/turbostat/defcolumns.py
@@ -0,0 +1,60 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+ exit(1)
+print('OK')