summaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig32
-rw-r--r--drivers/misc/Makefile2
-rw-r--r--drivers/misc/altera-stapl/Kconfig1
-rw-r--r--drivers/misc/c2port/Kconfig2
-rw-r--r--drivers/misc/cb710/Kconfig1
-rw-r--r--drivers/misc/cxl/Kconfig3
-rw-r--r--drivers/misc/echo/Kconfig1
-rw-r--r--drivers/misc/eeprom/ee1004.c43
-rw-r--r--drivers/misc/eeprom/idt_89hpesx.c6
-rw-r--r--drivers/misc/fsa9480.c547
-rw-r--r--drivers/misc/genwqe/Kconfig1
-rw-r--r--drivers/misc/habanalabs/asid.c2
-rw-r--r--drivers/misc/habanalabs/command_submission.c10
-rw-r--r--drivers/misc/habanalabs/context.c11
-rw-r--r--drivers/misc/habanalabs/debugfs.c54
-rw-r--r--drivers/misc/habanalabs/device.c189
-rw-r--r--drivers/misc/habanalabs/firmware_if.c51
-rw-r--r--drivers/misc/habanalabs/goya/goya.c635
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h16
-rw-r--r--drivers/misc/habanalabs/goya/goya_security.c16
-rw-r--r--drivers/misc/habanalabs/habanalabs.h93
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c66
-rw-r--r--drivers/misc/habanalabs/habanalabs_ioctl.c11
-rw-r--r--drivers/misc/habanalabs/hw_queue.c2
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h418
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h1
-rw-r--r--drivers/misc/habanalabs/memory.c13
-rw-r--r--drivers/misc/habanalabs/mmu.c20
-rw-r--r--drivers/misc/habanalabs/pci.c10
-rw-r--r--drivers/misc/habanalabs/sysfs.c4
-rw-r--r--drivers/misc/isl29003.c4
-rw-r--r--drivers/misc/lis3lv02d/Kconfig2
-rw-r--r--drivers/misc/lkdtm/Makefile3
-rw-r--r--drivers/misc/lkdtm/bugs.c66
-rw-r--r--drivers/misc/lkdtm/core.c1
-rw-r--r--drivers/misc/lkdtm/lkdtm.h1
-rw-r--r--drivers/misc/mei/debugfs.c184
-rw-r--r--drivers/misc/mei/hdcp/mei_hdcp.c11
-rw-r--r--drivers/misc/mic/scif/scif_main.c1
-rw-r--r--drivers/misc/ocxl/Kconfig1
-rw-r--r--drivers/misc/ocxl/context.c9
-rw-r--r--drivers/misc/ocxl/link.c28
-rw-r--r--drivers/misc/sgi-xp/xpc_partition.c2
-rw-r--r--drivers/misc/tsl2550.c2
-rw-r--r--drivers/misc/vmw_balloon.c489
-rw-r--r--drivers/misc/vmw_vmci/vmci_context.c80
-rw-r--r--drivers/misc/vmw_vmci/vmci_handle_array.c38
-rw-r--r--drivers/misc/vmw_vmci/vmci_handle_array.h29
-rw-r--r--drivers/misc/xilinx_sdfec.c345
49 files changed, 2265 insertions, 1292 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 85fc77148d19..6abfc8e92fcc 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -9,7 +9,6 @@ config SENSORS_LIS3LV02D
tristate
depends on INPUT
select INPUT_POLLDEV
- default n
config AD525X_DPOT
tristate "Analog Devices Digital Potentiometers"
@@ -62,7 +61,6 @@ config ATMEL_TCLIB
config DUMMY_IRQ
tristate "Dummy IRQ handler"
- default n
---help---
This module accepts a single 'irq' parameter, which it should register for.
The sole purpose of this module is to help with debugging of systems on
@@ -118,7 +116,6 @@ config PHANTOM
config INTEL_MID_PTI
tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST)
- default n
help
The PTI (Parallel Trace Interface) driver directs
trace data routed from various parts in the system out
@@ -194,7 +191,6 @@ config ATMEL_SSC
config ENCLOSURE_SERVICES
tristate "Enclosure Services"
- default n
help
Provides support for intelligent enclosures (bays which
contain storage devices). You also need either a host
@@ -218,7 +214,6 @@ config SGI_XP
config CS5535_MFGPT
tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
depends on MFD_CS5535
- default n
help
This driver provides access to MFGPT functionality for other
drivers that need timers. MFGPTs are available in the CS5535 and
@@ -251,7 +246,6 @@ config CS5535_CLOCK_EVENT_SRC
config HP_ILO
tristate "Channel interface driver for the HP iLO processor"
depends on PCI
- default n
help
The channel interface driver allows applications to communicate
with iLO management processors present on HP ProLiant servers.
@@ -286,7 +280,6 @@ config QCOM_FASTRPC
config SGI_GRU
tristate "SGI GRU driver"
depends on X86_UV && SMP
- default n
select MMU_NOTIFIER
---help---
The GRU is a hardware resource located in the system chipset. The GRU
@@ -301,7 +294,6 @@ config SGI_GRU
config SGI_GRU_DEBUG
bool "SGI GRU driver debug"
depends on SGI_GRU
- default n
---help---
This option enables additional debugging code for the SGI GRU driver.
If you are unsure, say N.
@@ -359,7 +351,6 @@ config SENSORS_BH1770
config SENSORS_APDS990X
tristate "APDS990X combined als and proximity sensors"
depends on I2C
- default n
---help---
Say Y here if you want to build a driver for Avago APDS990x
combined ambient light and proximity sensor chip.
@@ -387,7 +378,6 @@ config DS1682
config SPEAR13XX_PCIE_GADGET
bool "PCIe gadget support for SPEAr13XX platform"
depends on ARCH_SPEAR13XX && BROKEN
- default n
help
This option enables gadget support for PCIe controller. If
board file defines any controller as PCIe endpoint then a sysfs
@@ -397,6 +387,7 @@ config SPEAR13XX_PCIE_GADGET
config VMWARE_BALLOON
tristate "VMware Balloon Driver"
depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
+ select MEMORY_BALLOON
help
This is VMware physical memory management driver which acts
like a "balloon" that can be inflated to reclaim physical pages
@@ -431,15 +422,6 @@ config PCH_PHUB
To compile this driver as a module, choose M here: the module will
be called pch_phub.
-config USB_SWITCH_FSA9480
- tristate "FSA9480 USB Switch"
- depends on I2C
- help
- The FSA9480 is a USB port accessory detector and switch.
- The FSA9480 is fully controlled using I2C and enables USB data,
- stereo and mono audio, video, microphone and UART data to use
- a common connector port.
-
config LATTICE_ECP3_CONFIG
tristate "Lattice ECP3 FPGA bitstream configuration via SPI"
depends on SPI && SYSFS
@@ -481,6 +463,18 @@ config PCI_ENDPOINT_TEST
Enable this configuration option to enable the host side test driver
for PCI Endpoint.
+config XILINX_SDFEC
+ tristate "Xilinx SDFEC 16"
+ help
+ This option enables support for the Xilinx SDFEC (Soft Decision
+ Forward Error Correction) driver. This enables a char driver
+ for the SDFEC.
+
+ You may select this driver if your design instantiates the
+ SDFEC(16nm) hardened block. To compile this as a module choose M.
+
+ If unsure, say N.
+
config MISC_RTSX
tristate
default MISC_RTSX_PCI || MISC_RTSX_USB
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index b9affcdaa3d6..abd8ae249746 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o
obj-$(CONFIG_PCH_PHUB) += pch_phub.o
obj-y += ti-st/
obj-y += lis3lv02d/
-obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/
obj-$(CONFIG_INTEL_MEI) += mei/
obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/
@@ -59,3 +58,4 @@ obj-$(CONFIG_OCXL) += ocxl/
obj-y += cardreader/
obj-$(CONFIG_PVPANIC) += pvpanic.o
obj-$(CONFIG_HABANA_AI) += habanalabs/
+obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
diff --git a/drivers/misc/altera-stapl/Kconfig b/drivers/misc/altera-stapl/Kconfig
index b34863544365..6c4c6575ec31 100644
--- a/drivers/misc/altera-stapl/Kconfig
+++ b/drivers/misc/altera-stapl/Kconfig
@@ -5,6 +5,5 @@ comment "Altera FPGA firmware download module (requires I2C)"
config ALTERA_STAPL
tristate "Altera FPGA firmware download module"
depends on I2C
- default n
help
An Altera FPGA module. Say Y when you want to support this tool.
diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig
index 192e25094bd4..e20516ffd91e 100644
--- a/drivers/misc/c2port/Kconfig
+++ b/drivers/misc/c2port/Kconfig
@@ -5,7 +5,6 @@
menuconfig C2PORT
tristate "Silicon Labs C2 port support"
- default n
help
This option enables support for Silicon Labs C2 port used to
program Silicon micro controller chips (and other 8051 compatible).
@@ -24,7 +23,6 @@ if C2PORT
config C2PORT_DURAMAR_2150
tristate "C2 port support for Eurotech's Duramar 2150"
depends on X86
- default n
help
This option enables C2 support for the Eurotech's Duramar 2150
on board micro controller.
diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig
index 3c7356d55423..a696d7509024 100644
--- a/drivers/misc/cb710/Kconfig
+++ b/drivers/misc/cb710/Kconfig
@@ -15,7 +15,6 @@ config CB710_CORE
config CB710_DEBUG
bool "Enable driver debugging"
depends on CB710_CORE != n
- default n
help
This is an option for use by developers; most people should
say N here. This adds a lot of debugging output to dmesg.
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index f1d9a843e361..39eec9031487 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -5,16 +5,13 @@
config CXL_BASE
bool
- default n
select PPC_COPRO_BASE
config CXL_AFU_DRIVER_OPS
bool
- default n
config CXL_LIB
bool
- default n
config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig
index 39656413e70d..be70b263e271 100644
--- a/drivers/misc/echo/Kconfig
+++ b/drivers/misc/echo/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config ECHO
tristate "Line Echo Canceller support"
- default n
---help---
This driver provides line echo cancelling support for mISDN and
Zaptel drivers.
diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c
index be3263df278a..6f00c33cfe22 100644
--- a/drivers/misc/eeprom/ee1004.c
+++ b/drivers/misc/eeprom/ee1004.c
@@ -2,7 +2,7 @@
/*
* ee1004 - driver for DDR4 SPD EEPROMs
*
- * Copyright (C) 2017 Jean Delvare
+ * Copyright (C) 2017-2019 Jean Delvare
*
* Based on the at24 driver:
* Copyright (C) 2005-2007 David Brownell
@@ -53,6 +53,24 @@ MODULE_DEVICE_TABLE(i2c, ee1004_ids);
/*-------------------------------------------------------------------------*/
+static int ee1004_get_current_page(void)
+{
+ int err;
+
+ err = i2c_smbus_read_byte(ee1004_set_page[0]);
+ if (err == -ENXIO) {
+ /* Nack means page 1 is selected */
+ return 1;
+ }
+ if (err < 0) {
+ /* Anything else is a real error, bail out */
+ return err;
+ }
+
+ /* Ack means page 0 is selected, returned value meaningless */
+ return 0;
+}
+
static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf,
unsigned int offset, size_t count)
{
@@ -102,6 +120,16 @@ static ssize_t ee1004_read(struct file *filp, struct kobject *kobj,
/* Data is ignored */
status = i2c_smbus_write_byte(ee1004_set_page[page],
0x00);
+ if (status == -ENXIO) {
+ /*
+ * Don't give up just yet. Some memory
+ * modules will select the page but not
+ * ack the command. Check which page is
+ * selected now.
+ */
+ if (ee1004_get_current_page() == page)
+ status = 0;
+ }
if (status < 0) {
dev_err(dev, "Failed to select page %d (%d)\n",
page, status);
@@ -186,17 +214,10 @@ static int ee1004_probe(struct i2c_client *client,
}
/* Remember current page to avoid unneeded page select */
- err = i2c_smbus_read_byte(ee1004_set_page[0]);
- if (err == -ENXIO) {
- /* Nack means page 1 is selected */
- ee1004_current_page = 1;
- } else if (err < 0) {
- /* Anything else is a real error, bail out */
+ err = ee1004_get_current_page();
+ if (err < 0)
goto err_clients;
- } else {
- /* Ack means page 0 is selected, returned value meaningless */
- ee1004_current_page = 0;
- }
+ ee1004_current_page = err;
dev_dbg(&client->dev, "Currently selected page: %d\n",
ee1004_current_page);
mutex_unlock(&ee1004_bus_lock);
diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
index 8a4659518c33..81c70e5bc168 100644
--- a/drivers/misc/eeprom/idt_89hpesx.c
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -115,7 +115,6 @@ static struct dentry *csr_dbgdir;
* @client: i2c client used to perform IO operations
*
* @ee_file: EEPROM read/write sysfs-file
- * @csr_file: CSR read/write debugfs-node
*/
struct idt_smb_seq;
struct idt_89hpesx_dev {
@@ -137,7 +136,6 @@ struct idt_89hpesx_dev {
struct bin_attribute *ee_file;
struct dentry *csr_dir;
- struct dentry *csr_file;
};
/*
@@ -1378,8 +1376,8 @@ static void idt_create_dbgfs_files(struct idt_89hpesx_dev *pdev)
pdev->csr_dir = debugfs_create_dir(fname, csr_dbgdir);
/* Create Debugfs file for CSR read/write operations */
- pdev->csr_file = debugfs_create_file(cli->name, 0600,
- pdev->csr_dir, pdev, &csr_dbgfs_ops);
+ debugfs_create_file(cli->name, 0600, pdev->csr_dir, pdev,
+ &csr_dbgfs_ops);
}
/*
diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c
deleted file mode 100644
index fab02f2da077..000000000000
--- a/drivers/misc/fsa9480.c
+++ /dev/null
@@ -1,547 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * fsa9480.c - FSA9480 micro USB switch device driver
- *
- * Copyright (C) 2010 Samsung Electronics
- * Minkyu Kang <mk7.kang@samsung.com>
- * Wonguk Jeong <wonguk.jeong@samsung.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/platform_data/fsa9480.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/pm_runtime.h>
-
-/* FSA9480 I2C registers */
-#define FSA9480_REG_DEVID 0x01
-#define FSA9480_REG_CTRL 0x02
-#define FSA9480_REG_INT1 0x03
-#define FSA9480_REG_INT2 0x04
-#define FSA9480_REG_INT1_MASK 0x05
-#define FSA9480_REG_INT2_MASK 0x06
-#define FSA9480_REG_ADC 0x07
-#define FSA9480_REG_TIMING1 0x08
-#define FSA9480_REG_TIMING2 0x09
-#define FSA9480_REG_DEV_T1 0x0a
-#define FSA9480_REG_DEV_T2 0x0b
-#define FSA9480_REG_BTN1 0x0c
-#define FSA9480_REG_BTN2 0x0d
-#define FSA9480_REG_CK 0x0e
-#define FSA9480_REG_CK_INT1 0x0f
-#define FSA9480_REG_CK_INT2 0x10
-#define FSA9480_REG_CK_INTMASK1 0x11
-#define FSA9480_REG_CK_INTMASK2 0x12
-#define FSA9480_REG_MANSW1 0x13
-#define FSA9480_REG_MANSW2 0x14
-
-/* Control */
-#define CON_SWITCH_OPEN (1 << 4)
-#define CON_RAW_DATA (1 << 3)
-#define CON_MANUAL_SW (1 << 2)
-#define CON_WAIT (1 << 1)
-#define CON_INT_MASK (1 << 0)
-#define CON_MASK (CON_SWITCH_OPEN | CON_RAW_DATA | \
- CON_MANUAL_SW | CON_WAIT)
-
-/* Device Type 1 */
-#define DEV_USB_OTG (1 << 7)
-#define DEV_DEDICATED_CHG (1 << 6)
-#define DEV_USB_CHG (1 << 5)
-#define DEV_CAR_KIT (1 << 4)
-#define DEV_UART (1 << 3)
-#define DEV_USB (1 << 2)
-#define DEV_AUDIO_2 (1 << 1)
-#define DEV_AUDIO_1 (1 << 0)
-
-#define DEV_T1_USB_MASK (DEV_USB_OTG | DEV_USB)
-#define DEV_T1_UART_MASK (DEV_UART)
-#define DEV_T1_CHARGER_MASK (DEV_DEDICATED_CHG | DEV_USB_CHG)
-
-/* Device Type 2 */
-#define DEV_AV (1 << 6)
-#define DEV_TTY (1 << 5)
-#define DEV_PPD (1 << 4)
-#define DEV_JIG_UART_OFF (1 << 3)
-#define DEV_JIG_UART_ON (1 << 2)
-#define DEV_JIG_USB_OFF (1 << 1)
-#define DEV_JIG_USB_ON (1 << 0)
-
-#define DEV_T2_USB_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON)
-#define DEV_T2_UART_MASK (DEV_JIG_UART_OFF | DEV_JIG_UART_ON)
-#define DEV_T2_JIG_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON | \
- DEV_JIG_UART_OFF | DEV_JIG_UART_ON)
-
-/*
- * Manual Switch
- * D- [7:5] / D+ [4:2]
- * 000: Open all / 001: USB / 010: AUDIO / 011: UART / 100: V_AUDIO
- */
-#define SW_VAUDIO ((4 << 5) | (4 << 2))
-#define SW_UART ((3 << 5) | (3 << 2))
-#define SW_AUDIO ((2 << 5) | (2 << 2))
-#define SW_DHOST ((1 << 5) | (1 << 2))
-#define SW_AUTO ((0 << 5) | (0 << 2))
-
-/* Interrupt 1 */
-#define INT_DETACH (1 << 1)
-#define INT_ATTACH (1 << 0)
-
-struct fsa9480_usbsw {
- struct i2c_client *client;
- struct fsa9480_platform_data *pdata;
- int dev1;
- int dev2;
- int mansw;
-};
-
-static struct fsa9480_usbsw *chip;
-
-static int fsa9480_write_reg(struct i2c_client *client,
- int reg, int value)
-{
- int ret;
-
- ret = i2c_smbus_write_byte_data(client, reg, value);
-
- if (ret < 0)
- dev_err(&client->dev, "%s: err %d\n", __func__, ret);
-
- return ret;
-}
-
-static int fsa9480_read_reg(struct i2c_client *client, int reg)
-{
- int ret;
-
- ret = i2c_smbus_read_byte_data(client, reg);
-
- if (ret < 0)
- dev_err(&client->dev, "%s: err %d\n", __func__, ret);
-
- return ret;
-}
-
-static int fsa9480_read_irq(struct i2c_client *client, int *value)
-{
- int ret;
-
- ret = i2c_smbus_read_i2c_block_data(client,
- FSA9480_REG_INT1, 2, (u8 *)value);
- *value &= 0xffff;
-
- if (ret < 0)
- dev_err(&client->dev, "%s: err %d\n", __func__, ret);
-
- return ret;
-}
-
-static void fsa9480_set_switch(const char *buf)
-{
- struct fsa9480_usbsw *usbsw = chip;
- struct i2c_client *client = usbsw->client;
- unsigned int value;
- unsigned int path = 0;
-
- value = fsa9480_read_reg(client, FSA9480_REG_CTRL);
-
- if (!strncmp(buf, "VAUDIO", 6)) {
- path = SW_VAUDIO;
- value &= ~CON_MANUAL_SW;
- } else if (!strncmp(buf, "UART", 4)) {
- path = SW_UART;
- value &= ~CON_MANUAL_SW;
- } else if (!strncmp(buf, "AUDIO", 5)) {
- path = SW_AUDIO;
- value &= ~CON_MANUAL_SW;
- } else if (!strncmp(buf, "DHOST", 5)) {
- path = SW_DHOST;
- value &= ~CON_MANUAL_SW;
- } else if (!strncmp(buf, "AUTO", 4)) {
- path = SW_AUTO;
- value |= CON_MANUAL_SW;
- } else {
- printk(KERN_ERR "Wrong command\n");
- return;
- }
-
- usbsw->mansw = path;
- fsa9480_write_reg(client, FSA9480_REG_MANSW1, path);
- fsa9480_write_reg(client, FSA9480_REG_CTRL, value);
-}
-
-static ssize_t fsa9480_get_switch(char *buf)
-{
- struct fsa9480_usbsw *usbsw = chip;
- struct i2c_client *client = usbsw->client;
- unsigned int value;
-
- value = fsa9480_read_reg(client, FSA9480_REG_MANSW1);
-
- if (value == SW_VAUDIO)
- return sprintf(buf, "VAUDIO\n");
- else if (value == SW_UART)
- return sprintf(buf, "UART\n");
- else if (value == SW_AUDIO)
- return sprintf(buf, "AUDIO\n");
- else if (value == SW_DHOST)
- return sprintf(buf, "DHOST\n");
- else if (value == SW_AUTO)
- return sprintf(buf, "AUTO\n");
- else
- return sprintf(buf, "%x", value);
-}
-
-static ssize_t fsa9480_show_device(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct fsa9480_usbsw *usbsw = dev_get_drvdata(dev);
- struct i2c_client *client = usbsw->client;
- int dev1, dev2;
-
- dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
- dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
-
- if (!dev1 && !dev2)
- return sprintf(buf, "NONE\n");
-
- /* USB */
- if (dev1 & DEV_T1_USB_MASK || dev2 & DEV_T2_USB_MASK)
- return sprintf(buf, "USB\n");
-
- /* UART */
- if (dev1 & DEV_T1_UART_MASK || dev2 & DEV_T2_UART_MASK)
- return sprintf(buf, "UART\n");
-
- /* CHARGER */
- if (dev1 & DEV_T1_CHARGER_MASK)
- return sprintf(buf, "CHARGER\n");
-
- /* JIG */
- if (dev2 & DEV_T2_JIG_MASK)
- return sprintf(buf, "JIG\n");
-
- return sprintf(buf, "UNKNOWN\n");
-}
-
-static ssize_t fsa9480_show_manualsw(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return fsa9480_get_switch(buf);
-
-}
-
-static ssize_t fsa9480_set_manualsw(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- fsa9480_set_switch(buf);
-
- return count;
-}
-
-static DEVICE_ATTR(device, S_IRUGO, fsa9480_show_device, NULL);
-static DEVICE_ATTR(switch, S_IRUGO | S_IWUSR,
- fsa9480_show_manualsw, fsa9480_set_manualsw);
-
-static struct attribute *fsa9480_attributes[] = {
- &dev_attr_device.attr,
- &dev_attr_switch.attr,
- NULL
-};
-
-static const struct attribute_group fsa9480_group = {
- .attrs = fsa9480_attributes,
-};
-
-static void fsa9480_detect_dev(struct fsa9480_usbsw *usbsw, int intr)
-{
- int val1, val2, ctrl;
- struct fsa9480_platform_data *pdata = usbsw->pdata;
- struct i2c_client *client = usbsw->client;
-
- val1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
- val2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
- ctrl = fsa9480_read_reg(client, FSA9480_REG_CTRL);
-
- dev_info(&client->dev, "intr: 0x%x, dev1: 0x%x, dev2: 0x%x\n",
- intr, val1, val2);
-
- if (!intr)
- goto out;
-
- if (intr & INT_ATTACH) { /* Attached */
- /* USB */
- if (val1 & DEV_T1_USB_MASK || val2 & DEV_T2_USB_MASK) {
- if (pdata->usb_cb)
- pdata->usb_cb(FSA9480_ATTACHED);
-
- if (usbsw->mansw) {
- fsa9480_write_reg(client,
- FSA9480_REG_MANSW1, usbsw->mansw);
- }
- }
-
- /* UART */
- if (val1 & DEV_T1_UART_MASK || val2 & DEV_T2_UART_MASK) {
- if (pdata->uart_cb)
- pdata->uart_cb(FSA9480_ATTACHED);
-
- if (!(ctrl & CON_MANUAL_SW)) {
- fsa9480_write_reg(client,
- FSA9480_REG_MANSW1, SW_UART);
- }
- }
-
- /* CHARGER */
- if (val1 & DEV_T1_CHARGER_MASK) {
- if (pdata->charger_cb)
- pdata->charger_cb(FSA9480_ATTACHED);
- }
-
- /* JIG */
- if (val2 & DEV_T2_JIG_MASK) {
- if (pdata->jig_cb)
- pdata->jig_cb(FSA9480_ATTACHED);
- }
- } else if (intr & INT_DETACH) { /* Detached */
- /* USB */
- if (usbsw->dev1 & DEV_T1_USB_MASK ||
- usbsw->dev2 & DEV_T2_USB_MASK) {
- if (pdata->usb_cb)
- pdata->usb_cb(FSA9480_DETACHED);
- }
-
- /* UART */
- if (usbsw->dev1 & DEV_T1_UART_MASK ||
- usbsw->dev2 & DEV_T2_UART_MASK) {
- if (pdata->uart_cb)
- pdata->uart_cb(FSA9480_DETACHED);
- }
-
- /* CHARGER */
- if (usbsw->dev1 & DEV_T1_CHARGER_MASK) {
- if (pdata->charger_cb)
- pdata->charger_cb(FSA9480_DETACHED);
- }
-
- /* JIG */
- if (usbsw->dev2 & DEV_T2_JIG_MASK) {
- if (pdata->jig_cb)
- pdata->jig_cb(FSA9480_DETACHED);
- }
- }
-
- usbsw->dev1 = val1;
- usbsw->dev2 = val2;
-
-out:
- ctrl &= ~CON_INT_MASK;
- fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl);
-}
-
-static irqreturn_t fsa9480_irq_handler(int irq, void *data)
-{
- struct fsa9480_usbsw *usbsw = data;
- struct i2c_client *client = usbsw->client;
- int intr;
-
- /* clear interrupt */
- fsa9480_read_irq(client, &intr);
-
- /* device detection */
- fsa9480_detect_dev(usbsw, intr);
-
- return IRQ_HANDLED;
-}
-
-static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw)
-{
- struct fsa9480_platform_data *pdata = usbsw->pdata;
- struct i2c_client *client = usbsw->client;
- int ret;
- int intr;
- unsigned int ctrl = CON_MASK;
-
- /* clear interrupt */
- fsa9480_read_irq(client, &intr);
-
- /* unmask interrupt (attach/detach only) */
- fsa9480_write_reg(client, FSA9480_REG_INT1_MASK, 0xfc);
- fsa9480_write_reg(client, FSA9480_REG_INT2_MASK, 0x1f);
-
- usbsw->mansw = fsa9480_read_reg(client, FSA9480_REG_MANSW1);
-
- if (usbsw->mansw)
- ctrl &= ~CON_MANUAL_SW; /* Manual Switching Mode */
-
- fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl);
-
- if (pdata && pdata->cfg_gpio)
- pdata->cfg_gpio();
-
- if (client->irq) {
- ret = request_threaded_irq(client->irq, NULL,
- fsa9480_irq_handler,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- "fsa9480 micro USB", usbsw);
- if (ret) {
- dev_err(&client->dev, "failed to request IRQ\n");
- return ret;
- }
-
- if (pdata)
- device_init_wakeup(&client->dev, pdata->wakeup);
- }
-
- return 0;
-}
-
-static int fsa9480_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
-{
- struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
- struct fsa9480_usbsw *usbsw;
- int ret = 0;
-
- if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
- return -EIO;
-
- usbsw = kzalloc(sizeof(struct fsa9480_usbsw), GFP_KERNEL);
- if (!usbsw) {
- dev_err(&client->dev, "failed to allocate driver data\n");
- return -ENOMEM;
- }
-
- usbsw->client = client;
- usbsw->pdata = client->dev.platform_data;
-
- chip = usbsw;
-
- i2c_set_clientdata(client, usbsw);
-
- ret = fsa9480_irq_init(usbsw);
- if (ret)
- goto fail1;
-
- ret = sysfs_create_group(&client->dev.kobj, &fsa9480_group);
- if (ret) {
- dev_err(&client->dev,
- "failed to create fsa9480 attribute group\n");
- goto fail2;
- }
-
- /* ADC Detect Time: 500ms */
- fsa9480_write_reg(client, FSA9480_REG_TIMING1, 0x6);
-
- if (chip->pdata->reset_cb)
- chip->pdata->reset_cb();
-
- /* device detection */
- fsa9480_detect_dev(usbsw, INT_ATTACH);
-
- pm_runtime_set_active(&client->dev);
-
- return 0;
-
-fail2:
- if (client->irq)
- free_irq(client->irq, usbsw);
-fail1:
- kfree(usbsw);
- return ret;
-}
-
-static int fsa9480_remove(struct i2c_client *client)
-{
- struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
-
- if (client->irq)
- free_irq(client->irq, usbsw);
-
- sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
- device_init_wakeup(&client->dev, 0);
- kfree(usbsw);
- return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-
-static int fsa9480_suspend(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
- struct fsa9480_platform_data *pdata = usbsw->pdata;
-
- if (device_may_wakeup(&client->dev) && client->irq)
- enable_irq_wake(client->irq);
-
- if (pdata->usb_power)
- pdata->usb_power(0);
-
- return 0;
-}
-
-static int fsa9480_resume(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
- int dev1, dev2;
-
- if (device_may_wakeup(&client->dev) && client->irq)
- disable_irq_wake(client->irq);
-
- /*
- * Clear Pending interrupt. Note that detect_dev does what
- * the interrupt handler does. So, we don't miss pending and
- * we reenable interrupt if there is one.
- */
- fsa9480_read_reg(client, FSA9480_REG_INT1);
- fsa9480_read_reg(client, FSA9480_REG_INT2);
-
- dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1);
- dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2);
-
- /* device detection */
- fsa9480_detect_dev(usbsw, (dev1 || dev2) ? INT_ATTACH : INT_DETACH);
-
- return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(fsa9480_pm_ops, fsa9480_suspend, fsa9480_resume);
-#define FSA9480_PM_OPS (&fsa9480_pm_ops)
-
-#else
-
-#define FSA9480_PM_OPS NULL
-
-#endif /* CONFIG_PM_SLEEP */
-
-static const struct i2c_device_id fsa9480_id[] = {
- {"fsa9480", 0},
- {}
-};
-MODULE_DEVICE_TABLE(i2c, fsa9480_id);
-
-static struct i2c_driver fsa9480_i2c_driver = {
- .driver = {
- .name = "fsa9480",
- .pm = FSA9480_PM_OPS,
- },
- .probe = fsa9480_probe,
- .remove = fsa9480_remove,
- .id_table = fsa9480_id,
-};
-
-module_i2c_driver(fsa9480_i2c_driver);
-
-MODULE_AUTHOR("Minkyu Kang <mk7.kang@samsung.com>");
-MODULE_DESCRIPTION("FSA9480 USB Switch driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
index a8a608713d26..97f64bcf9fe0 100644
--- a/drivers/misc/genwqe/Kconfig
+++ b/drivers/misc/genwqe/Kconfig
@@ -7,7 +7,6 @@ menuconfig GENWQE
tristate "GenWQE PCIe Accelerator"
depends on PCI && 64BIT
select CRC_ITU_T
- default n
help
Enables PCIe card driver for IBM GenWQE accelerators.
The user-space interface is described in
diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c
index f54e7971a762..2c01461701a3 100644
--- a/drivers/misc/habanalabs/asid.c
+++ b/drivers/misc/habanalabs/asid.c
@@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)
mutex_init(&hdev->asid_mutex);
- /* ASID 0 is reserved for KMD */
+ /* ASID 0 is reserved for KMD and device CPU */
set_bit(0, hdev->asid_bitmap);
return 0;
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 6fe785e26859..6ad83d5ef4b0 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -682,14 +682,12 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
u32 tmp;
rc = hl_poll_timeout_memory(hdev,
- (u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token,
- jiffies_to_usecs(hdev->timeout_jiffies),
- &tmp);
+ &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+ 100, jiffies_to_usecs(hdev->timeout_jiffies));
- if (rc || !tmp) {
+ if (rc == -ETIMEDOUT) {
dev_err(hdev->dev,
- "context switch phase didn't finish in time\n");
- rc = -ETIMEDOUT;
+ "context switch phase timeout (%d)\n", tmp);
goto out;
}
}
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index f4c92f110a72..8682590e3f6e 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -31,9 +31,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
* Coresight might be still working by accessing addresses
* related to the stopped engines. Hence stop it explicitly.
*/
- hdev->asic_funcs->halt_coresight(hdev);
+ if (hdev->in_debug)
+ hl_device_set_debug_mode(hdev, false);
+
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
+ } else {
+ hl_mmu_ctx_fini(ctx);
}
}
@@ -117,6 +121,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
+ rc = hl_mmu_ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+ goto mem_ctx_err;
+ }
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index ba418aaa404c..18e499c900c7 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -355,7 +355,7 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- struct hl_ctx *ctx = hdev->user_ctx;
+ struct hl_ctx *ctx;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
@@ -367,6 +367,11 @@ static int mmu_show(struct seq_file *s, void *data)
if (!hdev->mmu_enable)
return 0;
+ if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
+ ctx = hdev->kernel_ctx;
+ else
+ ctx = hdev->user_ctx;
+
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return 0;
@@ -495,6 +500,36 @@ err:
return -EINVAL;
}
+static int engines_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+
+ hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+
+ return 0;
+}
+
+static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ if (!hdev->mmu_enable)
+ goto out;
+
+ if (hdev->dram_supports_virtual_memory &&
+ addr >= prop->va_space_dram_start_address &&
+ addr < prop->va_space_dram_end_address)
+ return true;
+
+ if (addr >= prop->va_space_host_start_address &&
+ addr < prop->va_space_host_end_address)
+ return true;
+out:
+ return false;
+}
+
static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
@@ -568,7 +603,6 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
char tmp_buf[32];
u64 addr = entry->addr;
u32 val;
@@ -577,11 +611,8 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
if (*ppos)
return 0;
- if (addr >= prop->va_space_dram_start_address &&
- addr < prop->va_space_dram_end_address &&
- hdev->mmu_enable &&
- hdev->dram_supports_virtual_memory) {
- rc = device_va_to_pa(hdev, entry->addr, &addr);
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
@@ -602,7 +633,6 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 addr = entry->addr;
u32 value;
ssize_t rc;
@@ -611,11 +641,8 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
if (rc)
return rc;
- if (addr >= prop->va_space_dram_start_address &&
- addr < prop->va_space_dram_end_address &&
- hdev->mmu_enable &&
- hdev->dram_supports_virtual_memory) {
- rc = device_va_to_pa(hdev, entry->addr, &addr);
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
@@ -877,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"userptr", userptr_show, NULL},
{"vm", vm_show, NULL},
{"mmu", mmu_show, mmu_write},
+ {"engines", engines_show, NULL}
};
static int hl_debugfs_open(struct inode *inode, struct file *file)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 0b19d3eefb98..0c4894dd9c02 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev)
mutex_init(&hdev->fd_open_cnt_lock);
mutex_init(&hdev->send_cpu_message_lock);
+ mutex_init(&hdev->debug_lock);
mutex_init(&hdev->mmu_cache_lock);
INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
spin_lock_init(&hdev->hw_queues_mirror_lock);
@@ -262,6 +263,7 @@ early_fini:
static void device_early_fini(struct hl_device *hdev)
{
mutex_destroy(&hdev->mmu_cache_lock);
+ mutex_destroy(&hdev->debug_lock);
mutex_destroy(&hdev->send_cpu_message_lock);
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
@@ -324,7 +326,15 @@ static int device_late_init(struct hl_device *hdev)
{
int rc;
- INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
+ if (hdev->asic_funcs->late_init) {
+ rc = hdev->asic_funcs->late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed late initialization for the H/W\n");
+ return rc;
+ }
+ }
+
hdev->high_pll = hdev->asic_prop.high_pll;
/* force setting to low frequency */
@@ -335,17 +345,9 @@ static int device_late_init(struct hl_device *hdev)
else
hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
- if (hdev->asic_funcs->late_init) {
- rc = hdev->asic_funcs->late_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "failed late initialization for the H/W\n");
- return rc;
- }
- }
-
+ INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
if (hdev->heartbeat) {
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
@@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
return 1;
}
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+{
+ int rc = 0;
+
+ mutex_lock(&hdev->debug_lock);
+
+ if (!enable) {
+ if (!hdev->in_debug) {
+ dev_err(hdev->dev,
+ "Failed to disable debug mode because device was not in debug mode\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+ hdev->asic_funcs->halt_coresight(hdev);
+ hdev->in_debug = 0;
+
+ goto out;
+ }
+
+ if (hdev->in_debug) {
+ dev_err(hdev->dev,
+ "Failed to enable debug mode because device is already in debug mode\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+ mutex_lock(&hdev->fd_open_cnt_lock);
+
+ if (atomic_read(&hdev->fd_open_cnt) > 1) {
+ dev_err(hdev->dev,
+ "Failed to enable debug mode. More then a single user is using the device\n");
+ rc = -EPERM;
+ goto unlock_fd_open_lock;
+ }
+
+ hdev->in_debug = 1;
+
+unlock_fd_open_lock:
+ mutex_unlock(&hdev->fd_open_cnt_lock);
+out:
+ mutex_unlock(&hdev->debug_lock);
+
+ return rc;
+}
+
/*
* hl_device_suspend - initiate device suspend
*
@@ -647,13 +695,6 @@ again:
hdev->hard_reset_pending = true;
- if (!hdev->pdev) {
- dev_err(hdev->dev,
- "Reset action is NOT supported in simulator\n");
- rc = -EINVAL;
- goto out_err;
- }
-
device_reset_work = kzalloc(sizeof(*device_reset_work),
GFP_ATOMIC);
if (!device_reset_work) {
@@ -704,6 +745,7 @@ again:
if (hard_reset) {
hl_vm_fini(hdev);
+ hl_mmu_fini(hdev);
hl_eq_reset(hdev, &hdev->event_queue);
}
@@ -731,6 +773,13 @@ again:
goto out_err;
}
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to initialize MMU S/W after hard reset\n");
+ goto out_err;
+ }
+
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
GFP_KERNEL);
@@ -902,11 +951,18 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto cq_fini;
}
+ /* MMU S/W must be initialized before kernel context is created */
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
+ goto eq_fini;
+ }
+
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
- goto eq_fini;
+ goto mmu_fini;
}
hdev->user_ctx = NULL;
@@ -954,8 +1010,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto out_disabled;
}
- /* After test_queues, KMD can start sending messages to device CPU */
-
rc = device_late_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed late initialization\n");
@@ -1001,6 +1055,8 @@ release_ctx:
"kernel ctx is still alive on initialization failure\n");
free_ctx:
kfree(hdev->kernel_ctx);
+mmu_fini:
+ hl_mmu_fini(hdev);
eq_fini:
hl_eq_fini(hdev, &hdev->event_queue);
cq_fini:
@@ -1105,6 +1161,8 @@ void hl_device_fini(struct hl_device *hdev)
hl_vm_fini(hdev);
+ hl_mmu_fini(hdev);
+
hl_eq_fini(hdev, &hdev->event_queue);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
@@ -1126,95 +1184,6 @@ void hl_device_fini(struct hl_device *hdev)
}
/*
- * hl_poll_timeout_memory - Periodically poll a host memory address
- * until it is not zero or a timeout occurs
- * @hdev: pointer to habanalabs device structure
- * @addr: Address to poll
- * @timeout_us: timeout in us
- * @val: Variable to read the value into
- *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
- * The function sleeps for 100us with timeout value of
- * timeout_us
- */
-int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
- u32 timeout_us, u32 *val)
-{
- /*
- * address in this function points always to a memory location in the
- * host's (server's) memory. That location is updated asynchronously
- * either by the direct access of the device or by another core
- */
- u32 *paddr = (u32 *) (uintptr_t) addr;
- ktime_t timeout;
-
- /* timeout should be longer when working with simulator */
- if (!hdev->pdev)
- timeout_us *= 10;
-
- timeout = ktime_add_us(ktime_get(), timeout_us);
-
- might_sleep();
-
- for (;;) {
- /*
- * Flush CPU read/write buffers to make sure we read updates
- * done by other cores or by the device
- */
- mb();
- *val = *paddr;
- if (*val)
- break;
- if (ktime_compare(ktime_get(), timeout) > 0) {
- *val = *paddr;
- break;
- }
- usleep_range((100 >> 2) + 1, 100);
- }
-
- return *val ? 0 : -ETIMEDOUT;
-}
-
-/*
- * hl_poll_timeout_devicememory - Periodically poll a device memory address
- * until it is not zero or a timeout occurs
- * @hdev: pointer to habanalabs device structure
- * @addr: Device address to poll
- * @timeout_us: timeout in us
- * @val: Variable to read the value into
- *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
- * The function sleeps for 100us with timeout value of
- * timeout_us
- */
-int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
- u32 timeout_us, u32 *val)
-{
- ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
-
- might_sleep();
-
- for (;;) {
- *val = readl(addr);
- if (*val)
- break;
- if (ktime_compare(ktime_get(), timeout) > 0) {
- *val = readl(addr);
- break;
- }
- usleep_range((100 >> 2) + 1, 100);
- }
-
- return *val ? 0 : -ETIMEDOUT;
-}
-
-/*
* MMIO register access helper functions.
*/
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index eda5d7fcb79f..cc8168bacb24 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -29,13 +29,13 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
rc = request_firmware(&fw, fw_name, hdev->dev);
if (rc) {
- dev_err(hdev->dev, "Failed to request %s\n", fw_name);
+ dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
goto out;
}
fw_size = fw->size;
if ((fw_size % 4) != 0) {
- dev_err(hdev->dev, "illegal %s firmware size %zu\n",
+ dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
fw_name, fw_size);
rc = -EINVAL;
goto out;
@@ -85,12 +85,6 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u32 tmp;
int rc = 0;
- if (len > HL_CPU_CB_SIZE) {
- dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
- len);
- return -ENOMEM;
- }
-
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
&pkt_dma_addr);
if (!pkt) {
@@ -117,33 +111,28 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out;
}
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
- timeout, &tmp);
+ rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
+ (tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev, "Timeout while waiting for device CPU\n");
+ dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
hdev->device_cpu_disabled = true;
goto out;
}
- if (tmp == ARMCP_PACKET_FENCE_VAL) {
- u32 ctl = le32_to_cpu(pkt->ctl);
+ tmp = le32_to_cpu(pkt->ctl);
- rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
- if (rc) {
- dev_err(hdev->dev,
- "F/W ERROR %d for CPU packet %d\n",
- rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK)
+ rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+ if (rc) {
+ dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
+ rc,
+ (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
>> ARMCP_PKT_CTL_OPCODE_SHIFT);
- rc = -EINVAL;
- } else if (result) {
- *result = (long) le64_to_cpu(pkt->result);
- }
- } else {
- dev_err(hdev->dev, "CPU packet wrong fence value\n");
- rc = -EINVAL;
+ rc = -EIO;
+ } else if (result) {
+ *result = (long) le64_to_cpu(pkt->result);
}
out:
@@ -186,9 +175,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
{
u64 kernel_addr;
- /* roundup to HL_CPU_PKT_SIZE */
- size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
-
kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
*dma_handle = hdev->cpu_accessible_dma_address +
@@ -200,9 +186,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr)
{
- /* roundup to HL_CPU_PKT_SIZE */
- size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
-
gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
size);
}
@@ -256,7 +239,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
HL_ARMCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
dev_err(hdev->dev,
- "Failed to send armcp info pkt, error %d\n", rc);
+ "Failed to send ArmCP info pkt, error %d\n", rc);
goto out;
}
@@ -291,7 +274,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
max_size, &eeprom_info_dma_addr);
if (!eeprom_info_cpu_addr) {
dev_err(hdev->dev,
- "Failed to allocate DMA memory for EEPROM info packet\n");
+ "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
return -ENOMEM;
}
@@ -307,7 +290,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
if (rc) {
dev_err(hdev->dev,
- "Failed to send armcp EEPROM pkt, error %d\n", rc);
+ "Failed to send ArmCP EEPROM packet, error %d\n", rc);
goto out;
}
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 02d116b01a1a..75294ec65257 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -14,6 +14,8 @@
#include <linux/genalloc.h>
#include <linux/hwmon.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/iommu.h>
+#include <linux/seq_file.h>
/*
* GOYA security scheme:
@@ -89,6 +91,30 @@
#define GOYA_CB_POOL_CB_CNT 512
#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
+#define IS_QM_IDLE(engine, qm_glbl_sts0) \
+ (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
+#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
+#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
+#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
+
+#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
+ (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
+ engine##_CMDQ_IDLE_MASK)
+#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
+#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
+
+#define IS_DMA_IDLE(dma_core_sts0) \
+ !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+
+#define IS_TPC_IDLE(tpc_cfg_sts) \
+ (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
+
+#define IS_MME_IDLE(mme_arch_sts) \
+ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+
+
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
"goya cq 4", "goya cpu eq"
@@ -297,6 +323,11 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
};
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+
void goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -467,7 +498,7 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
- rc = hl_pci_init(hdev, 39);
+ rc = hl_pci_init(hdev, 48);
if (rc)
return rc;
@@ -539,9 +570,36 @@ int goya_late_init(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
+ goya_fetch_psoc_frequency(hdev);
+
+ rc = goya_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to clear MMU page tables range %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_set_dram_default_page(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_add_mappings_for_device_cpu(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_init_cpu_queues(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_test_cpu_queue(hdev);
+ if (rc)
+ return rc;
+
rc = goya_armcp_info_get(hdev);
if (rc) {
- dev_err(hdev->dev, "Failed to get armcp info\n");
+ dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
return rc;
}
@@ -553,33 +611,15 @@ int goya_late_init(struct hl_device *hdev)
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ dev_err(hdev->dev,
+ "Failed to enable PCI access from CPU %d\n", rc);
return rc;
}
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
- goya_fetch_psoc_frequency(hdev);
-
- rc = goya_mmu_clear_pgt_range(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
- goto disable_pci_access;
- }
-
- rc = goya_mmu_set_dram_default_page(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to set DRAM default page\n");
- goto disable_pci_access;
- }
-
return 0;
-
-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
-
- return rc;
}
/*
@@ -655,7 +695,10 @@ static int goya_sw_init(struct hl_device *hdev)
goto free_dma_pool;
}
- hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1);
+ dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n",
+ hdev->cpu_accessible_dma_address);
+
+ hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
if (!hdev->cpu_accessible_dma_pool) {
dev_err(hdev->dev,
"Failed to create CPU accessible DMA pool\n");
@@ -786,7 +829,6 @@ static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
else
sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
- WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off, lower_32_bits(sob_addr));
WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
}
@@ -973,9 +1015,9 @@ int goya_init_cpu_queues(struct hl_device *hdev)
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
- lower_32_bits(hdev->cpu_accessible_dma_address));
+ lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
- upper_32_bits(hdev->cpu_accessible_dma_address));
+ upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
@@ -1001,7 +1043,7 @@ int goya_init_cpu_queues(struct hl_device *hdev)
if (err) {
dev_err(hdev->dev,
- "Failed to communicate with ARM CPU (ArmCP timeout)\n");
+ "Failed to setup communication with device CPU\n");
return -EIO;
}
@@ -2061,10 +2103,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
goya_disable_external_queues(hdev);
goya_disable_internal_queues(hdev);
- if (hard_reset)
+ if (hard_reset) {
goya_disable_msix(hdev);
- else
+ goya_mmu_remove_device_cpu_mappings(hdev);
+ } else {
goya_sync_irqs(hdev);
+ }
}
/*
@@ -2277,14 +2321,14 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
- if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
- goto out;
-
if (!hdev->fw_loading) {
dev_info(hdev->dev, "Skip loading FW\n");
goto out;
}
+ if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
+ goto out;
+
rc = goya_push_linux_to_device(hdev);
if (rc)
return rc;
@@ -2466,34 +2510,11 @@ static int goya_hw_init(struct hl_device *hdev)
if (rc)
goto disable_queues;
- rc = goya_init_cpu_queues(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
- rc);
- goto disable_msix;
- }
-
- /*
- * Check if we managed to set the DMA mask to more then 32 bits. If so,
- * let's try to increase it again because in Goya we set the initial
- * dma mask to less then 39 bits so that the allocation of the memory
- * area for the device's cpu will be under 39 bits
- */
- if (hdev->dma_mask > 32) {
- rc = hl_pci_set_dma_mask(hdev, 48);
- if (rc)
- goto disable_pci_access;
- }
-
/* Perform read from the device to flush all MSI-X configuration */
val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
return 0;
-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
-disable_msix:
- goya_disable_msix(hdev);
disable_queues:
goya_disable_internal_queues(hdev);
goya_disable_external_queues(hdev);
@@ -2629,7 +2650,6 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
{
u32 db_reg_offset, db_value;
- bool invalid_queue = false;
switch (hw_queue_id) {
case GOYA_QUEUE_ID_DMA_0:
@@ -2653,10 +2673,7 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
break;
case GOYA_QUEUE_ID_CPU_PQ:
- if (hdev->cpu_queues_enable)
- db_reg_offset = mmCPU_IF_PF_PQ_PI;
- else
- invalid_queue = true;
+ db_reg_offset = mmCPU_IF_PF_PQ_PI;
break;
case GOYA_QUEUE_ID_MME:
@@ -2696,12 +2713,8 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
break;
default:
- invalid_queue = true;
- }
-
- if (invalid_queue) {
/* Should never get here */
- dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
+ dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
hw_queue_id);
return;
}
@@ -2808,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
dma_addr_t fence_dma_addr;
struct hl_cb *cb;
u32 tmp, timeout;
- char buf[16] = {};
int rc;
if (hdev->pldm)
@@ -2816,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
else
timeout = HL_DEVICE_TIMEOUT_USEC;
- if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
+ if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
dev_err_ratelimited(hdev->dev,
- "Can't send KMD job on QMAN0 because %s is busy\n",
- buf);
+ "Can't send KMD job on QMAN0 because the device is not idle\n");
return -EBUSY;
}
@@ -2831,16 +2842,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
return -ENOMEM;
}
- *fence_ptr = 0;
-
goya_qman0_set_security(hdev, true);
- /*
- * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
- * synchronized kernel jobs we only need space for 1 packet_msg_prot
- */
- job->job_cb_size -= sizeof(struct packet_msg_prot);
-
cb = job->patched_cb;
fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
@@ -2860,14 +2863,14 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
goto free_fence_ptr;
}
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr, timeout,
- &tmp);
+ rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
+ (tmp == GOYA_QMAN0_FENCE_VAL), 1000, timeout);
hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
- if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) {
- dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n");
- rc = -ETIMEDOUT;
+ if (rc == -ETIMEDOUT) {
+ dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
+ goto free_fence_ptr;
}
free_fence_ptr:
@@ -2941,20 +2944,19 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
goto free_pkt;
}
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
- GOYA_TEST_QUEUE_WAIT_USEC, &tmp);
+ rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
+ 1000, GOYA_TEST_QUEUE_WAIT_USEC);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
- if ((!rc) && (tmp == fence_val)) {
- dev_info(hdev->dev,
- "queue test on H/W queue %d succeeded\n",
- hw_queue_id);
- } else {
+ if (rc == -ETIMEDOUT) {
dev_err(hdev->dev,
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
- rc = -EINVAL;
+ rc = -EIO;
+ } else {
+ dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
+ hw_queue_id);
}
free_pkt:
@@ -2990,12 +2992,6 @@ int goya_test_queues(struct hl_device *hdev)
ret_val = -EINVAL;
}
- if (hdev->cpu_queues_enable) {
- rc = goya_test_cpu_queue(hdev);
- if (rc)
- ret_val = -EINVAL;
- }
-
return ret_val;
}
@@ -3028,7 +3024,13 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle)
{
- return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
+ void *vaddr;
+
+ vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
+ *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
+ VA_CPU_ACCESSIBLE_MEM_ADDR;
+
+ return vaddr;
}
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
@@ -3907,8 +3909,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
return goya_parse_cb_no_mmu(hdev, parser);
}
-void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
- u32 cq_val, u32 msix_vec)
+void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
+ u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
{
struct packet_msg_prot *cq_pkt;
u32 tmp;
@@ -3939,6 +3941,11 @@ void goya_update_eq_ci(struct hl_device *hdev, u32 val)
void goya_restore_phase_topology(struct hl_device *hdev)
{
+
+}
+
+static void goya_clear_sm_regs(struct hl_device *hdev)
+{
int i, num_of_sob_in_longs, num_of_mon_in_longs;
num_of_sob_in_longs =
@@ -3958,10 +3965,11 @@ void goya_restore_phase_topology(struct hl_device *hdev)
}
/*
- * goya_debugfs_read32 - read a 32bit value from a given device address
+ * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
+ * address.
*
* @hdev: pointer to hl_device structure
- * @addr: address in device
+ * @addr: device or host mapped address
* @val: returned value
*
* In case of DDR address that is not mapped into the default aperture that
@@ -4002,6 +4010,10 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
}
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+
+ } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+ *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
+
} else {
rc = -EFAULT;
}
@@ -4010,10 +4022,11 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
}
/*
- * goya_debugfs_write32 - write a 32bit value to a given device address
+ * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
+ * address.
*
* @hdev: pointer to hl_device structure
- * @addr: address in device
+ * @addr: device or host mapped address
* @val: returned value
*
* In case of DDR address that is not mapped into the default aperture that
@@ -4054,6 +4067,10 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
}
if (ddr_bar_addr == U64_MAX)
rc = -EIO;
+
+ } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
+ *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
+
} else {
rc = -EFAULT;
}
@@ -4086,6 +4103,47 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
static const char *_goya_get_event_desc(u16 event_type)
{
switch (event_type) {
+ case GOYA_ASYNC_EVENT_ID_PCIE_IF:
+ return "PCIe_if";
+ case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
+ return "TPC%d_ecc";
+ case GOYA_ASYNC_EVENT_ID_MME_ECC:
+ return "MME_ecc";
+ case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
+ return "MME_ecc_ext";
+ case GOYA_ASYNC_EVENT_ID_MMU_ECC:
+ return "MMU_ecc";
+ case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
+ return "DMA_macro";
+ case GOYA_ASYNC_EVENT_ID_DMA_ECC:
+ return "DMA_ecc";
+ case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
+ return "CPU_if_ecc";
+ case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
+ return "PSOC_mem";
+ case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
+ return "PSOC_coresight";
+ case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
+ return "SRAM%d";
+ case GOYA_ASYNC_EVENT_ID_GIC500:
+ return "GIC500";
+ case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
+ return "PLL%d";
+ case GOYA_ASYNC_EVENT_ID_AXI_ECC:
+ return "AXI_ecc";
+ case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
+ return "L2_ram_ecc";
+ case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
+ return "PSOC_gpio_05_sw_reset";
+ case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
+ return "PSOC_gpio_10_vrhot_icrit";
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
return "PCIe_dec";
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
@@ -4128,6 +4186,17 @@ static const char *_goya_get_event_desc(u16 event_type)
return "DMA%d_qm";
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
return "DMA%d_ch";
+ case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
+ return "TPC%d_bmon_spmu";
+ case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
+ return "DMA_bm_ch%d";
default:
return "N/A";
}
@@ -4138,6 +4207,25 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
u8 index;
switch (event_type) {
+ case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
+ case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
+ index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
+ case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
+ index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
+ case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
+ index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
@@ -4176,6 +4264,21 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
+ case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
+ case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
+ index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
+ case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
+ index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
+ snprintf(desc, size, _goya_get_event_desc(event_type), index);
+ break;
default:
snprintf(desc, size, _goya_get_event_desc(event_type));
break;
@@ -4226,7 +4329,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
}
}
-static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
+static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
+ bool razwi)
{
char desc[20] = "";
@@ -4234,8 +4338,10 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
event_type, desc);
- goya_print_razwi_info(hdev);
- goya_print_mmu_error_info(hdev);
+ if (razwi) {
+ goya_print_razwi_info(hdev);
+ goya_print_mmu_error_info(hdev);
+ }
}
static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
@@ -4339,19 +4445,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
case GOYA_ASYNC_EVENT_ID_GIC500:
- case GOYA_ASYNC_EVENT_ID_PLL0:
- case GOYA_ASYNC_EVENT_ID_PLL1:
- case GOYA_ASYNC_EVENT_ID_PLL3:
- case GOYA_ASYNC_EVENT_ID_PLL4:
- case GOYA_ASYNC_EVENT_ID_PLL5:
- case GOYA_ASYNC_EVENT_ID_PLL6:
+ case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
- dev_err(hdev->dev,
- "Received H/W interrupt %d, reset the chip\n",
- event_type);
+ goya_print_irq_info(hdev, event_type, false);
hl_device_reset(hdev, true, false);
break;
@@ -4382,7 +4481,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
- goya_print_irq_info(hdev, event_type);
+ goya_print_irq_info(hdev, event_type, true);
goya_unmask_irq(hdev, event_type);
break;
@@ -4394,12 +4493,9 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3:
- case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
- dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type);
+ case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
+ goya_print_irq_info(hdev, event_type, false);
+ goya_unmask_irq(hdev, event_type);
break;
default:
@@ -4418,36 +4514,47 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
return goya->events_stat;
}
-static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
+static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
u64 val, bool is_dram)
{
struct packet_lin_dma *lin_dma_pkt;
struct hl_cs_job *job;
u32 cb_size, ctl;
struct hl_cb *cb;
- int rc;
+ int rc, lin_dma_pkts_cnt;
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
+ cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
+ sizeof(struct packet_msg_prot);
+ cb = hl_cb_kernel_create(hdev, cb_size);
if (!cb)
- return -EFAULT;
+ return -ENOMEM;
lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
- memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
- cb_size = sizeof(*lin_dma_pkt);
-
- ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
- (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
- (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
- (1 << GOYA_PKT_CTL_RB_SHIFT) |
- (1 << GOYA_PKT_CTL_MB_SHIFT));
- ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
- GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
- lin_dma_pkt->ctl = cpu_to_le32(ctl);
+ do {
+ memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
+
+ ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
+ (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
+ (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
+ (1 << GOYA_PKT_CTL_RB_SHIFT) |
+ (1 << GOYA_PKT_CTL_MB_SHIFT));
+ ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
+ GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+ lin_dma_pkt->ctl = cpu_to_le32(ctl);
+
+ lin_dma_pkt->src_addr = cpu_to_le64(val);
+ lin_dma_pkt->dst_addr = cpu_to_le64(addr);
+ if (lin_dma_pkts_cnt > 1)
+ lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
+ else
+ lin_dma_pkt->tsize = cpu_to_le32(size);
- lin_dma_pkt->src_addr = cpu_to_le64(val);
- lin_dma_pkt->dst_addr = cpu_to_le64(addr);
- lin_dma_pkt->tsize = cpu_to_le32(size);
+ size -= SZ_2G;
+ addr += SZ_2G;
+ lin_dma_pkt++;
+ } while (--lin_dma_pkts_cnt);
job = hl_cs_allocate_job(hdev, true);
if (!job) {
@@ -4462,8 +4569,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
job->user_cb_size = cb_size;
job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
job->patched_cb = job->user_cb;
- job->job_cb_size = job->user_cb_size +
- sizeof(struct packet_msg_prot) * 2;
+ job->job_cb_size = job->user_cb_size;
hl_debugfs_add_job(hdev, job);
@@ -4485,10 +4591,12 @@ release_cb:
int goya_context_switch(struct hl_device *hdev, u32 asid)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 addr = prop->sram_base_address;
+ u64 addr = prop->sram_base_address, sob_addr;
u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
u64 val = 0x7777777777777777ull;
- int rc;
+ int rc, dma_id;
+ u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
+ mmDMA_CH_0_WR_COMP_ADDR_LO;
rc = goya_memset_device_memory(hdev, addr, size, val, false);
if (rc) {
@@ -4496,13 +4604,27 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
return rc;
}
+ /* we need to reset registers that the user is allowed to change */
+ sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
+ WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
+
+ for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
+ sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
+ (dma_id - 1) * 4;
+ WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
+ lower_32_bits(sob_addr));
+ }
+
WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
+
goya_mmu_prepare(hdev, asid);
+ goya_clear_sm_regs(hdev);
+
return 0;
}
-int goya_mmu_clear_pgt_range(struct hl_device *hdev)
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
@@ -4516,7 +4638,7 @@ int goya_mmu_clear_pgt_range(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, 0, true);
}
-int goya_mmu_set_dram_default_page(struct hl_device *hdev)
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
@@ -4529,7 +4651,123 @@ int goya_mmu_set_dram_default_page(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, val, true);
}
-void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ s64 off, cpu_off;
+ int rc;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return 0;
+
+ for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
+ rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
+ prop->dram_base_address + off, PAGE_SIZE_2MB);
+ if (rc) {
+ dev_err(hdev->dev, "Map failed for address 0x%llx\n",
+ prop->dram_base_address + off);
+ goto unmap;
+ }
+ }
+
+ if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
+ rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+ hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for CPU accessible memory\n");
+ off -= PAGE_SIZE_2MB;
+ goto unmap;
+ }
+ } else {
+ for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
+ rc = hl_mmu_map(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ hdev->cpu_accessible_dma_address + cpu_off,
+ PAGE_SIZE_4KB);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Map failed for CPU accessible memory\n");
+ cpu_off -= PAGE_SIZE_4KB;
+ goto unmap_cpu;
+ }
+ }
+ }
+
+ goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
+ goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
+ WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
+ WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
+
+ /* Make sure configuration is flushed to device */
+ RREG32(mmCPU_IF_AWUSER_OVR_EN);
+
+ goya->device_cpu_mmu_mappings_done = true;
+
+ return 0;
+
+unmap_cpu:
+ for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ PAGE_SIZE_4KB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
+unmap:
+ for (; off >= 0 ; off -= PAGE_SIZE_2MB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ prop->dram_base_address + off, PAGE_SIZE_2MB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ prop->dram_base_address + off);
+
+ return rc;
+}
+
+void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct goya_device *goya = hdev->asic_specific;
+ u32 off, cpu_off;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ if (!goya->device_cpu_mmu_mappings_done)
+ return;
+
+ WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
+ WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
+
+ if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
+ if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+ PAGE_SIZE_2MB))
+ dev_warn(hdev->dev,
+ "Failed to unmap CPU accessible memory\n");
+ } else {
+ for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
+ PAGE_SIZE_4KB))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap address 0x%llx\n",
+ VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
+ }
+
+ for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
+ if (hl_mmu_unmap(hdev->kernel_ctx,
+ prop->dram_base_address + off, PAGE_SIZE_2MB))
+ dev_warn_ratelimited(hdev->dev,
+ "Failed to unmap address 0x%llx\n",
+ prop->dram_base_address + off);
+
+ goya->device_cpu_mmu_mappings_done = false;
+}
+
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
{
struct goya_device *goya = hdev->asic_specific;
int i;
@@ -4676,57 +4914,82 @@ int goya_armcp_info_get(struct hl_device *hdev)
return 0;
}
-static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
+static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
+ struct seq_file *s)
{
- u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
+ const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
+ const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+ u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
+ mme_arch_sts;
+ bool is_idle = true, is_eng_idle;
+ u64 offset;
int i;
+ if (s)
+ seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
+ "--- ------- ------------ -------------\n");
+
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
for (i = 0 ; i < DMA_MAX_NUM ; i++) {
- dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
+ qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
+ dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
+ is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
+ IS_DMA_IDLE(dma_core_sts0);
+ is_idle &= is_eng_idle;
- if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
- DMA_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
+ if (mask)
+ *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
+ if (s)
+ seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, dma_core_sts0);
}
+ if (s)
+ seq_puts(s,
+ "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
+ "--- ------- ------------ -------------- ----------\n");
+
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
for (i = 0 ; i < TPC_MAX_NUM ; i++) {
- tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
- tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
- tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
-
- if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
- TPC_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
-
- if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
- TPC_CMDQ_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
-
- if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
- TPC_CFG_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
- }
-
- if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
- MME_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_QM");
-
- if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
- MME_CMDQ_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_CMDQ");
-
- if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
- MME_ARCH_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_ARCH");
-
- if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME");
-
- return true;
+ qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
+ cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
+ tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
+ is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
+ IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
+ IS_TPC_IDLE(tpc_cfg_sts);
+ is_idle &= is_eng_idle;
+
+ if (mask)
+ *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
+ if (s)
+ seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
+ }
+
+ if (s)
+ seq_puts(s,
+ "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
+ "--- ------- ------------ -------------- -----------\n");
+
+ qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
+ cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
+ mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
+ is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
+ IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
+ IS_MME_IDLE(mme_arch_sts);
+ is_idle &= is_eng_idle;
+
+ if (mask)
+ *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
+ if (s) {
+ seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
+ cmdq_glbl_sts0, mme_arch_sts);
+ seq_puts(s, "\n");
+ }
+
+ return is_idle;
}
static void goya_hw_queues_lock(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index c83cab0d641e..f8c611883dc1 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -126,6 +126,12 @@
#define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \
VA_DDR_SPACE_START) /* 128GB */
+#if (HL_CPU_ACCESSIBLE_MEM_SIZE != SZ_2M)
+#error "HL_CPU_ACCESSIBLE_MEM_SIZE must be exactly 2MB to enable MMU mapping"
+#endif
+
+#define VA_CPU_ACCESSIBLE_MEM_ADDR 0x8000000000ull
+
#define DMA_MAX_TRANSFER_SIZE U32_MAX
#define HW_CAP_PLL 0x00000001
@@ -157,6 +163,7 @@ struct goya_device {
u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
+ u8 device_cpu_mmu_mappings_done;
};
void goya_get_fixed_properties(struct hl_device *hdev);
@@ -204,18 +211,14 @@ int goya_armcp_info_get(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, void *data);
void goya_halt_coresight(struct hl_device *hdev);
-void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
-int goya_mmu_clear_pgt_range(struct hl_device *hdev);
-int goya_mmu_set_dram_default_page(struct hl_device *hdev);
-
int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
-void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
- u32 cq_val, u32 msix_vec);
+void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
+ u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
dma_addr_t *dma_handle, u16 *queue_len);
@@ -225,5 +228,6 @@ void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle);
void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
+void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
#endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/goya/goya_security.c b/drivers/misc/habanalabs/goya/goya_security.c
index d95d1b2f860d..d6ec12b3e692 100644
--- a/drivers/misc/habanalabs/goya/goya_security.c
+++ b/drivers/misc/habanalabs/goya/goya_security.c
@@ -677,6 +677,17 @@ static void goya_init_tpc_protection_bits(struct hl_device *hdev)
goya_pb_set_block(hdev, mmTPC0_RD_REGULATOR_BASE);
goya_pb_set_block(hdev, mmTPC0_WR_REGULATOR_BASE);
+ pb_addr = (mmTPC0_CFG_SEMAPHORE & ~0xFFF) + PROT_BITS_OFFS;
+ word_offset = ((mmTPC0_CFG_SEMAPHORE & PROT_BITS_OFFS) >> 7) << 2;
+
+ mask = 1 << ((mmTPC0_CFG_SEMAPHORE & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_VFLAGS & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_SFLAGS & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_LFSR_POLYNOM & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_STATUS & 0x7F) >> 2);
+
+ WREG32(pb_addr + word_offset, ~mask);
+
pb_addr = (mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & ~0xFFF) + PROT_BITS_OFFS;
word_offset = ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH &
PROT_BITS_OFFS) >> 7) << 2;
@@ -684,6 +695,11 @@ static void goya_init_tpc_protection_bits(struct hl_device *hdev)
mask |= 1 << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1 << ((mmTPC0_CFG_SM_BASE_ADDRESS_LOW & 0x7F) >> 2);
mask |= 1 << ((mmTPC0_CFG_SM_BASE_ADDRESS_HIGH & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_TPC_INTR_CAUSE & 0x7F) >> 2);
+ mask |= 1 << ((mmTPC0_CFG_TPC_INTR_MASK & 0x7F) >> 2);
WREG32(pb_addr + word_offset, ~mask);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index adef7d9d7488..10da9940ee0d 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -34,6 +34,8 @@
#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
+
#define HL_MAX_QUEUES 128
#define HL_MAX_JOBS_PER_CS 64
@@ -123,7 +125,7 @@ enum hl_device_hw_state {
/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties.
- * @armcp_info: received various information from ArmCP regarding the H/W. e.g.
+ * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
* available sensors.
* @uboot_ver: F/W U-boot version.
* @preboot_ver: F/W Preboot version.
@@ -318,18 +320,8 @@ struct hl_cs_job;
#define HL_EQ_LENGTH 64
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
-#define HL_CPU_PKT_SHIFT 5
-#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
-#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
-#define HL_CPU_MAX_PKTS_IN_CB 32
-#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
- HL_CPU_MAX_PKTS_IN_CB)
-#define HL_CPU_CB_QUEUE_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
-
-/* KMD <-> ArmCP shared memory size (EQ + PQ + CPU CB queue) */
-#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_EQ_SIZE_IN_BYTES + \
- HL_QUEUE_SIZE_IN_BYTES + \
- HL_CPU_CB_QUEUE_SIZE)
+/* KMD <-> ArmCP shared memory size */
+#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
/**
* struct hl_hw_queue - describes a H/W transport queue.
@@ -543,8 +535,9 @@ struct hl_asic_funcs {
enum dma_data_direction dir);
u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
struct sg_table *sgt);
- void (*add_end_of_cb_packets)(u64 kernel_address, u32 len, u64 cq_addr,
- u32 cq_val, u32 msix_num);
+ void (*add_end_of_cb_packets)(struct hl_device *hdev,
+ u64 kernel_address, u32 len,
+ u64 cq_addr, u32 cq_val, u32 msix_num);
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
int (*context_switch)(struct hl_device *hdev, u32 asid);
void (*restore_phase_topology)(struct hl_device *hdev);
@@ -564,7 +557,8 @@ struct hl_asic_funcs {
u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, void *data);
- bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
+ bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+ struct seq_file *s);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -1065,12 +1059,59 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(cond) ? 0 : -ETIMEDOUT; \
})
+/*
+ * address in this macro points always to a memory location in the
+ * host's (server's) memory. That location is updated asynchronously
+ * either by the direct access of the device or by another core
+ */
+#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us) \
+({ \
+ ktime_t __timeout; \
+ /* timeout should be longer when working with simulator */ \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \
+ might_sleep_if(sleep_us); \
+ for (;;) { \
+ /* Verify we read updates done by other cores or by device */ \
+ mb(); \
+ (val) = *((u32 *) (uintptr_t) (addr)); \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ (val) = *((u32 *) (uintptr_t) (addr)); \
+ break; \
+ } \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
-#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
- if (buf) \
- snprintf(buf, size, fmt, ##__VA_ARGS__); \
- false; \
- })
+#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
+ timeout_us) \
+({ \
+ ktime_t __timeout; \
+ /* timeout should be longer when working with simulator */ \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(), (timeout_us * 10)); \
+ might_sleep_if(sleep_us); \
+ for (;;) { \
+ (val) = readl(addr); \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ (val) = readl(addr); \
+ break; \
+ } \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
struct hwmon_chip_info;
@@ -1117,6 +1158,7 @@ struct hl_device_reset_work {
* lock here so we can flush user processes which are opening
* the device while we are trying to hard reset it
* @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
+ * @debug_lock: protects critical section of setting debug mode for device
* @asic_prop: ASIC specific immutable properties.
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
@@ -1159,6 +1201,8 @@ struct hl_device_reset_work {
* @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
* @dma_mask: the dma mask that was set for this device
+ * @in_debug: is device under debug. This, together with fd_open_cnt, enforces
+ * that only a single user is configuring the debug infrastructure.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1188,6 +1232,7 @@ struct hl_device {
/* TODO: remove fd_open_cnt_lock for multiple process support */
struct mutex fd_open_cnt_lock;
struct mutex send_cpu_message_lock;
+ struct mutex debug_lock;
struct asic_fixed_properties asic_prop;
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
@@ -1230,6 +1275,7 @@ struct hl_device {
u8 init_done;
u8 device_cpu_disabled;
u8 dma_mask;
+ u8 in_debug;
/* Parameters for bring-up */
u8 mmu_enable;
@@ -1325,13 +1371,10 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
int hl_device_open(struct inode *inode, struct file *filp);
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
enum hl_device_status hl_device_status(struct hl_device *hdev);
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
enum hl_asic_type asic_type, int minor);
void destroy_hdev(struct hl_device *hdev);
-int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
- u32 *val);
-int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
- u32 timeout_us, u32 *val);
int hl_hw_queues_create(struct hl_device *hdev);
void hl_hw_queues_destroy(struct hl_device *hdev);
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 5f4d155be767..6f6dbe93f1df 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -105,9 +105,17 @@ int hl_device_open(struct inode *inode, struct file *filp)
return -EPERM;
}
+ if (hdev->in_debug) {
+ dev_err_ratelimited(hdev->dev,
+ "Can't open %s because it is being debugged by another user\n",
+ dev_name(hdev->dev));
+ mutex_unlock(&hdev->fd_open_cnt_lock);
+ return -EPERM;
+ }
+
if (atomic_read(&hdev->fd_open_cnt)) {
dev_info_ratelimited(hdev->dev,
- "Device %s is already attached to application\n",
+ "Can't open %s because another user is working on it\n",
dev_name(hdev->dev));
mutex_unlock(&hdev->fd_open_cnt_lock);
return -EBUSY;
@@ -164,6 +172,17 @@ close_device:
return rc;
}
+static void set_driver_behavior_per_device(struct hl_device *hdev)
+{
+ hdev->mmu_enable = 1;
+ hdev->cpu_enable = 1;
+ hdev->fw_loading = 1;
+ hdev->cpu_queues_enable = 1;
+ hdev->heartbeat = 1;
+
+ hdev->reset_pcilink = 0;
+}
+
/*
* create_hdev - create habanalabs device instance
*
@@ -188,29 +207,25 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
if (!hdev)
return -ENOMEM;
+ /* First, we must find out which ASIC are we handling. This is needed
+ * to configure the behavior of the driver (kernel parameters)
+ */
+ if (pdev) {
+ hdev->asic_type = get_asic_type(pdev->device);
+ if (hdev->asic_type == ASIC_INVALID) {
+ dev_err(&pdev->dev, "Unsupported ASIC\n");
+ rc = -ENODEV;
+ goto free_hdev;
+ }
+ } else {
+ hdev->asic_type = asic_type;
+ }
+
hdev->major = hl_major;
hdev->reset_on_lockup = reset_on_lockup;
-
- /* Parameters for bring-up - set them to defaults */
- hdev->mmu_enable = 1;
- hdev->cpu_enable = 1;
- hdev->reset_pcilink = 0;
- hdev->cpu_queues_enable = 1;
- hdev->fw_loading = 1;
hdev->pldm = 0;
- hdev->heartbeat = 1;
-
- /* If CPU is disabled, no point in loading FW */
- if (!hdev->cpu_enable)
- hdev->fw_loading = 0;
- /* If we don't load FW, no need to initialize CPU queues */
- if (!hdev->fw_loading)
- hdev->cpu_queues_enable = 0;
-
- /* If CPU queues not enabled, no way to do heartbeat */
- if (!hdev->cpu_queues_enable)
- hdev->heartbeat = 0;
+ set_driver_behavior_per_device(hdev);
if (timeout_locked)
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
@@ -220,17 +235,6 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->disabled = true;
hdev->pdev = pdev; /* can be NULL in case of simulator device */
- if (pdev) {
- hdev->asic_type = get_asic_type(pdev->device);
- if (hdev->asic_type == ASIC_INVALID) {
- dev_err(&pdev->dev, "Unsupported ASIC\n");
- rc = -ENODEV;
- goto free_hdev;
- }
- } else {
- hdev->asic_type = asic_type;
- }
-
/* Set default DMA mask to 32 bits */
hdev->dma_mask = 32;
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index b7a0eecf6b6c..07127576b3e8 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -119,7 +119,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
+ hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
+ &hw_idle.busy_engines_mask, NULL);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
@@ -254,10 +255,18 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
case HL_DEBUG_OP_BMON:
case HL_DEBUG_OP_SPMU:
case HL_DEBUG_OP_TIMESTAMP:
+ if (!hdev->in_debug) {
+ dev_err_ratelimited(hdev->dev,
+ "Rejecting debug configuration request because device not in debug mode\n");
+ return -EFAULT;
+ }
args->input_size =
min(args->input_size, hl_debug_struct_size[args->op]);
rc = debug_coresight(hdev, args);
break;
+ case HL_DEBUG_OP_SET_MODE:
+ rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
+ break;
default:
dev_err(hdev->dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 2894d8975933..e3b5517897ea 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -265,7 +265,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
cq = &hdev->completion_queue[q->hw_queue_id];
cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
- hdev->asic_funcs->add_end_of_cb_packets(cb->kernel_address, len,
+ hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
cq_addr,
__le32_to_cpu(cq_pkt.data),
q->hw_queue_id);
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
new file mode 100644
index 000000000000..028143408401
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ ** DO NOT EDIT BELOW **
+ ************************************/
+
+#ifndef ASIC_REG_DMA_CH_0_MASKS_H_
+#define ASIC_REG_DMA_CH_0_MASKS_H_
+
+/*
+ *****************************************
+ * DMA_CH_0 (Prototype: DMA_CH)
+ *****************************************
+ */
+
+/* DMA_CH_0_CFG0 */
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_SHIFT 0
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_MASK 0x3FF
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_SHIFT 16
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_MASK 0xFFF0000
+
+/* DMA_CH_0_CFG1 */
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_SHIFT 0
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_MASK 0x3FF
+
+/* DMA_CH_0_ERRMSG_ADDR_LO */
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_ADDR_HI */
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_WDATA */
+#define DMA_CH_0_ERRMSG_WDATA_VAL_SHIFT 0
+#define DMA_CH_0_ERRMSG_WDATA_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_LO */
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_HI */
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_WDATA */
+#define DMA_CH_0_RD_COMP_WDATA_VAL_SHIFT 0
+#define DMA_CH_0_RD_COMP_WDATA_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_LO */
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_HI */
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_WDATA */
+#define DMA_CH_0_WR_COMP_WDATA_VAL_SHIFT 0
+#define DMA_CH_0_WR_COMP_WDATA_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_LO */
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_HI */
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_LO */
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_HI */
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_TSIZE */
+#define DMA_CH_0_LDMA_TSIZE_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_TSIZE_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_COMIT_TRANSFER */
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_SHIFT 0
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_MASK 0x1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_SHIFT 1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_MASK 0x2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_SHIFT 2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_MASK 0x4
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_SHIFT 3
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_MASK 0x8
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_SHIFT 4
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_MASK 0x10
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_SHIFT 5
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_MASK 0x20
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_SHIFT 6
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_MASK 0x40
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_SHIFT 15
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_MASK 0x8000
+#define DMA_CH_0_COMIT_TRANSFER_CTL_SHIFT 16
+#define DMA_CH_0_COMIT_TRANSFER_CTL_MASK 0xFFFF0000
+
+/* DMA_CH_0_STS0 */
+#define DMA_CH_0_STS0_DMA_BUSY_SHIFT 0
+#define DMA_CH_0_STS0_DMA_BUSY_MASK 0x1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_SHIFT 1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_MASK 0x2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_SHIFT 2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_MASK 0x4
+
+/* DMA_CH_0_STS1 */
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_SHIFT 0
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_STS2 */
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_SHIFT 0
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_STS3 */
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_SHIFT 0
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_STS4 */
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_SHIFT 0
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_LO_STS */
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_SHIFT 0
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_HI_STS */
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_SHIFT 0
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_SRC_TSIZE_STS */
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_SHIFT 0
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_LO_STS */
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_SHIFT 0
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_HI_STS */
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_SHIFT 0
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_DST_TSIZE_STS */
+#define DMA_CH_0_DST_TSIZE_STS_VAL_SHIFT 0
+#define DMA_CH_0_DST_TSIZE_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_EN */
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_MASK 0x1
+
+/* DMA_CH_0_RD_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_SAT */
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_TOUT */
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_MASK 0x7FFFFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_EN */
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_MASK 0x1
+
+/* DMA_CH_0_WR_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_SAT */
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_TOUT */
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_MASK 0x7FFFFFFF
+
+/* DMA_CH_0_CFG2 */
+#define DMA_CH_0_CFG2_FORCE_WORD_SHIFT 0
+#define DMA_CH_0_CFG2_FORCE_WORD_MASK 0x1
+
+/* DMA_CH_0_TDMA_CTL */
+#define DMA_CH_0_TDMA_CTL_DTYPE_SHIFT 0
+#define DMA_CH_0_TDMA_CTL_DTYPE_MASK 0x7
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_0 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_1 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_2 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_3 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_4 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_0 */
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_1 */
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_2 */
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_3 */
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_4 */
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_MEM_INIT_BUSY */
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_SHIFT 0
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_MASK 0xFF
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_SHIFT 8
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_MASK 0x100
+
+#endif /* ASIC_REG_DMA_CH_0_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
index 506e71e201e1..19b0f0ef1d0b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
@@ -88,6 +88,7 @@
#include "psoc_global_conf_masks.h"
#include "dma_macro_masks.h"
#include "dma_qm_0_masks.h"
+#include "dma_ch_0_masks.h"
#include "tpc0_qm_masks.h"
#include "tpc0_cmdq_masks.h"
#include "mme_qm_masks.h"
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 693877e37fd8..42d237cae1dc 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -1657,17 +1657,10 @@ int hl_vm_init(struct hl_device *hdev)
struct hl_vm *vm = &hdev->vm;
int rc;
- rc = hl_mmu_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to init MMU\n");
- return rc;
- }
-
vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
if (!vm->dram_pg_pool) {
dev_err(hdev->dev, "Failed to create dram page pool\n");
- rc = -ENOMEM;
- goto pool_create_err;
+ return -ENOMEM;
}
kref_init(&vm->dram_pg_pool_refcount);
@@ -1693,8 +1686,6 @@ int hl_vm_init(struct hl_device *hdev)
pool_add_err:
gen_pool_destroy(vm->dram_pg_pool);
-pool_create_err:
- hl_mmu_fini(hdev);
return rc;
}
@@ -1724,7 +1715,5 @@ void hl_vm_fini(struct hl_device *hdev)
dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
__func__);
- hl_mmu_fini(hdev);
-
vm->init_done = false;
}
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 10aee3141444..176c315836f1 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -241,8 +241,9 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
hop2_pte_addr, hop3_pte_addr, pte_val;
int rc, i, j, hop3_allocated = 0;
- if (!hdev->dram_supports_virtual_memory ||
- !hdev->dram_default_page_mapping)
+ if ((!hdev->dram_supports_virtual_memory) ||
+ (!hdev->dram_default_page_mapping) ||
+ (ctx->asid == HL_KERNEL_ASID_ID))
return 0;
num_of_hop3 = prop->dram_size_for_default_page_mapping;
@@ -340,8 +341,9 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
hop2_pte_addr, hop3_pte_addr;
int i, j;
- if (!hdev->dram_supports_virtual_memory ||
- !hdev->dram_default_page_mapping)
+ if ((!hdev->dram_supports_virtual_memory) ||
+ (!hdev->dram_default_page_mapping) ||
+ (ctx->asid == HL_KERNEL_ASID_ID))
return;
num_of_hop3 = prop->dram_size_for_default_page_mapping;
@@ -385,12 +387,8 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
* @hdev: habanalabs device structure.
*
* This function does the following:
- * - Allocate max_asid zeroed hop0 pgts so no mapping is available.
- * - Enable MMU in H/W.
- * - Invalidate the MMU cache.
* - Create a pool of pages for pgt_infos.
- *
- * This function depends on DMA QMAN to be working!
+ * - Create a shadow table for pgt
*
* Return: 0 for success, non-zero for failure.
*/
@@ -915,6 +913,10 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
return -EFAULT;
}
+ WARN_ONCE((phys_addr & (real_page_size - 1)),
+ "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
+ phys_addr, real_page_size);
+
npages = page_size / real_page_size;
real_virt_addr = virt_addr;
real_phys_addr = phys_addr;
diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c
index 0e78a04d63f4..c98d88c7a5c6 100644
--- a/drivers/misc/habanalabs/pci.c
+++ b/drivers/misc/habanalabs/pci.c
@@ -10,6 +10,8 @@
#include <linux/pci.h>
+#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
+
/**
* hl_pci_bars_map() - Map PCI BARs.
* @hdev: Pointer to hl_device structure.
@@ -88,8 +90,14 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
{
struct pci_dev *pdev = hdev->pdev;
ktime_t timeout;
+ u64 msec;
u32 val;
+ if (hdev->pldm)
+ msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
+ else
+ msec = HL_PCI_ELBI_TIMEOUT_MSEC;
+
/* Clear previous status */
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
@@ -98,7 +106,7 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
PCI_CONFIG_ELBI_CTRL_WRITE);
- timeout = ktime_add_ms(ktime_get(), 10);
+ timeout = ktime_add_ms(ktime_get(), msec);
for (;;) {
pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
if (val & PCI_CONFIG_ELBI_STS_MASK)
diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c
index c900ab15cceb..25eb46d29d88 100644
--- a/drivers/misc/habanalabs/sysfs.c
+++ b/drivers/misc/habanalabs/sysfs.c
@@ -328,10 +328,6 @@ static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
{
struct hl_device *hdev = dev_get_drvdata(dev);
- /* Use dummy, fixed address for simulator */
- if (!hdev->pdev)
- return sprintf(buf, "0000:%02d:00.0\n", hdev->id);
-
return sprintf(buf, "%04x:%02x:%02x.%x\n",
pci_domain_nr(hdev->pdev->bus),
hdev->pdev->bus->number,
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
index 3431a825f24e..c12406f610d5 100644
--- a/drivers/misc/isl29003.c
+++ b/drivers/misc/isl29003.c
@@ -3,7 +3,7 @@
* isl29003.c - Linux kernel module for
* Intersil ISL29003 ambient light sensor
*
- * See file:Documentation/misc-devices/isl29003
+ * See file:Documentation/misc-devices/isl29003.rst
*
* Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
*
@@ -377,7 +377,7 @@ static int isl29003_init_client(struct i2c_client *client)
static int isl29003_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct i2c_adapter *adapter = client->adapter;
struct isl29003_data *data;
int err = 0;
diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig
index 4cfad45229c8..bb2fec4b5880 100644
--- a/drivers/misc/lis3lv02d/Kconfig
+++ b/drivers/misc/lis3lv02d/Kconfig
@@ -7,7 +7,6 @@ config SENSORS_LIS3_SPI
tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)"
depends on !ACPI && SPI_MASTER && INPUT
select SENSORS_LIS3LV02D
- default n
help
This driver provides support for the LIS3LV02Dx accelerometer connected
via SPI. The accelerometer data is readable via
@@ -24,7 +23,6 @@ config SENSORS_LIS3_I2C
tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (I2C)"
depends on I2C && INPUT
select SENSORS_LIS3LV02D
- default n
help
This driver provides support for the LIS3LV02Dx accelerometer connected
via I2C. The accelerometer data is readable via
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 951c984de61a..fb10eafe9bde 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -15,8 +15,7 @@ KCOV_INSTRUMENT_rodata.o := n
OBJCOPYFLAGS :=
OBJCOPYFLAGS_rodata_objcopy.o := \
- --set-section-flags .text=alloc,readonly \
- --rename-section .text=.rodata
+ --rename-section .text=.rodata,alloc,readonly,load
targets += rodata.o rodata_objcopy.o
$(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE
$(call if_changed,objcopy)
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index d9fcfd3b5af0..1606658b9b7e 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -266,3 +266,69 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void)
pr_err("FAIL: accessed page after stack!\n");
}
+
+void lkdtm_UNSET_SMEP(void)
+{
+#ifdef CONFIG_X86_64
+#define MOV_CR4_DEPTH 64
+ void (*direct_write_cr4)(unsigned long val);
+ unsigned char *insn;
+ unsigned long cr4;
+ int i;
+
+ cr4 = native_read_cr4();
+
+ if ((cr4 & X86_CR4_SMEP) != X86_CR4_SMEP) {
+ pr_err("FAIL: SMEP not in use\n");
+ return;
+ }
+ cr4 &= ~(X86_CR4_SMEP);
+
+ pr_info("trying to clear SMEP normally\n");
+ native_write_cr4(cr4);
+ if (cr4 == native_read_cr4()) {
+ pr_err("FAIL: pinning SMEP failed!\n");
+ cr4 |= X86_CR4_SMEP;
+ pr_info("restoring SMEP\n");
+ native_write_cr4(cr4);
+ return;
+ }
+ pr_info("ok: SMEP did not get cleared\n");
+
+ /*
+ * To test the post-write pinning verification we need to call
+ * directly into the middle of native_write_cr4() where the
+ * cr4 write happens, skipping any pinning. This searches for
+ * the cr4 writing instruction.
+ */
+ insn = (unsigned char *)native_write_cr4;
+ for (i = 0; i < MOV_CR4_DEPTH; i++) {
+ /* mov %rdi, %cr4 */
+ if (insn[i] == 0x0f && insn[i+1] == 0x22 && insn[i+2] == 0xe7)
+ break;
+ /* mov %rdi,%rax; mov %rax, %cr4 */
+ if (insn[i] == 0x48 && insn[i+1] == 0x89 &&
+ insn[i+2] == 0xf8 && insn[i+3] == 0x0f &&
+ insn[i+4] == 0x22 && insn[i+5] == 0xe0)
+ break;
+ }
+ if (i >= MOV_CR4_DEPTH) {
+ pr_info("ok: cannot locate cr4 writing call gadget\n");
+ return;
+ }
+ direct_write_cr4 = (void *)(insn + i);
+
+ pr_info("trying to clear SMEP with call gadget\n");
+ direct_write_cr4(cr4);
+ if (native_read_cr4() & X86_CR4_SMEP) {
+ pr_info("ok: SMEP removal was reverted\n");
+ } else {
+ pr_err("FAIL: cleared SMEP not detected!\n");
+ cr4 |= X86_CR4_SMEP;
+ pr_info("restoring SMEP\n");
+ native_write_cr4(cr4);
+ }
+#else
+ pr_err("FAIL: this test is x86_64-only\n");
+#endif
+}
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index bba49abb6750..756794ac8fd8 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -114,6 +114,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(CORRUPT_USER_DS),
CRASHTYPE(STACK_GUARD_PAGE_LEADING),
CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
+ CRASHTYPE(UNSET_SMEP),
CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE),
CRASHTYPE(OVERWRITE_ALLOCATION),
CRASHTYPE(WRITE_AFTER_FREE),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 23dc565b4307..bbcd370786d4 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -26,6 +26,7 @@ void lkdtm_CORRUPT_LIST_DEL(void);
void lkdtm_CORRUPT_USER_DS(void);
void lkdtm_STACK_GUARD_PAGE_LEADING(void);
void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
+void lkdtm_UNSET_SMEP(void);
/* lkdtm_heap.c */
void lkdtm_OVERWRITE_ALLOCATION(void);
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index 0970142bcace..47cfd5005e1b 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include <linux/mei.h>
@@ -15,104 +16,56 @@
#include "client.h"
#include "hw.h"
-static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int mei_dbgfs_meclients_show(struct seq_file *m, void *unused)
{
- struct mei_device *dev = fp->private_data;
+ struct mei_device *dev = m->private;
struct mei_me_client *me_cl;
- size_t bufsz = 1;
- char *buf;
int i = 0;
- int pos = 0;
- int ret;
-#define HDR \
-" |id|fix| UUID |con|msg len|sb|refc|\n"
+ if (!dev)
+ return -ENODEV;
down_read(&dev->me_clients_rwsem);
- list_for_each_entry(me_cl, &dev->me_clients, list)
- bufsz++;
- bufsz *= sizeof(HDR) + 1;
- buf = kzalloc(bufsz, GFP_KERNEL);
- if (!buf) {
- up_read(&dev->me_clients_rwsem);
- return -ENOMEM;
- }
-
- pos += scnprintf(buf + pos, bufsz - pos, HDR);
-#undef HDR
+ seq_puts(m, " |id|fix| UUID |con|msg len|sb|refc|\n");
/* if the driver is not enabled the list won't be consistent */
if (dev->dev_state != MEI_DEV_ENABLED)
goto out;
list_for_each_entry(me_cl, &dev->me_clients, list) {
-
- if (mei_me_cl_get(me_cl)) {
- pos += scnprintf(buf + pos, bufsz - pos,
- "%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|\n",
- i++, me_cl->client_id,
- me_cl->props.fixed_address,
- &me_cl->props.protocol_name,
- me_cl->props.max_number_of_connections,
- me_cl->props.max_msg_length,
- me_cl->props.single_recv_buf,
- kref_read(&me_cl->refcnt));
-
- mei_me_cl_put(me_cl);
- }
+ if (!mei_me_cl_get(me_cl))
+ continue;
+
+ seq_printf(m, "%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|\n",
+ i++, me_cl->client_id,
+ me_cl->props.fixed_address,
+ &me_cl->props.protocol_name,
+ me_cl->props.max_number_of_connections,
+ me_cl->props.max_msg_length,
+ me_cl->props.single_recv_buf,
+ kref_read(&me_cl->refcnt));
+ mei_me_cl_put(me_cl);
}
out:
up_read(&dev->me_clients_rwsem);
- ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
- kfree(buf);
- return ret;
+ return 0;
}
+DEFINE_SHOW_ATTRIBUTE(mei_dbgfs_meclients);
-static const struct file_operations mei_dbgfs_fops_meclients = {
- .open = simple_open,
- .read = mei_dbgfs_read_meclients,
- .llseek = generic_file_llseek,
-};
-
-static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int mei_dbgfs_active_show(struct seq_file *m, void *unused)
{
- struct mei_device *dev = fp->private_data;
+ struct mei_device *dev = m->private;
struct mei_cl *cl;
- size_t bufsz = 1;
- char *buf;
int i = 0;
- int pos = 0;
- int ret;
-
-#define HDR " |me|host|state|rd|wr|wrq\n"
if (!dev)
return -ENODEV;
mutex_lock(&dev->device_lock);
- /*
- * if the driver is not enabled the list won't be consistent,
- * we output empty table
- */
- if (dev->dev_state == MEI_DEV_ENABLED)
- list_for_each_entry(cl, &dev->file_list, link)
- bufsz++;
-
- bufsz *= sizeof(HDR) + 1;
-
- buf = kzalloc(bufsz, GFP_KERNEL);
- if (!buf) {
- mutex_unlock(&dev->device_lock);
- return -ENOMEM;
- }
-
- pos += scnprintf(buf + pos, bufsz - pos, HDR);
-#undef HDR
+ seq_puts(m, " |me|host|state|rd|wr|wrq\n");
/* if the driver is not enabled the list won't be consistent */
if (dev->dev_state != MEI_DEV_ENABLED)
@@ -120,76 +73,44 @@ static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf,
list_for_each_entry(cl, &dev->file_list, link) {
- pos += scnprintf(buf + pos, bufsz - pos,
- "%3d|%2d|%4d|%5d|%2d|%2d|%3u\n",
- i, mei_cl_me_id(cl), cl->host_client_id, cl->state,
- !list_empty(&cl->rd_completed), cl->writing_state,
- cl->tx_cb_queued);
+ seq_printf(m, "%3d|%2d|%4d|%5d|%2d|%2d|%3u\n",
+ i, mei_cl_me_id(cl), cl->host_client_id, cl->state,
+ !list_empty(&cl->rd_completed), cl->writing_state,
+ cl->tx_cb_queued);
i++;
}
out:
mutex_unlock(&dev->device_lock);
- ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
- kfree(buf);
- return ret;
+ return 0;
}
+DEFINE_SHOW_ATTRIBUTE(mei_dbgfs_active);
-static const struct file_operations mei_dbgfs_fops_active = {
- .open = simple_open,
- .read = mei_dbgfs_read_active,
- .llseek = generic_file_llseek,
-};
-
-static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int mei_dbgfs_devstate_show(struct seq_file *m, void *unused)
{
- struct mei_device *dev = fp->private_data;
- const size_t bufsz = 1024;
- char *buf = kzalloc(bufsz, GFP_KERNEL);
- int pos = 0;
- int ret;
-
- if (!buf)
- return -ENOMEM;
+ struct mei_device *dev = m->private;
- pos += scnprintf(buf + pos, bufsz - pos, "dev: %s\n",
- mei_dev_state_str(dev->dev_state));
- pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n",
- mei_hbm_state_str(dev->hbm_state));
+ seq_printf(m, "dev: %s\n", mei_dev_state_str(dev->dev_state));
+ seq_printf(m, "hbm: %s\n", mei_hbm_state_str(dev->hbm_state));
if (dev->hbm_state >= MEI_HBM_ENUM_CLIENTS &&
dev->hbm_state <= MEI_HBM_STARTED) {
- pos += scnprintf(buf + pos, bufsz - pos, "hbm features:\n");
- pos += scnprintf(buf + pos, bufsz - pos, "\tPG: %01d\n",
- dev->hbm_f_pg_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tDC: %01d\n",
- dev->hbm_f_dc_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tIE: %01d\n",
- dev->hbm_f_ie_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tDOT: %01d\n",
- dev->hbm_f_dot_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tEV: %01d\n",
- dev->hbm_f_ev_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tFA: %01d\n",
- dev->hbm_f_fa_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tOS: %01d\n",
- dev->hbm_f_os_supported);
- pos += scnprintf(buf + pos, bufsz - pos, "\tDR: %01d\n",
- dev->hbm_f_dr_supported);
+ seq_puts(m, "hbm features:\n");
+ seq_printf(m, "\tPG: %01d\n", dev->hbm_f_pg_supported);
+ seq_printf(m, "\tDC: %01d\n", dev->hbm_f_dc_supported);
+ seq_printf(m, "\tIE: %01d\n", dev->hbm_f_ie_supported);
+ seq_printf(m, "\tDOT: %01d\n", dev->hbm_f_dot_supported);
+ seq_printf(m, "\tEV: %01d\n", dev->hbm_f_ev_supported);
+ seq_printf(m, "\tFA: %01d\n", dev->hbm_f_fa_supported);
+ seq_printf(m, "\tOS: %01d\n", dev->hbm_f_os_supported);
+ seq_printf(m, "\tDR: %01d\n", dev->hbm_f_dr_supported);
}
- pos += scnprintf(buf + pos, bufsz - pos, "pg: %s, %s\n",
- mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED",
- mei_pg_state_str(mei_pg_state(dev)));
- ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos);
- kfree(buf);
- return ret;
+ seq_printf(m, "pg: %s, %s\n",
+ mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED",
+ mei_pg_state_str(mei_pg_state(dev)));
+ return 0;
}
-static const struct file_operations mei_dbgfs_fops_devstate = {
- .open = simple_open,
- .read = mei_dbgfs_read_devstate,
- .llseek = generic_file_llseek,
-};
+DEFINE_SHOW_ATTRIBUTE(mei_dbgfs_devstate);
static ssize_t mei_dbgfs_write_allow_fa(struct file *file,
const char __user *user_buf,
@@ -208,7 +129,7 @@ static ssize_t mei_dbgfs_write_allow_fa(struct file *file,
return ret;
}
-static const struct file_operations mei_dbgfs_fops_allow_fa = {
+static const struct file_operations mei_dbgfs_allow_fa_fops = {
.open = simple_open,
.read = debugfs_read_file_bool,
.write = mei_dbgfs_write_allow_fa,
@@ -247,26 +168,26 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name)
dev->dbgfs_dir = dir;
f = debugfs_create_file("meclients", S_IRUSR, dir,
- dev, &mei_dbgfs_fops_meclients);
+ dev, &mei_dbgfs_meclients_fops);
if (!f) {
dev_err(dev->dev, "meclients: registration failed\n");
goto err;
}
f = debugfs_create_file("active", S_IRUSR, dir,
- dev, &mei_dbgfs_fops_active);
+ dev, &mei_dbgfs_active_fops);
if (!f) {
dev_err(dev->dev, "active: registration failed\n");
goto err;
}
f = debugfs_create_file("devstate", S_IRUSR, dir,
- dev, &mei_dbgfs_fops_devstate);
+ dev, &mei_dbgfs_devstate_fops);
if (!f) {
dev_err(dev->dev, "devstate: registration failed\n");
goto err;
}
f = debugfs_create_file("allow_fixed_address", S_IRUSR | S_IWUSR, dir,
&dev->allow_fixed_address,
- &mei_dbgfs_fops_allow_fa);
+ &mei_dbgfs_allow_fa_fops);
if (!f) {
dev_err(dev->dev, "allow_fixed_address: registration failed\n");
goto err;
@@ -276,4 +197,3 @@ err:
mei_dbgfs_deregister(dev);
return -ENODEV;
}
-
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
index b07000202d4a..ed816939fb32 100644
--- a/drivers/misc/mei/hdcp/mei_hdcp.c
+++ b/drivers/misc/mei/hdcp/mei_hdcp.c
@@ -2,7 +2,7 @@
/*
* Copyright © 2019 Intel Corporation
*
- * Mei_hdcp.c: HDCP client driver for mei bus
+ * mei_hdcp.c: HDCP client driver for mei bus
*
* Author:
* Ramalingam C <ramalingam.c@intel.com>
@@ -11,12 +11,9 @@
/**
* DOC: MEI_HDCP Client Driver
*
- * This is a client driver to the mei_bus to make the HDCP2.2 services of
- * ME FW available for the interested consumers like I915.
- *
- * This module will act as a translation layer between HDCP protocol
- * implementor(I915) and ME FW by translating HDCP2.2 authentication
- * messages to ME FW command payloads and vice versa.
+ * The mei_hdcp driver acts as a translation layer between HDCP 2.2
+ * protocol implementer (I915) and ME FW by translating HDCP2.2
+ * negotiation messages to ME FW command payloads and vice versa.
*/
#include <linux/module.h>
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
index 490e3bdc1941..e2278bf9f11d 100644
--- a/drivers/misc/mic/scif/scif_main.c
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -133,6 +133,7 @@ static int scif_setup_scifdev(void)
static void scif_destroy_scifdev(void)
{
kfree(scif_dev);
+ scif_dev = NULL;
}
static int scif_probe(struct scif_hw_dev *sdev)
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
index 7fb6d39d4c5a..1916fa65f2f2 100644
--- a/drivers/misc/ocxl/Kconfig
+++ b/drivers/misc/ocxl/Kconfig
@@ -5,7 +5,6 @@
config OCXL_BASE
bool
- default n
select PPC_COPRO_BASE
config OCXL
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index bab9c9364184..994563a078eb 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -69,6 +69,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm)
{
int rc;
+ unsigned long pidr = 0;
// Locks both status & tidr
mutex_lock(&ctx->status_mutex);
@@ -77,9 +78,11 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm)
goto out;
}
- rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
- mm->context.id, ctx->tidr, amr, mm,
- xsl_fault_error, ctx);
+ if (mm)
+ pidr = mm->context.id;
+
+ rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr,
+ amr, mm, xsl_fault_error, ctx);
if (rc)
goto out;
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index cce5b0d64505..58d111afd9f6 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -224,6 +224,17 @@ static irqreturn_t xsl_fault_handler(int irq, void *data)
ack_irq(spa, ADDRESS_ERROR);
return IRQ_HANDLED;
}
+
+ if (!pe_data->mm) {
+ /*
+ * translation fault from a kernel context - an OpenCAPI
+ * device tried to access a bad kernel address
+ */
+ rcu_read_unlock();
+ pr_warn("Unresolved OpenCAPI xsl fault in kernel context\n");
+ ack_irq(spa, ADDRESS_ERROR);
+ return IRQ_HANDLED;
+ }
WARN_ON(pe_data->mm->context.id != pid);
if (mmget_not_zero(pe_data->mm)) {
@@ -523,7 +534,13 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
pe->amr = cpu_to_be64(amr);
pe->software_state = cpu_to_be32(SPA_PE_VALID);
- mm_context_add_copro(mm);
+ /*
+ * For user contexts, register a copro so that TLBIs are seen
+ * by the nest MMU. If we have a kernel context, TLBIs are
+ * already global.
+ */
+ if (mm)
+ mm_context_add_copro(mm);
/*
* Barrier is to make sure PE is visible in the SPA before it
* is used by the device. It also helps with the global TLBI
@@ -546,7 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
* have a reference on mm_users. Incrementing mm_count solves
* the problem.
*/
- mmgrab(mm);
+ if (mm)
+ mmgrab(mm);
trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
unlock:
mutex_unlock(&spa->spa_lock);
@@ -652,8 +670,10 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
if (!pe_data) {
WARN(1, "Couldn't find pe data when removing PE\n");
} else {
- mm_context_remove_copro(pe_data->mm);
- mmdrop(pe_data->mm);
+ if (pe_data->mm) {
+ mm_context_remove_copro(pe_data->mm);
+ mmdrop(pe_data->mm);
+ }
kfree_rcu(pe_data, rcu);
}
unlock:
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 3eba1c420cc0..782ce95d3f17 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -70,7 +70,7 @@ xpc_get_rsvd_page_pa(int nasid)
unsigned long rp_pa = nasid; /* seed with nasid */
size_t len = 0;
size_t buf_len = 0;
- void *buf = buf;
+ void *buf = NULL;
void *buf_base = NULL;
enum xp_retval (*get_partition_rsvd_page_pa)
(void *, u64 *, unsigned long *, size_t *) =
diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
index 5b7afd6190fe..09db397df287 100644
--- a/drivers/misc/tsl2550.c
+++ b/drivers/misc/tsl2550.c
@@ -336,7 +336,7 @@ static struct i2c_driver tsl2550_driver;
static int tsl2550_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
- struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+ struct i2c_adapter *adapter = client->adapter;
struct tsl2550_data *data;
int *opmode, err = 0;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index ad807d5a3141..043eed845246 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -28,6 +28,8 @@
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/mount.h>
+#include <linux/balloon_compaction.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
#include <asm/hypervisor.h>
@@ -38,25 +40,20 @@ MODULE_ALIAS("dmi:*:svnVMware*:*");
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");
-/*
- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
- * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
- * allocation failure warnings. Disallow access to emergency low-memory pools.
- */
-#define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
- __GFP_NOMEMALLOC)
+static bool __read_mostly vmwballoon_shrinker_enable;
+module_param(vmwballoon_shrinker_enable, bool, 0444);
+MODULE_PARM_DESC(vmwballoon_shrinker_enable,
+ "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
-/*
- * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
- * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
- * failure warnings. Disallow access to emergency low-memory pools.
- */
-#define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
- __GFP_NOMEMALLOC|__GFP_NORETRY)
+/* Delay in seconds after shrink before inflation. */
+#define VMBALLOON_SHRINK_DELAY (5)
/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED 16
+/* Magic number for the balloon mount-point */
+#define BALLOON_VMW_MAGIC 0x0ba11007
+
/*
* Hypervisor communication port definitions.
*/
@@ -229,29 +226,26 @@ enum vmballoon_stat_general {
VMW_BALLOON_STAT_TIMER,
VMW_BALLOON_STAT_DOORBELL,
VMW_BALLOON_STAT_RESET,
- VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET
+ VMW_BALLOON_STAT_SHRINK,
+ VMW_BALLOON_STAT_SHRINK_FREE,
+ VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
};
#define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
-
static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
struct vmballoon_ctl {
struct list_head pages;
struct list_head refused_pages;
+ struct list_head prealloc_pages;
unsigned int n_refused_pages;
unsigned int n_pages;
enum vmballoon_page_size_type page_size;
enum vmballoon_op op;
};
-struct vmballoon_page_size {
- /* list of reserved physical pages */
- struct list_head pages;
-};
-
/**
* struct vmballoon_batch_entry - a batch entry for lock or unlock.
*
@@ -266,8 +260,6 @@ struct vmballoon_batch_entry {
} __packed;
struct vmballoon {
- struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
-
/**
* @max_page_size: maximum supported page size for ballooning.
*
@@ -340,6 +332,15 @@ struct vmballoon {
*/
struct page *page;
+ /**
+ * @shrink_timeout: timeout until the next inflation.
+ *
+ * After an shrink event, indicates the time in jiffies after which
+ * inflation is allowed again. Can be written concurrently with reads,
+ * so must use READ_ONCE/WRITE_ONCE when accessing.
+ */
+ unsigned long shrink_timeout;
+
/* statistics */
struct vmballoon_stats *stats;
@@ -348,9 +349,21 @@ struct vmballoon {
struct dentry *dbg_entry;
#endif
+ /**
+ * @b_dev_info: balloon device information descriptor.
+ */
+ struct balloon_dev_info b_dev_info;
+
struct delayed_work dwork;
/**
+ * @huge_pages - list of the inflated 2MB pages.
+ *
+ * Protected by @b_dev_info.pages_lock .
+ */
+ struct list_head huge_pages;
+
+ /**
* @vmci_doorbell.
*
* Protected by @conf_sem.
@@ -368,6 +381,20 @@ struct vmballoon {
* Lock ordering: @conf_sem -> @comm_lock .
*/
spinlock_t comm_lock;
+
+ /**
+ * @shrinker: shrinker interface that is used to avoid over-inflation.
+ */
+ struct shrinker shrinker;
+
+ /**
+ * @shrinker_registered: whether the shrinker was registered.
+ *
+ * The shrinker interface does not handle gracefully the removal of
+ * shrinker that was not registered before. This indication allows to
+ * simplify the unregistration process.
+ */
+ bool shrinker_registered;
};
static struct vmballoon balloon;
@@ -642,15 +669,25 @@ static int vmballoon_alloc_page_list(struct vmballoon *b,
unsigned int i;
for (i = 0; i < req_n_pages; i++) {
- if (ctl->page_size == VMW_BALLOON_2M_PAGE)
- page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
- VMW_BALLOON_2M_ORDER);
- else
- page = alloc_page(VMW_PAGE_ALLOC_FLAGS);
-
- /* Update statistics */
- vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
- ctl->page_size);
+ /*
+ * First check if we happen to have pages that were allocated
+ * before. This happens when 2MB page rejected during inflation
+ * by the hypervisor, and then split into 4KB pages.
+ */
+ if (!list_empty(&ctl->prealloc_pages)) {
+ page = list_first_entry(&ctl->prealloc_pages,
+ struct page, lru);
+ list_del(&page->lru);
+ } else {
+ if (ctl->page_size == VMW_BALLOON_2M_PAGE)
+ page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
+ __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
+ else
+ page = balloon_page_alloc();
+
+ vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
+ ctl->page_size);
+ }
if (page) {
vmballoon_mark_page_offline(page, ctl->page_size);
@@ -896,7 +933,8 @@ static void vmballoon_release_page_list(struct list_head *page_list,
__free_pages(page, vmballoon_page_order(page_size));
}
- *n_pages = 0;
+ if (n_pages)
+ *n_pages = 0;
}
@@ -942,6 +980,10 @@ static int64_t vmballoon_change(struct vmballoon *b)
size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
return 0;
+ /* If an out-of-memory recently occurred, inflation is disallowed. */
+ if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
+ return 0;
+
return target - size;
}
@@ -961,9 +1003,22 @@ static void vmballoon_enqueue_page_list(struct vmballoon *b,
unsigned int *n_pages,
enum vmballoon_page_size_type page_size)
{
- struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
+ unsigned long flags;
+
+ if (page_size == VMW_BALLOON_4K_PAGE) {
+ balloon_page_list_enqueue(&b->b_dev_info, pages);
+ } else {
+ /*
+ * Keep the huge pages in a local list which is not available
+ * for the balloon compaction mechanism.
+ */
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+ list_splice_init(pages, &b->huge_pages);
+ __count_vm_events(BALLOON_INFLATE, *n_pages *
+ vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+ }
- list_splice_init(pages, &page_size_info->pages);
*n_pages = 0;
}
@@ -986,19 +1041,58 @@ static void vmballoon_dequeue_page_list(struct vmballoon *b,
enum vmballoon_page_size_type page_size,
unsigned int n_req_pages)
{
- struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size];
struct page *page, *tmp;
unsigned int i = 0;
+ unsigned long flags;
+
+ /* In the case of 4k pages, use the compaction infrastructure */
+ if (page_size == VMW_BALLOON_4K_PAGE) {
+ *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
+ n_req_pages);
+ return;
+ }
- list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) {
+ /* 2MB pages */
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+ list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
list_move(&page->lru, pages);
if (++i == n_req_pages)
break;
}
+
+ __count_vm_events(BALLOON_DEFLATE,
+ i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
*n_pages = i;
}
/**
+ * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
+ *
+ * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
+ * due to one or few 4KB pages. These 2MB pages may keep being allocated and
+ * then being refused. To prevent this case, this function splits the refused
+ * pages into 4KB pages and adds them into @prealloc_pages list.
+ *
+ * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
+ */
+static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
+{
+ struct page *page, *tmp;
+ unsigned int i, order;
+
+ order = vmballoon_page_order(ctl->page_size);
+
+ list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
+ list_del(&page->lru);
+ split_page(page, order);
+ for (i = 0; i < (1 << order); i++)
+ list_add(&page[i].lru, &ctl->prealloc_pages);
+ }
+ ctl->n_refused_pages = 0;
+}
+
+/**
* vmballoon_inflate() - Inflate the balloon towards its target size.
*
* @b: pointer to the balloon.
@@ -1009,6 +1103,7 @@ static void vmballoon_inflate(struct vmballoon *b)
struct vmballoon_ctl ctl = {
.pages = LIST_HEAD_INIT(ctl.pages),
.refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
+ .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
.page_size = b->max_page_size,
.op = VMW_BALLOON_INFLATE
};
@@ -1056,10 +1151,10 @@ static void vmballoon_inflate(struct vmballoon *b)
break;
/*
- * Ignore errors from locking as we now switch to 4k
- * pages and we might get different errors.
+ * Split the refused pages to 4k. This will also empty
+ * the refused pages list.
*/
- vmballoon_release_refused_pages(b, &ctl);
+ vmballoon_split_refused_pages(&ctl);
ctl.page_size--;
}
@@ -1073,6 +1168,8 @@ static void vmballoon_inflate(struct vmballoon *b)
*/
if (ctl.n_refused_pages != 0)
vmballoon_release_refused_pages(b, &ctl);
+
+ vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
}
/**
@@ -1411,6 +1508,90 @@ static void vmballoon_work(struct work_struct *work)
}
+/**
+ * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
+ * @shrinker: pointer to the balloon shrinker.
+ * @sc: page reclaim information.
+ *
+ * Returns: number of pages that were freed during deflation.
+ */
+static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ struct vmballoon *b = &balloon;
+ unsigned long deflated_frames;
+
+ pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
+
+ vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
+
+ /*
+ * If the lock is also contended for read, we cannot easily reclaim and
+ * we bail out.
+ */
+ if (!down_read_trylock(&b->conf_sem))
+ return 0;
+
+ deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
+
+ vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
+ deflated_frames);
+
+ /*
+ * Delay future inflation for some time to mitigate the situations in
+ * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
+ * the access is asynchronous.
+ */
+ WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
+
+ up_read(&b->conf_sem);
+
+ return deflated_frames;
+}
+
+/**
+ * vmballoon_shrinker_count() - return the number of ballooned pages.
+ * @shrinker: pointer to the balloon shrinker.
+ * @sc: page reclaim information.
+ *
+ * Returns: number of 4k pages that are allocated for the balloon and can
+ * therefore be reclaimed under pressure.
+ */
+static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ struct vmballoon *b = &balloon;
+
+ return atomic64_read(&b->size);
+}
+
+static void vmballoon_unregister_shrinker(struct vmballoon *b)
+{
+ if (b->shrinker_registered)
+ unregister_shrinker(&b->shrinker);
+ b->shrinker_registered = false;
+}
+
+static int vmballoon_register_shrinker(struct vmballoon *b)
+{
+ int r;
+
+ /* Do nothing if the shrinker is not enabled */
+ if (!vmwballoon_shrinker_enable)
+ return 0;
+
+ b->shrinker.scan_objects = vmballoon_shrinker_scan;
+ b->shrinker.count_objects = vmballoon_shrinker_count;
+ b->shrinker.seeks = DEFAULT_SEEKS;
+
+ r = register_shrinker(&b->shrinker);
+
+ if (r == 0)
+ b->shrinker_registered = true;
+
+ return r;
+}
+
/*
* DEBUGFS Interface
*/
@@ -1428,6 +1609,8 @@ static const char * const vmballoon_stat_names[] = {
[VMW_BALLOON_STAT_TIMER] = "timer",
[VMW_BALLOON_STAT_DOORBELL] = "doorbell",
[VMW_BALLOON_STAT_RESET] = "reset",
+ [VMW_BALLOON_STAT_SHRINK] = "shrink",
+ [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree"
};
static int vmballoon_enable_stats(struct vmballoon *b)
@@ -1552,9 +1735,204 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_BALLOON_COMPACTION
+
+static struct dentry *vmballoon_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ static const struct dentry_operations ops = {
+ .d_dname = simple_dname,
+ };
+
+ return mount_pseudo(fs_type, "balloon-vmware:", NULL, &ops,
+ BALLOON_VMW_MAGIC);
+}
+
+static struct file_system_type vmballoon_fs = {
+ .name = "balloon-vmware",
+ .mount = vmballoon_mount,
+ .kill_sb = kill_anon_super,
+};
+
+static struct vfsmount *vmballoon_mnt;
+
+/**
+ * vmballoon_migratepage() - migrates a balloon page.
+ * @b_dev_info: balloon device information descriptor.
+ * @newpage: the page to which @page should be migrated.
+ * @page: a ballooned page that should be migrated.
+ * @mode: migration mode, ignored.
+ *
+ * This function is really open-coded, but that is according to the interface
+ * that balloon_compaction provides.
+ *
+ * Return: zero on success, -EAGAIN when migration cannot be performed
+ * momentarily, and -EBUSY if migration failed and should be retried
+ * with that specific page.
+ */
+static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ unsigned long status, flags;
+ struct vmballoon *b;
+ int ret;
+
+ b = container_of(b_dev_info, struct vmballoon, b_dev_info);
+
+ /*
+ * If the semaphore is taken, there is ongoing configuration change
+ * (i.e., balloon reset), so try again.
+ */
+ if (!down_read_trylock(&b->conf_sem))
+ return -EAGAIN;
+
+ spin_lock(&b->comm_lock);
+ /*
+ * We must start by deflating and not inflating, as otherwise the
+ * hypervisor may tell us that it has enough memory and the new page is
+ * not needed. Since the old page is isolated, we cannot use the list
+ * interface to unlock it, as the LRU field is used for isolation.
+ * Instead, we use the native interface directly.
+ */
+ vmballoon_add_page(b, 0, page);
+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
+ VMW_BALLOON_DEFLATE);
+
+ if (status == VMW_BALLOON_SUCCESS)
+ status = vmballoon_status_page(b, 0, &page);
+
+ /*
+ * If a failure happened, let the migration mechanism know that it
+ * should not retry.
+ */
+ if (status != VMW_BALLOON_SUCCESS) {
+ spin_unlock(&b->comm_lock);
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ /*
+ * The page is isolated, so it is safe to delete it without holding
+ * @pages_lock . We keep holding @comm_lock since we will need it in a
+ * second.
+ */
+ balloon_page_delete(page);
+
+ put_page(page);
+
+ /* Inflate */
+ vmballoon_add_page(b, 0, newpage);
+ status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
+ VMW_BALLOON_INFLATE);
+
+ if (status == VMW_BALLOON_SUCCESS)
+ status = vmballoon_status_page(b, 0, &newpage);
+
+ spin_unlock(&b->comm_lock);
+
+ if (status != VMW_BALLOON_SUCCESS) {
+ /*
+ * A failure happened. While we can deflate the page we just
+ * inflated, this deflation can also encounter an error. Instead
+ * we will decrease the size of the balloon to reflect the
+ * change and report failure.
+ */
+ atomic64_dec(&b->size);
+ ret = -EBUSY;
+ } else {
+ /*
+ * Success. Take a reference for the page, and we will add it to
+ * the list after acquiring the lock.
+ */
+ get_page(newpage);
+ ret = MIGRATEPAGE_SUCCESS;
+ }
+
+ /* Update the balloon list under the @pages_lock */
+ spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
+
+ /*
+ * On inflation success, we already took a reference for the @newpage.
+ * If we succeed just insert it to the list and update the statistics
+ * under the lock.
+ */
+ if (ret == MIGRATEPAGE_SUCCESS) {
+ balloon_page_insert(&b->b_dev_info, newpage);
+ __count_vm_event(BALLOON_MIGRATE);
+ }
+
+ /*
+ * We deflated successfully, so regardless to the inflation success, we
+ * need to reduce the number of isolated_pages.
+ */
+ b->b_dev_info.isolated_pages--;
+ spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
+
+out_unlock:
+ up_read(&b->conf_sem);
+ return ret;
+}
+
+/**
+ * vmballoon_compaction_deinit() - removes compaction related data.
+ *
+ * @b: pointer to the balloon.
+ */
+static void vmballoon_compaction_deinit(struct vmballoon *b)
+{
+ if (!IS_ERR(b->b_dev_info.inode))
+ iput(b->b_dev_info.inode);
+
+ b->b_dev_info.inode = NULL;
+ kern_unmount(vmballoon_mnt);
+ vmballoon_mnt = NULL;
+}
+
+/**
+ * vmballoon_compaction_init() - initialized compaction for the balloon.
+ *
+ * @b: pointer to the balloon.
+ *
+ * If during the initialization a failure occurred, this function does not
+ * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
+ * case.
+ *
+ * Return: zero on success or error code on failure.
+ */
+static __init int vmballoon_compaction_init(struct vmballoon *b)
+{
+ vmballoon_mnt = kern_mount(&vmballoon_fs);
+ if (IS_ERR(vmballoon_mnt))
+ return PTR_ERR(vmballoon_mnt);
+
+ b->b_dev_info.migratepage = vmballoon_migratepage;
+ b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
+
+ if (IS_ERR(b->b_dev_info.inode))
+ return PTR_ERR(b->b_dev_info.inode);
+
+ b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+ return 0;
+}
+
+#else /* CONFIG_BALLOON_COMPACTION */
+
+static void vmballoon_compaction_deinit(struct vmballoon *b)
+{
+}
+
+static int vmballoon_compaction_init(struct vmballoon *b)
+{
+ return 0;
+}
+
+#endif /* CONFIG_BALLOON_COMPACTION */
+
static int __init vmballoon_init(void)
{
- enum vmballoon_page_size_type page_size;
int error;
/*
@@ -1564,17 +1942,26 @@ static int __init vmballoon_init(void)
if (x86_hyper_type != X86_HYPER_VMWARE)
return -ENODEV;
- for (page_size = VMW_BALLOON_4K_PAGE;
- page_size <= VMW_BALLOON_LAST_SIZE; page_size++)
- INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages);
-
-
INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
+ error = vmballoon_register_shrinker(&balloon);
+ if (error)
+ goto fail;
+
error = vmballoon_debugfs_init(&balloon);
if (error)
- return error;
+ goto fail;
+
+ /*
+ * Initialization of compaction must be done after the call to
+ * balloon_devinfo_init() .
+ */
+ balloon_devinfo_init(&balloon.b_dev_info);
+ error = vmballoon_compaction_init(&balloon);
+ if (error)
+ goto fail;
+ INIT_LIST_HEAD(&balloon.huge_pages);
spin_lock_init(&balloon.comm_lock);
init_rwsem(&balloon.conf_sem);
balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
@@ -1585,6 +1972,10 @@ static int __init vmballoon_init(void)
queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
return 0;
+fail:
+ vmballoon_unregister_shrinker(&balloon);
+ vmballoon_compaction_deinit(&balloon);
+ return error;
}
/*
@@ -1597,6 +1988,7 @@ late_initcall(vmballoon_init);
static void __exit vmballoon_exit(void)
{
+ vmballoon_unregister_shrinker(&balloon);
vmballoon_vmci_cleanup(&balloon);
cancel_delayed_work_sync(&balloon.dwork);
@@ -1609,5 +2001,8 @@ static void __exit vmballoon_exit(void)
*/
vmballoon_send_start(&balloon, 0);
vmballoon_pop(&balloon);
+
+ /* Only once we popped the balloon, compaction can be deinit */
+ vmballoon_compaction_deinit(&balloon);
}
module_exit(vmballoon_exit);
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
index 300ed69fe2c7..16695366ec92 100644
--- a/drivers/misc/vmw_vmci/vmci_context.c
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -21,6 +21,9 @@
#include "vmci_driver.h"
#include "vmci_event.h"
+/* Use a wide upper bound for the maximum contexts. */
+#define VMCI_MAX_CONTEXTS 2000
+
/*
* List of current VMCI contexts. Contexts can be added by
* vmci_ctx_create() and removed via vmci_ctx_destroy().
@@ -117,19 +120,22 @@ struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
/* Initialize host-specific VMCI context. */
init_waitqueue_head(&context->host_context.wait_queue);
- context->queue_pair_array = vmci_handle_arr_create(0);
+ context->queue_pair_array =
+ vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
if (!context->queue_pair_array) {
error = -ENOMEM;
goto err_free_ctx;
}
- context->doorbell_array = vmci_handle_arr_create(0);
+ context->doorbell_array =
+ vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
if (!context->doorbell_array) {
error = -ENOMEM;
goto err_free_qp_array;
}
- context->pending_doorbell_array = vmci_handle_arr_create(0);
+ context->pending_doorbell_array =
+ vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
if (!context->pending_doorbell_array) {
error = -ENOMEM;
goto err_free_db_array;
@@ -204,7 +210,7 @@ static int ctx_fire_notification(u32 context_id, u32 priv_flags)
* We create an array to hold the subscribers we find when
* scanning through all contexts.
*/
- subscriber_array = vmci_handle_arr_create(0);
+ subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
if (subscriber_array == NULL)
return VMCI_ERROR_NO_MEM;
@@ -623,20 +629,26 @@ int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
spin_lock(&context->lock);
- list_for_each_entry(n, &context->notifier_list, node) {
- if (vmci_handle_is_equal(n->handle, notifier->handle)) {
- exists = true;
- break;
+ if (context->n_notifiers < VMCI_MAX_CONTEXTS) {
+ list_for_each_entry(n, &context->notifier_list, node) {
+ if (vmci_handle_is_equal(n->handle, notifier->handle)) {
+ exists = true;
+ break;
+ }
}
- }
- if (exists) {
- kfree(notifier);
- result = VMCI_ERROR_ALREADY_EXISTS;
+ if (exists) {
+ kfree(notifier);
+ result = VMCI_ERROR_ALREADY_EXISTS;
+ } else {
+ list_add_tail_rcu(&notifier->node,
+ &context->notifier_list);
+ context->n_notifiers++;
+ result = VMCI_SUCCESS;
+ }
} else {
- list_add_tail_rcu(&notifier->node, &context->notifier_list);
- context->n_notifiers++;
- result = VMCI_SUCCESS;
+ kfree(notifier);
+ result = VMCI_ERROR_NO_MEM;
}
spin_unlock(&context->lock);
@@ -721,8 +733,7 @@ static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
u32 *buf_size, void **pbuf)
{
struct dbell_cpt_state *dbells;
- size_t n_doorbells;
- int i;
+ u32 i, n_doorbells;
n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
if (n_doorbells > 0) {
@@ -860,7 +871,8 @@ int vmci_ctx_rcv_notifications_get(u32 context_id,
spin_lock(&context->lock);
*db_handle_array = context->pending_doorbell_array;
- context->pending_doorbell_array = vmci_handle_arr_create(0);
+ context->pending_doorbell_array =
+ vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
if (!context->pending_doorbell_array) {
context->pending_doorbell_array = *db_handle_array;
*db_handle_array = NULL;
@@ -942,12 +954,11 @@ int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
return VMCI_ERROR_NOT_FOUND;
spin_lock(&context->lock);
- if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) {
- vmci_handle_arr_append_entry(&context->doorbell_array, handle);
- result = VMCI_SUCCESS;
- } else {
+ if (!vmci_handle_arr_has_entry(context->doorbell_array, handle))
+ result = vmci_handle_arr_append_entry(&context->doorbell_array,
+ handle);
+ else
result = VMCI_ERROR_DUPLICATE_ENTRY;
- }
spin_unlock(&context->lock);
vmci_ctx_put(context);
@@ -1083,15 +1094,16 @@ int vmci_ctx_notify_dbell(u32 src_cid,
if (!vmci_handle_arr_has_entry(
dst_context->pending_doorbell_array,
handle)) {
- vmci_handle_arr_append_entry(
+ result = vmci_handle_arr_append_entry(
&dst_context->pending_doorbell_array,
handle);
-
- ctx_signal_notify(dst_context);
- wake_up(&dst_context->host_context.wait_queue);
-
+ if (result == VMCI_SUCCESS) {
+ ctx_signal_notify(dst_context);
+ wake_up(&dst_context->host_context.wait_queue);
+ }
+ } else {
+ result = VMCI_SUCCESS;
}
- result = VMCI_SUCCESS;
}
spin_unlock(&dst_context->lock);
}
@@ -1118,13 +1130,11 @@ int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
if (context == NULL || vmci_handle_is_invalid(handle))
return VMCI_ERROR_INVALID_ARGS;
- if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) {
- vmci_handle_arr_append_entry(&context->queue_pair_array,
- handle);
- result = VMCI_SUCCESS;
- } else {
+ if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle))
+ result = vmci_handle_arr_append_entry(
+ &context->queue_pair_array, handle);
+ else
result = VMCI_ERROR_DUPLICATE_ENTRY;
- }
return result;
}
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c
index c527388f5d7b..de7fee7ead1b 100644
--- a/drivers/misc/vmw_vmci/vmci_handle_array.c
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.c
@@ -8,24 +8,29 @@
#include <linux/slab.h>
#include "vmci_handle_array.h"
-static size_t handle_arr_calc_size(size_t capacity)
+static size_t handle_arr_calc_size(u32 capacity)
{
- return sizeof(struct vmci_handle_arr) +
+ return VMCI_HANDLE_ARRAY_HEADER_SIZE +
capacity * sizeof(struct vmci_handle);
}
-struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity)
+struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity)
{
struct vmci_handle_arr *array;
+ if (max_capacity == 0 || capacity > max_capacity)
+ return NULL;
+
if (capacity == 0)
- capacity = VMCI_HANDLE_ARRAY_DEFAULT_SIZE;
+ capacity = min((u32)VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY,
+ max_capacity);
array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC);
if (!array)
return NULL;
array->capacity = capacity;
+ array->max_capacity = max_capacity;
array->size = 0;
return array;
@@ -36,27 +41,34 @@ void vmci_handle_arr_destroy(struct vmci_handle_arr *array)
kfree(array);
}
-void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
- struct vmci_handle handle)
+int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+ struct vmci_handle handle)
{
struct vmci_handle_arr *array = *array_ptr;
if (unlikely(array->size >= array->capacity)) {
/* reallocate. */
struct vmci_handle_arr *new_array;
- size_t new_capacity = array->capacity * VMCI_ARR_CAP_MULT;
- size_t new_size = handle_arr_calc_size(new_capacity);
+ u32 capacity_bump = min(array->max_capacity - array->capacity,
+ array->capacity);
+ size_t new_size = handle_arr_calc_size(array->capacity +
+ capacity_bump);
+
+ if (array->size >= array->max_capacity)
+ return VMCI_ERROR_NO_MEM;
new_array = krealloc(array, new_size, GFP_ATOMIC);
if (!new_array)
- return;
+ return VMCI_ERROR_NO_MEM;
- new_array->capacity = new_capacity;
+ new_array->capacity += capacity_bump;
*array_ptr = array = new_array;
}
array->entries[array->size] = handle;
array->size++;
+
+ return VMCI_SUCCESS;
}
/*
@@ -66,7 +78,7 @@ struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
struct vmci_handle entry_handle)
{
struct vmci_handle handle = VMCI_INVALID_HANDLE;
- size_t i;
+ u32 i;
for (i = 0; i < array->size; i++) {
if (vmci_handle_is_equal(array->entries[i], entry_handle)) {
@@ -101,7 +113,7 @@ struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array)
* Handle at given index, VMCI_INVALID_HANDLE if invalid index.
*/
struct vmci_handle
-vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index)
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index)
{
if (unlikely(index >= array->size))
return VMCI_INVALID_HANDLE;
@@ -112,7 +124,7 @@ vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index)
bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
struct vmci_handle entry_handle)
{
- size_t i;
+ u32 i;
for (i = 0; i < array->size; i++)
if (vmci_handle_is_equal(array->entries[i], entry_handle))
diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h
index bd1559a548e9..96193f85be5b 100644
--- a/drivers/misc/vmw_vmci/vmci_handle_array.h
+++ b/drivers/misc/vmw_vmci/vmci_handle_array.h
@@ -9,32 +9,41 @@
#define _VMCI_HANDLE_ARRAY_H_
#include <linux/vmw_vmci_defs.h>
+#include <linux/limits.h>
#include <linux/types.h>
-#define VMCI_HANDLE_ARRAY_DEFAULT_SIZE 4
-#define VMCI_ARR_CAP_MULT 2 /* Array capacity multiplier */
-
struct vmci_handle_arr {
- size_t capacity;
- size_t size;
+ u32 capacity;
+ u32 max_capacity;
+ u32 size;
+ u32 pad;
struct vmci_handle entries[];
};
-struct vmci_handle_arr *vmci_handle_arr_create(size_t capacity);
+#define VMCI_HANDLE_ARRAY_HEADER_SIZE \
+ offsetof(struct vmci_handle_arr, entries)
+/* Select a default capacity that results in a 64 byte sized array */
+#define VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY 6
+/* Make sure that the max array size can be expressed by a u32 */
+#define VMCI_HANDLE_ARRAY_MAX_CAPACITY \
+ ((U32_MAX - VMCI_HANDLE_ARRAY_HEADER_SIZE - 1) / \
+ sizeof(struct vmci_handle))
+
+struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity);
void vmci_handle_arr_destroy(struct vmci_handle_arr *array);
-void vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
- struct vmci_handle handle);
+int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr,
+ struct vmci_handle handle);
struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array,
struct vmci_handle
entry_handle);
struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array);
struct vmci_handle
-vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, size_t index);
+vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index);
bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array,
struct vmci_handle entry_handle);
struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array);
-static inline size_t vmci_handle_arr_get_size(
+static inline u32 vmci_handle_arr_get_size(
const struct vmci_handle_arr *array)
{
return array->size;
diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
new file mode 100644
index 000000000000..f257d3812110
--- /dev/null
+++ b/drivers/misc/xilinx_sdfec.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx SDFEC
+ *
+ * Copyright (C) 2019 Xilinx, Inc.
+ *
+ * Description:
+ * This driver is developed for SDFEC16 (Soft Decision FEC 16nm)
+ * IP. It exposes a char device which supports file operations
+ * like open(), close() and ioctl().
+ */
+
+#include <linux/miscdevice.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+
+#define DEV_NAME_LEN 12
+
+static struct idr dev_idr;
+static struct mutex dev_idr_lock;
+
+/**
+ * struct xsdfec_clks - For managing SD-FEC clocks
+ * @core_clk: Main processing clock for core
+ * @axi_clk: AXI4-Lite memory-mapped clock
+ * @din_words_clk: DIN Words AXI4-Stream Slave clock
+ * @din_clk: DIN AXI4-Stream Slave clock
+ * @dout_clk: DOUT Words AXI4-Stream Slave clock
+ * @dout_words_clk: DOUT AXI4-Stream Slave clock
+ * @ctrl_clk: Control AXI4-Stream Slave clock
+ * @status_clk: Status AXI4-Stream Slave clock
+ */
+struct xsdfec_clks {
+ struct clk *core_clk;
+ struct clk *axi_clk;
+ struct clk *din_words_clk;
+ struct clk *din_clk;
+ struct clk *dout_clk;
+ struct clk *dout_words_clk;
+ struct clk *ctrl_clk;
+ struct clk *status_clk;
+};
+
+/**
+ * struct xsdfec_dev - Driver data for SDFEC
+ * @regs: device physical base address
+ * @dev: pointer to device struct
+ * @miscdev: Misc device handle
+ * @error_data_lock: Error counter and states spinlock
+ * @clks: Clocks managed by the SDFEC driver
+ * @dev_name: Device name
+ * @dev_id: Device ID
+ *
+ * This structure contains necessary state for SDFEC driver to operate
+ */
+struct xsdfec_dev {
+ void __iomem *regs;
+ struct device *dev;
+ struct miscdevice miscdev;
+ /* Spinlock to protect state_updated and stats_updated */
+ spinlock_t error_data_lock;
+ struct xsdfec_clks clks;
+ char dev_name[DEV_NAME_LEN];
+ int dev_id;
+};
+
+static const struct file_operations xsdfec_fops = {
+ .owner = THIS_MODULE,
+};
+
+static int xsdfec_clk_init(struct platform_device *pdev,
+ struct xsdfec_clks *clks)
+{
+ int err;
+
+ clks->core_clk = devm_clk_get(&pdev->dev, "core_clk");
+ if (IS_ERR(clks->core_clk)) {
+ dev_err(&pdev->dev, "failed to get core_clk");
+ return PTR_ERR(clks->core_clk);
+ }
+
+ clks->axi_clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
+ if (IS_ERR(clks->axi_clk)) {
+ dev_err(&pdev->dev, "failed to get axi_clk");
+ return PTR_ERR(clks->axi_clk);
+ }
+
+ clks->din_words_clk = devm_clk_get(&pdev->dev, "s_axis_din_words_aclk");
+ if (IS_ERR(clks->din_words_clk)) {
+ if (PTR_ERR(clks->din_words_clk) != -ENOENT) {
+ err = PTR_ERR(clks->din_words_clk);
+ return err;
+ }
+ clks->din_words_clk = NULL;
+ }
+
+ clks->din_clk = devm_clk_get(&pdev->dev, "s_axis_din_aclk");
+ if (IS_ERR(clks->din_clk)) {
+ if (PTR_ERR(clks->din_clk) != -ENOENT) {
+ err = PTR_ERR(clks->din_clk);
+ return err;
+ }
+ clks->din_clk = NULL;
+ }
+
+ clks->dout_clk = devm_clk_get(&pdev->dev, "m_axis_dout_aclk");
+ if (IS_ERR(clks->dout_clk)) {
+ if (PTR_ERR(clks->dout_clk) != -ENOENT) {
+ err = PTR_ERR(clks->dout_clk);
+ return err;
+ }
+ clks->dout_clk = NULL;
+ }
+
+ clks->dout_words_clk =
+ devm_clk_get(&pdev->dev, "s_axis_dout_words_aclk");
+ if (IS_ERR(clks->dout_words_clk)) {
+ if (PTR_ERR(clks->dout_words_clk) != -ENOENT) {
+ err = PTR_ERR(clks->dout_words_clk);
+ return err;
+ }
+ clks->dout_words_clk = NULL;
+ }
+
+ clks->ctrl_clk = devm_clk_get(&pdev->dev, "s_axis_ctrl_aclk");
+ if (IS_ERR(clks->ctrl_clk)) {
+ if (PTR_ERR(clks->ctrl_clk) != -ENOENT) {
+ err = PTR_ERR(clks->ctrl_clk);
+ return err;
+ }
+ clks->ctrl_clk = NULL;
+ }
+
+ clks->status_clk = devm_clk_get(&pdev->dev, "m_axis_status_aclk");
+ if (IS_ERR(clks->status_clk)) {
+ if (PTR_ERR(clks->status_clk) != -ENOENT) {
+ err = PTR_ERR(clks->status_clk);
+ return err;
+ }
+ clks->status_clk = NULL;
+ }
+
+ err = clk_prepare_enable(clks->core_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable core_clk (%d)", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(clks->axi_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable axi_clk (%d)", err);
+ goto err_disable_core_clk;
+ }
+
+ err = clk_prepare_enable(clks->din_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable din_clk (%d)", err);
+ goto err_disable_axi_clk;
+ }
+
+ err = clk_prepare_enable(clks->din_words_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable din_words_clk (%d)", err);
+ goto err_disable_din_clk;
+ }
+
+ err = clk_prepare_enable(clks->dout_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable dout_clk (%d)", err);
+ goto err_disable_din_words_clk;
+ }
+
+ err = clk_prepare_enable(clks->dout_words_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable dout_words_clk (%d)",
+ err);
+ goto err_disable_dout_clk;
+ }
+
+ err = clk_prepare_enable(clks->ctrl_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable ctrl_clk (%d)", err);
+ goto err_disable_dout_words_clk;
+ }
+
+ err = clk_prepare_enable(clks->status_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable status_clk (%d)\n", err);
+ goto err_disable_ctrl_clk;
+ }
+
+ return err;
+
+err_disable_ctrl_clk:
+ clk_disable_unprepare(clks->ctrl_clk);
+err_disable_dout_words_clk:
+ clk_disable_unprepare(clks->dout_words_clk);
+err_disable_dout_clk:
+ clk_disable_unprepare(clks->dout_clk);
+err_disable_din_words_clk:
+ clk_disable_unprepare(clks->din_words_clk);
+err_disable_din_clk:
+ clk_disable_unprepare(clks->din_clk);
+err_disable_axi_clk:
+ clk_disable_unprepare(clks->axi_clk);
+err_disable_core_clk:
+ clk_disable_unprepare(clks->core_clk);
+
+ return err;
+}
+
+static void xsdfec_disable_all_clks(struct xsdfec_clks *clks)
+{
+ clk_disable_unprepare(clks->status_clk);
+ clk_disable_unprepare(clks->ctrl_clk);
+ clk_disable_unprepare(clks->dout_words_clk);
+ clk_disable_unprepare(clks->dout_clk);
+ clk_disable_unprepare(clks->din_words_clk);
+ clk_disable_unprepare(clks->din_clk);
+ clk_disable_unprepare(clks->core_clk);
+ clk_disable_unprepare(clks->axi_clk);
+}
+
+static void xsdfec_idr_remove(struct xsdfec_dev *xsdfec)
+{
+ mutex_lock(&dev_idr_lock);
+ idr_remove(&dev_idr, xsdfec->dev_id);
+ mutex_unlock(&dev_idr_lock);
+}
+
+static int xsdfec_probe(struct platform_device *pdev)
+{
+ struct xsdfec_dev *xsdfec;
+ struct device *dev;
+ struct resource *res;
+ int err;
+
+ xsdfec = devm_kzalloc(&pdev->dev, sizeof(*xsdfec), GFP_KERNEL);
+ if (!xsdfec)
+ return -ENOMEM;
+
+ xsdfec->dev = &pdev->dev;
+ spin_lock_init(&xsdfec->error_data_lock);
+
+ err = xsdfec_clk_init(pdev, &xsdfec->clks);
+ if (err)
+ return err;
+
+ dev = xsdfec->dev;
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ xsdfec->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(xsdfec->regs)) {
+ err = PTR_ERR(xsdfec->regs);
+ goto err_xsdfec_dev;
+ }
+
+ /* Save driver private data */
+ platform_set_drvdata(pdev, xsdfec);
+
+ mutex_lock(&dev_idr_lock);
+ err = idr_alloc(&dev_idr, xsdfec->dev_name, 0, 0, GFP_KERNEL);
+ mutex_unlock(&dev_idr_lock);
+ if (err < 0)
+ goto err_xsdfec_dev;
+ xsdfec->dev_id = err;
+
+ snprintf(xsdfec->dev_name, DEV_NAME_LEN, "xsdfec%d", xsdfec->dev_id);
+ xsdfec->miscdev.minor = MISC_DYNAMIC_MINOR;
+ xsdfec->miscdev.name = xsdfec->dev_name;
+ xsdfec->miscdev.fops = &xsdfec_fops;
+ xsdfec->miscdev.parent = dev;
+ err = misc_register(&xsdfec->miscdev);
+ if (err) {
+ dev_err(dev, "error:%d. Unable to register device", err);
+ goto err_xsdfec_idr;
+ }
+ return 0;
+
+err_xsdfec_idr:
+ xsdfec_idr_remove(xsdfec);
+err_xsdfec_dev:
+ xsdfec_disable_all_clks(&xsdfec->clks);
+ return err;
+}
+
+static int xsdfec_remove(struct platform_device *pdev)
+{
+ struct xsdfec_dev *xsdfec;
+
+ xsdfec = platform_get_drvdata(pdev);
+ misc_deregister(&xsdfec->miscdev);
+ xsdfec_idr_remove(xsdfec);
+ xsdfec_disable_all_clks(&xsdfec->clks);
+ return 0;
+}
+
+static const struct of_device_id xsdfec_of_match[] = {
+ {
+ .compatible = "xlnx,sd-fec-1.1",
+ },
+ { /* end of table */ }
+};
+MODULE_DEVICE_TABLE(of, xsdfec_of_match);
+
+static struct platform_driver xsdfec_driver = {
+ .driver = {
+ .name = "xilinx-sdfec",
+ .of_match_table = xsdfec_of_match,
+ },
+ .probe = xsdfec_probe,
+ .remove = xsdfec_remove,
+};
+
+static int __init xsdfec_init(void)
+{
+ int err;
+
+ mutex_init(&dev_idr_lock);
+ idr_init(&dev_idr);
+ err = platform_driver_register(&xsdfec_driver);
+ if (err < 0) {
+ pr_err("%s Unabled to register SDFEC driver", __func__);
+ return err;
+ }
+ return 0;
+}
+
+static void __exit xsdfec_exit(void)
+{
+ platform_driver_unregister(&xsdfec_driver);
+ idr_destroy(&dev_idr);
+}
+
+module_init(xsdfec_init);
+module_exit(xsdfec_exit);
+
+MODULE_AUTHOR("Xilinx, Inc");
+MODULE_DESCRIPTION("Xilinx SD-FEC16 Driver");
+MODULE_LICENSE("GPL");