diff options
author | Oded Gabbay <ogabbay@kernel.org> | 2021-04-02 01:43:18 +0300 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2021-04-09 14:09:24 +0300 |
commit | 639781dcab8261f39c7028db4ed4fd0e760d69fa (patch) | |
tree | adaa1b4c4e4eb50a89fe3a0de2c3ddda66795287 /drivers/misc | |
parent | e65448faf4cfeddd95a0e661aabf2fae1efc9831 (diff) | |
download | linux-639781dcab8261f39c7028db4ed4fd0e760d69fa.tar.gz linux-639781dcab8261f39c7028db4ed4fd0e760d69fa.tar.bz2 linux-639781dcab8261f39c7028db4ed4fd0e760d69fa.zip |
habanalabs/gaudi: add debugfs to DMA from the device
When trying to debug program, the user often needs to
dump large parts of the device's DRAM, which can reach to tens of GBs.
Because reading from the device's internal memory through the PCI BAR
is extremely slow, the debug can take hours.
Instead, we can provide the user to copy data through one of the DMA
engines. This will make the operation much faster.
Currently, only GAUDI is supported.
In GAUDI, we need to find a PCI DMA engine that is IDLE and set the
DMA as secured to be able to bypass our MMU as we currently don't
map the temporary buffer to the MMU.
Example bash one-line to dump entire HBM to file (~2 minutes):
for (( i=0x0; i < 0x800000000; i+=0x8000000 )); do \
printf '0x%x\n' $i | sudo tee /sys/kernel/debug/habanalabs/hl0/addr ; \
echo 0x8000000 | sudo tee /sys/kernel/debug/habanalabs/hl0/dma_size ; \
sudo cat /sys/kernel/debug/habanalabs/hl0/data_dma >> hbm.txt ; done
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/habanalabs/common/debugfs.c | 119 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 17 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 159 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 8 |
4 files changed, 298 insertions, 5 deletions
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 33de54092543..8381155578a0 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -9,8 +9,8 @@ #include "../include/hw_ip/mmu/mmu_general.h" #include <linux/pci.h> -#include <linux/debugfs.h> #include <linux/uaccess.h> +#include <linux/vmalloc.h> #define MMU_ADDR_BUF_SIZE 40 #define MMU_ASID_BUF_SIZE 10 @@ -457,6 +457,34 @@ out: return false; } +static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr, + u32 size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 dram_start_addr, dram_end_addr; + + if (!hdev->mmu_enable) + return false; + + if (prop->dram_supports_virtual_memory) { + dram_start_addr = prop->dmmu.start_addr; + dram_end_addr = prop->dmmu.end_addr; + } else { + dram_start_addr = prop->dram_base_address; + dram_end_addr = prop->dram_end_address; + } + + if (hl_mem_area_inside_range(addr, size, dram_start_addr, + dram_end_addr)) + return true; + + if (hl_mem_area_inside_range(addr, size, prop->sram_base_address, + prop->sram_end_address)) + return true; + + return false; +} + static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, u64 *phys_addr) { @@ -599,6 +627,11 @@ static ssize_t hl_data_read64(struct file *f, char __user *buf, ssize_t rc; u64 val; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } + if (*ppos) return 0; @@ -630,6 +663,11 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, u64 value; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; @@ -651,6 +689,63 @@ static ssize_t hl_data_write64(struct file *f, const char __user *buf, return count; } +static ssize_t hl_dma_size_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + ssize_t rc; + u32 size; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 16, &size); + if (rc) + return rc; + + if (!size) { + dev_err(hdev->dev, "DMA read failed. size can't be 0\n"); + return -EINVAL; + } + + if (size > SZ_128M) { + dev_err(hdev->dev, + "DMA read failed. size can't be larger than 128MB\n"); + return -EINVAL; + } + + if (!hl_is_device_internal_memory_va(hdev, addr, size)) { + dev_err(hdev->dev, + "DMA read failed. Invalid 0x%010llx + 0x%08x\n", + addr, size); + return -EINVAL; + } + + /* Free the previous allocation, if there was any */ + entry->blob_desc.size = 0; + vfree(entry->blob_desc.data); + + entry->blob_desc.data = vmalloc(size); + if (!entry->blob_desc.data) + return -ENOMEM; + + rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size, + entry->blob_desc.data); + if (rc) { + dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr); + vfree(entry->blob_desc.data); + entry->blob_desc.data = NULL; + return -EIO; + } + + entry->blob_desc.size = size; + + return count; +} + static ssize_t hl_get_power_state(struct file *f, char __user *buf, size_t count, loff_t *ppos) { @@ -960,6 +1055,11 @@ static const struct file_operations hl_data64b_fops = { .write = hl_data_write64 }; +static const struct file_operations hl_dma_size_fops = { + .owner = THIS_MODULE, + .write = hl_dma_size_write +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1062,6 +1162,9 @@ void hl_debugfs_add_device(struct hl_device *hdev) if (!dev_entry->entry_arr) return; + dev_entry->blob_desc.size = 0; + dev_entry->blob_desc.data = NULL; + INIT_LIST_HEAD(&dev_entry->file_list); INIT_LIST_HEAD(&dev_entry->cb_list); INIT_LIST_HEAD(&dev_entry->cs_list); @@ -1164,6 +1267,17 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_security_violations_fops); + debugfs_create_file("dma_size", + 0200, + dev_entry->root, + dev_entry, + &hl_dma_size_fops); + + debugfs_create_blob("data_dma", + 0400, + dev_entry->root, + &dev_entry->blob_desc); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, 0444, @@ -1182,6 +1296,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev) debugfs_remove_recursive(entry->root); mutex_destroy(&entry->file_mutex); + + vfree(entry->blob_desc.data); + kfree(entry->entry_arr); } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 54d7735991c7..d89ae4c3d634 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -19,6 +19,7 @@ #include <linux/dma-direction.h> #include <linux/scatterlist.h> #include <linux/hashtable.h> +#include <linux/debugfs.h> #include <linux/bitfield.h> #include <linux/genalloc.h> #include <linux/sched/signal.h> @@ -854,8 +855,12 @@ enum div_select_defs { * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. * @restore_phase_topology: clear all SOBs amd MONs. - * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. - * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM/Host memory. + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM/Host memory. + * @debugfs_read64: debug interface for reading u64 from DRAM/SRAM/Host memory. + * @debugfs_write64: debug interface for writing u64 to DRAM/SRAM/Host memory. + * @debugfs_read_dma: debug interface for reading up to 2MB from the device's + * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. * @set_pll_profile: change PLL profile (manual/automatic). @@ -979,6 +984,8 @@ struct hl_asic_funcs { bool user_address, u64 *val); int (*debugfs_write64)(struct hl_device *hdev, u64 addr, bool user_address, u64 val); + int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_attr_grp); void (*handle_eqe)(struct hl_device *hdev, @@ -1569,12 +1576,13 @@ struct hl_debugfs_entry { * @userptr_spinlock: protects userptr_list. * @ctx_mem_hash_list: list of available contexts with MMU mappings. * @ctx_mem_hash_spinlock: protects cb_list. + * @blob_desc: descriptor of blob * @addr: next address to read/write from/to in read/write32. * @mmu_addr: next virtual address to translate to physical address in mmu_show. * @mmu_asid: ASID to use while translating in mmu_show. * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read. - * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read. + * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. + * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. */ struct hl_dbg_device_entry { struct dentry *root; @@ -1592,6 +1600,7 @@ struct hl_dbg_device_entry { spinlock_t userptr_spinlock; struct list_head ctx_mem_hash_list; spinlock_t ctx_mem_hash_spinlock; + struct debugfs_blob_wrapper blob_desc; u64 addr; u64 mmu_addr; u32 mmu_asid; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index bd711c8d3874..03d3fb643e79 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6175,6 +6175,164 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, return rc; } +static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, + u32 size_to_dma, dma_addr_t dma_addr) +{ + u32 err_cause, val; + u64 dma_offset; + int rc; + + dma_offset = dma_id * DMA_CORE_OFFSET; + + WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); + WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); + WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); + WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); + WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); + WREG32(mmDMA0_CORE_COMMIT + dma_offset, + (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); + + rc = hl_poll_timeout( + hdev, + mmDMA0_CORE_STS0 + dma_offset, + val, + ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), + 0, + 1000000); + + if (rc) { + dev_err(hdev->dev, + "DMA %d timed-out during reading of 0x%llx\n", + dma_id, addr); + return -EIO; + } + + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + if (err_cause) { + dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); + + return -EIO; + } + + return 0; +} + +static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr) +{ + u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 dma_offset, qm_offset; + dma_addr_t dma_addr; + void *kernel_addr; + bool is_eng_idle; + int rc, dma_id; + + kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( + hdev, SZ_2M, + &dma_addr, + GFP_KERNEL | __GFP_ZERO); + + if (!kernel_addr) + return -ENOMEM; + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + hdev->asic_funcs->hw_queues_lock(hdev); + + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; + dma_offset = dma_id * DMA_CORE_OFFSET; + qm_offset = dma_id * DMA_QMAN_OFFSET; + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); + is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + + if (!is_eng_idle) { + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; + dma_offset = dma_id * DMA_CORE_OFFSET; + qm_offset = dma_id * DMA_QMAN_OFFSET; + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); + is_eng_idle = IS_DMA_IDLE(dma_core_sts0); + + if (!is_eng_idle) { + dev_err_ratelimited(hdev->dev, + "Can't read via DMA because it is BUSY\n"); + rc = -EAGAIN; + goto out; + } + } + + cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); + WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, + 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + + /* TODO: remove this by mapping the DMA temporary buffer to the MMU + * using the compute ctx ASID, if exists. If not, use the kernel ctx + * ASID + */ + WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + if (err_cause) { + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); + } + + pos = 0; + size_left = size; + size_to_dma = SZ_2M; + + while (size_left > 0) { + + if (size_left < SZ_2M) + size_to_dma = size_left; + + rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, + dma_addr); + if (rc) + break; + + memcpy(blob_addr + pos, kernel_addr, size_to_dma); + + if (size_left <= SZ_2M) + break; + + pos += SZ_2M; + addr += SZ_2M; + size_left -= SZ_2M; + } + + /* TODO: remove this by mapping the DMA temporary buffer to the MMU + * using the compute ctx ASID, if exists. If not, use the kernel ctx + * ASID + */ + WREG32_AND(mmDMA0_CORE_PROT + dma_offset, + ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); + +out: + hdev->asic_funcs->hw_queues_unlock(hdev); + + hdev->asic_funcs->set_clock_gating(hdev); + + mutex_unlock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, + dma_addr); + + return rc; +} + static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -8639,6 +8797,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .debugfs_write32 = gaudi_debugfs_write32, .debugfs_read64 = gaudi_debugfs_read64, .debugfs_write64 = gaudi_debugfs_write64, + .debugfs_read_dma = gaudi_debugfs_read_dma, .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, .set_pll_profile = gaudi_set_pll_profile, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 09b423455439..175b7b0af450 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4329,6 +4329,13 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, return rc; } +static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, + void *blob_addr) +{ + dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n"); + return -EPERM; +} + static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; @@ -5521,6 +5528,7 @@ static const struct hl_asic_funcs goya_funcs = { .debugfs_write32 = goya_debugfs_write32, .debugfs_read64 = goya_debugfs_read64, .debugfs_write64 = goya_debugfs_write64, + .debugfs_read_dma = goya_debugfs_read_dma, .add_device_attr = goya_add_device_attr, .handle_eqe = goya_handle_eqe, .set_pll_profile = goya_set_pll_profile, |