summaryrefslogtreecommitdiffstats
path: root/drivers/accel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c9
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h1
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h1
-rw-r--r--drivers/accel/ivpu/ivpu_hw_mtl.c19
-rw-r--r--drivers/accel/ivpu/ivpu_job.c4
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c14
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c294
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h9
8 files changed, 236 insertions, 115 deletions
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 8396db2b5203..9f2b9fdcc549 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -50,6 +50,10 @@ u8 ivpu_pll_max_ratio = U8_MAX;
module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency");
+bool ivpu_disable_mmu_cont_pages;
+module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0644);
+MODULE_PARM_DESC(disable_mmu_cont_pages, "Disable MMU contiguous pages optimization");
+
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
{
struct ivpu_device *vdev = file_priv->vdev;
@@ -372,7 +376,6 @@ static const struct drm_driver driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = ivpu_gem_prime_import,
- .gem_prime_mmap = drm_gem_prime_mmap,
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
@@ -427,7 +430,7 @@ static int ivpu_pci_init(struct ivpu_device *vdev)
return PTR_ERR(vdev->regb);
}
- ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38));
+ ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(vdev->hw->dma_bits));
if (ret) {
ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret);
return ret;
@@ -477,6 +480,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
return -ENOMEM;
vdev->hw->ops = &ivpu_hw_mtl_ops;
+ vdev->hw->dma_bits = 38;
+
vdev->platform = IVPU_PLATFORM_INVALID;
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index d3013fbd13b3..315180dd72ce 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -131,6 +131,7 @@ struct ivpu_file_priv {
extern int ivpu_dbg_mask;
extern u8 ivpu_pll_min_ratio;
extern u8 ivpu_pll_max_ratio;
+extern bool ivpu_disable_mmu_cont_pages;
#define IVPU_TEST_MODE_DISABLED 0
#define IVPU_TEST_MODE_FW_TEST 1
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 50a9304ab09c..0393901be492 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -57,6 +57,7 @@ struct ivpu_hw_info {
u32 tile_fuse;
u32 sku;
u16 config;
+ int dma_bits;
};
extern const struct ivpu_hw_ops ivpu_hw_mtl_ops;
diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c
index fef35422c6f0..3ff60fbbc8d9 100644
--- a/drivers/accel/ivpu/ivpu_hw_mtl.c
+++ b/drivers/accel/ivpu/ivpu_hw_mtl.c
@@ -548,21 +548,10 @@ static void ivpu_boot_tbu_mmu_enable(struct ivpu_device *vdev)
{
u32 val = REGV_RD32(MTL_VPU_HOST_IF_TBU_MMUSSIDV);
- if (ivpu_is_fpga(vdev)) {
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
- } else {
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_AWMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU1_ARMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_AWMMUSSIDV, val);
- val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU3_ARMMUSSIDV, val);
- }
+ val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_AWMMUSSIDV, val);
+ val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU0_ARMMUSSIDV, val);
+ val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_AWMMUSSIDV, val);
+ val = REG_SET_FLD(MTL_VPU_HOST_IF_TBU_MMUSSIDV, TBU2_ARMMUSSIDV, val);
REGV_WR32(MTL_VPU_HOST_IF_TBU_MMUSSIDV, val);
}
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index d45be0615b47..de9e69f70af7 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -289,15 +289,13 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_job *job;
- size_t buf_size;
int ret;
ret = ivpu_rpm_get(vdev);
if (ret < 0)
return NULL;
- buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *);
- job = kzalloc(buf_size, GFP_KERNEL);
+ job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL);
if (!job)
goto err_rpm_put;
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index b8b259b3aa63..53878e77aad3 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -143,6 +143,16 @@
#define IVPU_MMU_CD_0_ASET BIT(47)
#define IVPU_MMU_CD_0_ASID GENMASK_ULL(63, 48)
+#define IVPU_MMU_T0SZ_48BIT 16
+#define IVPU_MMU_T0SZ_38BIT 26
+
+#define IVPU_MMU_IPS_48BIT 5
+#define IVPU_MMU_IPS_44BIT 4
+#define IVPU_MMU_IPS_42BIT 3
+#define IVPU_MMU_IPS_40BIT 2
+#define IVPU_MMU_IPS_36BIT 1
+#define IVPU_MMU_IPS_32BIT 0
+
#define IVPU_MMU_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
#define IVPU_MMU_STE_0_S1CDMAX GENMASK_ULL(63, 59)
@@ -617,12 +627,12 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma)
entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
if (cd_dma != 0) {
- cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, 26) |
+ cd[0] = FIELD_PREP(IVPU_MMU_CD_0_TCR_T0SZ, IVPU_MMU_T0SZ_48BIT) |
FIELD_PREP(IVPU_MMU_CD_0_TCR_TG0, 0) |
FIELD_PREP(IVPU_MMU_CD_0_TCR_IRGN0, 0) |
FIELD_PREP(IVPU_MMU_CD_0_TCR_ORGN0, 0) |
FIELD_PREP(IVPU_MMU_CD_0_TCR_SH0, 0) |
- FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, 3) |
+ FIELD_PREP(IVPU_MMU_CD_0_TCR_IPS, IVPU_MMU_IPS_48BIT) |
FIELD_PREP(IVPU_MMU_CD_0_ASID, ssid) |
IVPU_MMU_CD_0_TCR_EPD1 |
IVPU_MMU_CD_0_AA64 |
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index 8ce9b12ac356..465a82298476 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -11,10 +11,12 @@
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
-#define IVPU_MMU_PGD_INDEX_MASK GENMASK(38, 30)
+#define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39)
+#define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30)
#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21)
#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12)
-#define IVPU_MMU_ENTRY_FLAGS_MASK GENMASK(11, 0)
+#define IVPU_MMU_ENTRY_FLAGS_MASK (BIT(52) | GENMASK(11, 0))
+#define IVPU_MMU_ENTRY_FLAG_CONT BIT(52)
#define IVPU_MMU_ENTRY_FLAG_NG BIT(11)
#define IVPU_MMU_ENTRY_FLAG_AF BIT(10)
#define IVPU_MMU_ENTRY_FLAG_USER BIT(6)
@@ -22,10 +24,13 @@
#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1)
#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0)
-#define IVPU_MMU_PAGE_SIZE SZ_4K
-#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
-#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
-#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
+#define IVPU_MMU_PAGE_SIZE SZ_4K
+#define IVPU_MMU_CONT_PAGES_SIZE (IVPU_MMU_PAGE_SIZE * 16)
+#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
+#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
+#define IVPU_MMU_PUD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PMD_MAP_SIZE)
+#define IVPU_MMU_PGD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PUD_MAP_SIZE)
+#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
@@ -36,167 +41,268 @@
static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
dma_addr_t pgd_dma;
- u64 *pgd;
- pgd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, GFP_KERNEL);
- if (!pgd)
+ pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
+ GFP_KERNEL);
+ if (!pgtable->pgd_dma_ptr)
return -ENOMEM;
- pgtable->pgd = pgd;
pgtable->pgd_dma = pgd_dma;
return 0;
}
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
{
- int pgd_index, pmd_index;
+ if (cpu_addr)
+ dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
+ dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+}
+
+static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
+{
+ int pgd_idx, pud_idx, pmd_idx;
+ dma_addr_t pud_dma, pmd_dma, pte_dma;
+ u64 *pud_dma_ptr, *pmd_dma_ptr, *pte_dma_ptr;
- for (pgd_index = 0; pgd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_index) {
- u64 **pmd_entries = pgtable->pgd_cpu_entries[pgd_index];
- u64 *pmd = pgtable->pgd_entries[pgd_index];
+ for (pgd_idx = 0; pgd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_idx) {
+ pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
+ pud_dma = pgtable->pgd_dma_ptr[pgd_idx];
- if (!pmd_entries)
+ if (!pud_dma_ptr)
continue;
- for (pmd_index = 0; pmd_index < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_index) {
- if (pmd_entries[pmd_index])
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE,
- pmd_entries[pmd_index],
- pmd[pmd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+ for (pud_idx = 0; pud_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pud_idx) {
+ pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
+ pmd_dma = pgtable->pud_ptrs[pgd_idx][pud_idx];
+
+ if (!pmd_dma_ptr)
+ continue;
+
+ for (pmd_idx = 0; pmd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_idx) {
+ pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
+ pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
+
+ ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
+ }
+
+ kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
+ ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
}
- kfree(pmd_entries);
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd_entries[pgd_index],
- pgtable->pgd[pgd_index] & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+ kfree(pgtable->pmd_ptrs[pgd_idx]);
+ kfree(pgtable->pte_ptrs[pgd_idx]);
+ ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
}
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pgtable->pgd,
- pgtable->pgd_dma & ~IVPU_MMU_ENTRY_FLAGS_MASK);
+ ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+}
+
+static u64*
+ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx)
+{
+ u64 *pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
+ dma_addr_t pud_dma;
+
+ if (pud_dma_ptr)
+ return pud_dma_ptr;
+
+ pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
+ if (!pud_dma_ptr)
+ return NULL;
+
+ drm_WARN_ON(&vdev->drm, pgtable->pmd_ptrs[pgd_idx]);
+ pgtable->pmd_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+ if (!pgtable->pmd_ptrs[pgd_idx])
+ goto err_free_pud_dma_ptr;
+
+ drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx]);
+ pgtable->pte_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+ if (!pgtable->pte_ptrs[pgd_idx])
+ goto err_free_pmd_ptrs;
+
+ pgtable->pud_ptrs[pgd_idx] = pud_dma_ptr;
+ pgtable->pgd_dma_ptr[pgd_idx] = pud_dma | IVPU_MMU_ENTRY_VALID;
+
+ return pud_dma_ptr;
+
+err_free_pmd_ptrs:
+ kfree(pgtable->pmd_ptrs[pgd_idx]);
+
+err_free_pud_dma_ptr:
+ ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+ return NULL;
}
static u64*
-ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, u64 pgd_index)
+ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx,
+ int pud_idx)
{
- u64 **pmd_entries;
+ u64 *pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
dma_addr_t pmd_dma;
- u64 *pmd;
- if (pgtable->pgd_entries[pgd_index])
- return pgtable->pgd_entries[pgd_index];
+ if (pmd_dma_ptr)
+ return pmd_dma_ptr;
- pmd = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
- if (!pmd)
+ pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+ if (!pmd_dma_ptr)
return NULL;
- pmd_entries = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
- if (!pmd_entries)
- goto err_free_pgd;
+ drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx][pud_idx]);
+ pgtable->pte_ptrs[pgd_idx][pud_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
+ if (!pgtable->pte_ptrs[pgd_idx][pud_idx])
+ goto err_free_pmd_dma_ptr;
- pgtable->pgd_entries[pgd_index] = pmd;
- pgtable->pgd_cpu_entries[pgd_index] = pmd_entries;
- pgtable->pgd[pgd_index] = pmd_dma | IVPU_MMU_ENTRY_VALID;
+ pgtable->pmd_ptrs[pgd_idx][pud_idx] = pmd_dma_ptr;
+ pgtable->pud_ptrs[pgd_idx][pud_idx] = pmd_dma | IVPU_MMU_ENTRY_VALID;
- return pmd;
+ return pmd_dma_ptr;
-err_free_pgd:
- dma_free_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, pmd, pmd_dma);
+err_free_pmd_dma_ptr:
+ ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
return NULL;
}
static u64*
ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
- int pgd_index, int pmd_index)
+ int pgd_idx, int pud_idx, int pmd_idx)
{
+ u64 *pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
dma_addr_t pte_dma;
- u64 *pte;
- if (pgtable->pgd_cpu_entries[pgd_index][pmd_index])
- return pgtable->pgd_cpu_entries[pgd_index][pmd_index];
+ if (pte_dma_ptr)
+ return pte_dma_ptr;
- pte = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
- if (!pte)
+ pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+ if (!pte_dma_ptr)
return NULL;
- pgtable->pgd_cpu_entries[pgd_index][pmd_index] = pte;
- pgtable->pgd_entries[pgd_index][pmd_index] = pte_dma | IVPU_MMU_ENTRY_VALID;
+ pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma_ptr;
+ pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma | IVPU_MMU_ENTRY_VALID;
- return pte;
+ return pte_dma_ptr;
}
static int
ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, dma_addr_t dma_addr, int prot)
+ u64 vpu_addr, dma_addr_t dma_addr, u64 prot)
{
u64 *pte;
- int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
- int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+ int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+
+ /* Allocate PUD - second level page table if needed */
+ if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
+ return -ENOMEM;
- /* Allocate PMD - second level page table if needed */
- if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_index))
+ /* Allocate PMD - third level page table if needed */
+ if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_idx, pud_idx))
return -ENOMEM;
- /* Allocate PTE - third level page table if needed */
- pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_index, pmd_index);
+ /* Allocate PTE - fourth level page table if needed */
+ pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_idx, pud_idx, pmd_idx);
if (!pte)
return -ENOMEM;
- /* Update PTE - third level page table with DMA address */
- pte[pte_index] = dma_addr | prot;
+ /* Update PTE */
+ pte[pte_idx] = dma_addr | prot;
+
+ return 0;
+}
+
+static int
+ivpu_mmu_context_map_cont_64k(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr,
+ dma_addr_t dma_addr, u64 prot)
+{
+ size_t size = IVPU_MMU_CONT_PAGES_SIZE;
+
+ drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr, size));
+ drm_WARN_ON(&vdev->drm, !IS_ALIGNED(dma_addr, size));
+
+ prot |= IVPU_MMU_ENTRY_FLAG_CONT;
+
+ while (size) {
+ int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+
+ if (ret)
+ return ret;
+
+ size -= IVPU_MMU_PAGE_SIZE;
+ vpu_addr += IVPU_MMU_PAGE_SIZE;
+ dma_addr += IVPU_MMU_PAGE_SIZE;
+ }
return 0;
}
static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
{
- int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
- int pte_index = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+ int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
/* Update PTE with dummy physical address and clear flags */
- ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index][pte_index] = IVPU_MMU_ENTRY_INVALID;
+ ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
}
static void
ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
{
+ struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
u64 end_addr = vpu_addr + size;
- u64 *pgd = ctx->pgtable.pgd;
/* Align to PMD entry (2 MB) */
vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
while (vpu_addr < end_addr) {
- int pgd_index = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- u64 pmd_end = (pgd_index + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
- u64 *pmd = ctx->pgtable.pgd_entries[pgd_index];
-
- while (vpu_addr < end_addr && vpu_addr < pmd_end) {
- int pmd_index = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
- u64 *pte = ctx->pgtable.pgd_cpu_entries[pgd_index][pmd_index];
-
- clflush_cache_range(pte, IVPU_MMU_PGTABLE_SIZE);
- vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
+ int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
+ u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
+
+ while (vpu_addr < end_addr && vpu_addr < pud_end) {
+ int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
+ u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
+
+ while (vpu_addr < end_addr && vpu_addr < pmd_end) {
+ int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
+
+ clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
+ IVPU_MMU_PGTABLE_SIZE);
+ vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
+ }
+ clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
+ IVPU_MMU_PGTABLE_SIZE);
}
- clflush_cache_range(pmd, IVPU_MMU_PGTABLE_SIZE);
+ clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
}
- clflush_cache_range(pgd, IVPU_MMU_PGTABLE_SIZE);
+ clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
}
static int
ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
- u64 vpu_addr, dma_addr_t dma_addr, size_t size, int prot)
+ u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
{
+ int map_size;
+ int ret;
+
while (size) {
- int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+ if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE &&
+ IS_ALIGNED(vpu_addr | dma_addr, IVPU_MMU_CONT_PAGES_SIZE)) {
+ ret = ivpu_mmu_context_map_cont_64k(vdev, ctx, vpu_addr, dma_addr, prot);
+ map_size = IVPU_MMU_CONT_PAGES_SIZE;
+ } else {
+ ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
+ map_size = IVPU_MMU_PAGE_SIZE;
+ }
if (ret)
return ret;
- vpu_addr += IVPU_MMU_PAGE_SIZE;
- dma_addr += IVPU_MMU_PAGE_SIZE;
- size -= IVPU_MMU_PAGE_SIZE;
+ vpu_addr += map_size;
+ dma_addr += map_size;
+ size -= map_size;
}
return 0;
@@ -216,8 +322,8 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
{
struct scatterlist *sg;
- int prot;
int ret;
+ u64 prot;
u64 i;
if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
@@ -237,7 +343,7 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
mutex_lock(&ctx->lock);
for_each_sgtable_dma_sg(sgt, sg, i) {
- u64 dma_addr = sg_dma_address(sg) - sg->offset;
+ dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
size_t size = sg_dma_len(sg) + sg->offset;
ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
@@ -293,8 +399,14 @@ ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
{
lockdep_assert_held(&ctx->lock);
- return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE,
- 0, range->start, range->end, DRM_MM_INSERT_BEST);
+ if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
+ if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST))
+ return 0;
+ }
+
+ return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST);
}
void
@@ -334,11 +446,15 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
- drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd);
+ if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
+ return;
mutex_destroy(&ctx->lock);
- ivpu_mmu_pgtable_free(vdev, &ctx->pgtable);
+ ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
drm_mm_takedown(&ctx->mm);
+
+ ctx->pgtable.pgd_dma_ptr = NULL;
+ ctx->pgtable.pgd_dma = 0;
}
int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index ddf11b95023a..961a0d6a6c7f 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -12,12 +12,13 @@ struct ivpu_device;
struct ivpu_file_priv;
struct ivpu_addr_range;
-#define IVPU_MMU_PGTABLE_ENTRIES 512
+#define IVPU_MMU_PGTABLE_ENTRIES 512ull
struct ivpu_mmu_pgtable {
- u64 **pgd_cpu_entries[IVPU_MMU_PGTABLE_ENTRIES];
- u64 *pgd_entries[IVPU_MMU_PGTABLE_ENTRIES];
- u64 *pgd;
+ u64 ***pte_ptrs[IVPU_MMU_PGTABLE_ENTRIES];
+ u64 **pmd_ptrs[IVPU_MMU_PGTABLE_ENTRIES];
+ u64 *pud_ptrs[IVPU_MMU_PGTABLE_ENTRIES];
+ u64 *pgd_dma_ptr;
dma_addr_t pgd_dma;
};