8 files changed, 121 insertions, 107 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 3525236ed8d9..19c84214a7ea 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref)
 
 	/* We also need to update CI for internal queues */
 	if (cs->submitted) {
+		int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
+
+		WARN_ONCE((cs_cnt < 0),
+			"hl%d: error in CS active cnt %d\n",
+			hdev->id, cs_cnt);
+
 		hl_int_hw_queue_update_ci(cs);
 
 		spin_lock(&hdev->hw_queues_mirror_lock);
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index a53c12aff6ad..974a87789bd8 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -232,6 +232,7 @@ static int vm_show(struct seq_file *s, void *data)
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
 	enum vm_type_t *vm_type;
 	bool once = true;
+	u64 j;
 	int i;
 
 	if (!dev_entry->hdev->mmu_enable)
@@ -260,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
 			} else {
 				phys_pg_pack = hnode->ptr;
 				seq_printf(s,
-					"    0x%-14llx      %-10u       %-4u\n",
+					"    0x%-14llx      %-10llu       %-4u\n",
 					hnode->vaddr, phys_pg_pack->total_size,
 					phys_pg_pack->handle);
 			}
@@ -282,9 +283,9 @@ static int vm_show(struct seq_file *s, void *data)
 						phys_pg_pack->page_size);
 			seq_puts(s, "   physical address\n");
 			seq_puts(s, "---------------------\n");
-			for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+			for (j = 0 ; j < phys_pg_pack->npages ; j++) {
 				seq_printf(s, "    0x%-14llx\n",
-						phys_pg_pack->pages[i]);
+						phys_pg_pack->pages[j]);
 			}
 		}
 		spin_unlock(&vm->idr_lock);
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index de46aa6ed154..77d51be66c7e 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -11,6 +11,8 @@
 #include <linux/sched/signal.h>
 #include <linux/hwmon.h>
 
+#define HL_PLDM_PENDING_RESET_PER_SEC	(HL_PENDING_RESET_PER_SEC * 10)
+
 bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
 {
 	if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
@@ -216,6 +218,7 @@ static int device_early_init(struct hl_device *hdev)
 	spin_lock_init(&hdev->hw_queues_mirror_lock);
 	atomic_set(&hdev->in_reset, 0);
 	atomic_set(&hdev->fd_open_cnt, 0);
+	atomic_set(&hdev->cs_active_cnt, 0);
 
 	return 0;
 
@@ -413,6 +416,27 @@ int hl_device_suspend(struct hl_device *hdev)
 
 	pci_save_state(hdev->pdev);
 
+	/* Block future CS/VM/JOB completion operations */
+	rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+	if (rc) {
+		dev_err(hdev->dev, "Can't suspend while in reset\n");
+		return -EIO;
+	}
+
+	/* This blocks all other stuff that is not blocked by in_reset */
+	hdev->disabled = true;
+
+	/*
+	 * Flush anyone that is inside the critical section of enqueue
+	 * jobs to the H/W
+	 */
+	hdev->asic_funcs->hw_queues_lock(hdev);
+	hdev->asic_funcs->hw_queues_unlock(hdev);
+
+	/* Flush processes that are sending message to CPU */
+	mutex_lock(&hdev->send_cpu_message_lock);
+	mutex_unlock(&hdev->send_cpu_message_lock);
+
 	rc = hdev->asic_funcs->suspend(hdev);
 	if (rc)
 		dev_err(hdev->dev,
@@ -440,21 +464,38 @@ int hl_device_resume(struct hl_device *hdev)
 
 	pci_set_power_state(hdev->pdev, PCI_D0);
 	pci_restore_state(hdev->pdev);
-	rc = pci_enable_device(hdev->pdev);
+	rc = pci_enable_device_mem(hdev->pdev);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to enable PCI device in resume\n");
 		return rc;
 	}
 
+	pci_set_master(hdev->pdev);
+
 	rc = hdev->asic_funcs->resume(hdev);
 	if (rc) {
-		dev_err(hdev->dev,
-			"Failed to enable PCI access from device CPU\n");
-		return rc;
+		dev_err(hdev->dev, "Failed to resume device after suspend\n");
+		goto disable_device;
+	}
+
+
+	hdev->disabled = false;
+	atomic_set(&hdev->in_reset, 0);
+
+	rc = hl_device_reset(hdev, true, false);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to reset device during resume\n");
+		goto disable_device;
 	}
 
 	return 0;
+
+disable_device:
+	pci_clear_master(hdev->pdev);
+	pci_disable_device(hdev->pdev);
+
+	return rc;
 }
 
 static void hl_device_hard_reset_pending(struct work_struct *work)
@@ -462,9 +503,16 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
 	struct hl_device_reset_work *device_reset_work =
 		container_of(work, struct hl_device_reset_work, reset_work);
 	struct hl_device *hdev = device_reset_work->hdev;
-	u16 pending_cnt = HL_PENDING_RESET_PER_SEC;
+	u16 pending_total, pending_cnt;
 	struct task_struct *task = NULL;
 
+	if (hdev->pldm)
+		pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
+	else
+		pending_total = HL_PENDING_RESET_PER_SEC;
+
+	pending_cnt = pending_total;
+
 	/* Flush all processes that are inside hl_open */
 	mutex_lock(&hdev->fd_open_cnt_lock);
 
@@ -489,6 +537,19 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
 		}
 	}
 
+	pending_cnt = pending_total;
+
+	while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
+
+		pending_cnt--;
+
+		ssleep(1);
+	}
+
+	if (atomic_read(&hdev->fd_open_cnt))
+		dev_crit(hdev->dev,
+			"Going to hard reset with open user contexts\n");
+
 	mutex_unlock(&hdev->fd_open_cnt_lock);
 
 	hl_device_reset(hdev, true, true);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 238dd57c541b..3c509e19d69d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1201,15 +1201,6 @@ static int goya_stop_external_queues(struct hl_device *hdev)
 	return retval;
 }
 
-static void goya_resume_external_queues(struct hl_device *hdev)
-{
-	WREG32(mmDMA_QM_0_GLBL_CFG1, 0);
-	WREG32(mmDMA_QM_1_GLBL_CFG1, 0);
-	WREG32(mmDMA_QM_2_GLBL_CFG1, 0);
-	WREG32(mmDMA_QM_3_GLBL_CFG1, 0);
-	WREG32(mmDMA_QM_4_GLBL_CFG1, 0);
-}
-
 /*
  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
  *
@@ -1697,12 +1688,11 @@ static void goya_init_golden_registers(struct hl_device *hdev)
 
 	/*
 	 * Workaround for H2 #HW-23 bug
-	 * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it
-	 * to 16 on KMD DMA
-	 * We need to limit only these DMAs because the user can only read
+	 * Set DMA max outstanding read requests to 240 on DMA CH 1.
+	 * This limitation is still large enough to not affect Gen4 bandwidth.
+	 * We need to only limit that DMA channel because the user can only read
 	 * from Host using DMA CH 1
 	 */
-	WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
 	WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
 
 	goya->hw_cap_initialized |= HW_CAP_GOLDEN;
@@ -2178,36 +2168,6 @@ static int goya_stop_internal_queues(struct hl_device *hdev)
 	return retval;
 }
 
-static void goya_resume_internal_queues(struct hl_device *hdev)
-{
-	WREG32(mmMME_QM_GLBL_CFG1, 0);
-	WREG32(mmMME_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC0_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC1_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC2_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC3_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC4_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC5_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC6_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0);
-
-	WREG32(mmTPC7_QM_GLBL_CFG1, 0);
-	WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0);
-}
-
 static void goya_dma_stall(struct hl_device *hdev)
 {
 	WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
@@ -2905,20 +2865,6 @@ int goya_suspend(struct hl_device *hdev)
 {
 	int rc;
 
-	rc = goya_stop_internal_queues(hdev);
-
-	if (rc) {
-		dev_err(hdev->dev, "failed to stop internal queues\n");
-		return rc;
-	}
-
-	rc = goya_stop_external_queues(hdev);
-
-	if (rc) {
-		dev_err(hdev->dev, "failed to stop external queues\n");
-		return rc;
-	}
-
 	rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
 	if (rc)
 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
@@ -2928,15 +2874,7 @@ int goya_suspend(struct hl_device *hdev)
 
 int goya_resume(struct hl_device *hdev)
 {
-	int rc;
-
-	goya_resume_external_queues(hdev);
-	goya_resume_internal_queues(hdev);
-
-	rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
-	if (rc)
-		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
-	return rc;
+	return goya_init_iatu(hdev);
 }
 
 static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
@@ -3070,7 +3008,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
 
 	*dma_handle = hdev->asic_prop.sram_base_address;
 
-	base = hdev->pcie_bar[SRAM_CFG_BAR_ID];
+	base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
 
 	switch (queue_id) {
 	case GOYA_QUEUE_ID_MME:
@@ -3754,7 +3692,7 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
 	 * WA for HW-23.
 	 * We can't allow user to read from Host using QMANs other than 1.
 	 */
-	if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
+	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
 		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
 				le32_to_cpu(user_dma_pkt->tsize),
 				hdev->asic_prop.va_space_host_start_address,
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index a7c95e9f9b9a..a8ee52c880cd 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -793,11 +793,11 @@ struct hl_vm_hash_node {
  * struct hl_vm_phys_pg_pack - physical page pack.
  * @vm_type: describes the type of the virtual area descriptor.
  * @pages: the physical page array.
+ * @npages: num physical pages in the pack.
+ * @total_size: total size of all the pages in this list.
  * @mapping_cnt: number of shared mappings.
  * @asid: the context related to this list.
- * @npages: num physical pages in the pack.
  * @page_size: size of each page in the pack.
- * @total_size: total size of all the pages in this list.
  * @flags: HL_MEM_* flags related to this list.
  * @handle: the provided handle related to this list.
  * @offset: offset from the first page.
@@ -807,11 +807,11 @@ struct hl_vm_hash_node {
 struct hl_vm_phys_pg_pack {
 	enum vm_type_t		vm_type; /* must be first */
 	u64			*pages;
+	u64			npages;
+	u64			total_size;
 	atomic_t		mapping_cnt;
 	u32			asid;
-	u32			npages;
 	u32			page_size;
-	u32			total_size;
 	u32			flags;
 	u32			handle;
 	u32			offset;
@@ -1056,13 +1056,15 @@ struct hl_device_reset_work {
  * @cb_pool_lock: protects the CB pool.
  * @user_ctx: current user context executing.
  * @dram_used_mem: current DRAM memory consumption.
- * @in_reset: is device in reset flow.
- * @curr_pll_profile: current PLL profile.
- * @fd_open_cnt: number of open user processes.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
  *             value is saved so in case of hard-reset, KMD will restore this
  *             value and update the F/W after the re-initialization
+ * @in_reset: is device in reset flow.
+ * @curr_pll_profile: current PLL profile.
+ * @fd_open_cnt: number of open user processes.
+ * @cs_active_cnt: number of active command submissions on this device (active
+ *                 means already in H/W queues)
  * @major: habanalabs KMD major.
  * @high_pll: high PLL profile frequency.
  * @soft_reset_cnt: number of soft reset since KMD loading.
@@ -1128,11 +1130,12 @@ struct hl_device {
 	struct hl_ctx			*user_ctx;
 
 	atomic64_t			dram_used_mem;
+	u64				timeout_jiffies;
+	u64				max_power;
 	atomic_t			in_reset;
 	atomic_t			curr_pll_profile;
 	atomic_t			fd_open_cnt;
-	u64				timeout_jiffies;
-	u64				max_power;
+	atomic_t			cs_active_cnt;
 	u32				major;
 	u32				high_pll;
 	u32				soft_reset_cnt;
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 67bece26417c..ef3bb6951360 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 		spin_unlock(&hdev->hw_queues_mirror_lock);
 	}
 
-	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) {
+	atomic_inc(&hdev->cs_active_cnt);
+
+	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 		if (job->ext_queue)
 			ext_hw_queue_schedule_job(job);
 		else
 			int_hw_queue_schedule_job(job);
-	}
 
 	cs->submitted = true;
 
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 3a12fd1a5274..ce1fda40a8b8 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -56,9 +56,9 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 	struct hl_device *hdev = ctx->hdev;
 	struct hl_vm *vm = &hdev->vm;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
-	u64 paddr = 0;
-	u32 total_size, num_pgs, num_curr_pgs, page_size, page_shift;
-	int handle, rc, i;
+	u64 paddr = 0, total_size, num_pgs, i;
+	u32 num_curr_pgs, page_size, page_shift;
+	int handle, rc;
 	bool contiguous;
 
 	num_curr_pgs = 0;
@@ -73,7 +73,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 		paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
 		if (!paddr) {
 			dev_err(hdev->dev,
-				"failed to allocate %u huge contiguous pages\n",
+				"failed to allocate %llu huge contiguous pages\n",
 				num_pgs);
 			return -ENOMEM;
 		}
@@ -93,7 +93,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 	phys_pg_pack->flags = args->flags;
 	phys_pg_pack->contiguous = contiguous;
 
-	phys_pg_pack->pages = kcalloc(num_pgs, sizeof(u64), GFP_KERNEL);
+	phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
 	if (!phys_pg_pack->pages) {
 		rc = -ENOMEM;
 		goto pages_arr_err;
@@ -148,7 +148,7 @@ page_err:
 			gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
 					page_size);
 
-	kfree(phys_pg_pack->pages);
+	kvfree(phys_pg_pack->pages);
 pages_arr_err:
 	kfree(phys_pg_pack);
 pages_pack_err:
@@ -267,7 +267,7 @@ static void free_phys_pg_pack(struct hl_device *hdev,
 		struct hl_vm_phys_pg_pack *phys_pg_pack)
 {
 	struct hl_vm *vm = &hdev->vm;
-	int i;
+	u64 i;
 
 	if (!phys_pg_pack->created_from_userptr) {
 		if (phys_pg_pack->contiguous) {
@@ -288,7 +288,7 @@ static void free_phys_pg_pack(struct hl_device *hdev,
 		}
 	}
 
-	kfree(phys_pg_pack->pages);
+	kvfree(phys_pg_pack->pages);
 	kfree(phys_pg_pack);
 }
 
@@ -519,7 +519,7 @@ static inline int add_va_block(struct hl_device *hdev,
  * - Return the start address of the virtual block
  */
 static u64 get_va_block(struct hl_device *hdev,
-		struct hl_va_range *va_range, u32 size, u64 hint_addr,
+		struct hl_va_range *va_range, u64 size, u64 hint_addr,
 		bool is_userptr)
 {
 	struct hl_vm_va_block *va_block, *new_va_block = NULL;
@@ -577,7 +577,8 @@ static u64 get_va_block(struct hl_device *hdev,
 	}
 
 	if (!new_va_block) {
-		dev_err(hdev->dev, "no available va block for size %u\n", size);
+		dev_err(hdev->dev, "no available va block for size %llu\n",
+				size);
 		goto out;
 	}
 
@@ -648,8 +649,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct scatterlist *sg;
 	dma_addr_t dma_addr;
-	u64 page_mask;
-	u32 npages, total_npages, page_size = PAGE_SIZE;
+	u64 page_mask, total_npages;
+	u32 npages, page_size = PAGE_SIZE;
 	bool first = true, is_huge_page_opt = true;
 	int rc, i, j;
 
@@ -691,7 +692,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 
 	page_mask = ~(((u64) page_size) - 1);
 
-	phys_pg_pack->pages = kcalloc(total_npages, sizeof(u64), GFP_KERNEL);
+	phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
+						GFP_KERNEL);
 	if (!phys_pg_pack->pages) {
 		rc = -ENOMEM;
 		goto page_pack_arr_mem_err;
@@ -750,9 +752,9 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
 		struct hl_vm_phys_pg_pack *phys_pg_pack)
 {
 	struct hl_device *hdev = ctx->hdev;
-	u64 next_vaddr = vaddr, paddr;
+	u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
 	u32 page_size = phys_pg_pack->page_size;
-	int i, rc = 0, mapped_pg_cnt = 0;
+	int rc = 0;
 
 	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
 		paddr = phys_pg_pack->pages[i];
@@ -764,7 +766,7 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
 		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
 		if (rc) {
 			dev_err(hdev->dev,
-				"map failed for handle %u, npages: %d, mapped: %d",
+				"map failed for handle %u, npages: %llu, mapped: %llu",
 				phys_pg_pack->handle, phys_pg_pack->npages,
 				mapped_pg_cnt);
 			goto err;
@@ -985,10 +987,10 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
 	struct hl_vm_hash_node *hnode = NULL;
 	struct hl_userptr *userptr = NULL;
 	enum vm_type_t *vm_type;
-	u64 next_vaddr;
+	u64 next_vaddr, i;
 	u32 page_size;
 	bool is_userptr;
-	int i, rc;
+	int rc;
 
 	/* protect from double entrance */
 	mutex_lock(&ctx->mem_hash_lock);
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 2f2e99cb2743..3a5a2cec8305 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -832,7 +832,7 @@ err:
 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 {
 	struct hl_device *hdev = ctx->hdev;
-	u64 real_virt_addr;
+	u64 real_virt_addr, real_phys_addr;
 	u32 real_page_size, npages;
 	int i, rc, mapped_cnt = 0;
 
@@ -857,14 +857,16 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 
 	npages = page_size / real_page_size;
 	real_virt_addr = virt_addr;
+	real_phys_addr = phys_addr;
 
 	for (i = 0 ; i < npages ; i++) {
-		rc = _hl_mmu_map(ctx, real_virt_addr, phys_addr,
+		rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
 				real_page_size);
 		if (rc)
 			goto err;
 
 		real_virt_addr += real_page_size;
+		real_phys_addr += real_page_size;
 		mapped_cnt++;
 	}