54 files changed, 2482 insertions, 3713 deletions
diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 4a9baf02439e..294b572a9c33 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
 static struct idr accel_minors_idr;
 
 static struct dentry *accel_debugfs_root;
-static struct class *accel_class;
 
 static struct device_type accel_sysfs_device_minor = {
 	.name = "accel_minor"
@@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
 	return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
 }
 
+static const struct class accel_class = {
+	.name = "accel",
+	.devnode = accel_devnode,
+};
+
 static int accel_sysfs_init(void)
 {
-	accel_class = class_create("accel");
-	if (IS_ERR(accel_class))
-		return PTR_ERR(accel_class);
-
-	accel_class->devnode = accel_devnode;
-
-	return 0;
+	return class_register(&accel_class);
 }
 
 static void accel_sysfs_destroy(void)
 {
-	if (IS_ERR_OR_NULL(accel_class))
-		return;
-	class_destroy(accel_class);
-	accel_class = NULL;
+	class_unregister(&accel_class);
 }
 
 static int accel_name_info(struct seq_file *m, void *data)
@@ -79,29 +74,30 @@ static const struct drm_info_list accel_debugfs_list[] = {
 #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list)
 
 /**
- * accel_debugfs_init() - Initialize debugfs for accel minor
- * @minor: Pointer to the drm_minor instance.
- * @minor_id: The minor's id
+ * accel_debugfs_init() - Initialize debugfs for device
+ * @dev: Pointer to the device instance.
  *
- * This function initializes the drm minor's debugfs members and creates
- * a root directory for the minor in debugfs. It also creates common files
- * for accelerators and calls the driver's debugfs init callback.
+ * This function creates a root directory for the device in debugfs.
  */
-void accel_debugfs_init(struct drm_minor *minor, int minor_id)
+void accel_debugfs_init(struct drm_device *dev)
 {
-	struct drm_device *dev = minor->dev;
-	char name[64];
+	drm_debugfs_dev_init(dev, accel_debugfs_root);
+}
 
-	INIT_LIST_HEAD(&minor->debugfs_list);
-	mutex_init(&minor->debugfs_lock);
-	sprintf(name, "%d", minor_id);
-	minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root);
+/**
+ * accel_debugfs_register() - Register debugfs for device
+ * @dev: Pointer to the device instance.
+ *
+ * Creates common files for accelerators.
+ */
+void accel_debugfs_register(struct drm_device *dev)
+{
+	struct drm_minor *minor = dev->accel;
 
-	drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES,
-				 minor->debugfs_root, minor);
+	minor->debugfs_root = dev->debugfs_root;
 
-	if (dev->driver->debugfs_init)
-		dev->driver->debugfs_init(minor);
+	drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES,
+				 dev->debugfs_root, minor);
 }
 
 /**
@@ -116,7 +112,7 @@ void accel_debugfs_init(struct drm_minor *minor, int minor_id)
 void accel_set_device_instance_params(struct device *kdev, int index)
 {
 	kdev->devt = MKDEV(ACCEL_MAJOR, index);
-	kdev->class = accel_class;
+	kdev->class = &accel_class;
 	kdev->type = &accel_sysfs_device_minor;
 }
 
diff --git a/drivers/accel/habanalabs/common/command_buffer.c b/drivers/accel/habanalabs/common/command_buffer.c
index 08f7aee42624..0f0d295116e7 100644
--- a/drivers/accel/habanalabs/common/command_buffer.c
+++ b/drivers/accel/habanalabs/common/command_buffer.c
@@ -361,10 +361,11 @@ out:
 	return rc;
 }
 
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
-	union hl_cb_args *args = data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
+	union hl_cb_args *args = data;
 	u64 handle = 0, device_va = 0;
 	enum hl_device_status status;
 	u32 usage_cnt = 0;
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index c23829dab97a..3aa6eeef443b 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -31,6 +31,24 @@ enum hl_cs_wait_status {
 	CS_WAIT_STATUS_GONE
 };
 
+/*
+ * Data used while handling wait/timestamp nodes.
+ * The purpose of this struct is to store the needed data for both operations
+ * in one variable instead of passing large number of arguments to functions.
+ */
+struct wait_interrupt_data {
+	struct hl_user_interrupt *interrupt;
+	struct hl_mmap_mem_buf *buf;
+	struct hl_mem_mgr *mmg;
+	struct hl_cb *cq_cb;
+	u64 ts_handle;
+	u64 ts_offset;
+	u64 cq_handle;
+	u64 cq_offset;
+	u64 target_value;
+	u64 intr_timeout_us;
+};
+
 static void job_wq_completion(struct work_struct *work);
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq,
 				enum hl_cs_wait_status *status, s64 *timestamp);
@@ -1079,19 +1097,22 @@ static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
 	struct hl_user_pending_interrupt *pend, *temp;
+	unsigned long flags;
 
-	spin_lock(&interrupt->wait_list_lock);
-	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
-		if (pend->ts_reg_info.buf) {
-			list_del(&pend->wait_list_node);
-			hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
-			hl_cb_put(pend->ts_reg_info.cq_cb);
-		} else {
-			pend->fence.error = -EIO;
-			complete_all(&pend->fence.completion);
-		}
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) {
+		pend->fence.error = -EIO;
+		complete_all(&pend->fence.completion);
 	}
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+	spin_lock_irqsave(&interrupt->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) {
+		list_del(&pend->list_node);
+		hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+		hl_cb_put(pend->ts_reg_info.cq_cb);
+	}
+	spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
 }
 
 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -1730,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 		/* Need to wait for restore completion before execution phase */
 		if (num_chunks) {
 			enum hl_cs_wait_status status;
-wait_again:
+
 			ret = _hl_cs_wait_ioctl(hdev, ctx,
 					jiffies_to_usecs(hdev->timeout_jiffies),
 					*cs_seq, &status, NULL);
 			if (ret) {
-				if (ret == -ERESTARTSYS) {
-					usleep_range(100, 200);
-					goto wait_again;
-				}
-
 				dev_err(hdev->dev,
 					"Restore CS for context %d failed to complete %d\n",
 					ctx->asid, ret);
@@ -2539,8 +2555,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
 	return 0;
 }
 
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	union hl_cs_args *args = data;
 	enum hl_cs_type cs_type = 0;
 	u64 cs_seq = ULONG_MAX;
@@ -3197,166 +3214,241 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
-					struct hl_cb *cq_cb,
-					u64 ts_offset, u64 cq_offset, u64 target_value,
-					spinlock_t *wait_list_lock,
-					struct hl_user_pending_interrupt **pend)
+static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
+					struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
 {
-	struct hl_ts_buff *ts_buff = buf->private;
-	struct hl_user_pending_interrupt *requested_offset_record =
-				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
-				ts_offset;
-	struct hl_user_pending_interrupt *cb_last =
-			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+	record->ts_reg_info.cq_cb = cq_cb;
+	record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
+	record->cq_target_value = target_value;
+}
+
+static int validate_and_get_ts_record(struct device *dev,
+					struct hl_ts_buff *ts_buff, u64 ts_offset,
+					struct hl_user_pending_interrupt **req_event_record)
+{
+	struct hl_user_pending_interrupt *ts_cb_last;
+
+	*req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+						ts_offset;
+	ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
 			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
-	unsigned long iter_counter = 0;
-	u64 current_cq_counter;
-	ktime_t timestamp;
 
 	/* Validate ts_offset not exceeding last max */
-	if (requested_offset_record >= cb_last) {
-		dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
-								(u64)(uintptr_t)cb_last);
+	if (*req_event_record >= ts_cb_last) {
+		dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n",
+				ts_offset, (u64)(uintptr_t)ts_cb_last);
 		return -EINVAL;
 	}
 
-	timestamp = ktime_get();
+	return 0;
+}
 
-start_over:
-	spin_lock(wait_list_lock);
+static void unregister_timestamp_node(struct hl_device *hdev,
+			struct hl_user_pending_interrupt *record, bool need_lock)
+{
+	struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt;
+	bool ts_rec_found = false;
+	unsigned long flags;
 
-	/* Unregister only if we didn't reach the target value
-	 * since in this case there will be no handling in irq context
-	 * and then it's safe to delete the node out of the interrupt list
-	 * then re-use it on other interrupt
-	 */
-	if (requested_offset_record->ts_reg_info.in_use) {
-		current_cq_counter = *requested_offset_record->cq_kernel_addr;
-		if (current_cq_counter < requested_offset_record->cq_target_value) {
-			list_del(&requested_offset_record->wait_list_node);
-			spin_unlock(wait_list_lock);
+	if (need_lock)
+		spin_lock_irqsave(&interrupt->ts_list_lock, flags);
 
-			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
-			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
+	if (record->ts_reg_info.in_use) {
+		record->ts_reg_info.in_use = false;
+		list_del(&record->list_node);
+		ts_rec_found = true;
+	}
 
-			dev_dbg(buf->mmg->dev,
-				"ts node removed from interrupt list now can re-use\n");
-		} else {
-			dev_dbg(buf->mmg->dev,
-				"ts node in middle of irq handling\n");
-
-			/* irq thread handling in the middle give it time to finish */
-			spin_unlock(wait_list_lock);
-			usleep_range(100, 1000);
-			if (++iter_counter == MAX_TS_ITER_NUM) {
-				dev_err(buf->mmg->dev,
-					"Timestamp offset processing reached timeout of %lld ms\n",
-					ktime_ms_delta(ktime_get(), timestamp));
-				return -EAGAIN;
-			}
+	if (need_lock)
+		spin_unlock_irqrestore(&interrupt->ts_list_lock, flags);
 
-			goto start_over;
+	/* Put refcounts that were taken when we registered the event */
+	if (ts_rec_found) {
+		hl_mmap_mem_buf_put(record->ts_reg_info.buf);
+		hl_cb_put(record->ts_reg_info.cq_cb);
+	}
+}
+
+static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx,
+					struct wait_interrupt_data *data, unsigned long *flags,
+					struct hl_user_pending_interrupt **pend)
+{
+	struct hl_user_pending_interrupt *req_offset_record;
+	struct hl_ts_buff *ts_buff = data->buf->private;
+	bool need_lock = false;
+	int rc;
+
+	rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset,
+									&req_offset_record);
+	if (rc)
+		return rc;
+
+	/* In case the node already registered, need to unregister first then re-use */
+	if (req_offset_record->ts_reg_info.in_use) {
+		dev_dbg(data->buf->mmg->dev,
+				"Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n",
+				req_offset_record,
+				req_offset_record->ts_reg_info.interrupt->interrupt_id,
+				req_offset_record->ts_reg_info.timestamp_kernel_addr,
+				data->interrupt->interrupt_id);
+		/*
+		 * Since interrupt here can be different than the one the node currently registered
+		 * on, and we don't want to lock two lists while we're doing unregister, so
+		 * unlock the new interrupt wait list here and acquire the lock again after you done
+		 */
+		if (data->interrupt->interrupt_id !=
+				req_offset_record->ts_reg_info.interrupt->interrupt_id) {
+
+			need_lock = true;
+			spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags);
 		}
-	} else {
-		/* Fill up the new registration node info */
-		requested_offset_record->ts_reg_info.buf = buf;
-		requested_offset_record->ts_reg_info.cq_cb = cq_cb;
-		requested_offset_record->ts_reg_info.timestamp_kernel_addr =
-				(u64 *) ts_buff->user_buff_address + ts_offset;
-		requested_offset_record->cq_kernel_addr =
-				(u64 *) cq_cb->kernel_address + cq_offset;
-		requested_offset_record->cq_target_value = target_value;
 
-		spin_unlock(wait_list_lock);
+		unregister_timestamp_node(hdev, req_offset_record, need_lock);
+
+		if (need_lock)
+			spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags);
 	}
 
-	*pend = requested_offset_record;
+	/* Fill up the new registration node info and add it to the list */
+	req_offset_record->ts_reg_info.in_use = true;
+	req_offset_record->ts_reg_info.buf = data->buf;
+	req_offset_record->ts_reg_info.timestamp_kernel_addr =
+			(u64 *) ts_buff->user_buff_address + data->ts_offset;
+	req_offset_record->ts_reg_info.interrupt = data->interrupt;
+	set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset,
+						data->target_value);
 
-	dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n",
-		requested_offset_record);
-	return 0;
+	*pend = req_offset_record;
+
+	return rc;
+}
+
+static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+				struct wait_interrupt_data *data,
+				u32 *status, u64 *timestamp)
+{
+	struct hl_user_pending_interrupt *pend;
+	unsigned long flags;
+	int rc = 0;
+
+	hl_ctx_get(ctx);
+
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
+		rc = -EINVAL;
+		goto put_ctx;
+	}
+
+	/* Validate the cq offset */
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n",
+					data->interrupt->interrupt_id, data->ts_handle,
+					data->ts_offset, data->cq_offset);
+
+	data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle);
+	if (!data->buf) {
+		rc = -EINVAL;
+		goto put_cq_cb;
+	}
+
+	spin_lock_irqsave(&data->interrupt->ts_list_lock, flags);
+
+	/* get ts buffer record */
+	rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend);
+	if (rc) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+		goto put_ts_buff;
+	}
+
+	/* We check for completion value as interrupt could have been received
+	 * before we add the timestamp node to the ts list.
+	 */
+	if (*pend->cq_kernel_addr >= data->target_value) {
+		spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+		dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n",
+				pend, data->ts_offset, data->interrupt->interrupt_id);
+
+		pend->ts_reg_info.in_use = 0;
+		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
+
+		goto put_ts_buff;
+	}
+
+	list_add_tail(&pend->list_node, &data->interrupt->ts_list_head);
+	spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags);
+
+	rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
+
+	hl_ctx_put(ctx);
+
+	return rc;
+
+put_ts_buff:
+	hl_mmap_mem_buf_put(data->buf);
+put_cq_cb:
+	hl_cb_put(data->cq_cb);
+put_ctx:
+	hl_ctx_put(ctx);
+
+	return rc;
 }
 
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-				struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
-				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
-				u64 target_value, struct hl_user_interrupt *interrupt,
-				bool register_ts_record, u64 ts_handle, u64 ts_offset,
+				struct wait_interrupt_data *data,
 				u32 *status, u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	struct hl_mmap_mem_buf *buf;
-	struct hl_cb *cq_cb;
-	unsigned long timeout;
+	unsigned long timeout, flags;
 	long completion_rc;
 	int rc = 0;
 
-	timeout = hl_usecs64_to_jiffies(timeout_us);
+	timeout = hl_usecs64_to_jiffies(data->intr_timeout_us);
 
 	hl_ctx_get(ctx);
 
-	cq_cb = hl_cb_get(cb_mmg, cq_counters_handle);
-	if (!cq_cb) {
+	data->cq_cb = hl_cb_get(data->mmg, data->cq_handle);
+	if (!data->cq_cb) {
 		rc = -EINVAL;
 		goto put_ctx;
 	}
 
 	/* Validate the cq offset */
-	if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >=
-			((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) {
+	if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >=
+			((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) {
 		rc = -EINVAL;
 		goto put_cq_cb;
 	}
 
-	if (register_ts_record) {
-		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
-					interrupt->interrupt_id, ts_offset, cq_counters_offset);
-		buf = hl_mmap_mem_buf_get(mmg, ts_handle);
-		if (!buf) {
-			rc = -EINVAL;
-			goto put_cq_cb;
-		}
-
-		/* get ts buffer record */
-		rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset,
-						cq_counters_offset, target_value,
-						&interrupt->wait_list_lock, &pend);
-		if (rc)
-			goto put_ts_buff;
-	} else {
-		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
-		if (!pend) {
-			rc = -ENOMEM;
-			goto put_cq_cb;
-		}
-		hl_fence_init(&pend->fence, ULONG_MAX);
-		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
-		pend->cq_target_value = target_value;
+	pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+	if (!pend) {
+		rc = -ENOMEM;
+		goto put_cq_cb;
 	}
 
-	spin_lock(&interrupt->wait_list_lock);
+	hl_fence_init(&pend->fence, ULONG_MAX);
+	pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset;
+	pend->cq_target_value = data->target_value;
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+
 
 	/* We check for completion value as interrupt could have been received
-	 * before we added the node to the wait list
+	 * before we add the wait node to the wait list.
 	 */
-	if (*pend->cq_kernel_addr >= target_value) {
-		if (register_ts_record)
-			pend->ts_reg_info.in_use = 0;
-		spin_unlock(&interrupt->wait_list_lock);
+	if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) {
+		spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		if (*pend->cq_kernel_addr >= data->target_value)
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		else
+			*status = HL_WAIT_CS_STATUS_BUSY;
 
-		if (register_ts_record) {
-			*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
-			goto put_ts_buff;
-		} else {
-			pend->fence.timestamp = ktime_get();
-			goto set_timestamp;
-		}
-	} else if (!timeout_us) {
-		spin_unlock(&interrupt->wait_list_lock);
-		*status = HL_WAIT_CS_STATUS_BUSY;
 		pend->fence.timestamp = ktime_get();
 		goto set_timestamp;
 	}
@@ -3366,55 +3458,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * Note that we cannot have sorted list by target value,
 	 * in order to shorten the list pass loop, since
 	 * same list could have nodes for different cq counter handle.
-	 * Note:
-	 * Mark ts buff offset as in use here in the spinlock protection area
-	 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record
-	 * before adding the node to the list. this scenario might happen when
-	 * multiple threads are racing on same offset and one thread could
-	 * set the ts buff in ts_buff_get_kernel_ts_record then the other thread
-	 * takes over and get to ts_buff_get_kernel_ts_record and then we will try
-	 * to re-use the same ts buff offset, and will try to delete a non existing
-	 * node from the list.
 	 */
-	if (register_ts_record)
-		pend->ts_reg_info.in_use = 1;
-
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
-
-	if (register_ts_record) {
-		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
-		goto ts_registration_exit;
-	}
+	list_add_tail(&pend->list_node, &data->interrupt->wait_list_head);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
 	/* Wait for interrupt handler to signal completion */
 	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
 								timeout);
 	if (completion_rc > 0) {
-		*status = HL_WAIT_CS_STATUS_COMPLETED;
+		if (pend->fence.error == -EIO) {
+			dev_err_ratelimited(hdev->dev,
+					"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+					pend->fence.error);
+			rc = -EIO;
+			*status = HL_WAIT_CS_STATUS_ABORTED;
+		} else {
+			*status = HL_WAIT_CS_STATUS_COMPLETED;
+		}
 	} else {
 		if (completion_rc == -ERESTARTSYS) {
 			dev_err_ratelimited(hdev->dev,
 					"user process got signal while waiting for interrupt ID %d\n",
-					interrupt->interrupt_id);
+					data->interrupt->interrupt_id);
 			rc = -EINTR;
 			*status = HL_WAIT_CS_STATUS_ABORTED;
 		} else {
-			if (pend->fence.error == -EIO) {
-				dev_err_ratelimited(hdev->dev,
-						"interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
-						pend->fence.error);
-				rc = -EIO;
-				*status = HL_WAIT_CS_STATUS_ABORTED;
-			} else {
-				/* The wait has timed-out. We don't know anything beyond that
-				 * because the workload wasn't submitted through the driver.
-				 * Therefore, from driver's perspective, the workload is still
-				 * executing.
-				 */
-				rc = 0;
-				*status = HL_WAIT_CS_STATUS_BUSY;
-			}
+			/* The wait has timed-out. We don't know anything beyond that
+			 * because the workload was not submitted through the driver.
+			 * Therefore, from driver's perspective, the workload is still
+			 * executing.
+			 */
+			rc = 0;
+			*status = HL_WAIT_CS_STATUS_BUSY;
 		}
 	}
 
@@ -3424,23 +3499,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	 * for ts record, the node will be deleted in the irq handler after
 	 * we reach the target value.
 	 */
-	spin_lock(&interrupt->wait_list_lock);
-	list_del(&pend->wait_list_node);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&data->interrupt->wait_list_lock, flags);
+	list_del(&pend->list_node);
+	spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags);
 
 set_timestamp:
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 	kfree(pend);
-	hl_cb_put(cq_cb);
-ts_registration_exit:
+	hl_cb_put(data->cq_cb);
 	hl_ctx_put(ctx);
 
 	return rc;
 
-put_ts_buff:
-	hl_mmap_mem_buf_put(buf);
 put_cq_cb:
-	hl_cb_put(cq_cb);
+	hl_cb_put(data->cq_cb);
 put_ctx:
 	hl_ctx_put(ctx);
 
@@ -3454,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 				u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
-	unsigned long timeout;
+	unsigned long timeout, flags;
 	u64 completion_value;
 	long completion_rc;
 	int rc = 0;
@@ -3474,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
 	/* Add pending user interrupt to relevant list for the interrupt
 	 * handler to monitor
 	 */
-	spin_lock(&interrupt->wait_list_lock);
-	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_add_tail(&pend->list_node, &interrupt->wait_list_head);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
@@ -3507,14 +3579,14 @@ wait_again:
 	 * If comparison fails, keep waiting until timeout expires
 	 */
 	if (completion_rc > 0) {
-		spin_lock(&interrupt->wait_list_lock);
+		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 		/* reinit_completion must be called before we check for user
 		 * completion value, otherwise, if interrupt is received after
 		 * the comparison and before the next wait_for_completion,
 		 * we will reach timeout and fail
 		 */
 		reinit_completion(&pend->fence.completion);
-		spin_unlock(&interrupt->wait_list_lock);
+		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
 			dev_err(hdev->dev, "Failed to copy completion value from user\n");
@@ -3551,9 +3623,9 @@ wait_again:
 	}
 
 remove_pending_user_interrupt:
-	spin_lock(&interrupt->wait_list_lock);
-	list_del(&pend->wait_list_node);
-	spin_unlock(&interrupt->wait_list_lock);
+	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+	list_del(&pend->list_node);
+	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 	*timestamp = ktime_to_ns(pend->fence.timestamp);
 
@@ -3611,19 +3683,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 		return -EINVAL;
 	}
 
-	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
-				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
-				args->in.cq_counters_offset,
-				args->in.target, interrupt,
-				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
-				args->in.timestamp_handle, args->in.timestamp_offset,
-				&status, &timestamp);
-	else
+	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) {
+		struct wait_interrupt_data wait_intr_data = {0};
+
+		wait_intr_data.interrupt = interrupt;
+		wait_intr_data.mmg = &hpriv->mem_mgr;
+		wait_intr_data.cq_handle = args->in.cq_counters_handle;
+		wait_intr_data.cq_offset = args->in.cq_counters_offset;
+		wait_intr_data.ts_handle = args->in.timestamp_handle;
+		wait_intr_data.ts_offset = args->in.timestamp_offset;
+		wait_intr_data.target_value = args->in.target;
+		wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us;
+
+		if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) {
+			/*
+			 * Allow only one registration at a time. this is needed in order to prevent
+			 * issues while handling the flow of re-use of the same offset.
+			 * Since the registration flow is protected only by the interrupt lock,
+			 * re-use flow might request to move ts node to another interrupt list,
+			 * and in such case we're not protected.
+			 */
+			mutex_lock(&hpriv->ctx->ts_reg_lock);
+
+			rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+
+			mutex_unlock(&hpriv->ctx->ts_reg_lock);
+		} else
+			rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data,
+						&status, &timestamp);
+	} else {
 		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
 				args->in.target, interrupt, &status,
 				&timestamp);
+	}
+
 	if (rc)
 		return rc;
 
@@ -3638,8 +3733,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	return 0;
 }
 
-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
 	union hl_wait_cs_args *args = data;
 	u32 flags = args->in.flags;
diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c
index 9c8b1b37b510..b83141f58319 100644
--- a/drivers/accel/habanalabs/common/context.c
+++ b/drivers/accel/habanalabs/common/context.c
@@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	kfree(ctx->cs_pending);
 
 	if (ctx->asid != HL_KERNEL_ASID_ID) {
-		dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
+		dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);
 
 		/* The engines are stopped as there is no executing CS, but the
 		 * Coresight might be still working by accessing addresses
@@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
 		hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
+		mutex_destroy(&ctx->ts_reg_lock);
 	} else {
 		dev_dbg(hdev->dev, "closing kernel context\n");
 		hdev->asic_funcs->ctx_fini(ctx);
@@ -198,6 +199,7 @@ out_err:
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 {
+	char task_comm[TASK_COMM_LEN];
 	int rc = 0, i;
 
 	ctx->hdev = hdev;
@@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 
 		hl_encaps_sig_mgr_init(&ctx->sig_mgr);
 
-		dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
+		mutex_init(&ctx->ts_reg_lock);
+
+		dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
+			get_task_comm(task_comm, current), ctx->asid);
 	}
 
 	return 0;
diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index 9e84a47a21dc..01f071d52570 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -18,8 +18,6 @@
 #define MMU_KBUF_SIZE		(MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
 #define I2C_MAX_TRANSACTION_LEN	8
 
-static struct dentry *hl_debug_root;
-
 static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 				u8 i2c_reg, u8 i2c_len, u64 *val)
 {
@@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 {
 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 
-	dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
+	dev_entry->root = hdev->drm.accel->debugfs_root;
 
 	add_files_to_device(hdev, dev_entry, dev_entry->root);
+
 	if (!hdev->asic_prop.fw_security_enabled)
 		add_secured_nodes(dev_entry, dev_entry->root);
 }
 
-void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-	struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
-
-	debugfs_remove_recursive(entry->root);
-}
-
 void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 {
 	struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
@@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 
 	up_write(&dev_entry->state_dump_sem);
 }
-
-void __init hl_debugfs_init(void)
-{
-	hl_debug_root = debugfs_create_dir("habanalabs", NULL);
-}
-
-void hl_debugfs_fini(void)
-{
-	debugfs_remove_recursive(hl_debug_root);
-}
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index b97339d1f7c6..9711e8fc979d 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -14,11 +14,14 @@
 #include <linux/hwmon.h>
 #include <linux/vmalloc.h>
 
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+
 #include <trace/events/habanalabs.h>
 
 #define HL_RESET_DELAY_USEC			10000	/* 10ms */
 
-#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	5
+#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	30
 
 enum dma_alloc_type {
 	DMA_ALLOC_COHERENT,
@@ -185,7 +188,36 @@ void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }
 
-int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir, const char *caller)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct scatterlist *sg;
+	int rc, i;
+
+	rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir);
+	if (rc)
+		return rc;
+
+	if (!trace_habanalabs_dma_map_page_enabled())
+		return 0;
+
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		trace_habanalabs_dma_map_page(hdev->dev,
+				page_to_phys(sg_page(sg)),
+				sg->dma_address - prop->device_dma_offset_for_host_access,
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+				sg->dma_length,
+#else
+				sg->length,
+#endif
+				dir, caller);
+
+	return 0;
+}
+
+int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
@@ -203,7 +235,30 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da
 	return 0;
 }
 
-void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+					enum dma_data_direction dir, const char *caller)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct scatterlist *sg;
+	int i;
+
+	hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir);
+
+	if (trace_habanalabs_dma_unmap_page_enabled()) {
+		for_each_sgtable_dma_sg(sgt, sg, i)
+			trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)),
+					sg->dma_address - prop->device_dma_offset_for_host_access,
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+					sg->dma_length,
+#else
+					sg->length,
+#endif
+					dir, caller);
+	}
+}
+
+void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
@@ -315,7 +370,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
 	enum hl_device_status status;
 
-	if (hdev->reset_info.in_reset) {
+	if (hdev->device_fini_pending) {
+		status = HL_DEVICE_STATUS_MALFUNCTION;
+	} else if (hdev->reset_info.in_reset) {
 		if (hdev->reset_info.in_compute_reset)
 			status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
 		else
@@ -343,9 +400,9 @@ bool hl_device_operational(struct hl_device *hdev,
 		*status = current_status;
 
 	switch (current_status) {
+	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_IN_RESET:
 	case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
-	case HL_DEVICE_STATUS_MALFUNCTION:
 	case HL_DEVICE_STATUS_NEEDS_RESET:
 		return false;
 	case HL_DEVICE_STATUS_OPERATIONAL:
@@ -406,8 +463,6 @@ static void hpriv_release(struct kref *ref)
 
 	hdev->asic_funcs->send_device_activity(hdev, false);
 
-	put_pid(hpriv->taskpid);
-
 	hl_debugfs_remove_file(hpriv);
 
 	mutex_destroy(&hpriv->ctx_lock);
@@ -424,7 +479,7 @@ static void hpriv_release(struct kref *ref)
 	/* Check the device idle status and reset if not idle.
 	 * Skip it if already in reset, or if device is going to be reset in any case.
 	 */
-	if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm)
+	if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm)
 		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 	if (!device_is_idle) {
@@ -446,14 +501,18 @@ static void hpriv_release(struct kref *ref)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_list_lock);
 
+	put_pid(hpriv->taskpid);
+
 	if (reset_device) {
 		hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
 	} else {
 		/* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
 		int rc = hdev->asic_funcs->scrub_device_mem(hdev);
 
-		if (rc)
+		if (rc) {
 			dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
+			hl_device_reset(hdev, HL_DRV_RESET_HARD);
+		}
 	}
 
 	/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
@@ -516,24 +575,20 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message
 }
 
 /*
- * hl_device_release - release function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
+ * hl_device_release() - release function for habanalabs device.
+ * @ddev: pointer to DRM device structure.
+ * @file: pointer to DRM file private data structure.
  *
  * Called when process closes an habanalabs device
  */
-static int hl_device_release(struct inode *inode, struct file *filp)
+void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
 {
-	struct hl_fpriv *hpriv = filp->private_data;
-	struct hl_device *hdev = hpriv->hdev;
-
-	filp->private_data = NULL;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+	struct hl_device *hdev = to_hl_device(ddev);
 
 	if (!hdev) {
 		pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
 		put_pid(hpriv->taskpid);
-		return 0;
 	}
 
 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
@@ -551,8 +606,6 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	}
 
 	hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;
-
-	return 0;
 }
 
 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
@@ -571,11 +624,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
 out:
-	/* release the eventfd */
-	if (hpriv->notifier_event.eventfd)
-		eventfd_ctx_put(hpriv->notifier_event.eventfd);
-
-	mutex_destroy(&hpriv->notifier_event.lock);
 	put_pid(hpriv->taskpid);
 
 	kfree(hpriv);
@@ -583,18 +631,8 @@ out:
 	return 0;
 }
 
-/*
- * hl_mmap - mmap function for habanalabs device
- *
- * @*filp: pointer to file structure
- * @*vma: pointer to vm_area_struct of the process
- *
- * Called when process does an mmap on habanalabs device. Call the relevant mmap
- * function at the end of the common code.
- */
-static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 {
-	struct hl_fpriv *hpriv = filp->private_data;
 	struct hl_device *hdev = hpriv->hdev;
 	unsigned long vm_pgoff;
 
@@ -617,14 +655,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 	return -EINVAL;
 }
 
-static const struct file_operations hl_ops = {
-	.owner = THIS_MODULE,
-	.open = hl_device_open,
-	.release = hl_device_release,
-	.mmap = hl_mmap,
-	.unlocked_ioctl = hl_ioctl,
-	.compat_ioctl = hl_ioctl
-};
+/*
+ * hl_mmap - mmap function for habanalabs device
+ *
+ * @*filp: pointer to file structure
+ * @*vma: pointer to vm_area_struct of the process
+ *
+ * Called when process does an mmap on habanalabs device. Call the relevant mmap
+ * function at the end of the common code.
+ */
+int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+
+	return __hl_mmap(hpriv, vma);
+}
 
 static const struct file_operations hl_ctrl_ops = {
 	.owner = THIS_MODULE,
@@ -645,14 +691,14 @@ static void device_release_func(struct device *dev)
  * @hdev: pointer to habanalabs device structure
  * @class: pointer to the class object of the device
  * @minor: minor number of the specific device
- * @fpos: file operations to install for this device
+ * @fops: file operations to install for this device
  * @name: name of the device as it will appear in the filesystem
  * @cdev: pointer to the char device object that will be initialized
  * @dev: pointer to the device object that will be initialized
  *
  * Initialize a cdev and a Linux device for habanalabs's device.
  */
-static int device_init_cdev(struct hl_device *hdev, struct class *class,
+static int device_init_cdev(struct hl_device *hdev, const struct class *class,
 				int minor, const struct file_operations *fops,
 				char *name, struct cdev *cdev,
 				struct device **dev)
@@ -676,23 +722,26 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class,
 
 static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
 {
+	const struct class *accel_class = hdev->drm.accel->kdev->class;
+	char name[32];
 	int rc;
 
-	rc = cdev_device_add(&hdev->cdev, hdev->dev);
-	if (rc) {
-		dev_err(hdev->dev,
-			"failed to add a char device to the system\n");
+	hdev->cdev_idx = hdev->drm.accel->index;
+
+	/* Initialize cdev and device structures for the control device */
+	snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx);
+	rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name,
+				&hdev->cdev_ctrl, &hdev->dev_ctrl);
+	if (rc)
 		return rc;
-	}
 
 	rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 	if (rc) {
-		dev_err(hdev->dev,
-			"failed to add a control char device to the system\n");
-		goto delete_cdev_device;
+		dev_err(hdev->dev_ctrl,
+			"failed to add an accel control char device to the system\n");
+		goto free_ctrl_device;
 	}
 
-	/* hl_sysfs_init() must be done after adding the device to the system */
 	rc = hl_sysfs_init(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize sysfs\n");
@@ -707,23 +756,19 @@ static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
 
 delete_ctrl_cdev_device:
 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-delete_cdev_device:
-	cdev_device_del(&hdev->cdev, hdev->dev);
+free_ctrl_device:
+	put_device(hdev->dev_ctrl);
 	return rc;
 }
 
 static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
 {
 	if (!hdev->cdev_sysfs_debugfs_created)
-		goto put_devices;
+		return;
 
-	hl_debugfs_remove_device(hdev);
 	hl_sysfs_fini(hdev);
-	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-	cdev_device_del(&hdev->cdev, hdev->dev);
 
-put_devices:
-	put_device(hdev->dev);
+	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 	put_device(hdev->dev_ctrl);
 }
 
@@ -996,6 +1041,20 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 	return (vendor_id == PCI_VENDOR_ID_HABANALABS);
 }
 
+static void hl_device_eq_heartbeat(struct hl_device *hdev)
+{
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+	if (!prop->cpucp_info.eq_health_check_supported)
+		return;
+
+	if (hdev->eq_heartbeat_received)
+		hdev->eq_heartbeat_received = false;
+	else
+		hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+}
+
 static void hl_device_heartbeat(struct work_struct *work)
 {
 	struct hl_device *hdev = container_of(work, struct hl_device,
@@ -1003,9 +1062,16 @@ static void hl_device_heartbeat(struct work_struct *work)
 	struct hl_info_fw_err_info info = {0};
 	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 
-	if (!hl_device_operational(hdev, NULL))
+	/* Start heartbeat checks only after driver has enabled events from FW */
+	if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
 		goto reschedule;
 
+	/*
+	 * For EQ health check need to check if driver received the heartbeat eq event
+	 * in order to validate the eq is working.
+	 */
+	hl_device_eq_heartbeat(hdev);
+
 	if (!hdev->asic_funcs->send_heartbeat(hdev))
 		goto reschedule;
 
@@ -1062,7 +1128,15 @@ static int device_late_init(struct hl_device *hdev)
 	hdev->high_pll = hdev->asic_prop.high_pll;
 
 	if (hdev->heartbeat) {
+		/*
+		 * Before scheduling the heartbeat driver will check if eq event has received.
+		 * for the first schedule we need to set the indication as true then for the next
+		 * one this indication will be true only if eq event was sent by FW.
+		 */
+		hdev->eq_heartbeat_received = true;
+
 		INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+
 		schedule_delayed_work(&hdev->work_heartbeat,
 				usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 	}
@@ -1302,18 +1376,18 @@ disable_device:
 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
 {
 	struct task_struct *task = NULL;
-	struct list_head *fd_list;
-	struct hl_fpriv	*hpriv;
-	struct mutex *fd_lock;
+	struct list_head *hpriv_list;
+	struct hl_fpriv *hpriv;
+	struct mutex *hpriv_lock;
 	u32 pending_cnt;
 
-	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
-	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+	hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+	hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
 
 	/* Giving time for user to close FD, and for processes that are inside
 	 * hl_device_open to finish
 	 */
-	if (!list_empty(fd_list))
+	if (!list_empty(hpriv_list))
 		ssleep(1);
 
 	if (timeout) {
@@ -1329,12 +1403,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 		}
 	}
 
-	mutex_lock(fd_lock);
+	mutex_lock(hpriv_lock);
 
 	/* This section must be protected because we are dereferencing
 	 * pointers that are freed if the process exits
 	 */
-	list_for_each_entry(hpriv, fd_list, dev_node) {
+	list_for_each_entry(hpriv, hpriv_list, dev_node) {
 		task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 		if (task) {
 			dev_info(hdev->dev, "Killing user process pid=%d\n",
@@ -1344,17 +1418,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 
 			put_task_struct(task);
 		} else {
-			/*
-			 * If we got here, it means that process was killed from outside the driver
-			 * right after it started looping on fd_list and before get_pid_task, thus
-			 * we don't need to kill it.
-			 */
 			dev_dbg(hdev->dev,
-				"Can't get task struct for user process, assuming process was killed from outside the driver\n");
+				"Can't get task struct for user process %d, process was killed from outside the driver\n",
+				pid_nr(hpriv->taskpid));
 		}
 	}
 
-	mutex_unlock(fd_lock);
+	mutex_unlock(hpriv_lock);
 
 	/*
 	 * We killed the open users, but that doesn't mean they are closed.
@@ -1366,7 +1436,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool
 	 */
 
 wait_for_processes:
-	while ((!list_empty(fd_list)) && (pending_cnt)) {
+	while ((!list_empty(hpriv_list)) && (pending_cnt)) {
 		dev_dbg(hdev->dev,
 			"Waiting for all unmap operations to finish before hard reset\n");
 
@@ -1376,7 +1446,7 @@ wait_for_processes:
 	}
 
 	/* All processes exited successfully */
-	if (list_empty(fd_list))
+	if (list_empty(hpriv_list))
 		return 0;
 
 	/* Give up waiting for processes to exit */
@@ -1390,17 +1460,17 @@ wait_for_processes:
 
 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
 {
-	struct list_head *fd_list;
+	struct list_head *hpriv_list;
 	struct hl_fpriv *hpriv;
-	struct mutex *fd_lock;
+	struct mutex *hpriv_lock;
 
-	fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
-	fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
+	hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
+	hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
 
-	mutex_lock(fd_lock);
-	list_for_each_entry(hpriv, fd_list, dev_node)
+	mutex_lock(hpriv_lock);
+	list_for_each_entry(hpriv, hpriv_list, dev_node)
 		hpriv->hdev = NULL;
-	mutex_unlock(fd_lock);
+	mutex_unlock(hpriv_lock);
 }
 
 static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
@@ -1916,7 +1986,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 	}
 
 	ctx = hl_get_compute_ctx(hdev);
-	if (!ctx || !ctx->hpriv->notifier_event.eventfd)
+	if (!ctx)
+		goto device_reset;
+
+	/*
+	 * There is no point in postponing the reset if user is not registered for events.
+	 * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it
+	 * just implies that user has unregistered as part of handling a previous event. In this
+	 * case an immediate reset is not required.
+	 */
+	if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active)
 		goto device_reset;
 
 	/* Schedule the device release watchdog work unless reset is already in progress or if the
@@ -1928,8 +2007,10 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
 		goto device_reset;
 	}
 
-	if (hdev->reset_info.watchdog_active)
+	if (hdev->reset_info.watchdog_active) {
+		hdev->device_release_watchdog_work.flags |= flags;
 		goto out;
+	}
 
 	hdev->device_release_watchdog_work.flags = flags;
 	dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
@@ -1990,59 +2071,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 		hl_notifier_event_send(&hpriv->notifier_event, event_mask);
 
 	mutex_unlock(&hdev->fpriv_list_lock);
-
-	/* control device */
-	mutex_lock(&hdev->fpriv_ctrl_list_lock);
-
-	list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
-		hl_notifier_event_send(&hpriv->notifier_event, event_mask);
-
-	mutex_unlock(&hdev->fpriv_ctrl_list_lock);
-}
-
-static int create_cdev(struct hl_device *hdev)
-{
-	char *name;
-	int rc;
-
-	hdev->cdev_idx = hdev->id / 2;
-
-	name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto out_err;
-	}
-
-	/* Initialize cdev and device structures */
-	rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name,
-				&hdev->cdev, &hdev->dev);
-
-	kfree(name);
-
-	if (rc)
-		goto out_err;
-
-	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
-	if (!name) {
-		rc = -ENOMEM;
-		goto free_dev;
-	}
-
-	/* Initialize cdev and device structures for control device */
-	rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops,
-				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
-
-	kfree(name);
-
-	if (rc)
-		goto free_dev;
-
-	return 0;
-
-free_dev:
-	put_device(hdev->dev);
-out_err:
-	return rc;
 }
 
 /*
@@ -2057,16 +2085,14 @@ out_err:
 int hl_device_init(struct hl_device *hdev)
 {
 	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
+	struct hl_ts_free_jobs *free_jobs_data;
 	bool expose_interfaces_on_err = false;
-
-	rc = create_cdev(hdev);
-	if (rc)
-		goto out_disabled;
+	void *p;
 
 	/* Initialize ASIC function pointers and perform early init */
 	rc = device_early_init(hdev);
 	if (rc)
-		goto free_dev;
+		goto out_disabled;
 
 	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
 				hdev->asic_prop.user_interrupt_count;
@@ -2078,15 +2104,43 @@ int hl_device_init(struct hl_device *hdev)
 			rc = -ENOMEM;
 			goto early_fini;
 		}
+
+		/* Timestamp records supported only if CQ supported in device */
+		if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
+			for (i = 0 ; i < user_interrupt_cnt ; i++) {
+				p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
+						sizeof(struct timestamp_reg_free_node));
+				if (!p) {
+					rc = -ENOMEM;
+					goto free_usr_intr_mem;
+				}
+				free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
+				free_jobs_data->free_nodes_pool = p;
+				free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
+				free_jobs_data->next_avail_free_node_idx = 0;
+			}
+		}
+	}
+
+	free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
+	p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
+				sizeof(struct timestamp_reg_free_node));
+	if (!p) {
+		rc = -ENOMEM;
+		goto free_usr_intr_mem;
 	}
 
+	free_jobs_data->free_nodes_pool = p;
+	free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
+	free_jobs_data->next_avail_free_node_idx = 0;
+
 	/*
 	 * Start calling ASIC initialization. First S/W then H/W and finally
 	 * late init
 	 */
 	rc = hdev->asic_funcs->sw_init(hdev);
 	if (rc)
-		goto free_usr_intr_mem;
+		goto free_common_usr_intr_mem;
 
 
 	/* initialize completion structure for multi CS wait */
@@ -2253,6 +2307,14 @@ int hl_device_init(struct hl_device *hdev)
 	 * From here there is no need to expose them in case of an error.
 	 */
 	expose_interfaces_on_err = false;
+
+	rc = drm_dev_register(&hdev->drm, 0);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc);
+		rc = 0;
+		goto out_disabled;
+	}
+
 	rc = cdev_sysfs_debugfs_add(hdev);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
@@ -2284,8 +2346,6 @@ int hl_device_init(struct hl_device *hdev)
 		"Successfully added device %s to habanalabs driver\n",
 		dev_name(&(hdev)->pdev->dev));
 
-	hdev->init_done = true;
-
 	/* After initialization is done, we are ready to receive events from
 	 * the F/W. We can't do it before because we will ignore events and if
 	 * those events are fatal, we won't know about it and the device will
@@ -2293,6 +2353,8 @@ int hl_device_init(struct hl_device *hdev)
 	 */
 	hdev->asic_funcs->enable_events_from_fw(hdev);
 
+	hdev->init_done = true;
+
 	return 0;
 
 cb_pool_fini:
@@ -2317,19 +2379,27 @@ hw_queues_destroy:
 	hl_hw_queues_destroy(hdev);
 sw_fini:
 	hdev->asic_funcs->sw_fini(hdev);
+free_common_usr_intr_mem:
+	vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
 free_usr_intr_mem:
-	kfree(hdev->user_interrupt);
+	if (user_interrupt_cnt) {
+		for (i = 0 ; i < user_interrupt_cnt ; i++) {
+			if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
+				break;
+			vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
+		}
+		kfree(hdev->user_interrupt);
+	}
 early_fini:
 	device_early_fini(hdev);
-free_dev:
-	put_device(hdev->dev_ctrl);
-	put_device(hdev->dev);
 out_disabled:
 	hdev->disabled = true;
-	if (expose_interfaces_on_err)
+	if (expose_interfaces_on_err) {
+		drm_dev_register(&hdev->drm, 0);
 		cdev_sysfs_debugfs_add(hdev);
-	dev_err(&hdev->pdev->dev,
-		"Failed to initialize hl%d. Device %s is NOT usable !\n",
+	}
+
+	pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n",
 		hdev->cdev_idx, dev_name(&hdev->pdev->dev));
 
 	return rc;
@@ -2344,12 +2414,13 @@ out_disabled:
  */
 void hl_device_fini(struct hl_device *hdev)
 {
+	u32 user_interrupt_cnt;
 	bool device_in_reset;
 	ktime_t timeout;
 	u64 reset_sec;
 	int i, rc;
 
-	dev_info(hdev->dev, "Removing device\n");
+	dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev));
 
 	hdev->device_fini_pending = 1;
 	flush_delayed_work(&hdev->device_reset_work.reset_work);
@@ -2425,14 +2496,14 @@ void hl_device_fini(struct hl_device *hdev)
 	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false);
 	if (rc) {
-		dev_crit(hdev->dev, "Failed to kill all open processes\n");
+		dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc);
 		device_disable_open_processes(hdev, false);
 	}
 
 	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, 0, true);
 	if (rc) {
-		dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
+		dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc);
 		device_disable_open_processes(hdev, true);
 	}
 
@@ -2464,7 +2535,20 @@ void hl_device_fini(struct hl_device *hdev)
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
 	kfree(hdev->completion_queue);
-	kfree(hdev->user_interrupt);
+
+	user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
+					hdev->asic_prop.user_interrupt_count;
+
+	if (user_interrupt_cnt) {
+		if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
+			for (i = 0 ; i < user_interrupt_cnt ; i++)
+				vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
+		}
+
+		kfree(hdev->user_interrupt);
+	}
+
+	vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
 
 	hl_hw_queues_destroy(hdev);
 
@@ -2475,6 +2559,7 @@ void hl_device_fini(struct hl_device *hdev)
 
 	/* Hide devices and sysfs/debugfs files from user */
 	cdev_sysfs_debugfs_remove(hdev);
+	drm_dev_unregister(&hdev->drm);
 
 	hl_debugfs_device_fini(hdev);
 
@@ -2690,6 +2775,20 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
 		*info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
 }
 
+void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count)
+{
+	struct engine_err_info *info = &hdev->captured_err_info.engine_err;
+
+	/* Capture only the first engine error */
+	if (atomic_cmpxchg(&info->event_detected, 0, 1))
+		return;
+
+	info->event.timestamp = ktime_to_ns(ktime_get());
+	info->event.engine_id = engine_id;
+	info->event.error_count = error_count;
+	info->event_info_available = true;
+}
+
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
 {
 	vfree(captured_err_info->page_fault_info.user_mappings);
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index acbc1a6b5cb1..47e8384134aa 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -6,7 +6,7 @@
  */
 
 #include "habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 
 #include <linux/firmware.h>
 #include <linux/crc32.h>
@@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		err_exists = true;
 	}
 
+	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+		dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
+		err_exists = true;
+	}
+
 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
 		/* Ignore this bit, don't prevent driver loading */
 		dev_dbg(hdev->dev, "device unusable status is set\n");
@@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 		dev_err(hdev->dev,
 			"Device boot progress - Stuck in preboot after security initialization\n");
 		break;
+	case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
+		dev_err(hdev->dev,
+			"Device boot progress - Stuck in preparation for shutdown\n");
+		break;
 	default:
 		dev_err(hdev->dev,
 			"Device boot progress - Invalid or unexpected status code %d\n", status);
@@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 {
 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
-	u32 status;
-	int rc;
+	u32 status = 0, timeout;
+	int rc, tries = 1;
+	bool preboot_still_runs;
 
 	/* Need to check two possible scenarios:
 	 *
@@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 	 * All other status values - for older firmwares where the uboot was
 	 * loaded from the FLASH
 	 */
+	timeout = pre_fw_load->wait_for_preboot_timeout;
+retry:
 	rc = hl_poll_timeout(
 		hdev,
 		pre_fw_load->cpu_boot_status_reg,
@@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 		(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
 		(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
 		hdev->fw_poll_interval_usec,
-		pre_fw_load->wait_for_preboot_timeout);
+		timeout);
+	/*
+	 * if F/W reports "security-ready" it means preboot might take longer.
+	 * If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
+	 * with that timeout
+	 */
+	preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
+				status == CPU_BOOT_STATUS_IN_PREBOOT ||
+				status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
+				status == CPU_BOOT_STATUS_DRAM_RDY);
+
+	if (rc && tries && preboot_still_runs) {
+		tries--;
+		if (pre_fw_load->wait_for_preboot_extended_timeout) {
+			timeout = pre_fw_load->wait_for_preboot_extended_timeout;
+			goto retry;
+		}
+	}
 
 	if (rc) {
 		detect_cpu_boot_status(hdev, status);
@@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
 		struct lkd_fw_binning_info *binning_info;
 
-		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
+		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
+							sizeof(struct lkd_msg_comms));
 		if (rc)
 			goto protocol_err;
 
@@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 				hdev->decoder_binning, hdev->rotator_binning);
 		}
 
+		if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
+			hdev->asic_prop.reserved_fw_mem_size =
+				le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
+		}
+
 		return 0;
 	}
 
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 2f027d5a8206..1655c101c705 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2016-2022 HabanaLabs, Ltd.
+ * Copyright 2016-2023 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -8,7 +8,7 @@
 #ifndef HABANALABSP_H_
 #define HABANALABSP_H_
 
-#include "../include/common/cpucp_if.h"
+#include <linux/habanalabs/cpucp_if.h>
 #include "../include/common/qman_if.h"
 #include "../include/hw_ip/mmu/mmu_general.h"
 #include <uapi/drm/habanalabs_accel.h>
@@ -29,6 +29,9 @@
 #include <linux/coresight.h>
 #include <linux/dma-buf.h>
 
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
 #include "security.h"
 
 #define HL_NAME				"habanalabs"
@@ -82,8 +85,6 @@ struct hl_fpriv;
 
 #define HL_PCI_ELBI_TIMEOUT_MSEC	10 /* 10ms */
 
-#define HL_SIM_MAX_TIMEOUT_US		100000000 /* 100s */
-
 #define HL_INVALID_QUEUE		UINT_MAX
 
 #define HL_COMMON_USER_CQ_INTERRUPT_ID	0xFFF
@@ -103,6 +104,8 @@ struct hl_fpriv;
 /* MMU */
 #define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
+#define TIMESTAMP_FREE_NODES_NUM	512
+
 /**
  * enum hl_mmu_page_table_location - mmu page table location
  * @MMU_DR_PGT: page-table is located on device DRAM.
@@ -154,6 +157,11 @@ enum hl_mmu_page_table_location {
 #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
 	hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
 
+#define hl_dma_map_sgtable(hdev, sgt, dir) \
+	hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__)
+#define hl_dma_unmap_sgtable(hdev, sgt, dir) \
+	hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__)
+
 /*
  * Reset Flags
  *
@@ -545,8 +553,7 @@ struct hl_hints_range {
  *              allocated with huge pages.
  * @hints_dram_reserved_va_range: dram hint addresses reserved range.
  * @hints_host_reserved_va_range: host hint addresses reserved range.
- * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
- *                                      range.
+ * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -585,7 +592,7 @@ struct hl_hints_range {
  * @mmu_pte_size: PTE size in MMU page tables.
  * @mmu_hop_table_size: MMU hop table size.
  * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
- * @dram_page_size: page size for MMU DRAM allocation.
+ * @dram_page_size: The DRAM physical page size.
  * @cfg_size: configuration space size on SRAM.
  * @sram_size: total size of SRAM.
  * @max_asid: maximum number of open contexts (ASIDs).
@@ -641,6 +648,7 @@ struct hl_hints_range {
  * @glbl_err_cause_num: global err cause number.
  * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
  *                 not supported.
+ * @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
  * @collective_first_sob: first sync object available for collective use
  * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
@@ -686,9 +694,10 @@ struct hl_hints_range {
  * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
  * @set_max_power_on_device_init: true if need to set max power in F/W on device init.
  * @supports_user_set_page_size: true if user can set the allocation page size.
- * @dma_mask: the dma mask to be set for this device
+ * @dma_mask: the dma mask to be set for this device.
  * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
  * @supports_engine_modes: true if changing engines/engine_cores modes is supported.
+ * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	*hw_queues_props;
@@ -772,6 +781,7 @@ struct asic_fixed_properties {
 	u32				num_of_special_blocks;
 	u32				glbl_err_cause_num;
 	u32				hbw_flush_reg;
+	u32				reserved_fw_mem_size;
 	u16				collective_first_sob;
 	u16				collective_first_mon;
 	u16				sync_stream_first_sob;
@@ -808,6 +818,7 @@ struct asic_fixed_properties {
 	u8				dma_mask;
 	u8				supports_advanced_cpucp_rc;
 	u8				supports_engine_modes;
+	u8				support_dynamic_resereved_fw_size;
 };
 
 /**
@@ -1098,19 +1109,41 @@ enum hl_user_interrupt_type {
 };
 
 /**
+ * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data
+ * @free_nodes_pool: pool of nodes to be used for free timestamp jobs
+ * @free_nodes_length: number of nodes in free_nodes_pool
+ * @next_avail_free_node_idx: index of the next free node in the pool
+ *
+ * the free nodes pool must be protected by the user interrupt lock
+ * to avoid race between different interrupts which are using the same
+ * ts buffer with different offsets.
+ */
+struct hl_ts_free_jobs {
+	struct timestamp_reg_free_node *free_nodes_pool;
+	u32				free_nodes_length;
+	u32				next_avail_free_node_idx;
+};
+
+/**
  * struct hl_user_interrupt - holds user interrupt information
  * @hdev: pointer to the device structure
+ * @ts_free_jobs_data: timestamp free jobs related data
  * @type: user interrupt type
  * @wait_list_head: head to the list of user threads pending on this interrupt
+ * @ts_list_head: head to the list of timestamp records
  * @wait_list_lock: protects wait_list_head
+ * @ts_list_lock: protects ts_list_head
  * @timestamp: last timestamp taken upon interrupt
  * @interrupt_id: msix interrupt id
  */
 struct hl_user_interrupt {
 	struct hl_device		*hdev;
+	struct hl_ts_free_jobs		ts_free_jobs_data;
 	enum hl_user_interrupt_type	type;
 	struct list_head		wait_list_head;
+	struct list_head		ts_list_head;
 	spinlock_t			wait_list_lock;
+	spinlock_t			ts_list_lock;
 	ktime_t				timestamp;
 	u32				interrupt_id;
 };
@@ -1120,11 +1153,15 @@ struct hl_user_interrupt {
  * @free_objects_node: node in the list free_obj_jobs
  * @cq_cb: pointer to cq command buffer to be freed
  * @buf: pointer to timestamp buffer to be freed
+ * @in_use: indicates whether the node still in use in workqueue thread.
+ * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
  */
 struct timestamp_reg_free_node {
 	struct list_head	free_objects_node;
 	struct hl_cb		*cq_cb;
 	struct hl_mmap_mem_buf	*buf;
+	atomic_t		in_use;
+	u8			dynamic_alloc;
 };
 
 /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
@@ -1133,17 +1170,21 @@ struct timestamp_reg_free_node {
  * @free_obj: workqueue object to free timestamp registration node objects
  * @hdev: pointer to the device structure
  * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
+ * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the
+ *                               interrupt handler.
  */
 struct timestamp_reg_work_obj {
 	struct work_struct	free_obj;
 	struct hl_device	*hdev;
 	struct list_head	*free_obj_head;
+	struct list_head	*dynamic_alloc_free_obj_head;
 };
 
 /* struct timestamp_reg_info - holds the timestamp registration related data.
  * @buf: pointer to the timestamp buffer which include both user/kernel buffers.
  *       relevant only when doing timestamps records registration.
  * @cq_cb: pointer to CQ counter CB.
+ * @interrupt: interrupt that the node hanged on it's wait list.
  * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
  *                         relevant only when doing timestamps records
  *                         registration.
@@ -1153,17 +1194,18 @@ struct timestamp_reg_work_obj {
  *          allocating records dynamically.
  */
 struct timestamp_reg_info {
-	struct hl_mmap_mem_buf	*buf;
-	struct hl_cb		*cq_cb;
-	u64			*timestamp_kernel_addr;
-	u8			in_use;
+	struct hl_mmap_mem_buf		*buf;
+	struct hl_cb			*cq_cb;
+	struct hl_user_interrupt	*interrupt;
+	u64				*timestamp_kernel_addr;
+	bool				in_use;
 };
 
 /**
  * struct hl_user_pending_interrupt - holds a context to a user thread
  *                                    pending on an interrupt
  * @ts_reg_info: holds the timestamps registration nodes info
- * @wait_list_node: node in the list of user threads pending on an interrupt
+ * @list_node: node in the list of user threads pending on an interrupt or timestamp
  * @fence: hl fence object for interrupt completion
  * @cq_target_value: CQ target value
  * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
@@ -1171,7 +1213,7 @@ struct timestamp_reg_info {
  */
 struct hl_user_pending_interrupt {
 	struct timestamp_reg_info	ts_reg_info;
-	struct list_head		wait_list_node;
+	struct list_head		list_node;
 	struct hl_fence			fence;
 	u64				cq_target_value;
 	u64				*cq_kernel_addr;
@@ -1370,6 +1412,8 @@ struct dynamic_fw_load_mgr {
  * @boot_err0_reg: boot_err0 register address
  * @boot_err1_reg: boot_err1 register address
  * @wait_for_preboot_timeout: timeout to poll for preboot ready
+ * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know
+ *		preboot needs longer time.
  */
 struct pre_fw_load_props {
 	u32 cpu_boot_status_reg;
@@ -1378,6 +1422,7 @@ struct pre_fw_load_props {
 	u32 boot_err0_reg;
 	u32 boot_err1_reg;
 	u32 wait_for_preboot_timeout;
+	u32 wait_for_preboot_extended_timeout;
 };
 
 /**
@@ -1477,11 +1522,9 @@ struct engines_data {
  * @asic_dma_pool_free: free small DMA allocation from pool.
  * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
  * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @asic_dma_unmap_single: unmap a single DMA buffer
- * @asic_dma_map_single: map a single buffer to a DMA
- * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
+ * @dma_unmap_sgtable: DMA unmap scatter-gather table.
+ * @dma_map_sgtable: DMA map scatter-gather table.
  * @cs_parser: parse Command Submission.
- * @asic_dma_map_sgtable: DMA map scatter-gather table.
  * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
  * @update_eq_ci: update event queue CI.
  * @context_switch: called upon ASID context switch.
@@ -1602,18 +1645,11 @@ struct hl_asic_funcs {
 				size_t size, dma_addr_t *dma_handle);
 	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
 				size_t size, void *vaddr);
-	void (*asic_dma_unmap_single)(struct hl_device *hdev,
-				dma_addr_t dma_addr, int len,
-				enum dma_data_direction dir);
-	dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev,
-				void *addr, int len,
+	void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
-	void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
-				struct sg_table *sgt,
+	int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
 	int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
-	int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
-				enum dma_data_direction dir);
 	void (*add_end_of_cb_packets)(struct hl_device *hdev,
 					void *kernel_address, u32 len,
 					u32 original_len,
@@ -1771,16 +1807,19 @@ struct hl_cs_counters_atomic {
  * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported
  *                where virtual memory is supported.
  * @memhash_hnode: pointer to the memhash node. this object holds the export count.
- * @device_address: physical address of the device's memory. Relevant only
- *                  if phys_pg_pack is NULL (dma-buf was exported from address).
- *                  The total size can be taken from the dmabuf object.
+ * @offset: the offset into the buffer from which the memory is exported.
+ *          Relevant only if virtual memory is supported and phys_pg_pack is being used.
+ * device_phys_addr: physical address of the device's memory. Relevant only
+ *                   if phys_pg_pack is NULL (dma-buf was exported from address).
+ *                   The total size can be taken from the dmabuf object.
  */
 struct hl_dmabuf_priv {
 	struct dma_buf			*dmabuf;
 	struct hl_ctx			*ctx;
 	struct hl_vm_phys_pg_pack	*phys_pg_pack;
 	struct hl_vm_hash_node		*memhash_hnode;
-	uint64_t			device_address;
+	u64				offset;
+	u64				device_phys_addr;
 };
 
 #define HL_CS_OUTCOME_HISTORY_LEN 256
@@ -1835,6 +1874,7 @@ struct hl_cs_outcome_store {
  * @va_range: holds available virtual addresses for host and dram mappings.
  * @mem_hash_lock: protects the mem_hash.
  * @hw_block_list_lock: protects the HW block memory list.
+ * @ts_reg_lock: timestamp registration ioctls lock.
  * @debugfs_list: node in debugfs list of contexts.
  * @hw_block_mem_list: list of HW block virtual mapped addresses.
  * @cs_counters: context command submission counters.
@@ -1871,6 +1911,7 @@ struct hl_ctx {
 	struct hl_va_range		*va_range[HL_VA_RANGE_TYPE_MAX];
 	struct mutex			mem_hash_lock;
 	struct mutex			hw_block_list_lock;
+	struct mutex			ts_reg_lock;
 	struct list_head		debugfs_list;
 	struct list_head		hw_block_mem_list;
 	struct hl_cs_counters_atomic	cs_counters;
@@ -1917,17 +1958,17 @@ struct hl_ctx_mgr {
  * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
  */
 struct hl_userptr {
-	enum vm_type		vm_type; /* must be first */
-	struct list_head	job_node;
-	struct page		**pages;
-	unsigned int		npages;
-	struct sg_table		*sgt;
-	enum dma_data_direction dir;
-	struct list_head	debugfs_list;
-	pid_t			pid;
-	u64			addr;
-	u64			size;
-	u8			dma_mapped;
+	enum vm_type			vm_type; /* must be first */
+	struct list_head		job_node;
+	struct page			**pages;
+	unsigned int			npages;
+	struct sg_table			*sgt;
+	enum dma_data_direction		dir;
+	struct list_head		debugfs_list;
+	pid_t				pid;
+	u64				addr;
+	u64				size;
+	u8				dma_mapped;
 };
 
 /**
@@ -2148,7 +2189,6 @@ struct hl_vm_hw_block_list_node {
  * @pages: the physical page array.
  * @npages: num physical pages in the pack.
  * @total_size: total size of all the pages in this list.
- * @exported_size: buffer exported size.
  * @node: used to attach to deletion list that is used when all the allocations are cleared
  *        at the teardown of the context.
  * @mapping_cnt: number of shared mappings.
@@ -2165,7 +2205,6 @@ struct hl_vm_phys_pg_pack {
 	u64			*pages;
 	u64			npages;
 	u64			total_size;
-	u64			exported_size;
 	struct list_head	node;
 	atomic_t		mapping_cnt;
 	u32			asid;
@@ -2250,7 +2289,7 @@ struct hl_notifier_event {
 /**
  * struct hl_fpriv - process information stored in FD private data.
  * @hdev: habanalabs device structure.
- * @filp: pointer to the given file structure.
+ * @file_priv: pointer to the DRM file private data structure.
  * @taskpid: current process ID.
  * @ctx: current executing context. TODO: remove for multiple ctx per process
  * @ctx_mgr: context manager to handle multiple context for this FD.
@@ -2265,7 +2304,7 @@ struct hl_notifier_event {
  */
 struct hl_fpriv {
 	struct hl_device		*hdev;
-	struct file			*filp;
+	struct drm_file			*file_priv;
 	struct pid			*taskpid;
 	struct hl_ctx			*ctx;
 	struct hl_ctx_mgr		ctx_mgr;
@@ -2706,6 +2745,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 	usr_intr.type = intr_type; \
 	INIT_LIST_HEAD(&usr_intr.wait_list_head); \
 	spin_lock_init(&usr_intr.wait_list_lock); \
+	INIT_LIST_HEAD(&usr_intr.ts_list_head); \
+	spin_lock_init(&usr_intr.ts_list_lock); \
 })
 
 struct hwmon_chip_info;
@@ -3055,6 +3096,20 @@ struct fw_err_info {
 };
 
 /**
+ * struct engine_err_info - engine error information.
+ * @event: holds information on the event.
+ * @event_detected: if set as 1, then an engine event was discovered for the
+ *                  first time after the driver has finished booting-up.
+ * @event_info_available: indicates that an engine event info is now available.
+ */
+struct engine_err_info {
+	struct hl_info_engine_err_event	event;
+	atomic_t			event_detected;
+	bool				event_info_available;
+};
+
+
+/**
  * struct hl_error_info - holds information collected during an error.
  * @cs_timeout: CS timeout error information.
  * @razwi_info: RAZWI information.
@@ -3062,6 +3117,7 @@ struct fw_err_info {
  * @page_fault_info: page fault information.
  * @hw_err: (fatal) hardware error information.
  * @fw_err: firmware error information.
+ * @engine_err: engine error information.
  */
 struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
@@ -3070,6 +3126,7 @@ struct hl_error_info {
 	struct page_fault_info		page_fault_info;
 	struct hw_err_info		hw_err;
 	struct fw_err_info		fw_err;
+	struct engine_err_info		engine_err;
 };
 
 /**
@@ -3117,8 +3174,7 @@ struct hl_reset_info {
  *		   (required only for PCI address match mode)
  * @pcie_bar: array of available PCIe bars virtual addresses.
  * @rmmio: configuration area address on SRAM.
- * @hclass: pointer to the habanalabs class.
- * @cdev: related char device.
+ * @drm: related DRM device.
  * @cdev_ctrl: char device for control operations only (INFO IOCTL)
  * @dev: related kernel basic device structure.
  * @dev_ctrl: related kernel device structure for the control device
@@ -3245,8 +3301,7 @@ struct hl_reset_info {
  * @rotator_binning: contains mask of rotators engines that is received from the f/w
  *			which indicates which rotator engines are binned-out(Gaudi3 and above).
  * @id: device minor.
- * @id_control: minor of the control device.
- * @cdev_idx: char device index. Used for setting its name.
+ * @cdev_idx: char device index.
  * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
  *                    addresses.
  * @is_in_dram_scrub: true if dram scrub operation is on going.
@@ -3289,6 +3344,7 @@ struct hl_reset_info {
  *                             device.
  * @supports_ctx_switch: true if a ctx switch is required upon first submission.
  * @support_preboot_binning: true if we support read binning info from preboot.
+ * @eq_heartbeat_received: indication that eq heartbeat event has received from FW.
  * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing.
  * @fw_components: Controls which f/w components to load to the device. There are multiple f/w
  *                 stages and sometimes we want to stop at a certain stage. Used only for testing.
@@ -3308,8 +3364,7 @@ struct hl_device {
 	u64				pcie_bar_phys[HL_PCI_NUM_BARS];
 	void __iomem			*pcie_bar[HL_PCI_NUM_BARS];
 	void __iomem			*rmmio;
-	struct class			*hclass;
-	struct cdev			cdev;
+	struct drm_device		drm;
 	struct cdev			cdev_ctrl;
 	struct device			*dev;
 	struct device			*dev_ctrl;
@@ -3418,7 +3473,6 @@ struct hl_device {
 	u32				device_release_watchdog_timeout_sec;
 	u32				rotator_binning;
 	u16				id;
-	u16				id_control;
 	u16				cdev_idx;
 	u16				cpu_pci_msb_addr;
 	u8				is_in_dram_scrub;
@@ -3451,6 +3505,7 @@ struct hl_device {
 	u8				reset_upon_device_release;
 	u8				supports_ctx_switch;
 	u8				support_preboot_binning;
+	u8				eq_heartbeat_received;
 
 	/* Parameters for bring-up to be upstreamed */
 	u64				nic_ports_mask;
@@ -3582,6 +3637,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size,
 	return false;
 }
 
+static inline struct hl_device *to_hl_device(struct drm_device *ddev)
+{
+	return container_of(ddev, struct hl_device, drm);
+}
+
 /**
  * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
  * @address: The start address of the area we want to validate.
@@ -3611,8 +3671,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t
 					dma_addr_t *dma_handle, const char *caller);
 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
 					const char *caller);
-int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
-void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir, const char *caller);
+void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
+					enum dma_data_direction dir, const char *caller);
+int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+				enum dma_data_direction dir);
+void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
@@ -3620,7 +3685,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type);
 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
 			u64 addr, u64 *val, enum debugfs_access_type acc_type);
-int hl_device_open(struct inode *inode, struct file *filp);
+
+int hl_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int hl_device_open(struct drm_device *drm, struct drm_file *file_priv);
+void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv);
+
 int hl_device_open_ctrl(struct inode *inode, struct file *filp);
 bool hl_device_operational(struct hl_device *hdev,
 		enum hl_device_status *status);
@@ -3652,8 +3722,9 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
 irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
-irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
+irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg);
 irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg);
+irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg);
 u32 hl_cq_inc_ptr(u32 ptr);
 
 int hl_asid_init(struct hl_device *hdev);
@@ -3944,16 +4015,14 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_
 				u64 *event_mask);
 void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask);
 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
+void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
 
 #ifdef CONFIG_DEBUG_FS
 
-void hl_debugfs_init(void);
-void hl_debugfs_fini(void);
 int hl_debugfs_device_init(struct hl_device *hdev);
 void hl_debugfs_device_fini(struct hl_device *hdev);
 void hl_debugfs_add_device(struct hl_device *hdev);
-void hl_debugfs_remove_device(struct hl_device *hdev);
 void hl_debugfs_add_file(struct hl_fpriv *hpriv);
 void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
 void hl_debugfs_add_cb(struct hl_cb *cb);
@@ -3972,14 +4041,6 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
 
 #else
 
-static inline void __init hl_debugfs_init(void)
-{
-}
-
-static inline void hl_debugfs_fini(void)
-{
-}
-
 static inline int hl_debugfs_device_init(struct hl_device *hdev)
 {
 	return 0;
@@ -3993,10 +4054,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev)
 {
 }
 
-static inline void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-}
-
 static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 {
 }
@@ -4108,11 +4165,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset,
 		const u32 pb_blocks[], u32 blocks_array_size);
 
 /* IOCTLs */
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
+int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv);
 
 #endif /* HABANALABSP_H_ */
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 7263e84c1a4d..306a5bc9bf89 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -14,6 +14,11 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
+#include <linux/version.h>
+
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_ioctl.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/habanalabs.h>
@@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
 MODULE_LICENSE("GPL v2");
 
 static int hl_major;
-static struct class *hl_class;
 static DEFINE_IDR(hl_devs_idr);
 static DEFINE_MUTEX(hl_devs_idr_lock);
 
@@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, ids);
 
+static const struct drm_ioctl_desc hl_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
+};
+
+static const struct file_operations hl_fops = {
+	.owner = THIS_MODULE,
+	.open = accel_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.compat_ioctl = drm_compat_ioctl,
+	.llseek = noop_llseek,
+	.mmap = hl_mmap
+};
+
+static const struct drm_driver hl_driver = {
+	.driver_features = DRIVER_COMPUTE_ACCEL,
+
+	.name = HL_NAME,
+	.desc = HL_DRIVER_DESC,
+	.major = LINUX_VERSION_MAJOR,
+	.minor = LINUX_VERSION_PATCHLEVEL,
+	.patchlevel = LINUX_VERSION_SUBLEVEL,
+	.date = "20190505",
+
+	.fops = &hl_fops,
+	.open = hl_device_open,
+	.postclose = hl_device_release,
+	.ioctls = hl_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
+};
+
 /*
  * get_asic_type - translate device id to asic type
  *
@@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
 }
 
 /*
- * hl_device_open - open function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
+ * hl_device_open() - open function for habanalabs device.
+ * @ddev: pointer to DRM device structure.
+ * @file: pointer to DRM file private data structure.
  *
  * Called when process opens an habanalabs device.
  */
-int hl_device_open(struct inode *inode, struct file *filp)
+int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
 {
+	struct hl_device *hdev = to_hl_device(ddev);
 	enum hl_device_status status;
-	struct hl_device *hdev;
 	struct hl_fpriv *hpriv;
 	int rc;
 
-	mutex_lock(&hl_devs_idr_lock);
-	hdev = idr_find(&hl_devs_idr, iminor(inode));
-	mutex_unlock(&hl_devs_idr_lock);
-
-	if (!hdev) {
-		pr_err("Couldn't find device %d:%d\n",
-			imajor(inode), iminor(inode));
-		return -ENXIO;
-	}
-
 	hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 	if (!hpriv)
 		return -ENOMEM;
 
 	hpriv->hdev = hdev;
-	filp->private_data = hpriv;
-	hpriv->filp = filp;
-
 	mutex_init(&hpriv->notifier_event.lock);
 	mutex_init(&hpriv->restore_phase_mutex);
 	mutex_init(&hpriv->ctx_lock);
 	kref_init(&hpriv->refcount);
-	nonseekable_open(inode, filp);
 
 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
 	hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
@@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hdev->last_successful_open_jif = jiffies;
 	hdev->last_successful_open_ktime = ktime_get();
 
+	file_priv->driver_priv = hpriv;
+	hpriv->file_priv = file_priv;
+
 	return 0;
 
 out_err:
@@ -232,7 +260,6 @@ out_err:
 	hl_mem_mgr_fini(&hpriv->mem_mgr);
 	hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
-	filp->private_data = NULL;
 	mutex_destroy(&hpriv->ctx_lock);
 	mutex_destroy(&hpriv->restore_phase_mutex);
 	mutex_destroy(&hpriv->notifier_event.lock);
@@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	 */
 	hpriv->hdev = hdev;
 	filp->private_data = hpriv;
-	hpriv->filp = filp;
 
-	mutex_init(&hpriv->notifier_event.lock);
 	nonseekable_open(inode, filp);
 
 	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
@@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
 	hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
 
 	hdev->major = hl_major;
-	hdev->hclass = hl_class;
 	hdev->memory_scrub = memory_scrub;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->boot_error_status_mask = boot_error_status_mask;
@@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev)
 	return 0;
 }
 
+static int allocate_device_id(struct hl_device *hdev)
+{
+	int id;
+
+	mutex_lock(&hl_devs_idr_lock);
+	id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
+	mutex_unlock(&hl_devs_idr_lock);
+
+	if (id < 0) {
+		if (id == -ENOSPC)
+			pr_err("too many devices in the system\n");
+		return -EBUSY;
+	}
+
+	hdev->id = id;
+
+	/*
+	 * Firstly initialized with the internal device ID.
+	 * Will be updated later after the DRM device registration to hold the minor ID.
+	 */
+	hdev->cdev_idx = hdev->id;
+
+	return 0;
+}
+
 /**
  * create_hdev - create habanalabs device instance
  *
@@ -395,27 +444,29 @@ static int fixup_device_params(struct hl_device *hdev)
  */
 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 {
-	int main_id, ctrl_id = 0, rc = 0;
 	struct hl_device *hdev;
+	int rc;
 
 	*dev = NULL;
 
-	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
+	hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
+	if (IS_ERR(hdev))
+		return PTR_ERR(hdev);
+
+	hdev->dev = hdev->drm.dev;
 
 	/* Will be NULL in case of simulator device */
 	hdev->pdev = pdev;
 
 	/* Assign status description string */
-	strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
-					"in device creation", HL_STR_MAX);
-	strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
-					"in reset after device release", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
+				"in device creation", HL_STR_MAX);
+	strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
+				"in reset after device release", HL_STR_MAX);
 
 
 	/* First, we must find out which ASIC are we handling. This is needed
@@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 	if (hdev->asic_type == ASIC_INVALID) {
 		dev_err(&pdev->dev, "Unsupported ASIC\n");
 		rc = -ENODEV;
-		goto free_hdev;
+		goto out_err;
 	}
 
 	copy_kernel_module_params_to_device(hdev);
@@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 
 	fixup_device_params(hdev);
 
-	mutex_lock(&hl_devs_idr_lock);
-
-	/* Always save 2 numbers, 1 for main device and 1 for control.
-	 * They must be consecutive
-	 */
-	main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
-
-	if (main_id >= 0)
-		ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
-					main_id + 2, GFP_KERNEL);
-
-	mutex_unlock(&hl_devs_idr_lock);
-
-	if ((main_id < 0) || (ctrl_id < 0)) {
-		if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
-			pr_err("too many devices in the system\n");
-
-		if (main_id >= 0) {
-			mutex_lock(&hl_devs_idr_lock);
-			idr_remove(&hl_devs_idr, main_id);
-			mutex_unlock(&hl_devs_idr_lock);
-		}
-
-		rc = -EBUSY;
-		goto free_hdev;
-	}
-
-	hdev->id = main_id;
-	hdev->id_control = ctrl_id;
+	rc = allocate_device_id(hdev);
+	if (rc)
+		goto out_err;
 
 	*dev = hdev;
 
 	return 0;
 
-free_hdev:
-	kfree(hdev);
+out_err:
 	return rc;
 }
 
@@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev)
 	/* Remove device from the device list */
 	mutex_lock(&hl_devs_idr_lock);
 	idr_remove(&hl_devs_idr, hdev->id);
-	idr_remove(&hl_devs_idr, hdev->id_control);
 	mutex_unlock(&hl_devs_idr_lock);
 
-	kfree(hdev);
 }
 
 static int hl_pmops_suspend(struct device *dev)
@@ -691,28 +713,16 @@ static int __init hl_init(void)
 
 	hl_major = MAJOR(dev);
 
-	hl_class = class_create(HL_NAME);
-	if (IS_ERR(hl_class)) {
-		pr_err("failed to allocate class\n");
-		rc = PTR_ERR(hl_class);
-		goto remove_major;
-	}
-
-	hl_debugfs_init();
-
 	rc = pci_register_driver(&hl_pci_driver);
 	if (rc) {
 		pr_err("failed to register pci device\n");
-		goto remove_debugfs;
+		goto remove_major;
 	}
 
 	pr_debug("driver loaded\n");
 
 	return 0;
 
-remove_debugfs:
-	hl_debugfs_fini();
-	class_destroy(hl_class);
 remove_major:
 	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 	return rc;
@@ -725,14 +735,6 @@ static void __exit hl_exit(void)
 {
 	pci_unregister_driver(&hl_pci_driver);
 
-	/*
-	 * Removing debugfs must be after all devices or simulator devices
-	 * have been removed because otherwise we get a bug in the
-	 * debugfs module for referencing NULL objects
-	 */
-	hl_debugfs_fini();
-
-	class_destroy(hl_class);
 	unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 
 	idr_destroy(&hl_devs_idr);
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index 6a45a92344e9..8ef36effb95b 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -17,6 +17,8 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 
+#include <asm/msr.h>
+
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
 	[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
 
 	time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
 	time_sync.host_time = ktime_get_raw_ns();
+	time_sync.tsc_time = rdtsc();
 
 	return copy_to_user(out, &time_sync,
 		min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
@@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	return rc ? -EFAULT : 0;
 }
 
+static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
+	struct hl_device *hdev = hpriv->hdev;
+	u32 user_buf_size = args->return_size;
+	struct engine_err_info *info;
+	int rc;
+
+	if (!user_buf)
+		return -EINVAL;
+
+	info = &hdev->captured_err_info.engine_err;
+	if (!info->event_info_available)
+		return 0;
+
+	if (user_buf_size < sizeof(struct hl_info_engine_err_event))
+		return -ENOMEM;
+
+	rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
+	return rc ? -EFAULT : 0;
+}
+
 static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
 {
 	void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
@@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_FW_ERR_EVENT:
 		return fw_err_info(hpriv, args);
 
+	case HL_INFO_USER_ENGINE_ERR_EVENT:
+		return engine_err_info(hpriv, args);
+
 	case HL_INFO_DRAM_USAGE:
 		return dram_usage_info(hpriv, args);
 	default:
@@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	return rc;
 }
 
-static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
+
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
 }
 
 static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
 {
+	struct hl_info_args *args = data;
+
+	switch (args->op) {
+	case HL_INFO_GET_EVENTS:
+	case HL_INFO_UNREGISTER_EVENTFD:
+	case HL_INFO_REGISTER_EVENTFD:
+		return -EOPNOTSUPP;
+	default:
+		break;
+	}
+
 	return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
 }
 
-static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
-	struct hl_debug_args *args = data;
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	struct hl_device *hdev = hpriv->hdev;
+	struct hl_debug_args *args = data;
 	enum hl_device_status status;
 
 	int rc = 0;
@@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
 }
 
 #define HL_IOCTL_DEF(ioctl, _func) \
-	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
-
-static const struct hl_ioctl_desc hl_ioctls[] = {
-	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
-};
+	[_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}
 
 static const struct hl_ioctl_desc hl_ioctls_control[] = {
-	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
+	HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
 };
 
-static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
-		const struct hl_ioctl_desc *ioctl, struct device *dev)
+static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
+			const struct hl_ioctl_desc *ioctl, struct device *dev)
 {
-	struct hl_fpriv *hpriv = filep->private_data;
 	unsigned int nr = _IOC_NR(cmd);
 	char stack_kdata[128] = {0};
 	char *kdata = NULL;
@@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
 		retcode = -EFAULT;
 
 out_err:
-	if (retcode)
-		dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
-			  task_pid_nr(current), cmd, nr);
+	if (retcode) {
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(dev,
+				"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
+	}
 
 	if (kdata != stack_kdata)
 		kfree(kdata);
@@ -1204,29 +1240,6 @@ out_err:
 	return retcode;
 }
 
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
-{
-	struct hl_fpriv *hpriv = filep->private_data;
-	struct hl_device *hdev = hpriv->hdev;
-	const struct hl_ioctl_desc *ioctl = NULL;
-	unsigned int nr = _IOC_NR(cmd);
-
-	if (!hdev) {
-		pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
-		return -ENODEV;
-	}
-
-	if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
-		ioctl = &hl_ioctls[nr];
-	} else {
-		dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
-			task_pid_nr(current), nr);
-		return -ENOTTY;
-	}
-
-	return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
-}
-
 long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 {
 	struct hl_fpriv *hpriv = filep->private_data;
@@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
 		return -ENODEV;
 	}
 
-	if (nr == _IOC_NR(HL_IOCTL_INFO)) {
-		ioctl = &hl_ioctls_control[nr];
+	if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
+		ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
 	} else {
-		dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
-			task_pid_nr(current), nr);
+		char task_comm[TASK_COMM_LEN];
+
+		dev_dbg_ratelimited(hdev->dev_ctrl,
+				"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
+				task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
 		return -ENOTTY;
 	}
 
-	return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
+	return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
 }
diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c
index b1010d206c2e..978b7f4d5eeb 100644
--- a/drivers/accel/habanalabs/common/irq.c
+++ b/drivers/accel/habanalabs/common/irq.c
@@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
 {
 	struct timestamp_reg_work_obj *job =
 			container_of(work, struct timestamp_reg_work_obj, free_obj);
+	struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
 	struct timestamp_reg_free_node *free_obj, *temp_free_obj;
 	struct list_head *free_list_head = job->free_obj_head;
+
 	struct hl_device *hdev = job->hdev;
 
 	list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
@@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)
 
 		hl_mmap_mem_buf_put(free_obj->buf);
 		hl_cb_put(free_obj->cq_cb);
-		kfree(free_obj);
+		atomic_set(&free_obj->in_use, 0);
 	}
 
 	kfree(free_list_head);
+
+	if (dynamic_alloc_free_list_head) {
+		list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
+								free_objects_node) {
+			dev_dbg(hdev->dev,
+				"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
+						free_obj->buf,
+						free_obj->cq_cb);
+
+			hl_mmap_mem_buf_put(free_obj->buf);
+			hl_cb_put(free_obj->cq_cb);
+			list_del(&free_obj->free_objects_node);
+			kfree(free_obj);
+		}
+
+		kfree(dynamic_alloc_free_list_head);
+	}
+
 	kfree(job);
 }
 
@@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
  * list to a dedicated workqueue to do the actual put.
  */
 static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
-						struct list_head **free_list, ktime_t now)
+						struct list_head **free_list,
+						struct list_head **dynamic_alloc_list,
+						struct hl_user_interrupt *intr)
 {
+	struct hl_ts_free_jobs *ts_free_jobs_data;
 	struct timestamp_reg_free_node *free_node;
+	u32 free_node_index;
 	u64 timestamp;
 
+	ts_free_jobs_data = &intr->ts_free_jobs_data;
+	free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
+
 	if (!(*free_list)) {
 		/* Alloc/Init the timestamp registration free objects list */
 		*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
@@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
 		INIT_LIST_HEAD(*free_list);
 	}
 
-	free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
-	if (!free_node)
-		return -ENOMEM;
+	free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
+	if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
+		dev_dbg(hdev->dev,
+			"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
+				pend->ts_reg_info.buf,
+				pend,
+				intr->interrupt_id);
 
-	timestamp = ktime_to_ns(now);
+		if (!(*dynamic_alloc_list)) {
+			*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
+			if (!(*dynamic_alloc_list))
+				return -ENOMEM;
 
-	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
+			INIT_LIST_HEAD(*dynamic_alloc_list);
+		}
+
+		free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
+		if (!free_node)
+			return -ENOMEM;
+
+		free_node->dynamic_alloc = 1;
+	}
 
-	dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
-			pend->ts_reg_info.timestamp_kernel_addr,
-			*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
+	timestamp = ktime_to_ns(intr->timestamp);
 
-	list_del(&pend->wait_list_node);
+	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
+
+	dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
+			pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
 
-	/* Mark kernel CB node as free */
-	pend->ts_reg_info.in_use = 0;
+	list_del(&pend->list_node);
 
 	/* Putting the refcount for ts_buff and cq_cb objects will be handled
 	 * in workqueue context, just add job to free_list.
 	 */
 	free_node->buf = pend->ts_reg_info.buf;
 	free_node->cq_cb = pend->ts_reg_info.cq_cb;
-	list_add(&free_node->free_objects_node, *free_list);
+
+	if (free_node->dynamic_alloc) {
+		list_add(&free_node->free_objects_node, *dynamic_alloc_list);
+	} else {
+		ts_free_jobs_data->next_avail_free_node_idx =
+				(++free_node_index) % ts_free_jobs_data->free_nodes_length;
+		list_add(&free_node->free_objects_node, *free_list);
+	}
+
+	/* Mark TS record as free */
+	pend->ts_reg_info.in_use = false;
 
 	return 0;
 }
 
-static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
+static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
 {
+	struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
 	struct hl_user_pending_interrupt *pend, *temp_pend;
-	struct list_head *ts_reg_free_list_head = NULL;
 	struct timestamp_reg_work_obj *job;
 	bool reg_node_handle_fail = false;
+	unsigned long flags;
 	int rc;
 
 	/* For registration nodes:
@@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	 * or in irq handler context at all (since release functions are long and
 	 * might sleep), so we will need to handle that part in workqueue context.
 	 * To avoid handling kmalloc failure which compels us rolling back actions
-	 * and move nodes hanged on the free list back to the interrupt wait list
+	 * and move nodes hanged on the free list back to the interrupt ts list
 	 * we always alloc the job of the WQ at the beginning.
 	 */
 	job = kmalloc(sizeof(*job), GFP_ATOMIC);
 	if (!job)
 		return;
 
-	spin_lock(&intr->wait_list_lock);
-	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
+	spin_lock_irqsave(&intr->ts_list_lock, flags);
+	list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
 		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
 				!pend->cq_kernel_addr) {
-			if (pend->ts_reg_info.buf) {
-				if (!reg_node_handle_fail) {
-					rc = handle_registration_node(hdev, pend,
-							&ts_reg_free_list_head, intr->timestamp);
-					if (rc)
-						reg_node_handle_fail = true;
-				}
-			} else {
-				/* Handle wait target value node */
-				pend->fence.timestamp = intr->timestamp;
-				complete_all(&pend->fence.completion);
+			if (!reg_node_handle_fail) {
+				rc = handle_registration_node(hdev, pend,
+						&ts_reg_free_list_head,
+						&dynamic_alloc_list_head, intr);
+				if (rc)
+					reg_node_handle_fail = true;
 			}
 		}
 	}
-	spin_unlock(&intr->wait_list_lock);
+	spin_unlock_irqrestore(&intr->ts_list_lock, flags);
 
 	if (ts_reg_free_list_head) {
 		INIT_WORK(&job->free_obj, hl_ts_free_objects);
 		job->free_obj_head = ts_reg_free_list_head;
+		job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
 		job->hdev = hdev;
 		queue_work(hdev->ts_free_obj_wq, &job->free_obj);
 	} else {
@@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
 	}
 }
 
+static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
+{
+	struct hl_user_pending_interrupt *pend, *temp_pend;
+	unsigned long flags;
+
+	spin_lock_irqsave(&intr->wait_list_lock, flags);
+	list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
+		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+				!pend->cq_kernel_addr) {
+			/* Handle wait target value node */
+			pend->fence.timestamp = intr->timestamp;
+			complete_all(&pend->fence.completion);
+		}
+	}
+	spin_unlock_irqrestore(&intr->wait_list_lock, flags);
+}
+
 static void handle_tpc_interrupt(struct hl_device *hdev)
 {
 	u64 event_mask;
@@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
 }
 
 /**
- * hl_irq_handler_user_interrupt - irq handler for user interrupts
+ * hl_irq_user_interrupt_handler - irq handler for user interrupts.
  *
  * @irq: irq number
  * @arg: pointer to user interrupt structure
- *
  */
-irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
+irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
 {
 	struct hl_user_interrupt *user_int = arg;
+	struct hl_device *hdev = user_int->hdev;
 
 	user_int->timestamp = ktime_get();
+	switch (user_int->type) {
+	case HL_USR_INTERRUPT_CQ:
+		/* First handle user waiters threads */
+		handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
+		handle_user_interrupt_wait_list(hdev, user_int);
 
-	return IRQ_WAKE_THREAD;
+		/* Second handle user timestamp registrations */
+		handle_user_interrupt_ts_list(hdev,  &hdev->common_user_cq_interrupt);
+		handle_user_interrupt_ts_list(hdev, user_int);
+		break;
+	case HL_USR_INTERRUPT_DECODER:
+		handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
+
+		/* Handle decoder interrupt registered on this specific irq */
+		handle_user_interrupt_wait_list(hdev, user_int);
+		break;
+	default:
+		break;
+	}
+
+	return IRQ_HANDLED;
 }
 
 /**
@@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 	struct hl_user_interrupt *user_int = arg;
 	struct hl_device *hdev = user_int->hdev;
 
+	user_int->timestamp = ktime_get();
 	switch (user_int->type) {
-	case HL_USR_INTERRUPT_CQ:
-		handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
-
-		/* Handle user cq interrupt registered on this specific irq */
-		handle_user_interrupt(hdev, user_int);
-		break;
-	case HL_USR_INTERRUPT_DECODER:
-		handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
-
-		/* Handle decoder interrupt registered on this specific irq */
-		handle_user_interrupt(hdev, user_int);
-		break;
 	case HL_USR_INTERRUPT_TPC:
 		handle_tpc_interrupt(hdev);
 		break;
@@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
+{
+	u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
+	struct hl_device *hdev = arg;
+
+	dev_err(hdev->dev, "EQ error interrupt received\n");
+
+	hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * hl_irq_handler_eq - irq handler for event queue
  *
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 4fc72a07d2f5..0b8689fe0b64 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
 
 	*p_userptr = userptr;
 
-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto dma_map_err;
@@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
  *                      physical pages
  *
  * This function does the following:
- * - Pin the physical pages related to the given virtual block.
  * - Create a physical page pack from the physical pages related to the given
  *   virtual block.
  */
@@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
 }
 
 static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
-						u64 page_size, u64 exported_size,
+						u64 page_size, u64 exported_size, u64 offset,
 						struct device *dev, enum dma_data_direction dir)
 {
-	u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages;
-	struct asic_fixed_properties *prop;
-	int rc, i, j, nents, cur_page;
+	u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page,
+		left_size_in_dma_seg, device_address, bar_address, start_page;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct scatterlist *sg;
+	unsigned int nents, i;
 	struct sg_table *sgt;
+	bool next_sg_entry;
+	int rc;
 
-	prop = &hdev->asic_prop;
-
-	dma_max_seg_size = dma_get_max_seg_size(dev);
-
-	/* We would like to align the max segment size to PAGE_SIZE, so the
-	 * SGL will contain aligned addresses that can be easily mapped to
-	 * an MMU
-	 */
-	dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
+	/* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */
+	dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE);
 	if (dma_max_seg_size < PAGE_SIZE) {
 		dev_err_ratelimited(hdev->dev,
 				"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
@@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
 
-	/* remove export size restrictions in case not explicitly defined */
-	cur_size_to_export = exported_size ? exported_size : (npages * page_size);
-
-	/* If the size of each page is larger than the dma max segment size,
-	 * then we can't combine pages and the number of entries in the SGL
-	 * will just be the
-	 * <number of pages> * <chunks of max segment size in each page>
-	 */
-	if (page_size > dma_max_seg_size) {
-		/* we should limit number of pages according to the exported size */
-		cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size);
-		nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size);
-	} else {
-		cur_npages = npages;
-
-		/* Get number of non-contiguous chunks */
-		for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) {
-			if (pages[i - 1] + page_size != pages[i] ||
-					chunk_size + page_size > dma_max_seg_size) {
-				nents++;
-				chunk_size = page_size;
-				continue;
-			}
+	/* Use the offset to move to the actual first page that is exported */
+	for (start_page = 0 ; start_page < npages ; ++start_page) {
+		if (offset < page_size)
+			break;
 
-			chunk_size += page_size;
-		}
+		/* The offset value was validated so there can't be an underflow */
+		offset -= page_size;
 	}
 
-	rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
-	if (rc)
-		goto error_free;
+	/* Calculate the required number of entries for the SG table */
+	curr_page = start_page;
+	nents = 1;
+	left_size_to_export = exported_size;
+	left_size_in_page = page_size - offset;
+	left_size_in_dma_seg = dma_max_seg_size;
+	next_sg_entry = false;
 
-	cur_page = 0;
+	while (true) {
+		size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+		left_size_to_export -= size;
+		left_size_in_page -= size;
+		left_size_in_dma_seg -= size;
 
-	if (page_size > dma_max_seg_size) {
-		u64 size_left, cur_device_address = 0;
+		if (!left_size_to_export)
+			break;
 
-		size_left = page_size;
+		if (!left_size_in_page) {
+			/* left_size_to_export is not zero so there must be another page */
+			if (pages[curr_page] + page_size != pages[curr_page + 1])
+				next_sg_entry = true;
 
-		/* Need to split each page into the number of chunks of
-		 * dma_max_seg_size
-		 */
-		for_each_sgtable_dma_sg(sgt, sg, i) {
-			if (size_left == page_size)
-				cur_device_address =
-					pages[cur_page] - prop->dram_base_address;
-			else
-				cur_device_address += dma_max_seg_size;
+			++curr_page;
+			left_size_in_page = page_size;
+		}
 
-			/* make sure not to export over exported size */
-			chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export);
+		if (!left_size_in_dma_seg) {
+			next_sg_entry = true;
+			left_size_in_dma_seg = dma_max_seg_size;
+		}
 
-			bar_address = hdev->dram_pci_bar_start + cur_device_address;
+		if (next_sg_entry) {
+			++nents;
+			next_sg_entry = false;
+		}
+	}
 
-			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
-			if (rc)
-				goto error_unmap;
+	rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
+	if (rc)
+		goto err_free_sgt;
 
-			cur_size_to_export -= chunk_size;
+	/* Prepare the SG table entries */
+	curr_page = start_page;
+	device_address = pages[curr_page] + offset;
+	left_size_to_export = exported_size;
+	left_size_in_page = page_size - offset;
+	left_size_in_dma_seg = dma_max_seg_size;
+	next_sg_entry = false;
 
-			if (size_left > dma_max_seg_size) {
-				size_left -= dma_max_seg_size;
-			} else {
-				cur_page++;
-				size_left = page_size;
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address);
+		chunk_size = 0;
+
+		for ( ; curr_page < npages ; ++curr_page) {
+			size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg);
+			chunk_size += size;
+			left_size_to_export -= size;
+			left_size_in_page -= size;
+			left_size_in_dma_seg -= size;
+
+			if (!left_size_to_export)
+				break;
+
+			if (!left_size_in_page) {
+				/* left_size_to_export is not zero so there must be another page */
+				if (pages[curr_page] + page_size != pages[curr_page + 1]) {
+					device_address = pages[curr_page + 1];
+					next_sg_entry = true;
+				}
+
+				left_size_in_page = page_size;
 			}
-		}
-	} else {
-		/* Merge pages and put them into the scatterlist */
-		for_each_sgtable_dma_sg(sgt, sg, i) {
-			chunk_size = page_size;
-			for (j = cur_page + 1 ; j < cur_npages ; j++) {
-				if (pages[j - 1] + page_size != pages[j] ||
-						chunk_size + page_size > dma_max_seg_size)
-					break;
-
-				chunk_size += page_size;
+
+			if (!left_size_in_dma_seg) {
+				/*
+				 * Skip setting a new device address if already moving to a page
+				 * which is not contiguous with the current page.
+				 */
+				if (!next_sg_entry) {
+					device_address += chunk_size;
+					next_sg_entry = true;
+				}
+
+				left_size_in_dma_seg = dma_max_seg_size;
 			}
 
-			bar_address = hdev->dram_pci_bar_start +
-					(pages[cur_page] - prop->dram_base_address);
+			if (next_sg_entry) {
+				next_sg_entry = false;
+				break;
+			}
+		}
 
-			/* make sure not to export over exported size */
-			chunk_size = min(chunk_size, cur_size_to_export);
-			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
-			if (rc)
-				goto error_unmap;
+		rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+		if (rc)
+			goto err_unmap;
+	}
 
-			cur_size_to_export -= chunk_size;
-			cur_page = j;
-		}
+	/* There should be nothing left to export exactly after looping over all SG elements */
+	if (left_size_to_export) {
+		dev_err(hdev->dev,
+			"left size to export %#llx after initializing %u SG elements\n",
+			left_size_to_export, sgt->nents);
+		rc = -ENOMEM;
+		goto err_unmap;
 	}
 
-	/* Because we are not going to include a CPU list we want to have some
-	 * chance that other users will detect this by setting the orig_nents
-	 * to 0 and using only nents (length of DMA list) when going over the
-	 * sgl
+	/*
+	 * Because we are not going to include a CPU list, we want to have some chance that other
+	 * users will detect this when going over SG table, by setting the orig_nents to 0 and using
+	 * only nents (length of DMA list).
 	 */
 	sgt->orig_nents = 0;
 
+	dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n",
+		nents, dev_name(dev));
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		dev_dbg(hdev->dev,
+			"SG entry %d: address %#llx, length %#x\n",
+			i, sg_dma_address(sg), sg_dma_len(sg));
+
 	return sgt;
 
-error_unmap:
+err_unmap:
 	for_each_sgtable_dma_sg(sgt, sg, i) {
 		if (!sg_dma_len(sg))
 			continue;
 
-		dma_unmap_resource(dev, sg_dma_address(sg),
-					sg_dma_len(sg), dir,
+		dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir,
 					DMA_ATTR_SKIP_CPU_SYNC);
 	}
 
 	sg_free_table(sgt);
 
-error_free:
+err_free_sgt:
 	kfree(sgt);
 	return ERR_PTR(rc);
 }
@@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
 static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 					enum dma_data_direction dir)
 {
+	u64 *pages, npages, page_size, exported_size, offset;
 	struct dma_buf *dma_buf = attachment->dmabuf;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct hl_dmabuf_priv *hl_dmabuf;
@@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
 
 	hl_dmabuf = dma_buf->priv;
 	hdev = hl_dmabuf->ctx->hdev;
-	phys_pg_pack = hl_dmabuf->phys_pg_pack;
 
 	if (!attachment->peer2peer) {
 		dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
 		return ERR_PTR(-EPERM);
 	}
 
-	if (phys_pg_pack)
-		sgt = alloc_sgt_from_device_pages(hdev,
-						phys_pg_pack->pages,
-						phys_pg_pack->npages,
-						phys_pg_pack->page_size,
-						phys_pg_pack->exported_size,
-						attachment->dev,
-						dir);
-	else
-		sgt = alloc_sgt_from_device_pages(hdev,
-						&hl_dmabuf->device_address,
-						1,
-						hl_dmabuf->dmabuf->size,
-						0,
-						attachment->dev,
-						dir);
+	exported_size = hl_dmabuf->dmabuf->size;
+	offset = hl_dmabuf->offset;
+	phys_pg_pack = hl_dmabuf->phys_pg_pack;
 
+	if (phys_pg_pack) {
+		pages = phys_pg_pack->pages;
+		npages = phys_pg_pack->npages;
+		page_size = phys_pg_pack->page_size;
+	} else {
+		pages = &hl_dmabuf->device_phys_addr;
+		npages = 1;
+		page_size = hl_dmabuf->dmabuf->size;
+	}
+
+	sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset,
+						attachment->dev, dir);
 	if (IS_ERR(sgt))
 		dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
 
@@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
 	hl_ctx_put(ctx);
 
 	/* Paired with get_file() in export_dmabuf() */
-	fput(ctx->hpriv->filp);
+	fput(ctx->hpriv->file_priv->filp);
 
 	kfree(hl_dmabuf);
 }
@@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx,
 	 * released first and only then the compute device.
 	 * Paired with fput() in hl_release_dmabuf().
 	 */
-	get_file(ctx->hpriv->filp);
+	get_file(ctx->hpriv->file_priv->filp);
 
 	*dmabuf_fd = fd;
 
@@ -1876,22 +1898,29 @@ err_dma_buf_put:
 	return rc;
 }
 
-static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size)
+static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset)
 {
-	if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
+	if (!PAGE_ALIGNED(addr)) {
 		dev_dbg(hdev->dev,
-			"exported device memory address 0x%llx should be aligned to 0x%lx\n",
-			device_addr, PAGE_SIZE);
+			"exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n",
+			addr, PAGE_SIZE);
 		return -EINVAL;
 	}
 
-	if (size < PAGE_SIZE) {
+	if (!size || !PAGE_ALIGNED(size)) {
 		dev_dbg(hdev->dev,
-			"exported device memory size %llu should be equal to or greater than %lu\n",
+			"exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n",
 			size, PAGE_SIZE);
 		return -EINVAL;
 	}
 
+	if (!PAGE_ALIGNED(offset)) {
+		dev_dbg(hdev->dev,
+			"exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n",
+			offset, PAGE_SIZE);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr
 	u64 bar_address;
 	int rc;
 
-	rc = validate_export_params_common(hdev, device_addr, size);
+	rc = validate_export_params_common(hdev, device_addr, size, 0);
 	if (rc)
 		return rc;
 
 	if (device_addr < prop->dram_user_base_address ||
-				(device_addr + size) > prop->dram_end_address ||
-				(device_addr + size) < device_addr) {
+			(device_addr + size) > prop->dram_end_address ||
+			(device_addr + size) < device_addr) {
 		dev_dbg(hdev->dev,
 			"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
 			device_addr, size);
@@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s
 	u64 bar_address;
 	int i, rc;
 
-	rc = validate_export_params_common(hdev, device_addr, size);
+	rc = validate_export_params_common(hdev, device_addr, size, offset);
 	if (rc)
 		return rc;
 
 	if ((offset + size) > phys_pg_pack->total_size) {
 		dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n",
-				offset, size, phys_pg_pack->total_size);
+			offset, size, phys_pg_pack->total_size);
 		return -EINVAL;
 	}
 
 	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-
 		bar_address = hdev->dram_pci_bar_start +
-					(phys_pg_pack->pages[i] - prop->dram_base_address);
+				(phys_pg_pack->pages[i] - prop->dram_base_address);
 
 		if ((bar_address + phys_pg_pack->page_size) >
 				(hdev->dram_pci_bar_start + prop->dram_pci_bar_size) ||
 				(bar_address + phys_pg_pack->page_size) < bar_address) {
 			dev_dbg(hdev->dev,
 				"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
-					phys_pg_pack->pages[i],
-					phys_pg_pack->page_size);
-
+				phys_pg_pack->pages[i], phys_pg_pack->page_size);
 			return -EINVAL;
 		}
 	}
@@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 	struct asic_fixed_properties *prop;
 	struct hl_dmabuf_priv *hl_dmabuf;
 	struct hl_device *hdev;
-	u64 export_addr;
 	int rc;
 
 	hdev = ctx->hdev;
@@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 		return -EINVAL;
 	}
 
-	export_addr = addr + offset;
-
 	hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
 	if (!hl_dmabuf)
 		return -ENOMEM;
@@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o
 			rc = PTR_ERR(phys_pg_pack);
 			goto dec_memhash_export_cnt;
 		}
-		rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack);
+		rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack);
 		if (rc)
 			goto dec_memhash_export_cnt;
 
-		phys_pg_pack->exported_size = size;
 		hl_dmabuf->phys_pg_pack = phys_pg_pack;
 		hl_dmabuf->memhash_hnode = hnode;
+		hl_dmabuf->offset = offset;
 	} else {
-		rc = validate_export_params_no_mmu(hdev, export_addr, size);
+		rc = validate_export_params_no_mmu(hdev, addr, size);
 		if (rc)
 			goto err_free_dmabuf_wrapper;
-	}
 
-	hl_dmabuf->device_address = export_addr;
+		hl_dmabuf->device_phys_addr = addr;
+	}
 
 	rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd);
 	if (rc)
@@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in
 	return 0;
 }
 
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
 {
+	struct hl_fpriv *hpriv = file_priv->driver_priv;
 	enum hl_device_status status;
 	union hl_mem_args *args = data;
 	struct hl_device *hdev = hpriv->hdev;
@@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 	hl_debugfs_remove_userptr(hdev, userptr);
 
 	if (userptr->dma_mapped)
-		hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
+		hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
 
 	unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
 	kvfree(userptr->pages);
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index 056e2ef44afb..53292d4c15c8 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -63,6 +63,10 @@
 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
 
+MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
+MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
+MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
+
 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
 
 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
@@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 					CARD_NAME_MAX_LEN);
 
 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
@@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
 static int gaudi_scrub_device_mem(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
-			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
+	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
 	u64 addr, size, val = hdev->memory_scrub_val;
 	ktime_t timeout;
 	int rc = 0;
@@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 
 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
 
-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto unpin_memory;
@@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 		return rc;
 
 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);
 
 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
@@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.asic_dma_pool_free = gaudi_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = gaudi_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
 	.update_eq_ci = gaudi_update_eq_ci,
 	.context_switch = gaudi_context_switch,
diff --git a/drivers/accel/habanalabs/gaudi/gaudiP.h b/drivers/accel/habanalabs/gaudi/gaudiP.h
index b8fa724be5a1..831be53bb9d7 100644
--- a/drivers/accel/habanalabs/gaudi/gaudiP.h
+++ b/drivers/accel/habanalabs/gaudi/gaudiP.h
@@ -10,7 +10,7 @@
 
 #include <uapi/drm/habanalabs_accel.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../include/gaudi/gaudi_packets.h"
 #include "../include/gaudi/gaudi.h"
 #include "../include/gaudi/gaudi_async_events.h"
diff --git a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
index 3455b14554c6..1168fefa33f4 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c
@@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,
 
 	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
 
+	val = RREG32(base_reg + 0x20);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + 0x304);
 	val |= 0x1000;
 	WREG32(base_reg + 0x304, val);
@@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,
 
 	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
 
+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
+
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 20c4583f12b0..819660c684cf 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -66,7 +66,6 @@
 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
-#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
@@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] =
 	"sbte_prtn_intr_4",
 };
 
-static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
-	"i0",
-	"i1",
-	"i2",
-	"i3",
-	"i4",
-};
-
 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
 	"WBC ERR RESP_0",
 	"WBC ERR RESP_1",
@@ -993,6 +984,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
 	"TLP is blocked by RR"
 };
 
+static const int gaudi2_queue_id_to_engine_id[] = {
+	[GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
+	[GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
+							GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
+							GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
+							GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
+	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
+							GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
+	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_MME,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
+							GAUDI2_DCORE0_ENGINE_ID_TPC_6,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
+							GAUDI2_DCORE1_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
+							GAUDI2_DCORE2_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_0,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_1,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_2,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_3,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_4,
+	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
+							GAUDI2_DCORE3_ENGINE_ID_TPC_5,
+	[GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
+	[GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
+	[GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
+	[GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
+	[GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
+	[GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
+	[GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
+	[GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
+	[GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
+	[GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
+	[GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
+	[GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
+	[GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
+	[GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
+	[GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
+	[GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
+	[GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
+	[GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
+	[GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
+	[GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
+	[GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
+	[GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
+	[GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
+	[GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
+	[GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
+	[GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
+};
+
 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
@@ -2001,7 +2097,8 @@ enum razwi_event_sources {
 	RAZWI_PDMA,
 	RAZWI_NIC,
 	RAZWI_DEC,
-	RAZWI_ROT
+	RAZWI_ROT,
+	RAZWI_ARC_FARM
 };
 
 struct hbm_mc_error_causes {
@@ -2431,7 +2528,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+	strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
 
 	prop->mme_master_slave_mode = 1;
 
@@ -2884,7 +2981,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
 	}
 
 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
+		strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
+				CARD_NAME_MAX_LEN);
 
 	/* Overwrite binning masks with the actual binning values from F/W */
 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
@@ -4077,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number)
 		return "gaudi2 unexpected error";
 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
 		return "gaudi2 user completion";
+	case GAUDI2_IRQ_NUM_EQ_ERROR:
+		return "gaudi2 eq error";
 	default:
 		return "invalid";
 	}
@@ -4127,9 +4227,7 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev)
 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
 						gaudi2_irq_name(i), (void *) dec);
 		} else {
-			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-					gaudi2_irq_name(i),
+			rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
 					(void *) &hdev->user_interrupt[dec->core_id]);
 		}
 
@@ -4187,17 +4285,17 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 	}
 
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
-	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
+	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+					gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
+					&hdev->tpc_interrupt);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
 		goto free_dec_irq;
 	}
 
 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
-	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
-			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
+	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
+					gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
 					&hdev->unexpected_error_interrupt);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
@@ -4209,16 +4307,23 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
 			i++, j++, user_irq_init_cnt++) {
 
 		irq = pci_irq_vector(hdev->pdev, i);
-		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
-						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
-						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
-
+		rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
+				&hdev->user_interrupt[j]);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
 			goto free_user_irq;
 		}
 	}
 
+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+	rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
+					IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
+					hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
+		goto free_user_irq;
+	}
+
 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
 
 	return 0;
@@ -4278,6 +4383,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev)
 	}
 
 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
+	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
 }
 
 static void gaudi2_disable_msix(struct hl_device *hdev)
@@ -4314,6 +4420,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
 	free_irq(irq, cq);
 
+	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
+	free_irq(irq, hdev);
+
 	pci_free_irq_vectors(hdev->pdev);
 
 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
@@ -4716,6 +4825,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
+	pre_fw_load->wait_for_preboot_extended_timeout =
+		GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
 }
 
 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
@@ -6157,17 +6268,14 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_
 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
 						u32 poll_timeout_us)
 {
-	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
-	int rc = 0;
+	int rc;
 
 	if (!driver_performs_reset) {
 		if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
 			/* set SP to indicate reset request sent to FW */
-			if (dyn_regs->cpu_rst_status)
-				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
-			else
-				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
-			WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
+			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
+
+			WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG,
 				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
 
 			/* wait for f/w response */
@@ -6623,24 +6731,6 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s
 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }
 
-static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
-					enum dma_data_direction dir)
-{
-	dma_addr_t dma_addr;
-
-	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
-	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
-		return 0;
-
-	return dma_addr;
-}
-
-static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
-					enum dma_data_direction dir)
-{
-	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
-}
-
 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
 {
 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
@@ -7703,11 +7793,13 @@ static inline bool is_info_event(u32 event)
 	switch (event) {
 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
+	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
 
 	/* return in case of NIC status event - these events are received periodically and not as
 	 * an indication to an error.
 	 */
 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
+	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
 		return true;
 	default:
 		return false;
@@ -7739,21 +7831,34 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 		struct hl_eq_ecc_data *ecc_data)
 {
-	u64 ecc_address = 0, ecc_syndrom = 0;
+	u64 ecc_address = 0, ecc_syndrome = 0;
 	u8 memory_wrapper_idx = 0;
+	bool has_block_id = false;
+	u16 block_id;
+
+	if (!hl_is_fw_sw_ver_below(hdev, 1, 12))
+		has_block_id = true;
 
 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
-	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
+	ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
 
-	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
-		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
-		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
+	if (has_block_id) {
+		block_id = le16_to_cpu(ecc_data->block_id);
+		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
+			ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
+			ecc_data->is_critical);
+	} else {
+		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
+			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
+			ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
+	}
 
 	return !!ecc_data->is_critical;
 }
 
-static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
 {
 	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
 	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
@@ -7775,10 +7880,22 @@ static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base)
 	dev_info(hdev->dev,
 		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
 		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+
+	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
+		if (arc_cq_ptr) {
+			hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
+			hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
+		} else {
+			hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
+			hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
+		}
+
+		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+	}
 }
 
 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
-							u64 qman_base, u32 qid_base)
+						u64 qman_base, u32 qid_base, u64 *event_mask)
 {
 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
 	u64 glbl_sts_addr, arb_err_addr;
@@ -7812,8 +7929,22 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}
 
-		if (i == QMAN_STREAMS)
-			print_lower_qman_data_on_err(hdev, qman_base);
+		if (i == QMAN_STREAMS && error_count) {
+			/* check for undefined opcode */
+			if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK &&
+					hdev->captured_err_info.undef_opcode.write_enable) {
+				memset(&hdev->captured_err_info.undef_opcode, 0,
+						sizeof(hdev->captured_err_info.undef_opcode));
+
+				hdev->captured_err_info.undef_opcode.write_enable = false;
+				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
+				hdev->captured_err_info.undef_opcode.engine_id =
+							gaudi2_queue_id_to_engine_id[qid_base];
+				*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
+			}
+
+			handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
+		}
 	}
 
 	arb_err_val = RREG32(arb_err_addr);
@@ -7927,6 +8058,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
 	case RAZWI_ROT:
 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
 
+	case RAZWI_ARC_FARM:
+		return GAUDI2_ENGINE_ID_ARC_FARM;
+
 	default:
 		return GAUDI2_ENGINE_ID_SIZE;
 	}
@@ -8036,6 +8170,11 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
 		sprintf(initiator_name, "ROT_%u", module_idx);
 		break;
+	case RAZWI_ARC_FARM:
+		lbw_rtr_id = DCORE1_RTR5;
+		hbw_rtr_id = DCORE1_RTR7;
+		sprintf(initiator_name, "ARC_FARM_%u", module_idx);
+		break;
 	default:
 		return;
 	}
@@ -8149,11 +8288,11 @@ static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u
 		eng_id[num_of_eng] = razwi_info[i].eng_id;
 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
 		if (!num_of_eng)
-			str_size += snprintf(eng_name + str_size,
+			str_size += scnprintf(eng_name + str_size,
 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
 						razwi_info[i].eng_name);
 		else
-			str_size += snprintf(eng_name + str_size,
+			str_size += scnprintf(eng_name + str_size,
 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
 						razwi_info[i].eng_name);
 		num_of_eng++;
@@ -8475,7 +8614,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 		return 0;
 	}
 
-	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
+	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
+								qid_base, event_mask);
 
 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
@@ -8488,7 +8628,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 	return error_count;
 }
 
-static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
+static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
 {
 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
 
@@ -8510,6 +8650,7 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type
 				sts_clr_val);
 	}
 
+	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
 	hl_check_for_glbl_errors(hdev);
 
 	return error_count;
@@ -8649,21 +8790,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event
 	return error_count;
 }
 
-static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
-					u64 intr_cause_data)
+static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
 {
-	int i, error_count = 0;
-
-	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
-		if (intr_cause_data & BIT(i)) {
-			gaudi2_print_event(hdev, event_type, true,
-				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
-			error_count++;
-		}
-
+	/*
+	 * We have a single error cause here but the report mechanism is
+	 * buggy. Hence there is no good reason to fetch the cause so we
+	 * just check for glbl_errors and exit.
+	 */
 	hl_check_for_glbl_errors(hdev);
 
-	return error_count;
+	return GAUDI2_NA_EVENT_CAUSE;
 }
 
 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
@@ -9460,6 +9596,176 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
 	}
 }
 
+static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
+{
+	enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
+	u16 index;
+
+	switch (event_type) {
+	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
+		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
+		index = event_type - GAUDI2_EVENT_TPC0_QM;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME0_QM:
+		index = 0;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME1_QM:
+		index = 1;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME2_QM:
+		index = 2;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
+	case GAUDI2_EVENT_MME3_QM:
+		index = 3;
+		type = GAUDI2_BLOCK_TYPE_MME;
+		break;
+	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
+	case GAUDI2_EVENT_KDMA_BM_SPMU:
+	case GAUDI2_EVENT_KDMA0_CORE:
+		return GAUDI2_ENGINE_ID_KDMA;
+	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
+	case GAUDI2_EVENT_PDMA0_CORE:
+	case GAUDI2_EVENT_PDMA0_BM_SPMU:
+	case GAUDI2_EVENT_PDMA0_QM:
+		return GAUDI2_ENGINE_ID_PDMA_0;
+	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
+	case GAUDI2_EVENT_PDMA1_CORE:
+	case GAUDI2_EVENT_PDMA1_BM_SPMU:
+	case GAUDI2_EVENT_PDMA1_QM:
+		return GAUDI2_ENGINE_ID_PDMA_1;
+	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
+		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
+		type = GAUDI2_BLOCK_TYPE_DEC;
+		break;
+	case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
+		index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
+		type = GAUDI2_BLOCK_TYPE_DEC;
+		break;
+	case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
+		index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
+		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
+		index = event_type - GAUDI2_EVENT_NIC0_QM0;
+		return GAUDI2_ENGINE_ID_NIC0_0 + index;
+	case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
+		index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
+		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
+	case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
+		index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
+		type = GAUDI2_BLOCK_TYPE_TPC;
+		break;
+	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
+	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
+		return GAUDI2_ENGINE_ID_ROT_0;
+	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
+	case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
+	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
+		return GAUDI2_ENGINE_ID_ROT_1;
+	case GAUDI2_EVENT_HDMA0_BM_SPMU:
+	case GAUDI2_EVENT_HDMA0_QM:
+	case GAUDI2_EVENT_HDMA0_CORE:
+		return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA1_BM_SPMU:
+	case GAUDI2_EVENT_HDMA1_QM:
+	case GAUDI2_EVENT_HDMA1_CORE:
+		return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA2_BM_SPMU:
+	case GAUDI2_EVENT_HDMA2_QM:
+	case GAUDI2_EVENT_HDMA2_CORE:
+		return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA3_BM_SPMU:
+	case GAUDI2_EVENT_HDMA3_QM:
+	case GAUDI2_EVENT_HDMA3_CORE:
+		return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA4_BM_SPMU:
+	case GAUDI2_EVENT_HDMA4_QM:
+	case GAUDI2_EVENT_HDMA4_CORE:
+		return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA5_BM_SPMU:
+	case GAUDI2_EVENT_HDMA5_QM:
+	case GAUDI2_EVENT_HDMA5_CORE:
+		return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
+	case GAUDI2_EVENT_HDMA6_BM_SPMU:
+	case GAUDI2_EVENT_HDMA6_QM:
+	case GAUDI2_EVENT_HDMA6_CORE:
+		return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
+	case GAUDI2_EVENT_HDMA7_BM_SPMU:
+	case GAUDI2_EVENT_HDMA7_QM:
+	case GAUDI2_EVENT_HDMA7_CORE:
+		return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
+	default:
+		break;
+	}
+
+	switch (type) {
+	case GAUDI2_BLOCK_TYPE_TPC:
+		switch (index) {
+		case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
+			return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
+		case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
+			return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
+		case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
+			return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
+		case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
+			return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
+		default:
+			break;
+		}
+		break;
+	case GAUDI2_BLOCK_TYPE_MME:
+		switch (index) {
+		case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
+		case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
+		case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
+		case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
+		default:
+			break;
+		}
+		break;
+	case GAUDI2_BLOCK_TYPE_DEC:
+		switch (index) {
+		case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
+		case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
+		case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
+		case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
+		case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return U16_MAX;
+}
+
+static void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
+{
+	hdev->eq_heartbeat_received = true;
+}
+
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
@@ -9501,7 +9807,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		break;
 
 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
-		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
+		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -9724,8 +10030,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
-		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
-						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
@@ -9875,6 +10180,21 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		is_critical = true;
 		break;
 
+	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
+	case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
+	case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
+		error_count = GAUDI2_NA_EVENT_CAUSE;
+		dev_info_ratelimited(hdev->dev, "%s event received\n",
+					gaudi2_irq_map_table[event_type].name);
+		break;
+
+	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
+		hl_eq_heartbeat_event_handle(hdev);
+		error_count = GAUDI2_NA_EVENT_CAUSE;
+		break;
 	default:
 		if (gaudi2_irq_map_table[event_type].valid) {
 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
@@ -9883,6 +10203,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		}
 	}
 
+	if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
+		hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
+
 	/* Make sure to dump an error in case no error cause was printed so far.
 	 * Note that although we have counted the errors, we use this number as
 	 * a boolean.
@@ -10523,6 +10846,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx)
 {
 	int rc;
 
+	if (ctx->asid == HL_KERNEL_ASID_ID)
+		return 0;
+
 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
 	if (rc)
 		return rc;
@@ -11014,6 +11340,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64
 static void gaudi2_get_msi_info(__le32 *table)
 {
 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
+	table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
 }
 
 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
@@ -11170,11 +11497,9 @@ static const struct hl_asic_funcs gaudi2_funcs = {
 	.asic_dma_pool_free = gaudi2_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
-	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
-	.asic_dma_map_single = gaudi2_dma_map_single,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = gaudi2_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = NULL,
 	.update_eq_ci = gaudi2_update_eq_ci,
 	.context_switch = gaudi2_context_switch,
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index 5f3ce086928e..9b9eef0d97d6 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -10,7 +10,7 @@
 
 #include <uapi/drm/habanalabs_accel.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../include/gaudi2/gaudi2.h"
 #include "../include/gaudi2/gaudi2_packets.h"
 #include "../include/gaudi2/gaudi2_fw_if.h"
@@ -84,6 +84,7 @@
 #define CORESIGHT_TIMEOUT_USEC			100000		/* 100 ms */
 
 #define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC		25000000	/* 25s */
+#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000	/* 85s */
 
 #define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC	10000000	/* 10s */
 
@@ -419,6 +420,7 @@ enum gaudi2_irq_num {
 	GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
 	GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
 	GAUDI2_IRQ_NUM_TPC_ASSERT,
+	GAUDI2_IRQ_NUM_EQ_ERROR,
 	GAUDI2_IRQ_NUM_RESERVED_FIRST,
 	GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
 	GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
index 25b5368f37dd..2423620ff358 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
@@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
 	[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
 	[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
 	[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
-	[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE,
-	[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE,
+	[GAUDI2_STM_PSOC_ARC0_CS] = 0,
+	[GAUDI2_STM_PSOC_ARC1_CS] = 0,
 	[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
 	[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
 	[GAUDI2_STM_CPU] = mmCPU_STM_BASE,
@@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
 	[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
 	[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
 	[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
-	[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE,
-	[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE,
+	[GAUDI2_ETF_PSOC_ARC0_CS] = 0,
+	[GAUDI2_ETF_PSOC_ARC1_CS] = 0,
 	[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
 	[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
 	[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
@@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
 	[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
-	[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE,
-	[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE,
+	[GAUDI2_FUNNEL_PSOC_ARC0] = 0,
+	[GAUDI2_FUNNEL_PSOC_ARC1] = 0,
 	[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
 	[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
@@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
 	[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
 	[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
 	[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
-	[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE,
-	[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE,
-	[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE,
-	[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE,
+	[GAUDI2_BMON_PSOC_ARC0_0] = 0,
+	[GAUDI2_BMON_PSOC_ARC0_1] = 0,
+	[GAUDI2_BMON_PSOC_ARC1_0] = 0,
+	[GAUDI2_BMON_PSOC_ARC1_1] = 0,
 	[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
 	[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
 	[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
@@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
 	[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
 	[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
-	[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE,
-	[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE,
+	[GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
+	[GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
 	[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
 	[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,
@@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
 	if (rc)
 		return -EIO;
 
+	val = RREG32(base_reg + mmETF_CTL_OFFSET);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + mmETF_FFCR_OFFSET);
 	val |= 0x1000;
 	WREG32(base_reg + mmETF_FFCR_OFFSET, val);
@@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
 		if (!input)
 			return -EINVAL;
 
+		val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
+		if (val) {
+			val = ffs(val);
+			WREG32(base_reg + mmETF_PSCR_OFFSET, val);
+		} else {
+			WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
+		}
+
 		WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
 		WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
 		WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
-		WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
 		WREG32(base_reg + mmETF_CTL_OFFSET, 1);
 	} else {
 		WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);
@@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
 	if (rc)
 		return -EIO;
 
+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
@@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
 		 * set enabled events mask based on input->event_types_num
 		 */
 		event_mask = 0x80000000;
-		event_mask |= GENMASK(input->event_types_num, 0);
+		if (input->event_types_num)
+			event_mask |= GENMASK(input->event_types_num - 1, 0);
 
 		WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
 	} else {
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
index 2742b1f801eb..34bf80c5a44b 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c
@@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
 	mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
 	mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
+	mmDCORE0_TPC0_CFG_TPC_COUNT,
 	mmDCORE0_TPC0_CFG_TPC_ID,
 	mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
 	mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,
@@ -2907,7 +2908,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
 	 * - range 11: NIC11_CFG + *_DBG (not including TPC_DBG)
 	 *
 	 * If F/W security is not enabled:
-	 * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP)
+	 * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF)
 	 */
 	u64 lbw_range_min_short[] = {
 		mmNIC0_TX_AXUSER_BASE,
@@ -2923,7 +2924,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
 		mmNIC10_TX_AXUSER_BASE,
 		mmNIC11_TX_AXUSER_BASE,
 		mmPSOC_I2C_M0_BASE,
-		mmPSOC_EFUSE_BASE
+		mmPSOC_GPIO0_BASE
 	};
 	u64 lbw_range_max_short[] = {
 		mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE,
@@ -3219,6 +3220,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev)
  */
 static int gaudi2_init_protection_bits(struct hl_device *hdev)
 {
+	u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u32 instance_offset;
 	int rc = 0;
@@ -3389,11 +3391,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
 	/* PSOC.
 	 * Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are
 	 * protected by privileged RR.
+	 * For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine
+	 * cores to raise events towards F/W.
 	 */
+	engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE);
+	if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 &&
+			engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) {
+		user_regs_array = &engine_core_intr_reg;
+		user_regs_array_size = 1;
+	} else {
+		dev_err(hdev->dev,
+			"Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n",
+			engine_core_intr_reg);
+	}
+
 	rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
 			HL_PB_SINGLE_INSTANCE, HL_PB_NA,
 			gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf),
-			NULL, HL_PB_NA);
+			user_regs_array, user_regs_array_size);
 
 	if (!hdev->asic_prop.fw_security_enabled)
 		rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index 7c685e6075f6..1322cb330c57 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
 
-	strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+	strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 		CARD_NAME_MAX_LEN);
 
 	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
@@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 
 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
 
-	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
+	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
 	if (rc) {
 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
 		goto unpin_memory;
@@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	}
 
 	if (!strlen(prop->cpucp_info.card_name))
-		strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+		strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 				CARD_NAME_MAX_LEN);
 
 	return 0;
@@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
 	.asic_dma_pool_free = goya_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
-	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
+	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
 	.cs_parser = goya_cs_parser,
-	.asic_dma_map_sgtable = hl_dma_map_sgtable,
+	.dma_map_sgtable = hl_asic_dma_map_sgtable,
 	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
 	.update_eq_ci = goya_update_eq_ci,
 	.context_switch = goya_context_switch,
diff --git a/drivers/accel/habanalabs/goya/goyaP.h b/drivers/accel/habanalabs/goya/goyaP.h
index 5df3d30b91fd..194c2ae157cd 100644
--- a/drivers/accel/habanalabs/goya/goyaP.h
+++ b/drivers/accel/habanalabs/goya/goyaP.h
@@ -9,8 +9,8 @@
 #define GOYAP_H_
 
 #include <uapi/drm/habanalabs_accel.h>
+#include <linux/habanalabs/hl_boot_if.h>
 #include "../common/habanalabs.h"
-#include "../include/common/hl_boot_if.h"
 #include "../include/goya/goya_packets.h"
 #include "../include/goya/goya.h"
 #include "../include/goya/goya_async_events.h"
diff --git a/drivers/accel/habanalabs/goya/goya_coresight.c b/drivers/accel/habanalabs/goya/goya_coresight.c
index a6d6cc38bcd8..41cae5fd843b 100644
--- a/drivers/accel/habanalabs/goya/goya_coresight.c
+++ b/drivers/accel/habanalabs/goya/goya_coresight.c
@@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev,
 
 	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
 
+	val = RREG32(base_reg + 0x20);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(base_reg + 0x304);
 	val |= 0x1000;
 	WREG32(base_reg + 0x304, val);
@@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev,
 
 	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
 
+	val = RREG32(mmPSOC_ETR_CTL);
+
+	if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
+		return 0;
+
 	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
 	WREG32(mmPSOC_ETR_FFCR, val);
diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h
deleted file mode 100644
index 33807b839c37..000000000000
--- a/drivers/accel/habanalabs/include/common/cpucp_if.h
+++ /dev/null
@@ -1,1401 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2020-2022 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef CPUCP_IF_H
-#define CPUCP_IF_H
-
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-#include "hl_boot_if.h"
-
-#define NUM_HBM_PSEUDO_CH				2
-#define NUM_HBM_CH_PER_DEV				8
-#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT		0
-#define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK		0x00000001
-#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_SHIFT		1
-#define CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK		0x00000002
-#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_SHIFT		2
-#define CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK		0x00000004
-#define CPUCP_PKT_HBM_ECC_INFO_DERR_SHIFT		3
-#define CPUCP_PKT_HBM_ECC_INFO_DERR_MASK		0x00000008
-#define CPUCP_PKT_HBM_ECC_INFO_SERR_SHIFT		4
-#define CPUCP_PKT_HBM_ECC_INFO_SERR_MASK		0x00000010
-#define CPUCP_PKT_HBM_ECC_INFO_TYPE_SHIFT		5
-#define CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK		0x00000020
-#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT		6
-#define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK		0x000007C0
-
-#define PLL_MAP_MAX_BITS	128
-#define PLL_MAP_LEN		(PLL_MAP_MAX_BITS / 8)
-
-/*
- * info of the pkt queue pointers in the first async occurrence
- */
-struct cpucp_pkt_sync_err {
-	__le32 pi;
-	__le32 ci;
-};
-
-struct hl_eq_hbm_ecc_data {
-	/* SERR counter */
-	__le32 sec_cnt;
-	/* DERR counter */
-	__le32 dec_cnt;
-	/* Supplemental Information according to the mask bits */
-	__le32 hbm_ecc_info;
-	/* Address in hbm where the ecc happened */
-	__le32 first_addr;
-	/* SERR continuous address counter */
-	__le32 sec_cont_cnt;
-	__le32 pad;
-};
-
-/*
- * EVENT QUEUE
- */
-
-struct hl_eq_header {
-	__le32 reserved;
-	__le32 ctl;
-};
-
-struct hl_eq_ecc_data {
-	__le64 ecc_address;
-	__le64 ecc_syndrom;
-	__u8 memory_wrapper_idx;
-	__u8 is_critical;
-	__u8 pad[6];
-};
-
-enum hl_sm_sei_cause {
-	SM_SEI_SO_OVERFLOW,
-	SM_SEI_LBW_4B_UNALIGNED,
-	SM_SEI_AXI_RESPONSE_ERR
-};
-
-struct hl_eq_sm_sei_data {
-	__le32 sei_log;
-	/* enum hl_sm_sei_cause */
-	__u8 sei_cause;
-	__u8 pad[3];
-};
-
-enum hl_fw_alive_severity {
-	FW_ALIVE_SEVERITY_MINOR,
-	FW_ALIVE_SEVERITY_CRITICAL
-};
-
-struct hl_eq_fw_alive {
-	__le64 uptime_seconds;
-	__le32 process_id;
-	__le32 thread_id;
-	/* enum hl_fw_alive_severity */
-	__u8 severity;
-	__u8 pad[7];
-};
-
-struct hl_eq_intr_cause {
-	__le64 intr_cause_data;
-};
-
-struct hl_eq_pcie_drain_ind_data {
-	struct hl_eq_intr_cause intr_cause;
-	__le64 drain_wr_addr_lbw;
-	__le64 drain_rd_addr_lbw;
-	__le64 drain_wr_addr_hbw;
-	__le64 drain_rd_addr_hbw;
-};
-
-struct hl_eq_razwi_lbw_info_regs {
-	__le32 rr_aw_razwi_reg;
-	__le32 rr_aw_razwi_id_reg;
-	__le32 rr_ar_razwi_reg;
-	__le32 rr_ar_razwi_id_reg;
-};
-
-struct hl_eq_razwi_hbw_info_regs {
-	__le32 rr_aw_razwi_hi_reg;
-	__le32 rr_aw_razwi_lo_reg;
-	__le32 rr_aw_razwi_id_reg;
-	__le32 rr_ar_razwi_hi_reg;
-	__le32 rr_ar_razwi_lo_reg;
-	__le32 rr_ar_razwi_id_reg;
-};
-
-/* razwi_happened masks */
-#define RAZWI_HAPPENED_HBW	0x1
-#define RAZWI_HAPPENED_LBW	0x2
-#define RAZWI_HAPPENED_AW	0x4
-#define RAZWI_HAPPENED_AR	0x8
-
-struct hl_eq_razwi_info {
-	__le32 razwi_happened_mask;
-	union {
-		struct hl_eq_razwi_lbw_info_regs lbw;
-		struct hl_eq_razwi_hbw_info_regs hbw;
-	};
-	__le32 pad;
-};
-
-struct hl_eq_razwi_with_intr_cause {
-	struct hl_eq_razwi_info razwi_info;
-	struct hl_eq_intr_cause intr_cause;
-};
-
-#define HBM_CA_ERR_CMD_LIFO_LEN		8
-#define HBM_RD_ERR_DATA_LIFO_LEN	8
-#define HBM_WR_PAR_CMD_LIFO_LEN		11
-
-enum hl_hbm_sei_cause {
-	/* Command/address parity error event is split into 2 events due to
-	 * size limitation: ODD suffix for odd HBM CK_t cycles and EVEN  suffix
-	 * for even HBM CK_t cycles
-	 */
-	HBM_SEI_CMD_PARITY_EVEN,
-	HBM_SEI_CMD_PARITY_ODD,
-	/* Read errors can be reflected as a combination of SERR/DERR/parity
-	 * errors. Therefore, we define one event for all read error types.
-	 * LKD will perform further proccessing.
-	 */
-	HBM_SEI_READ_ERR,
-	HBM_SEI_WRITE_DATA_PARITY_ERR,
-	HBM_SEI_CATTRIP,
-	HBM_SEI_MEM_BIST_FAIL,
-	HBM_SEI_DFI,
-	HBM_SEI_INV_TEMP_READ_OUT,
-	HBM_SEI_BIST_FAIL,
-};
-
-/* Masks for parsing hl_hbm_sei_headr fields */
-#define HBM_ECC_SERR_CNTR_MASK		0xFF
-#define HBM_ECC_DERR_CNTR_MASK		0xFF00
-#define HBM_RD_PARITY_CNTR_MASK		0xFF0000
-
-/* HBM index and MC index are known by the event_id */
-struct hl_hbm_sei_header {
-	union {
-		/* relevant only in case of HBM read error */
-		struct {
-			__u8 ecc_serr_cnt;
-			__u8 ecc_derr_cnt;
-			__u8 read_par_cnt;
-			__u8 reserved;
-		};
-		/* All other cases */
-		__le32 cnt;
-	};
-	__u8 sei_cause;		/* enum hl_hbm_sei_cause */
-	__u8 mc_channel;		/* range: 0-3 */
-	__u8 mc_pseudo_channel;	/* range: 0-7 */
-	__u8 is_critical;
-};
-
-#define HBM_RD_ADDR_SID_SHIFT		0
-#define HBM_RD_ADDR_SID_MASK		0x1
-#define HBM_RD_ADDR_BG_SHIFT		1
-#define HBM_RD_ADDR_BG_MASK		0x6
-#define HBM_RD_ADDR_BA_SHIFT		3
-#define HBM_RD_ADDR_BA_MASK		0x18
-#define HBM_RD_ADDR_COL_SHIFT		5
-#define HBM_RD_ADDR_COL_MASK		0x7E0
-#define HBM_RD_ADDR_ROW_SHIFT		11
-#define HBM_RD_ADDR_ROW_MASK		0x3FFF800
-
-struct hbm_rd_addr {
-	union {
-		/* bit fields are only for FW use */
-		struct {
-			u32 dbg_rd_err_addr_sid:1;
-			u32 dbg_rd_err_addr_bg:2;
-			u32 dbg_rd_err_addr_ba:2;
-			u32 dbg_rd_err_addr_col:6;
-			u32 dbg_rd_err_addr_row:15;
-			u32 reserved:6;
-		};
-		__le32 rd_addr_val;
-	};
-};
-
-#define HBM_RD_ERR_BEAT_SHIFT		2
-/* dbg_rd_err_misc fields: */
-/* Read parity is calculated per DW on every beat */
-#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT	0
-#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK	0x3
-#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT	8
-#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK	0x300
-/* ECC is calculated per PC on every beat */
-#define HBM_RD_ERR_SERR_BEAT0_SHIFT	16
-#define HBM_RD_ERR_SERR_BEAT0_MASK	0x10000
-#define HBM_RD_ERR_DERR_BEAT0_SHIFT	24
-#define HBM_RD_ERR_DERR_BEAT0_MASK	0x100000
-
-struct hl_eq_hbm_sei_read_err_intr_info {
-	/* DFI_RD_ERR_REP_ADDR */
-	struct hbm_rd_addr dbg_rd_err_addr;
-	/* DFI_RD_ERR_REP_ERR */
-	union {
-		struct {
-			/* bit fields are only for FW use */
-			u32 dbg_rd_err_par:8;
-			u32 dbg_rd_err_par_data:8;
-			u32 dbg_rd_err_serr:4;
-			u32 dbg_rd_err_derr:4;
-			u32 reserved:8;
-		};
-		__le32 dbg_rd_err_misc;
-	};
-	/* DFI_RD_ERR_REP_DM */
-	__le32 dbg_rd_err_dm;
-	/* DFI_RD_ERR_REP_SYNDROME */
-	__le32 dbg_rd_err_syndrome;
-	/* DFI_RD_ERR_REP_DATA */
-	__le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN];
-};
-
-struct hl_eq_hbm_sei_ca_par_intr_info {
-	/* 14 LSBs */
-	__le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN];
-	/* 18 LSBs */
-	__le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN];
-};
-
-#define WR_PAR_LAST_CMD_COL_SHIFT	0
-#define WR_PAR_LAST_CMD_COL_MASK	0x3F
-#define WR_PAR_LAST_CMD_BG_SHIFT	6
-#define WR_PAR_LAST_CMD_BG_MASK		0xC0
-#define WR_PAR_LAST_CMD_BA_SHIFT	8
-#define WR_PAR_LAST_CMD_BA_MASK		0x300
-#define WR_PAR_LAST_CMD_SID_SHIFT	10
-#define WR_PAR_LAST_CMD_SID_MASK	0x400
-
-/* Row address isn't latched */
-struct hbm_sei_wr_cmd_address {
-	/* DFI_DERR_LAST_CMD */
-	union {
-		struct {
-			/* bit fields are only for FW use */
-			u32 col:6;
-			u32 bg:2;
-			u32 ba:2;
-			u32 sid:1;
-			u32 reserved:21;
-		};
-		__le32 dbg_wr_cmd_addr;
-	};
-};
-
-struct hl_eq_hbm_sei_wr_par_intr_info {
-	/* entry 0: WR command address from the 1st cycle prior to the error
-	 * entry 1: WR command address from the 2nd cycle prior to the error
-	 * and so on...
-	 */
-	struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN];
-	/* derr[0:1] - 1st HBM cycle DERR output
-	 * derr[2:3] - 2nd HBM cycle DERR output
-	 */
-	__u8 dbg_derr;
-	/* extend to reach 8B */
-	__u8 pad[3];
-};
-
-/*
- * this struct represents the following sei causes:
- * command parity, ECC double error, ECC single error, dfi error, cattrip,
- * temperature read-out, read parity error and write parity error.
- * some only use the header while some have extra data.
- */
-struct hl_eq_hbm_sei_data {
-	struct hl_hbm_sei_header hdr;
-	union {
-		struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info;
-		struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info;
-		struct hl_eq_hbm_sei_read_err_intr_info read_err_info;
-		struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info;
-	};
-};
-
-/* Engine/farm arc interrupt type */
-enum hl_engine_arc_interrupt_type {
-	/* Qman/farm ARC DCCM QUEUE FULL interrupt type */
-	ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1
-};
-
-/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */
-struct hl_engine_arc_dccm_queue_full_irq {
-	/* Queue index value which caused DCCM QUEUE FULL */
-	__le32 queue_index;
-	__le32 pad;
-};
-
-/* Data structure specifies details of QM/FARM ARC interrupt */
-struct hl_eq_engine_arc_intr_data {
-	/* ARC engine id e.g.  DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */
-	__le32 engine_id;
-	__le32 intr_type; /* enum hl_engine_arc_interrupt_type */
-	/* More info related to the interrupt e.g. queue index
-	 * incase of DCCM_QUEUE_FULL interrupt.
-	 */
-	__le64 payload;
-	__le64 pad[5];
-};
-
-#define ADDR_DEC_ADDRESS_COUNT_MAX 4
-
-/* Data structure specifies details of ADDR_DEC interrupt */
-struct hl_eq_addr_dec_intr_data {
-	struct hl_eq_intr_cause intr_cause;
-	__le64 addr[ADDR_DEC_ADDRESS_COUNT_MAX];
-	__u8 addr_cnt;
-	__u8 pad[7];
-};
-
-struct hl_eq_entry {
-	struct hl_eq_header hdr;
-	union {
-		__le64 data_placeholder;
-		struct hl_eq_ecc_data ecc_data;
-		struct hl_eq_hbm_ecc_data hbm_ecc_data;	/* Obsolete */
-		struct hl_eq_sm_sei_data sm_sei_data;
-		struct cpucp_pkt_sync_err pkt_sync_err;
-		struct hl_eq_fw_alive fw_alive;
-		struct hl_eq_intr_cause intr_cause;
-		struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data;
-		struct hl_eq_razwi_info razwi_info;
-		struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause;
-		struct hl_eq_hbm_sei_data sei_data;	/* Gaudi2 HBM */
-		struct hl_eq_engine_arc_intr_data arc_data;
-		struct hl_eq_addr_dec_intr_data addr_dec;
-		__le64 data[7];
-	};
-};
-
-#define HL_EQ_ENTRY_SIZE		sizeof(struct hl_eq_entry)
-
-#define EQ_CTL_READY_SHIFT		31
-#define EQ_CTL_READY_MASK		0x80000000
-
-#define EQ_CTL_EVENT_TYPE_SHIFT		16
-#define EQ_CTL_EVENT_TYPE_MASK		0x0FFF0000
-
-#define EQ_CTL_INDEX_SHIFT		0
-#define EQ_CTL_INDEX_MASK		0x0000FFFF
-
-enum pq_init_status {
-	PQ_INIT_STATUS_NA = 0,
-	PQ_INIT_STATUS_READY_FOR_CP,
-	PQ_INIT_STATUS_READY_FOR_HOST,
-	PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI,
-	PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR,
-	PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR
-};
-
-/*
- * CpuCP Primary Queue Packets
- *
- * During normal operation, the host's kernel driver needs to send various
- * messages to CpuCP, usually either to SET some value into a H/W periphery or
- * to GET the current value of some H/W periphery. For example, SET the
- * frequency of MME/TPC and GET the value of the thermal sensor.
- *
- * These messages can be initiated either by the User application or by the
- * host's driver itself, e.g. power management code. In either case, the
- * communication from the host's driver to CpuCP will *always* be in
- * synchronous mode, meaning that the host will send a single message and poll
- * until the message was acknowledged and the results are ready (if results are
- * needed).
- *
- * This means that only a single message can be sent at a time and the host's
- * driver must wait for its result before sending the next message. Having said
- * that, because these are control messages which are sent in a relatively low
- * frequency, this limitation seems acceptable. It's important to note that
- * in case of multiple devices, messages to different devices *can* be sent
- * at the same time.
- *
- * The message, inputs/outputs (if relevant) and fence object will be located
- * on the device DDR at an address that will be determined by the host's driver.
- * During device initialization phase, the host will pass to CpuCP that address.
- * Most of the message types will contain inputs/outputs inside the message
- * itself. The common part of each message will contain the opcode of the
- * message (its type) and a field representing a fence object.
- *
- * When the host's driver wishes to send a message to CPU CP, it will write the
- * message contents to the device DDR, clear the fence object and then write to
- * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU.
- *
- * Upon receiving the interrupt (#121), CpuCP will read the message from the
- * DDR. In case the message is a SET operation, CpuCP will first perform the
- * operation and then write to the fence object on the device DDR. In case the
- * message is a GET operation, CpuCP will first fill the results section on the
- * device DDR and then write to the fence object. If an error occurred, CpuCP
- * will fill the rc field with the right error code.
- *
- * In the meantime, the host's driver will poll on the fence object. Once the
- * host sees that the fence object is signaled, it will read the results from
- * the device DDR (if relevant) and resume the code execution in the host's
- * driver.
- *
- * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
- * so the value being put by the host's driver matches the value read by CpuCP
- *
- * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
- *
- * Detailed description:
- *
- * CPUCP_PACKET_DISABLE_PCI_ACCESS -
- *       After receiving this packet the embedded CPU must NOT issue PCI
- *       transactions (read/write) towards the Host CPU. This also include
- *       sending MSI-X interrupts.
- *       This packet is usually sent before the device is moved to D3Hot state.
- *
- * CPUCP_PACKET_ENABLE_PCI_ACCESS -
- *       After receiving this packet the embedded CPU is allowed to issue PCI
- *       transactions towards the Host CPU, including sending MSI-X interrupts.
- *       This packet is usually send after the device is moved to D0 state.
- *
- * CPUCP_PACKET_TEMPERATURE_GET -
- *       Fetch the current temperature / Max / Max Hyst / Critical /
- *       Critical Hyst of a specified thermal sensor. The packet's
- *       arguments specify the desired sensor and the field to get.
- *
- * CPUCP_PACKET_VOLTAGE_GET -
- *       Fetch the voltage / Max / Min of a specified sensor. The packet's
- *       arguments specify the sensor and type.
- *
- * CPUCP_PACKET_CURRENT_GET -
- *       Fetch the current / Max / Min of a specified sensor. The packet's
- *       arguments specify the sensor and type.
- *
- * CPUCP_PACKET_FAN_SPEED_GET -
- *       Fetch the speed / Max / Min of a specified fan. The packet's
- *       arguments specify the sensor and type.
- *
- * CPUCP_PACKET_PWM_GET -
- *       Fetch the pwm value / mode of a specified pwm. The packet's
- *       arguments specify the sensor and type.
- *
- * CPUCP_PACKET_PWM_SET -
- *       Set the pwm value / mode of a specified pwm. The packet's
- *       arguments specify the sensor, type and value.
- *
- * CPUCP_PACKET_FREQUENCY_SET -
- *       Set the frequency of a specified PLL. The packet's arguments specify
- *       the PLL and the desired frequency. The actual frequency in the device
- *       might differ from the requested frequency.
- *
- * CPUCP_PACKET_FREQUENCY_GET -
- *       Fetch the frequency of a specified PLL. The packet's arguments specify
- *       the PLL.
- *
- * CPUCP_PACKET_LED_SET -
- *       Set the state of a specified led. The packet's arguments
- *       specify the led and the desired state.
- *
- * CPUCP_PACKET_I2C_WR -
- *       Write 32-bit value to I2C device. The packet's arguments specify the
- *       I2C bus, address and value.
- *
- * CPUCP_PACKET_I2C_RD -
- *       Read 32-bit value from I2C device. The packet's arguments specify the
- *       I2C bus and address.
- *
- * CPUCP_PACKET_INFO_GET -
- *       Fetch information from the device as specified in the packet's
- *       structure. The host's driver passes the max size it allows the CpuCP to
- *       write to the structure, to prevent data corruption in case of
- *       mismatched driver/FW versions.
- *
- * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
- *
- * CPUCP_PACKET_UNMASK_RAZWI_IRQ -
- *       Unmask the given IRQ. The IRQ number is specified in the value field.
- *       The packet is sent after receiving an interrupt and printing its
- *       relevant information.
- *
- * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
- *       Unmask the given IRQs. The IRQs numbers are specified in an array right
- *       after the cpucp_packet structure, where its first element is the array
- *       length. The packet is sent after a soft reset was done in order to
- *       handle any interrupts that were sent during the reset process.
- *
- * CPUCP_PACKET_TEST -
- *       Test packet for CpuCP connectivity. The CPU will put the fence value
- *       in the result field.
- *
- * CPUCP_PACKET_FREQUENCY_CURR_GET -
- *       Fetch the current frequency of a specified PLL. The packet's arguments
- *       specify the PLL.
- *
- * CPUCP_PACKET_MAX_POWER_GET -
- *       Fetch the maximal power of the device.
- *
- * CPUCP_PACKET_MAX_POWER_SET -
- *       Set the maximal power of the device. The packet's arguments specify
- *       the power.
- *
- * CPUCP_PACKET_EEPROM_DATA_GET -
- *       Get EEPROM data from the CpuCP kernel. The buffer is specified in the
- *       addr field. The CPU will put the returned data size in the result
- *       field. In addition, the host's driver passes the max size it allows the
- *       CpuCP to write to the structure, to prevent data corruption in case of
- *       mismatched driver/FW versions.
- *
- * CPUCP_PACKET_NIC_INFO_GET -
- *       Fetch information from the device regarding the NIC. the host's driver
- *       passes the max size it allows the CpuCP to write to the structure, to
- *       prevent data corruption in case of mismatched driver/FW versions.
- *
- * CPUCP_PACKET_TEMPERATURE_SET -
- *       Set the value of the offset property of a specified thermal sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * CPUCP_PACKET_VOLTAGE_SET -
- *       Trigger the reset_history property of a specified voltage sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * CPUCP_PACKET_CURRENT_SET -
- *       Trigger the reset_history property of a specified current sensor.
- *       The packet's arguments specify the desired sensor and the field to
- *       set.
- *
- * CPUCP_PACKET_PCIE_THROUGHPUT_GET -
- *       Get throughput of PCIe.
- *       The packet's arguments specify the transaction direction (TX/RX).
- *       The window measurement is 10[msec], and the return value is in KB/sec.
- *
- * CPUCP_PACKET_PCIE_REPLAY_CNT_GET
- *       Replay count measures number of "replay" events, which is basicly
- *       number of retries done by PCIe.
- *
- * CPUCP_PACKET_TOTAL_ENERGY_GET -
- *       Total Energy is measurement of energy from the time FW Linux
- *       is loaded. It is calculated by multiplying the average power
- *       by time (passed from armcp start). The units are in MilliJouls.
- *
- * CPUCP_PACKET_PLL_INFO_GET -
- *       Fetch frequencies of PLL from the required PLL IP.
- *       The packet's arguments specify the device PLL type
- *       Pll type is the PLL from device pll_index enum.
- *       The result is composed of 4 outputs, each is 16-bit
- *       frequency in MHz.
- *
- * CPUCP_PACKET_POWER_GET -
- *       Fetch the present power consumption of the device (Current * Voltage).
- *
- * CPUCP_PACKET_NIC_PFC_SET -
- *       Enable/Disable the NIC PFC feature. The packet's arguments specify the
- *       NIC port, relevant lanes to configure and one bit indication for
- *       enable/disable.
- *
- * CPUCP_PACKET_NIC_FAULT_GET -
- *       Fetch the current indication for local/remote faults from the NIC MAC.
- *       The result is 32-bit value of the relevant register.
- *
- * CPUCP_PACKET_NIC_LPBK_SET -
- *       Enable/Disable the MAC loopback feature. The packet's arguments specify
- *       the NIC port, relevant lanes to configure and one bit indication for
- *       enable/disable.
- *
- * CPUCP_PACKET_NIC_MAC_INIT -
- *       Configure the NIC MAC channels. The packet's arguments specify the
- *       NIC port and the speed.
- *
- * CPUCP_PACKET_MSI_INFO_SET -
- *       set the index number for each supported msi type going from
- *       host to device
- *
- * CPUCP_PACKET_NIC_XPCS91_REGS_GET -
- *       Fetch the un/correctable counters values from the NIC MAC.
- *
- * CPUCP_PACKET_NIC_STAT_REGS_GET -
- *       Fetch various NIC MAC counters from the NIC STAT.
- *
- * CPUCP_PACKET_NIC_STAT_REGS_CLR -
- *       Clear the various NIC MAC counters in the NIC STAT.
- *
- * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
- *       Fetch all NIC MAC counters from the NIC STAT.
- *
- * CPUCP_PACKET_IS_IDLE_CHECK -
- *       Check if the device is IDLE in regard to the DMA/compute engines
- *       and QMANs. The f/w will return a bitmask where each bit represents
- *       a different engine or QMAN according to enum cpucp_idle_mask.
- *       The bit will be 1 if the engine is NOT idle.
- *
- * CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET -
- *       Fetch all HBM replaced-rows and prending to be replaced rows data.
- *
- * CPUCP_PACKET_HBM_PENDING_ROWS_STATUS -
- *       Fetch status of HBM rows pending replacement and need a reboot to
- *       be replaced.
- *
- * CPUCP_PACKET_POWER_SET -
- *       Resets power history of device to 0
- *
- * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
- *       Packet to perform engine core ASID configuration
- *
- * CPUCP_PACKET_SEC_ATTEST_GET -
- *       Get the attestaion data that is collected during various stages of the
- *       boot sequence. the attestation data is also hashed with some unique
- *       number (nonce) provided by the host to prevent replay attacks.
- *       public key and certificate also provided as part of the FW response.
- *
- * CPUCP_PACKET_MONITOR_DUMP_GET -
- *       Get monitors registers dump from the CpuCP kernel.
- *       The CPU will put the registers dump in the a buffer allocated by the driver
- *       which address is passed via the CpuCp packet. In addition, the host's driver
- *       passes the max size it allows the CpuCP to write to the structure, to prevent
- *       data corruption in case of mismatched driver/FW versions.
- *       Obsolete.
- *
- * CPUCP_PACKET_GENERIC_PASSTHROUGH -
- *      Generic opcode for all firmware info that is only passed to host
- *      through the LKD, without getting parsed there.
- *
- * CPUCP_PACKET_ACTIVE_STATUS_SET -
- *       LKD sends FW indication whether device is free or in use, this indication is reported
- *       also to the BMC.
- *
- * CPUCP_PACKET_REGISTER_INTERRUPTS -
- *       Packet to register interrupts indicating LKD is ready to receive events from FW.
- *
- * CPUCP_PACKET_SOFT_RESET -
- *	 Packet to perform soft-reset.
- */
-
-enum cpucp_packet_id {
-	CPUCP_PACKET_DISABLE_PCI_ACCESS = 1,	/* internal */
-	CPUCP_PACKET_ENABLE_PCI_ACCESS,		/* internal */
-	CPUCP_PACKET_TEMPERATURE_GET,		/* sysfs */
-	CPUCP_PACKET_VOLTAGE_GET,		/* sysfs */
-	CPUCP_PACKET_CURRENT_GET,		/* sysfs */
-	CPUCP_PACKET_FAN_SPEED_GET,		/* sysfs */
-	CPUCP_PACKET_PWM_GET,			/* sysfs */
-	CPUCP_PACKET_PWM_SET,			/* sysfs */
-	CPUCP_PACKET_FREQUENCY_SET,		/* sysfs */
-	CPUCP_PACKET_FREQUENCY_GET,		/* sysfs */
-	CPUCP_PACKET_LED_SET,			/* debugfs */
-	CPUCP_PACKET_I2C_WR,			/* debugfs */
-	CPUCP_PACKET_I2C_RD,			/* debugfs */
-	CPUCP_PACKET_INFO_GET,			/* IOCTL */
-	CPUCP_PACKET_FLASH_PROGRAM_REMOVED,
-	CPUCP_PACKET_UNMASK_RAZWI_IRQ,		/* internal */
-	CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY,	/* internal */
-	CPUCP_PACKET_TEST,			/* internal */
-	CPUCP_PACKET_FREQUENCY_CURR_GET,	/* sysfs */
-	CPUCP_PACKET_MAX_POWER_GET,		/* sysfs */
-	CPUCP_PACKET_MAX_POWER_SET,		/* sysfs */
-	CPUCP_PACKET_EEPROM_DATA_GET,		/* sysfs */
-	CPUCP_PACKET_NIC_INFO_GET,		/* internal */
-	CPUCP_PACKET_TEMPERATURE_SET,		/* sysfs */
-	CPUCP_PACKET_VOLTAGE_SET,		/* sysfs */
-	CPUCP_PACKET_CURRENT_SET,		/* sysfs */
-	CPUCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
-	CPUCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
-	CPUCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
-	CPUCP_PACKET_PLL_INFO_GET,		/* internal */
-	CPUCP_PACKET_NIC_STATUS,		/* internal */
-	CPUCP_PACKET_POWER_GET,			/* internal */
-	CPUCP_PACKET_NIC_PFC_SET,		/* internal */
-	CPUCP_PACKET_NIC_FAULT_GET,		/* internal */
-	CPUCP_PACKET_NIC_LPBK_SET,		/* internal */
-	CPUCP_PACKET_NIC_MAC_CFG,		/* internal */
-	CPUCP_PACKET_MSI_INFO_SET,		/* internal */
-	CPUCP_PACKET_NIC_XPCS91_REGS_GET,	/* internal */
-	CPUCP_PACKET_NIC_STAT_REGS_GET,		/* internal */
-	CPUCP_PACKET_NIC_STAT_REGS_CLR,		/* internal */
-	CPUCP_PACKET_NIC_STAT_REGS_ALL_GET,	/* internal */
-	CPUCP_PACKET_IS_IDLE_CHECK,		/* internal */
-	CPUCP_PACKET_HBM_REPLACED_ROWS_INFO_GET,/* internal */
-	CPUCP_PACKET_HBM_PENDING_ROWS_STATUS,	/* internal */
-	CPUCP_PACKET_POWER_SET,			/* internal */
-	CPUCP_PACKET_RESERVED,			/* not used */
-	CPUCP_PACKET_ENGINE_CORE_ASID_SET,	/* internal */
-	CPUCP_PACKET_RESERVED2,			/* not used */
-	CPUCP_PACKET_SEC_ATTEST_GET,		/* internal */
-	CPUCP_PACKET_RESERVED3,			/* not used */
-	CPUCP_PACKET_RESERVED4,			/* not used */
-	CPUCP_PACKET_MONITOR_DUMP_GET,		/* debugfs */
-	CPUCP_PACKET_RESERVED5,			/* not used */
-	CPUCP_PACKET_RESERVED6,			/* not used */
-	CPUCP_PACKET_RESERVED7,			/* not used */
-	CPUCP_PACKET_GENERIC_PASSTHROUGH,	/* IOCTL */
-	CPUCP_PACKET_RESERVED8,			/* not used */
-	CPUCP_PACKET_ACTIVE_STATUS_SET,		/* internal */
-	CPUCP_PACKET_RESERVED9,			/* not used */
-	CPUCP_PACKET_RESERVED10,		/* not used */
-	CPUCP_PACKET_RESERVED11,		/* not used */
-	CPUCP_PACKET_RESERVED12,		/* internal */
-	CPUCP_PACKET_REGISTER_INTERRUPTS,	/* internal */
-	CPUCP_PACKET_SOFT_RESET,		/* internal */
-	CPUCP_PACKET_ID_MAX			/* must be last */
-};
-
-#define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
-
-#define CPUCP_PKT_CTL_RC_SHIFT		12
-#define CPUCP_PKT_CTL_RC_MASK		0x0000F000
-
-#define CPUCP_PKT_CTL_OPCODE_SHIFT	16
-#define CPUCP_PKT_CTL_OPCODE_MASK	0x1FFF0000
-
-#define CPUCP_PKT_RES_PLL_OUT0_SHIFT	0
-#define CPUCP_PKT_RES_PLL_OUT0_MASK	0x000000000000FFFFull
-#define CPUCP_PKT_RES_PLL_OUT1_SHIFT	16
-#define CPUCP_PKT_RES_PLL_OUT1_MASK	0x00000000FFFF0000ull
-#define CPUCP_PKT_RES_PLL_OUT2_SHIFT	32
-#define CPUCP_PKT_RES_PLL_OUT2_MASK	0x0000FFFF00000000ull
-#define CPUCP_PKT_RES_PLL_OUT3_SHIFT	48
-#define CPUCP_PKT_RES_PLL_OUT3_MASK	0xFFFF000000000000ull
-
-#define CPUCP_PKT_RES_EEPROM_OUT0_SHIFT	0
-#define CPUCP_PKT_RES_EEPROM_OUT0_MASK	0x000000000000FFFFull
-#define CPUCP_PKT_RES_EEPROM_OUT1_SHIFT	16
-#define CPUCP_PKT_RES_EEPROM_OUT1_MASK	0x0000000000FF0000ull
-
-#define CPUCP_PKT_VAL_PFC_IN1_SHIFT	0
-#define CPUCP_PKT_VAL_PFC_IN1_MASK	0x0000000000000001ull
-#define CPUCP_PKT_VAL_PFC_IN2_SHIFT	1
-#define CPUCP_PKT_VAL_PFC_IN2_MASK	0x000000000000001Eull
-
-#define CPUCP_PKT_VAL_LPBK_IN1_SHIFT	0
-#define CPUCP_PKT_VAL_LPBK_IN1_MASK	0x0000000000000001ull
-#define CPUCP_PKT_VAL_LPBK_IN2_SHIFT	1
-#define CPUCP_PKT_VAL_LPBK_IN2_MASK	0x000000000000001Eull
-
-#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT	0
-#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK	0x0000000000000001ull
-#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT	1
-#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK	0x00000000FFFFFFFEull
-
-/* heartbeat status bits */
-#define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT		0
-#define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK		0x00000001
-
-struct cpucp_packet {
-	union {
-		__le64 value;	/* For SET packets */
-		__le64 result;	/* For GET packets */
-		__le64 addr;	/* For PQ */
-	};
-
-	__le32 ctl;
-
-	__le32 fence;		/* Signal to host that message is completed */
-
-	union {
-		struct {/* For temperature/current/voltage/fan/pwm get/set */
-			__le16 sensor_index;
-			__le16 type;
-		};
-
-		struct {	/* For I2C read/write */
-			__u8 i2c_bus;
-			__u8 i2c_addr;
-			__u8 i2c_reg;
-			/*
-			 * In legacy implemetations, i2c_len was not present,
-			 * was unused and just added as pad.
-			 * So if i2c_len is 0, it is treated as legacy
-			 * and r/w 1 Byte, else if i2c_len is specified,
-			 * its treated as new multibyte r/w support.
-			 */
-			__u8 i2c_len;
-		};
-
-		struct {/* For PLL info fetch */
-			__le16 pll_type;
-			/* TODO pll_reg is kept temporary before removal */
-			__le16 pll_reg;
-		};
-
-		/* For any general request */
-		__le32 index;
-
-		/* For frequency get/set */
-		__le32 pll_index;
-
-		/* For led set */
-		__le32 led_index;
-
-		/* For get CpuCP info/EEPROM data/NIC info */
-		__le32 data_max_size;
-
-		/*
-		 * For any general status bitmask. Shall be used whenever the
-		 * result cannot be used to hold general purpose data.
-		 */
-		__le32 status_mask;
-
-		/* random, used once number, for security packets */
-		__le32 nonce;
-	};
-
-	union {
-		/* For NIC requests */
-		__le32 port_index;
-
-		/* For Generic packet sub index */
-		__le32 pkt_subidx;
-	};
-};
-
-struct cpucp_unmask_irq_arr_packet {
-	struct cpucp_packet cpucp_pkt;
-	__le32 length;
-	__le32 irqs[];
-};
-
-struct cpucp_nic_status_packet {
-	struct cpucp_packet cpucp_pkt;
-	__le32 length;
-	__le32 data[];
-};
-
-struct cpucp_array_data_packet {
-	struct cpucp_packet cpucp_pkt;
-	__le32 length;
-	__le32 data[];
-};
-
-enum cpucp_led_index {
-	CPUCP_LED0_INDEX = 0,
-	CPUCP_LED1_INDEX,
-	CPUCP_LED2_INDEX,
-	CPUCP_LED_MAX_INDEX = CPUCP_LED2_INDEX
-};
-
-/*
- * enum cpucp_packet_rc - Error return code
- * @cpucp_packet_success	-> in case of success.
- * @cpucp_packet_invalid	-> this is to support first generation platforms.
- * @cpucp_packet_fault		-> in case of processing error like failing to
- *                                 get device binding or semaphore etc.
- * @cpucp_packet_invalid_pkt	-> when cpucp packet is un-supported.
- * @cpucp_packet_invalid_params	-> when checking parameter like length of buffer
- *				   or attribute value etc.
- * @cpucp_packet_rc_max		-> It indicates size of enum so should be at last.
- */
-enum cpucp_packet_rc {
-	cpucp_packet_success,
-	cpucp_packet_invalid,
-	cpucp_packet_fault,
-	cpucp_packet_invalid_pkt,
-	cpucp_packet_invalid_params,
-	cpucp_packet_rc_max
-};
-
-/*
- * cpucp_temp_type should adhere to hwmon_temp_attributes
- * defined in Linux kernel hwmon.h file
- */
-enum cpucp_temp_type {
-	cpucp_temp_input,
-	cpucp_temp_min = 4,
-	cpucp_temp_min_hyst,
-	cpucp_temp_max = 6,
-	cpucp_temp_max_hyst,
-	cpucp_temp_crit,
-	cpucp_temp_crit_hyst,
-	cpucp_temp_offset = 19,
-	cpucp_temp_lowest = 21,
-	cpucp_temp_highest = 22,
-	cpucp_temp_reset_history = 23,
-	cpucp_temp_warn = 24,
-	cpucp_temp_max_crit = 25,
-	cpucp_temp_max_warn = 26,
-};
-
-enum cpucp_in_attributes {
-	cpucp_in_input,
-	cpucp_in_min,
-	cpucp_in_max,
-	cpucp_in_lowest = 6,
-	cpucp_in_highest = 7,
-	cpucp_in_reset_history,
-	cpucp_in_intr_alarm_a,
-	cpucp_in_intr_alarm_b,
-};
-
-enum cpucp_curr_attributes {
-	cpucp_curr_input,
-	cpucp_curr_min,
-	cpucp_curr_max,
-	cpucp_curr_lowest = 6,
-	cpucp_curr_highest = 7,
-	cpucp_curr_reset_history
-};
-
-enum cpucp_fan_attributes {
-	cpucp_fan_input,
-	cpucp_fan_min = 2,
-	cpucp_fan_max
-};
-
-enum cpucp_pwm_attributes {
-	cpucp_pwm_input,
-	cpucp_pwm_enable
-};
-
-enum cpucp_pcie_throughput_attributes {
-	cpucp_pcie_throughput_tx,
-	cpucp_pcie_throughput_rx
-};
-
-/* TODO temporary kept before removal */
-enum cpucp_pll_reg_attributes {
-	cpucp_pll_nr_reg,
-	cpucp_pll_nf_reg,
-	cpucp_pll_od_reg,
-	cpucp_pll_div_factor_reg,
-	cpucp_pll_div_sel_reg
-};
-
-/* TODO temporary kept before removal */
-enum cpucp_pll_type_attributes {
-	cpucp_pll_cpu,
-	cpucp_pll_pci,
-};
-
-/*
- * cpucp_power_type aligns with hwmon_power_attributes
- * defined in Linux kernel hwmon.h file
- */
-enum cpucp_power_type {
-	CPUCP_POWER_INPUT = 8,
-	CPUCP_POWER_INPUT_HIGHEST = 9,
-	CPUCP_POWER_RESET_INPUT_HISTORY = 11
-};
-
-/*
- * MSI type enumeration table for all ASICs and future SW versions.
- * For future ASIC-LKD compatibility, we can only add new enumerations.
- * at the end of the table (before CPUCP_NUM_OF_MSI_TYPES).
- * Changing the order of entries or removing entries is not allowed.
- */
-enum cpucp_msi_type {
-	CPUCP_EVENT_QUEUE_MSI_TYPE,
-	CPUCP_NIC_PORT1_MSI_TYPE,
-	CPUCP_NIC_PORT3_MSI_TYPE,
-	CPUCP_NIC_PORT5_MSI_TYPE,
-	CPUCP_NIC_PORT7_MSI_TYPE,
-	CPUCP_NIC_PORT9_MSI_TYPE,
-	CPUCP_NUM_OF_MSI_TYPES
-};
-
-/*
- * PLL enumeration table used for all ASICs and future SW versions.
- * For future ASIC-LKD compatibility, we can only add new enumerations.
- * at the end of the table.
- * Changing the order of entries or removing entries is not allowed.
- */
-enum pll_index {
-	CPU_PLL = 0,
-	PCI_PLL = 1,
-	NIC_PLL = 2,
-	DMA_PLL = 3,
-	MESH_PLL = 4,
-	MME_PLL = 5,
-	TPC_PLL = 6,
-	IF_PLL = 7,
-	SRAM_PLL = 8,
-	NS_PLL = 9,
-	HBM_PLL = 10,
-	MSS_PLL = 11,
-	DDR_PLL = 12,
-	VID_PLL = 13,
-	BANK_PLL = 14,
-	MMU_PLL = 15,
-	IC_PLL = 16,
-	MC_PLL = 17,
-	EMMC_PLL = 18,
-	D2D_PLL = 19,
-	CS_PLL = 20,
-	C2C_PLL = 21,
-	NCH_PLL = 22,
-	C2M_PLL = 23,
-	PLL_MAX
-};
-
-enum rl_index {
-	TPC_RL = 0,
-	MME_RL,
-	EDMA_RL,
-};
-
-enum pvt_index {
-	PVT_SW,
-	PVT_SE,
-	PVT_NW,
-	PVT_NE
-};
-
-/* Event Queue Packets */
-
-struct eq_generic_event {
-	__le64 data[7];
-};
-
-/*
- * CpuCP info
- */
-
-#define CARD_NAME_MAX_LEN		16
-#define CPUCP_MAX_SENSORS		128
-#define CPUCP_MAX_NICS			128
-#define CPUCP_LANES_PER_NIC		4
-#define CPUCP_NIC_QSFP_EEPROM_MAX_LEN	1024
-#define CPUCP_MAX_NIC_LANES		(CPUCP_MAX_NICS * CPUCP_LANES_PER_NIC)
-#define CPUCP_NIC_MASK_ARR_LEN		((CPUCP_MAX_NICS + 63) / 64)
-#define CPUCP_NIC_POLARITY_ARR_LEN	((CPUCP_MAX_NIC_LANES + 63) / 64)
-#define CPUCP_HBM_ROW_REPLACE_MAX	32
-
-struct cpucp_sensor {
-	__le32 type;
-	__le32 flags;
-};
-
-/**
- * struct cpucp_card_types - ASIC card type.
- * @cpucp_card_type_pci: PCI card.
- * @cpucp_card_type_pmc: PCI Mezzanine Card.
- */
-enum cpucp_card_types {
-	cpucp_card_type_pci,
-	cpucp_card_type_pmc
-};
-
-#define CPUCP_SEC_CONF_ENABLED_SHIFT	0
-#define CPUCP_SEC_CONF_ENABLED_MASK	0x00000001
-
-#define CPUCP_SEC_CONF_FLASH_WP_SHIFT	1
-#define CPUCP_SEC_CONF_FLASH_WP_MASK	0x00000002
-
-#define CPUCP_SEC_CONF_EEPROM_WP_SHIFT	2
-#define CPUCP_SEC_CONF_EEPROM_WP_MASK	0x00000004
-
-/**
- * struct cpucp_security_info - Security information.
- * @config: configuration bit field
- * @keys_num: number of stored keys
- * @revoked_keys: revoked keys bit field
- * @min_svn: minimal security version
- */
-struct cpucp_security_info {
-	__u8 config;
-	__u8 keys_num;
-	__u8 revoked_keys;
-	__u8 min_svn;
-};
-
-/**
- * struct cpucp_info - Info from CpuCP that is necessary to the host's driver
- * @sensors: available sensors description.
- * @kernel_version: CpuCP linux kernel version.
- * @reserved: reserved field.
- * @card_type: card configuration type.
- * @card_location: in a server, each card has different connections topology
- *                 depending on its location (relevant for PMC card type)
- * @cpld_version: CPLD programmed F/W version.
- * @infineon_version: Infineon main DC-DC version.
- * @fuse_version: silicon production FUSE information.
- * @thermal_version: thermald S/W version.
- * @cpucp_version: CpuCP S/W version.
- * @infineon_second_stage_version: Infineon 2nd stage DC-DC version.
- * @dram_size: available DRAM size.
- * @card_name: card name that will be displayed in HWMON subsystem on the host
- * @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance
- *                    (0 = functional, 1 = binned)
- * @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance
- *                        (0 = functional, 1 = binned), maximum 1 per dcore
- * @sram_binning: Categorize SRAM functionality
- *                (0 = fully functional, 1 = lower-half is not functional,
- *                 2 = upper-half is not functional)
- * @sec_info: security information
- * @pll_map: Bit map of supported PLLs for current ASIC version.
- * @mme_binning_mask: MME binning mask,
- *                    bits [0:6]   <==> dcore0 mme fma
- *                    bits [7:13]  <==> dcore1 mme fma
- *                    bits [14:20] <==> dcore0 mme ima
- *                    bits [21:27] <==> dcore1 mme ima
- *                    For each group, if the 6th bit is set then first 5 bits
- *                    represent the col's idx [0-31], otherwise these bits are
- *                    ignored, and col idx 32 is binned. 7th bit is don't care.
- * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
- *                     (0 = functional 1 = binned)
- * @memory_repair_flag: eFuse flag indicating memory repair
- * @edma_binning_mask: EDMA binning mask, 1 bit per EDMA instance
- *                     (0 = functional 1 = binned)
- * @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
- *                     (0 = functional 1 = binned)
- * @interposer_version: Interposer version programmed in eFuse
- * @substrate_version: Substrate version programmed in eFuse
- * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM.
- * @fw_os_version: Firmware OS Version
- */
-struct cpucp_info {
-	struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
-	__u8 kernel_version[VERSION_MAX_LEN];
-	__le32 reserved;
-	__le32 card_type;
-	__le32 card_location;
-	__le32 cpld_version;
-	__le32 infineon_version;
-	__u8 fuse_version[VERSION_MAX_LEN];
-	__u8 thermal_version[VERSION_MAX_LEN];
-	__u8 cpucp_version[VERSION_MAX_LEN];
-	__le32 infineon_second_stage_version;
-	__le64 dram_size;
-	char card_name[CARD_NAME_MAX_LEN];
-	__le64 tpc_binning_mask;
-	__le64 decoder_binning_mask;
-	__u8 sram_binning;
-	__u8 dram_binning_mask;
-	__u8 memory_repair_flag;
-	__u8 edma_binning_mask;
-	__u8 xbar_binning_mask;
-	__u8 interposer_version;
-	__u8 substrate_version;
-	__u8 reserved2;
-	struct cpucp_security_info sec_info;
-	__le32 fw_hbm_region_size;
-	__u8 pll_map[PLL_MAP_LEN];
-	__le64 mme_binning_mask;
-	__u8 fw_os_version[VERSION_MAX_LEN];
-};
-
-struct cpucp_mac_addr {
-	__u8 mac_addr[ETH_ALEN];
-};
-
-enum cpucp_serdes_type {
-	TYPE_1_SERDES_TYPE,
-	TYPE_2_SERDES_TYPE,
-	HLS1_SERDES_TYPE,
-	HLS1H_SERDES_TYPE,
-	HLS2_SERDES_TYPE,
-	HLS2_TYPE_1_SERDES_TYPE,
-	MAX_NUM_SERDES_TYPE,		/* number of types */
-	UNKNOWN_SERDES_TYPE = 0xFFFF	/* serdes_type is u16 */
-};
-
-struct cpucp_nic_info {
-	struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
-	__le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
-	__le64 pol_tx_mask[CPUCP_NIC_POLARITY_ARR_LEN];
-	__le64 pol_rx_mask[CPUCP_NIC_POLARITY_ARR_LEN];
-	__le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
-	__u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
-	__le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
-	__le16 serdes_type; /* enum cpucp_serdes_type */
-	__le16 tx_swap_map[CPUCP_MAX_NICS];
-	__u8 reserved[6];
-};
-
-#define PAGE_DISCARD_MAX	64
-
-struct page_discard_info {
-	__u8 num_entries;
-	__u8 reserved[7];
-	__le32 mmu_page_idx[PAGE_DISCARD_MAX];
-};
-
-/*
- * struct frac_val - fracture value represented by "integer.frac".
- * @integer: the integer part of the fracture value;
- * @frac: the fracture part of the fracture value.
- */
-struct frac_val {
-	union {
-		struct {
-			__le16 integer;
-			__le16 frac;
-		};
-		__le32 val;
-	};
-};
-
-/*
- * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp".
- * @integer: the integer part of the SER value;
- * @exp: the exponent part of the SER value.
- */
-struct ser_val {
-	__le16 integer;
-	__le16 exp;
-};
-
-/*
- * struct cpucp_nic_status - describes the status of a NIC port.
- * @port: NIC port index.
- * @bad_format_cnt: e.g. CRC.
- * @responder_out_of_sequence_psn_cnt: e.g NAK.
- * @high_ber_reinit_cnt: link reinit due to high BER.
- * @correctable_err_cnt: e.g. bit-flip.
- * @uncorrectable_err_cnt: e.g. MAC errors.
- * @retraining_cnt: re-training counter.
- * @up: is port up.
- * @pcs_link: has PCS link.
- * @phy_ready: is PHY ready.
- * @auto_neg: is Autoneg enabled.
- * @timeout_retransmission_cnt: timeout retransmission events.
- * @high_ber_cnt: high ber events.
- * @pre_fec_ser: pre FEC SER value.
- * @post_fec_ser: post FEC SER value.
- * @throughput: measured throughput.
- * @latency: measured latency.
- */
-struct cpucp_nic_status {
-	__le32 port;
-	__le32 bad_format_cnt;
-	__le32 responder_out_of_sequence_psn_cnt;
-	__le32 high_ber_reinit;
-	__le32 correctable_err_cnt;
-	__le32 uncorrectable_err_cnt;
-	__le32 retraining_cnt;
-	__u8 up;
-	__u8 pcs_link;
-	__u8 phy_ready;
-	__u8 auto_neg;
-	__le32 timeout_retransmission_cnt;
-	__le32 high_ber_cnt;
-	struct ser_val pre_fec_ser;
-	struct ser_val post_fec_ser;
-	struct frac_val bandwidth;
-	struct frac_val lat;
-};
-
-enum cpucp_hbm_row_replace_cause {
-	REPLACE_CAUSE_DOUBLE_ECC_ERR,
-	REPLACE_CAUSE_MULTI_SINGLE_ECC_ERR,
-};
-
-struct cpucp_hbm_row_info {
-	__u8 hbm_idx;
-	__u8 pc;
-	__u8 sid;
-	__u8 bank_idx;
-	__le16 row_addr;
-	__u8 replaced_row_cause; /* enum cpucp_hbm_row_replace_cause */
-	__u8 pad;
-};
-
-struct cpucp_hbm_row_replaced_rows_info {
-	__le16 num_replaced_rows;
-	__u8 pad[6];
-	struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
-};
-
-enum cpu_reset_status {
-	CPU_RST_STATUS_NA = 0,
-	CPU_RST_STATUS_SOFT_RST_DONE = 1,
-};
-
-#define SEC_PCR_DATA_BUF_SZ	256
-#define SEC_PCR_QUOTE_BUF_SZ	510	/* (512 - 2) 2 bytes used for size */
-#define SEC_SIGNATURE_BUF_SZ	255	/* (256 - 1) 1 byte used for size */
-#define SEC_PUB_DATA_BUF_SZ	510	/* (512 - 2) 2 bytes used for size */
-#define SEC_CERTIFICATE_BUF_SZ	2046	/* (2048 - 2) 2 bytes used for size */
-
-/*
- * struct cpucp_sec_attest_info - attestation report of the boot
- * @pcr_data: raw values of the PCR registers
- * @pcr_num_reg: number of PCR registers in the pcr_data array
- * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
- * @nonce: number only used once. random number provided by host. this also
- *	    passed to the quote command as a qualifying data.
- * @pcr_quote_len: length of the attestation quote data (bytes)
- * @pcr_quote: attestation report data structure
- * @quote_sig_len: length of the attestation report signature (bytes)
- * @quote_sig: signature structure of the attestation report
- * @pub_data_len: length of the public data (bytes)
- * @public_data: public key for the signed attestation
- *		 (outPublic + name + qualifiedName)
- * @certificate_len: length of the certificate (bytes)
- * @certificate: certificate for the attestation signing key
- */
-struct cpucp_sec_attest_info {
-	__u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
-	__u8 pcr_num_reg;
-	__u8 pcr_reg_len;
-	__le16 pad0;
-	__le32 nonce;
-	__le16 pcr_quote_len;
-	__u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
-	__u8 quote_sig_len;
-	__u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
-	__le16 pub_data_len;
-	__u8 public_data[SEC_PUB_DATA_BUF_SZ];
-	__le16 certificate_len;
-	__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
-};
-
-/*
- * struct cpucp_dev_info_signed - device information signed by a secured device
- * @info: device information structure as defined above
- * @nonce: number only used once. random number provided by host. this number is
- *	   hashed and signed along with the device information.
- * @info_sig_len: length of the attestation signature (bytes)
- * @info_sig: signature of the info + nonce data.
- * @pub_data_len: length of the public data (bytes)
- * @public_data: public key info signed info data
- *		 (outPublic + name + qualifiedName)
- * @certificate_len: length of the certificate (bytes)
- * @certificate: certificate for the signing key
- */
-struct cpucp_dev_info_signed {
-	struct cpucp_info info;	/* assumed to be 64bit aligned */
-	__le32 nonce;
-	__le32 pad0;
-	__u8 info_sig_len;
-	__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
-	__le16 pub_data_len;
-	__u8 public_data[SEC_PUB_DATA_BUF_SZ];
-	__le16 certificate_len;
-	__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
-};
-
-#define DCORE_MON_REGS_SZ	512
-/*
- * struct dcore_monitor_regs_data - DCORE monitor regs data.
- * the structure follows sync manager block layout. Obsolete.
- * @mon_pay_addrl: array of payload address low bits.
- * @mon_pay_addrh: array of payload address high bits.
- * @mon_pay_data: array of payload data.
- * @mon_arm: array of monitor arm.
- * @mon_status: array of monitor status.
- */
-struct dcore_monitor_regs_data {
-	__le32 mon_pay_addrl[DCORE_MON_REGS_SZ];
-	__le32 mon_pay_addrh[DCORE_MON_REGS_SZ];
-	__le32 mon_pay_data[DCORE_MON_REGS_SZ];
-	__le32 mon_arm[DCORE_MON_REGS_SZ];
-	__le32 mon_status[DCORE_MON_REGS_SZ];
-};
-
-/* contains SM data for each SYNC_MNGR (Obsolete) */
-struct cpucp_monitor_dump {
-	struct dcore_monitor_regs_data sync_mngr_w_s;
-	struct dcore_monitor_regs_data sync_mngr_e_s;
-	struct dcore_monitor_regs_data sync_mngr_w_n;
-	struct dcore_monitor_regs_data sync_mngr_e_n;
-};
-
-/*
- * The Type of the generic request (and other input arguments) will be fetched from user by reading
- * from "pkt_subidx" field in struct cpucp_packet.
- *
- * HL_PASSTHROUGHT_VERSIONS	- Fetch all firmware versions.
- */
-enum hl_passthrough_type {
-	HL_PASSTHROUGH_VERSIONS,
-};
-
-#endif /* CPUCP_IF_H */
diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/drivers/accel/habanalabs/include/common/hl_boot_if.h
deleted file mode 100644
index cff79f7f9f75..000000000000
--- a/drivers/accel/habanalabs/include/common/hl_boot_if.h
+++ /dev/null
@@ -1,785 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2018-2020 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef HL_BOOT_IF_H
-#define HL_BOOT_IF_H
-
-#define LKD_HARD_RESET_MAGIC		0xED7BD694 /* deprecated - do not use */
-#define HL_POWER9_HOST_MAGIC		0x1DA30009
-
-#define BOOT_FIT_SRAM_OFFSET		0x200000
-
-#define VERSION_MAX_LEN			128
-
-enum cpu_boot_err {
-	CPU_BOOT_ERR_DRAM_INIT_FAIL = 0,
-	CPU_BOOT_ERR_FIT_CORRUPTED = 1,
-	CPU_BOOT_ERR_TS_INIT_FAIL = 2,
-	CPU_BOOT_ERR_DRAM_SKIPPED = 3,
-	CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4,
-	CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5,
-	CPU_BOOT_ERR_NIC_FW_FAIL = 6,
-	CPU_BOOT_ERR_SECURITY_NOT_RDY = 7,
-	CPU_BOOT_ERR_SECURITY_FAIL = 8,
-	CPU_BOOT_ERR_EFUSE_FAIL = 9,
-	CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10,
-	CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11,
-	CPU_BOOT_ERR_PLL_FAIL = 12,
-	CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13,
-	CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
-	CPU_BOOT_ERR_BINNING_FAIL = 19,
-	CPU_BOOT_ERR_TPM_FAIL = 20,
-	CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
-	CPU_BOOT_ERR_EEPROM_FAIL = 22,
-	CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL = 23,
-	CPU_BOOT_ERR_ENABLED = 31,
-	CPU_BOOT_ERR_SCND_EN = 63,
-	CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
-};
-
-/*
- * Mask for fatal failures
- * This mask contains all possible fatal failures, and a dynamic code
- * will clear the non-relevant ones.
- */
-#define CPU_BOOT_ERR_FATAL_MASK					\
-		((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) |		\
-		 (1 << CPU_BOOT_ERR_PLL_FAIL) |			\
-		 (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) |	\
-		 (1 << CPU_BOOT_ERR_BINNING_FAIL) |		\
-		 (1 << CPU_BOOT_ERR_DRAM_SKIPPED) |		\
-		 (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) |	\
-		 (1 << CPU_BOOT_ERR_EEPROM_FAIL))
-
-/*
- * CPU error bits in BOOT_ERROR registers
- *
- * CPU_BOOT_ERR0_DRAM_INIT_FAIL		DRAM initialization failed.
- *					DRAM is not reliable to use.
- *
- * CPU_BOOT_ERR0_FIT_CORRUPTED		FIT data integrity verification of the
- *					image provided by the host has failed.
- *
- * CPU_BOOT_ERR0_TS_INIT_FAIL		Thermal Sensor initialization failed.
- *					Boot continues as usual, but keep in
- *					mind this is a warning.
- *
- * CPU_BOOT_ERR0_DRAM_SKIPPED		DRAM initialization has been skipped.
- *					Skipping DRAM initialization has been
- *					requested (e.g. strap, command, etc.)
- *					and FW skipped the DRAM initialization.
- *					Host can initialize the DRAM.
- *
- * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED	Waiting for BMC data will be skipped.
- *					Meaning the BMC data might not be
- *					available until reset.
- *
- * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY	NIC data from BMC is not ready.
- *					BMC has not provided the NIC data yet.
- *					Once provided this bit will be cleared.
- *
- * CPU_BOOT_ERR0_NIC_FW_FAIL		NIC FW loading failed.
- *					The NIC FW loading and initialization
- *					failed. This means NICs are not usable.
- *
- * CPU_BOOT_ERR0_SECURITY_NOT_RDY	Chip security initialization has been
- *					started, but is not ready yet - chip
- *					cannot be accessed.
- *
- * CPU_BOOT_ERR0_SECURITY_FAIL		Security related tasks have failed.
- *					The tasks are security init (root of
- *					trust), boot authentication (chain of
- *					trust), data packets authentication.
- *
- * CPU_BOOT_ERR0_EFUSE_FAIL		Reading from eFuse failed.
- *					The PCI device ID might be wrong.
- *
- * CPU_BOOT_ERR0_PRI_IMG_VER_FAIL	Verification of primary image failed.
- *					It mean that ppboot checksum
- *					verification for the preboot primary
- *					image has failed to match expected
- *					checksum. Trying to program image again
- *					might solve this.
- *
- * CPU_BOOT_ERR0_SEC_IMG_VER_FAIL	Verification of secondary image failed.
- *					It mean that ppboot checksum
- *					verification for the preboot secondary
- *					image has failed to match expected
- *					checksum. Trying to program image again
- *					might solve this.
- *
- * CPU_BOOT_ERR0_PLL_FAIL		PLL settings failed, meaning that one
- *					of the PLLs remains in REF_CLK
- *
- * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	Device is unusable and customer support
- *					should be contacted.
- *
- * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR	Critical error was detected during
- *					the execution of ppboot or preboot.
- *					for example: stack overflow.
- *
- * CPU_BOOT_ERR0_BINNING_FAIL		Binning settings failed, meaning
- *					malfunctioning components might still be
- *					in use.
- *
- * CPU_BOOT_ERR0_TPM_FAIL		TPM verification flow failed.
- *
- * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	Failed to set threshold for tmperature
- *					sensor.
- *
- * CPU_BOOT_ERR_EEPROM_FAIL		Failed reading EEPROM data. Defaults
- *					are used.
- *
- * CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL	Failed scrubbing the Engines/ARCFarm
- *					memories. Boot disabled until reset.
- *
- * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
- *					This is a main indication that the
- *					running FW populates the error
- *					registers. Meaning the error bits are
- *					not garbage, but actual error statuses.
- */
-#define CPU_BOOT_ERR0_DRAM_INIT_FAIL		(1 << CPU_BOOT_ERR_DRAM_INIT_FAIL)
-#define CPU_BOOT_ERR0_FIT_CORRUPTED		(1 << CPU_BOOT_ERR_FIT_CORRUPTED)
-#define CPU_BOOT_ERR0_TS_INIT_FAIL		(1 << CPU_BOOT_ERR_TS_INIT_FAIL)
-#define CPU_BOOT_ERR0_DRAM_SKIPPED		(1 << CPU_BOOT_ERR_DRAM_SKIPPED)
-#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED		(1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED)
-#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY		(1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY)
-#define CPU_BOOT_ERR0_NIC_FW_FAIL		(1 << CPU_BOOT_ERR_NIC_FW_FAIL)
-#define CPU_BOOT_ERR0_SECURITY_NOT_RDY		(1 << CPU_BOOT_ERR_SECURITY_NOT_RDY)
-#define CPU_BOOT_ERR0_SECURITY_FAIL		(1 << CPU_BOOT_ERR_SECURITY_FAIL)
-#define CPU_BOOT_ERR0_EFUSE_FAIL		(1 << CPU_BOOT_ERR_EFUSE_FAIL)
-#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL		(1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL)
-#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL		(1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL)
-#define CPU_BOOT_ERR0_PLL_FAIL			(1 << CPU_BOOT_ERR_PLL_FAIL)
-#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	(1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL)
-#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR		(1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
-#define CPU_BOOT_ERR0_BINNING_FAIL		(1 << CPU_BOOT_ERR_BINNING_FAIL)
-#define CPU_BOOT_ERR0_TPM_FAIL			(1 << CPU_BOOT_ERR_TPM_FAIL)
-#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL	(1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
-#define CPU_BOOT_ERR0_EEPROM_FAIL		(1 << CPU_BOOT_ERR_EEPROM_FAIL)
-#define CPU_BOOT_ERR0_ENG_ARC_MEM_SCRUB_FAIL	(1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
-#define CPU_BOOT_ERR0_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
-#define CPU_BOOT_ERR1_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
-
-enum cpu_boot_dev_sts {
-	CPU_BOOT_DEV_STS_SECURITY_EN = 0,
-	CPU_BOOT_DEV_STS_DEBUG_EN = 1,
-	CPU_BOOT_DEV_STS_WATCHDOG_EN = 2,
-	CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3,
-	CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4,
-	CPU_BOOT_DEV_STS_E2E_CRED_EN = 5,
-	CPU_BOOT_DEV_STS_HBM_CRED_EN = 6,
-	CPU_BOOT_DEV_STS_RL_EN = 7,
-	CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8,
-	CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9,
-	CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10,
-	CPU_BOOT_DEV_STS_PLL_INFO_EN = 11,
-	CPU_BOOT_DEV_STS_SP_SRAM_EN = 12,
-	CPU_BOOT_DEV_STS_CLK_GATE_EN = 13,
-	CPU_BOOT_DEV_STS_HBM_ECC_EN = 14,
-	CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15,
-	CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16,
-	CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17,
-	CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18,
-	CPU_BOOT_DEV_STS_DYN_PLL_EN = 19,
-	CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20,
-	CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21,
-	CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22,
-	CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23,
-	CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24,
-	CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25,
-	CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26,
-	CPU_BOOT_DEV_STS_ENABLED = 31,
-	CPU_BOOT_DEV_STS_SCND_EN = 63,
-	CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */
-};
-
-/*
- * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers
- *
- * CPU_BOOT_DEV_STS0_SECURITY_EN	Security is Enabled.
- *					This is an indication for security
- *					enabled in FW, which means that
- *					all conditions for security are met:
- *					device is indicated as security enabled,
- *					registers are protected, and device
- *					uses keys for image verification.
- *					Initialized in: preboot
- *
- * CPU_BOOT_DEV_STS0_DEBUG_EN		Debug is enabled.
- *					Enabled when JTAG or DEBUG is enabled
- *					in FW.
- *					Initialized in: preboot
- *
- * CPU_BOOT_DEV_STS0_WATCHDOG_EN	Watchdog is enabled.
- *					Watchdog is enabled in FW.
- *					Initialized in: preboot
- *
- * CPU_BOOT_DEV_STS0_DRAM_INIT_EN	DRAM initialization is enabled.
- *					DRAM initialization has been done in FW.
- *					Initialized in: u-boot
- *
- * CPU_BOOT_DEV_STS0_BMC_WAIT_EN	Waiting for BMC data enabled.
- *					If set, it means that during boot,
- *					FW waited for BMC data.
- *					Initialized in: u-boot
- *
- * CPU_BOOT_DEV_STS0_E2E_CRED_EN	E2E credits initialized.
- *					FW initialized E2E credits.
- *					Initialized in: u-boot
- *
- * CPU_BOOT_DEV_STS0_HBM_CRED_EN	HBM credits initialized.
- *					FW initialized HBM credits.
- *					Initialized in: u-boot
- *
- * CPU_BOOT_DEV_STS0_RL_EN		Rate limiter initialized.
- *					FW initialized rate limiter.
- *					Initialized in: u-boot
- *
- * CPU_BOOT_DEV_STS0_SRAM_SCR_EN	SRAM scrambler enabled.
- *					FW initialized SRAM scrambler.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_DRAM_SCR_EN	DRAM scrambler enabled.
- *					FW initialized DRAM scrambler.
- *					Initialized in: u-boot
- *
- * CPU_BOOT_DEV_STS0_FW_HARD_RST_EN	FW hard reset procedure is enabled.
- *					FW has the hard reset procedure
- *					implemented. This means that FW will
- *					perform hard reset procedure on
- *					receiving the halt-machine event.
- *					Initialized in: preboot, u-boot, linux
- *
- * CPU_BOOT_DEV_STS0_PLL_INFO_EN	FW retrieval of PLL info is enabled.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_SP_SRAM_EN		SP SRAM is initialized and available
- *					for use.
- *					Initialized in: preboot
- *
- * CPU_BOOT_DEV_STS0_CLK_GATE_EN	Clock Gating enabled.
- *					FW initialized Clock Gating.
- *					Initialized in: preboot
- *
- * CPU_BOOT_DEV_STS0_HBM_ECC_EN		HBM ECC handling Enabled.
- *					FW handles HBM ECC indications.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN	Packets ack value used in the armcpd
- *					is set to the PI counter.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_FW_LD_COM_EN	Flexible FW loading communication
- *					protocol is enabled.
- *					Initialized in: preboot
- *
- * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN	FW iATU configuration is enabled.
- *					This bit if set, means the iATU has been
- *					configured and is ready for use.
- *					Initialized in: ppboot
- *
- * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN	NIC MAC channels init is done by FW and
- *					any access to them is done via the FW.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_DYN_PLL_EN		Dynamic PLL configuration is enabled.
- *					FW sends to host a bitmap of supported
- *					PLLs.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN	GIC access permission only from
- *					previleged entity. FW sets this status
- *					bit for host. If this bit is set then
- *					GIC can not be accessed from host.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_EQ_INDEX_EN	Event Queue (EQ) index is a running
- *					index for each new event sent to host.
- *					This is used as a method in host to
- *					identify that the waiting event in
- *					queue is actually a new event which
- *					was not served before.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN	Use multiple scratchpad interfaces to
- *					prevent IRQs overriding each other.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN
- *					NIC STAT and XPCS91 access is restricted
- *					and is done via FW only.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN
- *					NIC STAT get all is supported.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN
- *					F/W checks if the device is idle by reading defined set
- *					of registers. It returns a bitmask of all the engines,
- *					where a bit is set if the engine is not idle.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_MAP_HWMON_EN
- *					If set, means f/w supports proprietary
- *					HWMON enum mapping to cpucp enums.
- *					Initialized in: linux
- *
- * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
- *					This is a main indication that the
- *					running FW populates the device status
- *					register. Meaning the device status
- *					bits are not garbage, but actual
- *					statuses.
- *					Initialized in: preboot
- *
- */
-#define CPU_BOOT_DEV_STS0_SECURITY_EN		(1 << CPU_BOOT_DEV_STS_SECURITY_EN)
-#define CPU_BOOT_DEV_STS0_DEBUG_EN		(1 << CPU_BOOT_DEV_STS_DEBUG_EN)
-#define CPU_BOOT_DEV_STS0_WATCHDOG_EN		(1 << CPU_BOOT_DEV_STS_WATCHDOG_EN)
-#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN		(1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN)
-#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN		(1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN)
-#define CPU_BOOT_DEV_STS0_E2E_CRED_EN		(1 << CPU_BOOT_DEV_STS_E2E_CRED_EN)
-#define CPU_BOOT_DEV_STS0_HBM_CRED_EN		(1 << CPU_BOOT_DEV_STS_HBM_CRED_EN)
-#define CPU_BOOT_DEV_STS0_RL_EN			(1 << CPU_BOOT_DEV_STS_RL_EN)
-#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN		(1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN)
-#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN		(1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN)
-#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN	(1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN)
-#define CPU_BOOT_DEV_STS0_PLL_INFO_EN		(1 << CPU_BOOT_DEV_STS_PLL_INFO_EN)
-#define CPU_BOOT_DEV_STS0_SP_SRAM_EN		(1 << CPU_BOOT_DEV_STS_SP_SRAM_EN)
-#define CPU_BOOT_DEV_STS0_CLK_GATE_EN		(1 << CPU_BOOT_DEV_STS_CLK_GATE_EN)
-#define CPU_BOOT_DEV_STS0_HBM_ECC_EN		(1 << CPU_BOOT_DEV_STS_HBM_ECC_EN)
-#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN		(1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN)
-#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN		(1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN)
-#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN	(1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN)
-#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN		(1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN)
-#define CPU_BOOT_DEV_STS0_DYN_PLL_EN		(1 << CPU_BOOT_DEV_STS_DYN_PLL_EN)
-#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN	(1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN)
-#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN		(1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN)
-#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN	(1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN)
-#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN	(1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN)
-#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN	(1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN)
-#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN	(1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN)
-#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN		(1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN)
-#define CPU_BOOT_DEV_STS0_ENABLED		(1 << CPU_BOOT_DEV_STS_ENABLED)
-#define CPU_BOOT_DEV_STS1_ENABLED		(1 << CPU_BOOT_DEV_STS_ENABLED)
-
-enum cpu_boot_status {
-	CPU_BOOT_STATUS_NA = 0,		/* Default value after reset of chip */
-	CPU_BOOT_STATUS_IN_WFE = 1,
-	CPU_BOOT_STATUS_DRAM_RDY = 2,
-	CPU_BOOT_STATUS_SRAM_AVAIL = 3,
-	CPU_BOOT_STATUS_IN_BTL = 4,	/* BTL is H/W FSM */
-	CPU_BOOT_STATUS_IN_PREBOOT = 5,
-	CPU_BOOT_STATUS_IN_SPL,		/* deprecated - not reported */
-	CPU_BOOT_STATUS_IN_UBOOT = 7,
-	CPU_BOOT_STATUS_DRAM_INIT_FAIL,	/* deprecated - will be removed */
-	CPU_BOOT_STATUS_FIT_CORRUPTED,	/* deprecated - will be removed */
-	/* U-Boot console prompt activated, commands are not processed */
-	CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
-	/* Finished NICs init, reported after DRAM and NICs */
-	CPU_BOOT_STATUS_NIC_FW_RDY = 11,
-	CPU_BOOT_STATUS_TS_INIT_FAIL,	/* deprecated - will be removed */
-	CPU_BOOT_STATUS_DRAM_SKIPPED,	/* deprecated - will be removed */
-	CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
-	/* Last boot loader progress status, ready to receive commands */
-	CPU_BOOT_STATUS_READY_TO_BOOT = 15,
-	/* Internal Boot finished, ready for boot-fit */
-	CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
-	/* Internal Security has been initialized, device can be accessed */
-	CPU_BOOT_STATUS_SECURITY_READY = 17,
-};
-
-enum kmd_msg {
-	KMD_MSG_NA = 0,
-	KMD_MSG_GOTO_WFE,
-	KMD_MSG_FIT_RDY,
-	KMD_MSG_SKIP_BMC,
-	RESERVED,
-	KMD_MSG_RST_DEV,
-	KMD_MSG_LAST
-};
-
-enum cpu_msg_status {
-	CPU_MSG_CLR = 0,
-	CPU_MSG_OK,
-	CPU_MSG_ERR,
-};
-
-/* communication registers mapping - consider ABI when changing */
-struct cpu_dyn_regs {
-	__le32 cpu_pq_base_addr_low;
-	__le32 cpu_pq_base_addr_high;
-	__le32 cpu_pq_length;
-	__le32 cpu_pq_init_status;
-	__le32 cpu_eq_base_addr_low;
-	__le32 cpu_eq_base_addr_high;
-	__le32 cpu_eq_length;
-	__le32 cpu_eq_ci;
-	__le32 cpu_cq_base_addr_low;
-	__le32 cpu_cq_base_addr_high;
-	__le32 cpu_cq_length;
-	__le32 cpu_pf_pq_pi;
-	__le32 cpu_boot_dev_sts0;
-	__le32 cpu_boot_dev_sts1;
-	__le32 cpu_boot_err0;
-	__le32 cpu_boot_err1;
-	__le32 cpu_boot_status;
-	__le32 fw_upd_sts;
-	__le32 fw_upd_cmd;
-	__le32 fw_upd_pending_sts;
-	__le32 fuse_ver_offset;
-	__le32 preboot_ver_offset;
-	__le32 uboot_ver_offset;
-	__le32 hw_state;
-	__le32 kmd_msg_to_cpu;
-	__le32 cpu_cmd_status_to_host;
-	__le32 gic_host_pi_upd_irq;
-	__le32 gic_tpc_qm_irq_ctrl;
-	__le32 gic_mme_qm_irq_ctrl;
-	__le32 gic_dma_qm_irq_ctrl;
-	__le32 gic_nic_qm_irq_ctrl;
-	__le32 gic_dma_core_irq_ctrl;
-	__le32 gic_host_halt_irq;
-	__le32 gic_host_ints_irq;
-	__le32 gic_host_soft_rst_irq;
-	__le32 gic_rot_qm_irq_ctrl;
-	__le32 cpu_rst_status;
-	__le32 eng_arc_irq_ctrl;
-	__le32 reserved1[20];		/* reserve for future use */
-};
-
-/* TODO: remove the desc magic after the code is updated to use message */
-/* HCDM - Habana Communications Descriptor Magic */
-#define HL_COMMS_DESC_MAGIC	0x4843444D
-#define HL_COMMS_DESC_VER	3
-
-/* HCMv - Habana Communications Message + header version */
-#define HL_COMMS_MSG_MAGIC_VALUE	0x48434D00
-#define HL_COMMS_MSG_MAGIC_MASK		0xFFFFFF00
-#define HL_COMMS_MSG_MAGIC_VER_MASK	0xFF
-
-#define HL_COMMS_MSG_MAGIC_VER(ver)	(HL_COMMS_MSG_MAGIC_VALUE |	\
-					((ver) & HL_COMMS_MSG_MAGIC_VER_MASK))
-#define HL_COMMS_MSG_MAGIC_V0		HL_COMMS_DESC_MAGIC
-#define HL_COMMS_MSG_MAGIC_V1		HL_COMMS_MSG_MAGIC_VER(1)
-#define HL_COMMS_MSG_MAGIC_V2		HL_COMMS_MSG_MAGIC_VER(2)
-#define HL_COMMS_MSG_MAGIC_V3		HL_COMMS_MSG_MAGIC_VER(3)
-
-#define HL_COMMS_MSG_MAGIC		HL_COMMS_MSG_MAGIC_V3
-
-#define HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC(magic)			\
-		(((magic) & HL_COMMS_MSG_MAGIC_MASK) ==			\
-		HL_COMMS_MSG_MAGIC_VALUE)
-
-#define HL_COMMS_MSG_MAGIC_VALIDATE_VERSION(magic, ver)			\
-		(((magic) & HL_COMMS_MSG_MAGIC_VER_MASK) >=		\
-		((ver) & HL_COMMS_MSG_MAGIC_VER_MASK))
-
-#define HL_COMMS_MSG_MAGIC_VALIDATE(magic, ver)				\
-		(HL_COMMS_MSG_MAGIC_VALIDATE_MAGIC((magic)) &&		\
-		HL_COMMS_MSG_MAGIC_VALIDATE_VERSION((magic), (ver)))
-
-enum comms_msg_type {
-	HL_COMMS_DESC_TYPE = 0,
-	HL_COMMS_RESET_CAUSE_TYPE = 1,
-	HL_COMMS_FW_CFG_SKIP_TYPE = 2,
-	HL_COMMS_BINNING_CONF_TYPE = 3,
-};
-
-/*
- * Binning information shared between LKD and FW
- * @tpc_mask_l - TPC binning information lower 64 bit
- * @dec_mask - Decoder binning information
- * @dram_mask - DRAM binning information
- * @edma_mask - EDMA binning information
- * @mme_mask_l - MME binning information lower 32
- * @mme_mask_h - MME binning information upper 32
- * @rot_mask - Rotator binning information
- * @xbar_mask - xBAR binning information
- * @reserved - reserved field for future binning info w/o ABI change
- * @tpc_mask_h - TPC binning information upper 64 bit
- * @nic_mask - NIC binning information
- */
-struct lkd_fw_binning_info {
-	__le64 tpc_mask_l;
-	__le32 dec_mask;
-	__le32 dram_mask;
-	__le32 edma_mask;
-	__le32 mme_mask_l;
-	__le32 mme_mask_h;
-	__le32 rot_mask;
-	__le32 xbar_mask;
-	__le32 reserved0;
-	__le64 tpc_mask_h;
-	__le64 nic_mask;
-	__le32 reserved1[8];
-};
-
-/* TODO: remove this struct after the code is updated to use message */
-/* this is the comms descriptor header - meta data */
-struct comms_desc_header {
-	__le32 magic;		/* magic for validation */
-	__le32 crc32;		/* CRC32 of the descriptor w/o header */
-	__le16 size;		/* size of the descriptor w/o header */
-	__u8 version;	/* descriptor version */
-	__u8 reserved[5];	/* pad to 64 bit */
-};
-
-/* this is the comms message header - meta data */
-struct comms_msg_header {
-	__le32 magic;		/* magic for validation */
-	__le32 crc32;		/* CRC32 of the message w/o header */
-	__le16 size;		/* size of the message w/o header */
-	__u8 version;	/* message payload version */
-	__u8 type;		/* message type */
-	__u8 reserved[4];	/* pad to 64 bit */
-};
-
-enum lkd_fw_ascii_msg_lvls {
-	LKD_FW_ASCII_MSG_ERR = 0,
-	LKD_FW_ASCII_MSG_WRN = 1,
-	LKD_FW_ASCII_MSG_INF = 2,
-	LKD_FW_ASCII_MSG_DBG = 3,
-};
-
-#define LKD_FW_ASCII_MSG_MAX_LEN	128
-#define LKD_FW_ASCII_MSG_MAX		4	/* consider ABI when changing */
-
-struct lkd_fw_ascii_msg {
-	__u8 valid;
-	__u8 msg_lvl;
-	__u8 reserved[6];
-	char msg[LKD_FW_ASCII_MSG_MAX_LEN];
-};
-
-/* this is the main FW descriptor - consider ABI when changing */
-struct lkd_fw_comms_desc {
-	struct comms_desc_header header;
-	struct cpu_dyn_regs cpu_dyn_regs;
-	char fuse_ver[VERSION_MAX_LEN];
-	char cur_fw_ver[VERSION_MAX_LEN];
-	/* can be used for 1 more version w/o ABI change */
-	char reserved0[VERSION_MAX_LEN];
-	__le64 img_addr;	/* address for next FW component load */
-	struct lkd_fw_binning_info binning_info;
-	struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX];
-};
-
-enum comms_reset_cause {
-	HL_RESET_CAUSE_UNKNOWN = 0,
-	HL_RESET_CAUSE_HEARTBEAT = 1,
-	HL_RESET_CAUSE_TDR = 2,
-};
-
-/* TODO: remove define after struct name is aligned on all projects */
-#define lkd_msg_comms lkd_fw_comms_msg
-
-/* this is the comms message descriptor */
-struct lkd_fw_comms_msg {
-	struct comms_msg_header header;
-	/* union for future expantions of new messages */
-	union {
-		struct {
-			struct cpu_dyn_regs cpu_dyn_regs;
-			char fuse_ver[VERSION_MAX_LEN];
-			char cur_fw_ver[VERSION_MAX_LEN];
-			/* can be used for 1 more version w/o ABI change */
-			char reserved0[VERSION_MAX_LEN];
-			/* address for next FW component load */
-			__le64 img_addr;
-			struct lkd_fw_binning_info binning_info;
-			struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX];
-		};
-		struct {
-			__u8 reset_cause;
-		};
-		struct {
-			__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
-		};
-		struct lkd_fw_binning_info binning_conf;
-	};
-};
-
-/*
- * LKD commands:
- *
- * COMMS_NOOP			Used to clear the command register and no actual
- *				command is send.
- *
- * COMMS_CLR_STS		Clear status command - FW should clear the
- *				status register. Used for synchronization
- *				between the commands as part of the race free
- *				protocol.
- *
- * COMMS_RST_STATE		Reset the current communication state which is
- *				kept by FW for proper responses.
- *				Should be used in the beginning of the
- *				communication cycle to clean any leftovers from
- *				previous communication attempts.
- *
- * COMMS_PREP_DESC		Prepare descriptor for setting up the
- *				communication and other dynamic data:
- *				struct lkd_fw_comms_desc.
- *				This command has a parameter stating the next FW
- *				component size, so the FW can actually prepare a
- *				space for it and in the status response provide
- *				the descriptor offset. The Offset of the next FW
- *				data component is a part of the descriptor
- *				structure.
- *
- * COMMS_DATA_RDY		The FW data has been uploaded and is ready for
- *				validation.
- *
- * COMMS_EXEC			Execute the next FW component.
- *
- * COMMS_RST_DEV		Reset the device.
- *
- * COMMS_GOTO_WFE		Execute WFE command. Allowed only on non-secure
- *				devices.
- *
- * COMMS_SKIP_BMC		Perform actions required for BMC-less servers.
- *				Do not wait for BMC response.
- *
- * COMMS_PREP_DESC_ELBI		Same as COMMS_PREP_DESC only that the memory
- *				space is allocated in a ELBI access only
- *				address range.
- *
- */
-enum comms_cmd {
-	COMMS_NOOP = 0,
-	COMMS_CLR_STS = 1,
-	COMMS_RST_STATE = 2,
-	COMMS_PREP_DESC = 3,
-	COMMS_DATA_RDY = 4,
-	COMMS_EXEC = 5,
-	COMMS_RST_DEV = 6,
-	COMMS_GOTO_WFE = 7,
-	COMMS_SKIP_BMC = 8,
-	COMMS_PREP_DESC_ELBI = 10,
-	COMMS_INVLD_LAST
-};
-
-#define COMMS_COMMAND_SIZE_SHIFT	0
-#define COMMS_COMMAND_SIZE_MASK		0x1FFFFFF
-#define COMMS_COMMAND_CMD_SHIFT		27
-#define COMMS_COMMAND_CMD_MASK		0xF8000000
-
-/*
- * LKD command to FW register structure
- * @size	- FW component size
- * @cmd		- command from enum comms_cmd
- */
-struct comms_command {
-	union {		/* bit fields are only for FW use */
-		struct {
-			u32 size :25;		/* 32MB max. */
-			u32 reserved :2;
-			enum comms_cmd cmd :5;		/* 32 commands */
-		};
-		__le32 val;
-	};
-};
-
-/*
- * FW status
- *
- * COMMS_STS_NOOP		Used to clear the status register and no actual
- *				status is provided.
- *
- * COMMS_STS_ACK		Command has been received and recognized.
- *
- * COMMS_STS_OK			Command execution has finished successfully.
- *
- * COMMS_STS_ERR		Command execution was unsuccessful and resulted
- *				in error.
- *
- * COMMS_STS_VALID_ERR		FW validation has failed.
- *
- * COMMS_STS_TIMEOUT_ERR	Command execution has timed out.
- */
-enum comms_sts {
-	COMMS_STS_NOOP = 0,
-	COMMS_STS_ACK = 1,
-	COMMS_STS_OK = 2,
-	COMMS_STS_ERR = 3,
-	COMMS_STS_VALID_ERR = 4,
-	COMMS_STS_TIMEOUT_ERR = 5,
-	COMMS_STS_INVLD_LAST
-};
-
-/* RAM types for FW components loading - defines the base address */
-enum comms_ram_types {
-	COMMS_SRAM = 0,
-	COMMS_DRAM = 1,
-};
-
-#define COMMS_STATUS_OFFSET_SHIFT	0
-#define COMMS_STATUS_OFFSET_MASK	0x03FFFFFF
-#define COMMS_STATUS_OFFSET_ALIGN_SHIFT	2
-#define COMMS_STATUS_RAM_TYPE_SHIFT	26
-#define COMMS_STATUS_RAM_TYPE_MASK	0x0C000000
-#define COMMS_STATUS_STATUS_SHIFT	28
-#define COMMS_STATUS_STATUS_MASK	0xF0000000
-
-/*
- * FW status to LKD register structure
- * @offset	- an offset from the base of the ram_type shifted right by
- *		  2 bits (always aligned to 32 bits).
- *		  Allows a maximum addressable offset of 256MB from RAM base.
- *		  Example: for real offset in RAM of 0x800000 (8MB), the value
- *		  in offset field is (0x800000 >> 2) = 0x200000.
- * @ram_type	- the RAM type that should be used for offset from
- *		  enum comms_ram_types
- * @status	- status from enum comms_sts
- */
-struct comms_status {
-	union {		/* bit fields are only for FW use */
-		struct {
-			u32 offset :26;
-			enum comms_ram_types ram_type :2;
-			enum comms_sts status :4;	/* 16 statuses */
-		};
-		__le32 val;
-	};
-};
-
-#define NAME_MAX_LEN	32 /* bytes */
-struct hl_module_data {
-	__u8 name[NAME_MAX_LEN];
-	__u8 version[VERSION_MAX_LEN];
-};
-
-/**
- * struct hl_component_versions - versions associated with hl component.
- * @struct_size: size of all the struct (including dynamic size of modules).
- * @modules_offset: offset of the modules field in this struct.
- * @component: version of the component itself.
- * @fw_os: Firmware OS Version.
- * @comp_name: Name of the component.
- * @modules_counter: number of set bits in modules_mask.
- * @reserved: reserved for future use.
- * @modules: versions of the component's modules. Elborated explanation in
- *              struct cpucp_versions.
- */
-struct hl_component_versions {
-	__le16 struct_size;
-	__le16 modules_offset;
-	__u8 component[VERSION_MAX_LEN];
-	__u8 fw_os[VERSION_MAX_LEN];
-	__u8 comp_name[NAME_MAX_LEN];
-	__u8 modules_counter;
-	__u8 reserved[3];
-	struct hl_module_data modules[];
-};
-
-/* Max size of fit size */
-#define HL_FW_VERSIONS_FIT_SIZE	4096
-
-#endif /* HL_BOOT_IF_H */
diff --git a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
index 2dba02757d37..a2547f306750 100644
--- a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
+++ b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h
@@ -44,38 +44,6 @@ struct eq_nic_sei_event {
 	__u8 pad[6];
 };
 
-/*
- * struct gaudi_nic_status - describes the status of a NIC port.
- * @port: NIC port index.
- * @bad_format_cnt: e.g. CRC.
- * @responder_out_of_sequence_psn_cnt: e.g NAK.
- * @high_ber_reinit_cnt: link reinit due to high BER.
- * @correctable_err_cnt: e.g. bit-flip.
- * @uncorrectable_err_cnt: e.g. MAC errors.
- * @retraining_cnt: re-training counter.
- * @up: is port up.
- * @pcs_link: has PCS link.
- * @phy_ready: is PHY ready.
- * @auto_neg: is Autoneg enabled.
- * @timeout_retransmission_cnt: timeout retransmission events
- * @high_ber_cnt: high ber events
- */
-struct gaudi_nic_status {
-	__u32 port;
-	__u32 bad_format_cnt;
-	__u32 responder_out_of_sequence_psn_cnt;
-	__u32 high_ber_reinit;
-	__u32 correctable_err_cnt;
-	__u32 uncorrectable_err_cnt;
-	__u32 retraining_cnt;
-	__u8 up;
-	__u8 pcs_link;
-	__u8 phy_ready;
-	__u8 auto_neg;
-	__u32 timeout_retransmission_cnt;
-	__u32 high_ber_cnt;
-};
-
 struct gaudi_cold_rst_data {
 	union {
 		struct {
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
index f661068d0c5f..a426410139af 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h
@@ -959,6 +959,13 @@ enum gaudi2_async_event_id {
 	GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
 	GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
 	GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
+	GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322,
+	GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326,
+	GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327,
+	GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328,
 	GAUDI2_EVENT_SIZE,
 };
 
diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
index ad01fc4e9940..b2dbe1f64430 100644
--- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@@ -1293,7 +1293,7 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		 .name = "" },
 	{ .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
 		 .name = "PCIE_P2P_MSIX" },
-	{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
+	{ .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "PCIE_DRAIN_COMPLETE" },
 	{ .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE,
 		 .name = "TPC0_BMON_SPMU" },
@@ -2673,6 +2673,20 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		 .name = "FP32_NOT_SUPPORTED" },
 	{ .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD,
 		 .name = "DEV_RESET_REQ" },
+	{ .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_BRK_ENTRY" },
+	{ .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_BRK_EXT" },
+	{ .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE0" },
+	{ .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE1" },
+	{ .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE2" },
+	{ .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_PWR_RD_MODE3" },
+	{ .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE,
+		 .name = "ARC_EQ_HEARTBEAT" },
 };
 
 #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
index e4328b430564..95ff7ad16338 100644
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@@ -2,7 +2,6 @@
 # Copyright (C) 2023 Intel Corporation
 
 intel_vpu-y := \
-	ivpu_debugfs.o \
 	ivpu_drv.o \
 	ivpu_fw.o \
 	ivpu_fw_log.o \
@@ -16,4 +15,6 @@ intel_vpu-y := \
 	ivpu_mmu_context.o \
 	ivpu_pm.o
 
+intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o
+
 obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO
deleted file mode 100644
index 9077217ae10f..000000000000
--- a/drivers/accel/ivpu/TODO
+++ /dev/null
@@ -1,11 +0,0 @@
-- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler()
-- Implement support for BLOB IDs
-- Add debugfs support to improve debugging and testing
-- Add tracing events for performance debugging
-- Implement HW based scheduling support
-- Use syncobjs for submit/sync
-- Refactor IPC protocol to improve message latency
-- Implement BO cache and MADVISE IOCTL
-- Add support for user allocated buffers using prime import and dma-buf heaps
-- Refactor struct ivpu_bo to use struct drm_gem_shmem_object
-- Add driver/device documentation
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 5e5996fd4f9f..ea453b985b49 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -17,20 +17,26 @@
 #include "ivpu_jsm_msg.h"
 #include "ivpu_pm.h"
 
+static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s)
+{
+	struct drm_debugfs_entry *entry = s->private;
+
+	return to_ivpu_device(entry->dev);
+}
+
 static int bo_list_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
 	struct drm_printer p = drm_seq_file_printer(s);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 
-	ivpu_bo_list(node->minor->dev, &p);
+	ivpu_bo_list(&vdev->drm, &p);
 
 	return 0;
 }
 
 static int fw_name_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 
 	seq_printf(s, "%s\n", vdev->fw->name);
 	return 0;
@@ -38,8 +44,7 @@ static int fw_name_show(struct seq_file *s, void *v)
 
 static int fw_trace_capability_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 	u64 trace_hw_component_mask;
 	u32 trace_destination_mask;
 	int ret;
@@ -57,8 +62,7 @@ static int fw_trace_capability_show(struct seq_file *s, void *v)
 
 static int fw_trace_config_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 	/**
 	 * WA: VPU_JSM_MSG_TRACE_GET_CONFIG command is not working yet,
 	 * so we use values from vdev->fw instead of calling ivpu_jsm_trace_get_config()
@@ -78,8 +82,7 @@ static int fw_trace_config_show(struct seq_file *s, void *v)
 
 static int last_bootmode_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 
 	seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot");
 
@@ -88,8 +91,7 @@ static int last_bootmode_show(struct seq_file *s, void *v)
 
 static int reset_counter_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 
 	seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_counter));
 	return 0;
@@ -97,14 +99,13 @@ static int reset_counter_show(struct seq_file *s, void *v)
 
 static int reset_pending_show(struct seq_file *s, void *v)
 {
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct ivpu_device *vdev = to_ivpu_device(node->minor->dev);
+	struct ivpu_device *vdev = seq_to_ivpu(s);
 
 	seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
 	return 0;
 }
 
-static const struct drm_info_list vdev_debugfs_list[] = {
+static const struct drm_debugfs_info vdev_debugfs_list[] = {
 	{"bo_list", bo_list_show, 0},
 	{"fw_name", fw_name_show, 0},
 	{"fw_trace_capability", fw_trace_capability_show, 0},
@@ -270,25 +271,24 @@ static const struct file_operations ivpu_reset_engine_fops = {
 	.write = ivpu_reset_engine_fn,
 };
 
-void ivpu_debugfs_init(struct drm_minor *minor)
+void ivpu_debugfs_init(struct ivpu_device *vdev)
 {
-	struct ivpu_device *vdev = to_ivpu_device(minor->dev);
+	struct dentry *debugfs_root = vdev->drm.debugfs_root;
 
-	drm_debugfs_create_files(vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list),
-				 minor->debugfs_root, minor);
+	drm_debugfs_add_files(&vdev->drm, vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list));
 
-	debugfs_create_file("force_recovery", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
 			    &ivpu_force_recovery_fops);
 
-	debugfs_create_file("fw_log", 0644, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
 			    &fw_log_fops);
-	debugfs_create_file("fw_trace_destination_mask", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
 			    &fw_trace_destination_mask_fops);
-	debugfs_create_file("fw_trace_hw_comp_mask", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_trace_hw_comp_mask", 0200, debugfs_root, vdev,
 			    &fw_trace_hw_comp_mask_fops);
-	debugfs_create_file("fw_trace_level", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev,
 			    &fw_trace_level_fops);
 
-	debugfs_create_file("reset_engine", 0200, minor->debugfs_root, vdev,
+	debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
 			    &ivpu_reset_engine_fops);
 }
diff --git a/drivers/accel/ivpu/ivpu_debugfs.h b/drivers/accel/ivpu/ivpu_debugfs.h
index 78f80c1e00e4..49ae9ea78287 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.h
+++ b/drivers/accel/ivpu/ivpu_debugfs.h
@@ -6,8 +6,12 @@
 #ifndef __IVPU_DEBUGFS_H__
 #define __IVPU_DEBUGFS_H__
 
-struct drm_minor;
+struct ivpu_device;
 
-void ivpu_debugfs_init(struct drm_minor *minor);
+#if defined(CONFIG_DEBUG_FS)
+void ivpu_debugfs_init(struct ivpu_device *vdev);
+#else
+static inline void ivpu_debugfs_init(struct ivpu_device *vdev) { }
+#endif
 
 #endif /* __IVPU_DEBUGFS_H__ */
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 7e9359611d69..790603017653 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -131,6 +131,22 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
 	return 0;
 }
 
+static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
+{
+	int ret;
+
+	ret = ivpu_rpm_get_if_active(vdev);
+	if (ret < 0)
+		return ret;
+
+	*clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
+
+	if (ret)
+		ivpu_rpm_put(vdev);
+
+	return 0;
+}
+
 static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -154,7 +170,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 		args->value = vdev->platform;
 		break;
 	case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
-		args->value = ivpu_hw_reg_pll_freq_get(vdev);
+		ret = ivpu_get_core_clock_rate(vdev, &args->value);
 		break;
 	case DRM_IVPU_PARAM_NUM_CONTEXTS:
 		args->value = ivpu_get_context_count(vdev);
@@ -400,10 +416,6 @@ static const struct drm_driver driver = {
 	.postclose = ivpu_postclose,
 	.gem_prime_import = ivpu_gem_prime_import,
 
-#if defined(CONFIG_DEBUG_FS)
-	.debugfs_init = ivpu_debugfs_init,
-#endif
-
 	.ioctls = ivpu_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
 	.fops = &ivpu_fops,
@@ -523,78 +535,52 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
 
 	ret = ivpu_pci_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}
 
 	ret = ivpu_irq_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}
 
 	/* Init basic HW info based on buttress registers which are accessible before power up */
 	ret = ivpu_hw_info_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}
 
 	/* Power up early so the rest of init code can access VPU registers */
 	ret = ivpu_hw_power_up(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
+	if (ret)
 		goto err_xa_destroy;
-	}
 
 	ret = ivpu_mmu_global_context_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret);
+	if (ret)
 		goto err_power_down;
-	}
 
 	ret = ivpu_mmu_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret);
+	if (ret)
 		goto err_mmu_gctx_fini;
-	}
 
-	ret = ivpu_fw_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret);
+	ret = ivpu_mmu_reserved_context_init(vdev);
+	if (ret)
 		goto err_mmu_gctx_fini;
-	}
+
+	ret = ivpu_fw_init(vdev);
+	if (ret)
+		goto err_mmu_rctx_fini;
 
 	ret = ivpu_ipc_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret);
+	if (ret)
 		goto err_fw_fini;
-	}
 
-	ret = ivpu_pm_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize PM: %d\n", ret);
-		goto err_ipc_fini;
-	}
+	ivpu_pm_init(vdev);
 
 	ret = ivpu_job_done_thread_init(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
+	if (ret)
 		goto err_ipc_fini;
-	}
-
-	ret = ivpu_fw_load(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
-		goto err_job_done_thread_fini;
-	}
 
 	ret = ivpu_boot(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to boot: %d\n", ret);
+	if (ret)
 		goto err_job_done_thread_fini;
-	}
 
 	ivpu_pm_enable(vdev);
 
@@ -606,6 +592,8 @@ err_ipc_fini:
 	ivpu_ipc_fini(vdev);
 err_fw_fini:
 	ivpu_fw_fini(vdev);
+err_mmu_rctx_fini:
+	ivpu_mmu_reserved_context_fini(vdev);
 err_mmu_gctx_fini:
 	ivpu_mmu_global_context_fini(vdev);
 err_power_down:
@@ -629,6 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
 
 	ivpu_ipc_fini(vdev);
 	ivpu_fw_fini(vdev);
+	ivpu_mmu_reserved_context_fini(vdev);
 	ivpu_mmu_global_context_fini(vdev);
 
 	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
@@ -657,10 +646,10 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, vdev);
 
 	ret = ivpu_dev_init(vdev);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret);
+	if (ret)
 		return ret;
-	}
+
+	ivpu_debugfs_init(vdev);
 
 	ret = drm_dev_register(&vdev->drm, 0);
 	if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 2adc349126bb..417ddeca8517 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -29,12 +29,13 @@
 #define IVPU_HW_37XX	37
 #define IVPU_HW_40XX	40
 
-#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0
-/* SSID 1 is used by the VPU to represent invalid context */
-#define IVPU_USER_CONTEXT_MIN_SSID   2
-#define IVPU_USER_CONTEXT_MAX_SSID   (IVPU_USER_CONTEXT_MIN_SSID + 63)
+#define IVPU_GLOBAL_CONTEXT_MMU_SSID   0
+/* SSID 1 is used by the VPU to represent reserved context */
+#define IVPU_RESERVED_CONTEXT_MMU_SSID 1
+#define IVPU_USER_CONTEXT_MIN_SSID     2
+#define IVPU_USER_CONTEXT_MAX_SSID     (IVPU_USER_CONTEXT_MIN_SSID + 63)
 
-#define IVPU_NUM_ENGINES	     2
+#define IVPU_NUM_ENGINES 2
 
 #define IVPU_PLATFORM_SILICON 0
 #define IVPU_PLATFORM_SIMICS  2
@@ -76,6 +77,11 @@
 
 #define IVPU_WA(wa_name) (vdev->wa.wa_name)
 
+#define IVPU_PRINT_WA(wa_name) do {					\
+	if (IVPU_WA(wa_name))						\
+		ivpu_dbg(vdev, MISC, "Using WA: " #wa_name "\n");	\
+} while (0)
+
 struct ivpu_wa_table {
 	bool punit_disabled;
 	bool clear_runtime_mem;
@@ -105,6 +111,7 @@ struct ivpu_device {
 	struct ivpu_pm_info *pm;
 
 	struct ivpu_mmu_context gctx;
+	struct ivpu_mmu_context rctx;
 	struct xarray context_xa;
 	struct xa_limit context_xa_limit;
 
@@ -118,6 +125,7 @@ struct ivpu_device {
 		int jsm;
 		int tdr;
 		int reschedule_suspend;
+		int autosuspend;
 	} timeout;
 };
 
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index a277bbae78fc..691da521dde5 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -301,6 +301,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
 	if (ret)
 		goto err_fw_release;
 
+	ivpu_fw_load(vdev);
+
 	return 0;
 
 err_fw_release:
@@ -314,25 +316,23 @@ void ivpu_fw_fini(struct ivpu_device *vdev)
 	ivpu_fw_release(vdev);
 }
 
-int ivpu_fw_load(struct ivpu_device *vdev)
+void ivpu_fw_load(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
 	u64 image_end_offset = fw->image_load_offset + fw->image_size;
 
-	memset(fw->mem->kvaddr, 0, fw->image_load_offset);
-	memcpy(fw->mem->kvaddr + fw->image_load_offset,
+	memset(ivpu_bo_vaddr(fw->mem), 0, fw->image_load_offset);
+	memcpy(ivpu_bo_vaddr(fw->mem) + fw->image_load_offset,
 	       fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size);
 
 	if (IVPU_WA(clear_runtime_mem)) {
-		u8 *start = fw->mem->kvaddr + image_end_offset;
-		u64 size = fw->mem->base.size - image_end_offset;
+		u8 *start = ivpu_bo_vaddr(fw->mem) + image_end_offset;
+		u64 size = ivpu_bo_size(fw->mem) - image_end_offset;
 
 		memset(start, 0, size);
 	}
 
 	wmb(); /* Flush WC buffers after writing fw->mem */
-
-	return 0;
 }
 
 static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@@ -451,10 +451,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 					  vdev->hw->ranges.global.start;
 
 	boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr;
-	boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2;
+	boot_params->ipc_header_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
 
-	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2;
-	boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2;
+	boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2;
+	boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2;
 
 	boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start;
 	boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user);
@@ -486,9 +486,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->trace_destination_mask = vdev->fw->trace_destination_mask;
 	boot_params->trace_hw_component_mask = vdev->fw->trace_hw_component_mask;
 	boot_params->crit_tracing_buff_addr = vdev->fw->mem_log_crit->vpu_addr;
-	boot_params->crit_tracing_buff_size = vdev->fw->mem_log_crit->base.size;
+	boot_params->crit_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_crit);
 	boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr;
-	boot_params->verbose_tracing_buff_size = vdev->fw->mem_log_verb->base.size;
+	boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb);
 
 	boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
 	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 8567fdf925fe..10ae2847f0ef 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -31,7 +31,7 @@ struct ivpu_fw_info {
 
 int ivpu_fw_init(struct ivpu_device *vdev);
 void ivpu_fw_fini(struct ivpu_device *vdev);
-int ivpu_fw_load(struct ivpu_device *vdev);
+void ivpu_fw_load(struct ivpu_device *vdev);
 void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
 
 static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c
index 95065cac9fbd..f6770f5e82a2 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.c
+++ b/drivers/accel/ivpu/ivpu_fw_log.c
@@ -31,10 +31,10 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
 {
 	struct vpu_tracing_buffer_header *log;
 
-	if ((*offset + sizeof(*log)) > bo->base.size)
+	if ((*offset + sizeof(*log)) > ivpu_bo_size(bo))
 		return -EINVAL;
 
-	log = bo->kvaddr + *offset;
+	log = ivpu_bo_vaddr(bo) + *offset;
 
 	if (log->vpu_canary_start != VPU_TRACING_BUFFER_CANARY)
 		return -EINVAL;
@@ -43,7 +43,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset,
 		ivpu_dbg(vdev, FW_BOOT, "Invalid header size 0x%x\n", log->header_size);
 		return -EINVAL;
 	}
-	if ((char *)log + log->size > (char *)bo->kvaddr + bo->base.size) {
+	if ((char *)log + log->size > (char *)ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)) {
 		ivpu_dbg(vdev, FW_BOOT, "Invalid log size 0x%x\n", log->size);
 		return -EINVAL;
 	}
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index d09f13b35902..c91852f2edc8 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -69,7 +69,7 @@ static const struct ivpu_bo_ops prime_ops = {
 
 static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
 {
-	int npages = bo->base.size >> PAGE_SHIFT;
+	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 	struct page **pages;
 
 	pages = drm_gem_get_pages(&bo->base);
@@ -88,7 +88,7 @@ static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
 static void shmem_free_pages_locked(struct ivpu_bo *bo)
 {
 	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
 
 	drm_gem_put_pages(&bo->base, bo->pages, true, false);
 	bo->pages = NULL;
@@ -96,7 +96,7 @@ static void shmem_free_pages_locked(struct ivpu_bo *bo)
 
 static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
 {
-	int npages = bo->base.size >> PAGE_SHIFT;
+	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 	struct sg_table *sgt;
 	int ret;
@@ -142,7 +142,7 @@ static const struct ivpu_bo_ops shmem_ops = {
 
 static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
 {
-	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 	struct page **pages;
 	int ret;
 
@@ -171,10 +171,10 @@ err_free_pages:
 
 static void internal_free_pages_locked(struct ivpu_bo *bo)
 {
-	unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
 
 	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
 
 	for (i = 0; i < npages; i++)
 		put_page(bo->pages[i]);
@@ -291,7 +291,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 	}
 
 	mutex_lock(&ctx->lock);
-	ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node);
+	ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
 	if (!ret) {
 		bo->ctx = ctx;
 		bo->vpu_addr = bo->mm_node.start;
@@ -438,7 +438,7 @@ static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
 
 	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
-		 bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name);
+		 bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
 
 	if (obj->import_attach) {
 		/* Drop the reference drm_gem_mmap_obj() acquired.*/
@@ -553,7 +553,7 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	drm_gem_object_put(&bo->base);
 
 	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags);
+		 file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
 
 	return ret;
 }
@@ -590,22 +590,22 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		goto err_put;
 
 	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+		drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
 
 	if (bo->flags & DRM_IVPU_BO_WC)
-		set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
 	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
+		set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
 
 	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
-	bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
+	bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
 	if (!bo->kvaddr) {
 		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
 		goto err_put;
 	}
 
 	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 bo->vpu_addr, bo->base.size, flags);
+		 bo->vpu_addr, ivpu_bo_size(bo), flags);
 
 	return bo;
 
@@ -718,7 +718,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
 
 	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
-		   bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size,
+		   bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
 		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
 }
 
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index 6b0ceda5f253..a0b4d4a32b3b 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -68,9 +68,19 @@ static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
 	return container_of(obj, struct ivpu_bo, base);
 }
 
+static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo)
+{
+	return bo->kvaddr;
+}
+
+static inline size_t ivpu_bo_size(struct ivpu_bo *bo)
+{
+	return bo->base.size;
+}
+
 static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
 {
-	if (offset > bo->base.size || !bo->pages)
+	if (offset > ivpu_bo_size(bo) || !bo->pages)
 		return NULL;
 
 	return bo->pages[offset / PAGE_SIZE];
@@ -107,21 +117,21 @@ static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
 	if (vpu_addr < bo->vpu_addr)
 		return NULL;
 
-	if (vpu_addr >= (bo->vpu_addr + bo->base.size))
+	if (vpu_addr >= (bo->vpu_addr + ivpu_bo_size(bo)))
 		return NULL;
 
-	return bo->kvaddr + (vpu_addr - bo->vpu_addr);
+	return ivpu_bo_vaddr(bo) + (vpu_addr - bo->vpu_addr);
 }
 
 static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
 {
-	if (cpu_addr < bo->kvaddr)
+	if (cpu_addr < ivpu_bo_vaddr(bo))
 		return 0;
 
-	if (cpu_addr >= (bo->kvaddr + bo->base.size))
+	if (cpu_addr >= (ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)))
 		return 0;
 
-	return bo->vpu_addr + (cpu_addr - bo->kvaddr);
+	return bo->vpu_addr + (cpu_addr - ivpu_bo_vaddr(bo));
 }
 
 #endif /* __IVPU_GEM_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 18be8b98e9a8..5c0246b9e522 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -68,57 +68,28 @@
 				     (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \
 				     (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX)))
 
-static char *ivpu_platform_to_str(u32 platform)
-{
-	switch (platform) {
-	case IVPU_PLATFORM_SILICON:
-		return "IVPU_PLATFORM_SILICON";
-	case IVPU_PLATFORM_SIMICS:
-		return "IVPU_PLATFORM_SIMICS";
-	case IVPU_PLATFORM_FPGA:
-		return "IVPU_PLATFORM_FPGA";
-	default:
-		return "Invalid platform";
-	}
-}
-
-static void ivpu_hw_read_platform(struct ivpu_device *vdev)
-{
-	u32 gen_ctrl = REGV_RD32(VPU_37XX_HOST_SS_GEN_CTRL);
-	u32 platform = REG_GET_FLD(VPU_37XX_HOST_SS_GEN_CTRL, PS, gen_ctrl);
-
-	if  (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA)
-		vdev->platform = platform;
-	else
-		vdev->platform = IVPU_PLATFORM_SILICON;
-
-	ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n",
-		 ivpu_platform_to_str(vdev->platform), vdev->platform);
-}
-
 static void ivpu_hw_wa_init(struct ivpu_device *vdev)
 {
-	vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
+	vdev->wa.punit_disabled = false;
 	vdev->wa.clear_runtime_mem = false;
 	vdev->wa.d3hot_after_power_off = true;
 
 	if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4)
 		vdev->wa.interrupt_clear_with_0 = true;
+
+	IVPU_PRINT_WA(punit_disabled);
+	IVPU_PRINT_WA(clear_runtime_mem);
+	IVPU_PRINT_WA(d3hot_after_power_off);
+	IVPU_PRINT_WA(interrupt_clear_with_0);
 }
 
 static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 {
-	if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) {
-		vdev->timeout.boot = 100000;
-		vdev->timeout.jsm = 50000;
-		vdev->timeout.tdr = 2000000;
-		vdev->timeout.reschedule_suspend = 1000;
-	} else {
-		vdev->timeout.boot = 1000;
-		vdev->timeout.jsm = 500;
-		vdev->timeout.tdr = 2000;
-		vdev->timeout.reschedule_suspend = 10;
-	}
+	vdev->timeout.boot = 1000;
+	vdev->timeout.jsm = 500;
+	vdev->timeout.tdr = 2000;
+	vdev->timeout.reschedule_suspend = 10;
+	vdev->timeout.autosuspend = 10;
 }
 
 static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
@@ -213,8 +184,7 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
 	int ret;
 
 	if (IVPU_WA(punit_disabled)) {
-		ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n",
-			 ivpu_platform_to_str(vdev->platform));
+		ivpu_dbg(vdev, PM, "Skipping PLL request\n");
 		return 0;
 	}
 
@@ -345,10 +315,10 @@ static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
 
 static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
 {
-	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+	u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
 
-	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
-	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
+	if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val))
 		return -EIO;
 
 	return 0;
@@ -356,10 +326,10 @@ static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val)
 
 static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val)
 {
-	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN);
+	u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN);
 
-	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
-	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
+	if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val))
 		return -EIO;
 
 	return 0;
@@ -367,10 +337,10 @@ static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_va
 
 static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val)
 {
-	u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY);
+	u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY);
 
-	if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
-	    !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
+	if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) ||
+	    !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val))
 		return -EIO;
 
 	return 0;
@@ -423,15 +393,15 @@ static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable
 	int ret;
 	u32 val;
 
-	val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN);
+	val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN);
 	if (enable) {
-		val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
-		val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+		val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+		val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
 	} else {
-		val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val);
-		val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
+		val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val);
+		val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val);
 	}
-	REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val);
+	REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val);
 
 	ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0);
 	if (ret) {
@@ -477,10 +447,6 @@ static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable)
 
 static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val)
 {
-	/* FPGA model (UPF) is not power aware, skipped Power Island polling */
-	if (ivpu_is_fpga(vdev))
-		return 0;
-
 	return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU,
 			     exp_val, PWR_ISLAND_STATUS_TIMEOUT_US);
 }
@@ -563,17 +529,17 @@ static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev)
 {
 	u32 val;
 
-	val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
-	val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
+	val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC);
+	val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val);
 
-	val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
-	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+	val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val);
+	REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
 
-	val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
-	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+	val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+	REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
 
-	val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
-	REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
+	val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val);
+	REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val);
 
 	val = vdev->fw->entry_point >> 9;
 	REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val);
@@ -625,6 +591,10 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
 	ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G);
 	ivpu_hw_init_range(&hw->ranges.dma,   0x200000000, SZ_8G);
 
+	vdev->platform = IVPU_PLATFORM_SILICON;
+	ivpu_hw_wa_init(vdev);
+	ivpu_hw_timeouts_init(vdev);
+
 	return 0;
 }
 
@@ -681,10 +651,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
 {
 	int ret;
 
-	ivpu_hw_read_platform(vdev);
-	ivpu_hw_wa_init(vdev);
-	ivpu_hw_timeouts_init(vdev);
-
 	ret = ivpu_hw_37xx_reset(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to reset HW: %d\n", ret);
@@ -777,17 +743,17 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
 	u32 val;
 
 	/* Enable writing and set non-zero WDT value */
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE);
 
 	/* Enable writing and disable watchdog timer */
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0);
 
 	/* Now clear the timeout interrupt */
-	val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG);
-	val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val);
+	val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG);
+	val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
 }
 
 static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
@@ -834,10 +800,10 @@ static u32 ivpu_hw_37xx_reg_telemetry_enable_get(struct ivpu_device *vdev)
 
 static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id)
 {
-	u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0;
-	u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET);
+	u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0;
+	u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET);
 
-	REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
+	REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val);
 }
 
 static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev)
@@ -854,7 +820,7 @@ static u32 ivpu_hw_37xx_reg_ipc_rx_count_get(struct ivpu_device *vdev)
 
 static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr)
 {
-	REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr);
+	REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr);
 }
 
 static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
index 6e4e915948f9..4083beb5e9db 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@@ -3,70 +3,70 @@
  * Copyright (C) 2020-2023 Intel Corporation
  */
 
-#ifndef __IVPU_HW_MTL_REG_H__
-#define __IVPU_HW_MTL_REG_H__
+#ifndef __IVPU_HW_37XX_REG_H__
+#define __IVPU_HW_37XX_REG_H__
 
 #include <linux/bits.h>
 
-#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE					0x00000000u
+#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE				0x00000000u
 
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT					0x00000004u
-#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK			BIT_MASK(0)
+#define VPU_37XX_BUTTRESS_INTERRUPT_STAT				0x00000004u
+#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK		BIT_MASK(0)
 #define VPU_37XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK			BIT_MASK(1)
 #define VPU_37XX_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK			BIT_MASK(2)
 
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0					0x00000008u
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK			GENMASK(15, 0)
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK			GENMASK(31, 16)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0				0x00000008u
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK		GENMASK(15, 0)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK		GENMASK(31, 16)
 
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1					0x0000000cu
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK			GENMASK(15, 0)
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK				GENMASK(31, 16)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1				0x0000000cu
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK		GENMASK(15, 0)
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK			GENMASK(31, 16)
 
-#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2					0x00000010u
+#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2				0x00000010u
 #define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK			GENMASK(15, 0)
 
-#define VPU_37XX_BUTTRESS_WP_REQ_CMD						0x00000014u
+#define VPU_37XX_BUTTRESS_WP_REQ_CMD					0x00000014u
 #define VPU_37XX_BUTTRESS_WP_REQ_CMD_SEND_MASK				BIT_MASK(0)
 
 #define VPU_37XX_BUTTRESS_WP_DOWNLOAD					0x00000018u
 #define VPU_37XX_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK			GENMASK(15, 0)
 
 #define VPU_37XX_BUTTRESS_CURRENT_PLL					0x0000001cu
-#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK				GENMASK(15, 0)
+#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK			GENMASK(15, 0)
 
-#define VPU_37XX_BUTTRESS_PLL_ENABLE						0x00000020u
+#define VPU_37XX_BUTTRESS_PLL_ENABLE					0x00000020u
 
-#define VPU_37XX_BUTTRESS_FMIN_FUSE						0x00000024u
-#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK				GENMASK(7, 0)
-#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK				GENMASK(15, 8)
+#define VPU_37XX_BUTTRESS_FMIN_FUSE					0x00000024u
+#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK			GENMASK(7, 0)
+#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK			GENMASK(15, 8)
 
-#define VPU_37XX_BUTTRESS_FMAX_FUSE						0x00000028u
-#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK				GENMASK(7, 0)
+#define VPU_37XX_BUTTRESS_FMAX_FUSE					0x00000028u
+#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK			GENMASK(7, 0)
 
-#define VPU_37XX_BUTTRESS_TILE_FUSE						0x0000002cu
+#define VPU_37XX_BUTTRESS_TILE_FUSE					0x0000002cu
 #define VPU_37XX_BUTTRESS_TILE_FUSE_VALID_MASK				BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK					GENMASK(3, 2)
+#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK				GENMASK(3, 2)
 
-#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK					0x00000030u
-#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK					0x00000034u
+#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK				0x00000030u
+#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK				0x00000034u
 
-#define VPU_37XX_BUTTRESS_PLL_STATUS						0x00000040u
+#define VPU_37XX_BUTTRESS_PLL_STATUS					0x00000040u
 #define VPU_37XX_BUTTRESS_PLL_STATUS_LOCK_MASK				BIT_MASK(1)
 
-#define VPU_37XX_BUTTRESS_VPU_STATUS						0x00000044u
+#define VPU_37XX_BUTTRESS_VPU_STATUS					0x00000044u
 #define VPU_37XX_BUTTRESS_VPU_STATUS_READY_MASK				BIT_MASK(0)
 #define VPU_37XX_BUTTRESS_VPU_STATUS_IDLE_MASK				BIT_MASK(1)
 
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL					0x00000060u
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK			BIT_MASK(0)
-#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK				BIT_MASK(2)
+#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL				0x00000060u
+#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK		BIT_MASK(0)
+#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK			BIT_MASK(2)
 
 #define VPU_37XX_BUTTRESS_VPU_IP_RESET					0x00000050u
-#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK				BIT_MASK(0)
+#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK			BIT_MASK(0)
 
 #define VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET				0x00000080u
-#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE					0x00000084u
+#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE				0x00000084u
 #define VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE				0x00000088u
 
 #define VPU_37XX_BUTTRESS_ATS_ERR_LOG_0					0x000000a0u
@@ -74,9 +74,9 @@
 #define VPU_37XX_BUTTRESS_ATS_ERR_CLEAR					0x000000a8u
 
 #define VPU_37XX_BUTTRESS_UFI_ERR_LOG					0x000000b0u
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK				GENMASK(11, 0)
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK				GENMASK(19, 12)
-#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK				GENMASK(24, 20)
+#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK			GENMASK(11, 0)
+#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK			GENMASK(19, 12)
+#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK			GENMASK(24, 20)
 
 #define VPU_37XX_BUTTRESS_UFI_ERR_CLEAR					0x000000b4u
 
@@ -113,17 +113,17 @@
 #define VPU_37XX_HOST_SS_NOC_QDENY					0x0000015cu
 #define VPU_37XX_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK			BIT_MASK(0)
 
-#define MTL_VPU_TOP_NOC_QREQN						0x00000160u
-#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK				BIT_MASK(0)
-#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+#define VPU_37XX_TOP_NOC_QREQN						0x00000160u
+#define VPU_37XX_TOP_NOC_QREQN_CPU_CTRL_MASK				BIT_MASK(0)
+#define VPU_37XX_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
 
-#define MTL_VPU_TOP_NOC_QACCEPTN					0x00000164u
-#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK				BIT_MASK(0)
-#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+#define VPU_37XX_TOP_NOC_QACCEPTN					0x00000164u
+#define VPU_37XX_TOP_NOC_QACCEPTN_CPU_CTRL_MASK				BIT_MASK(0)
+#define VPU_37XX_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
 
-#define MTL_VPU_TOP_NOC_QDENY						0x00000168u
-#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK				BIT_MASK(0)
-#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
+#define VPU_37XX_TOP_NOC_QDENY						0x00000168u
+#define VPU_37XX_TOP_NOC_QDENY_CPU_CTRL_MASK				BIT_MASK(0)
+#define VPU_37XX_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK			BIT_MASK(1)
 
 #define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN					0x00000170u
 #define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK			BIT_MASK(0)
@@ -140,9 +140,9 @@
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK			BIT_MASK(2)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK			BIT_MASK(3)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK		BIT_MASK(4)
-#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK			BIT_MASK(5)
-#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK			BIT_MASK(6)
-#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK			BIT_MASK(7)
+#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK		BIT_MASK(5)
+#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK		BIT_MASK(6)
+#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK		BIT_MASK(7)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK		BIT_MASK(8)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK	BIT_MASK(30)
 #define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK	BIT_MASK(31)
@@ -164,14 +164,14 @@
 #define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK		GENMASK(23, 16)
 #define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK			GENMASK(31, 24)
 
-#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0					0x00030020u
+#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0				0x00030020u
 #define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK			BIT_MASK(3)
 
 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0				0x00030024u
-#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK			BIT_MASK(3)
+#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK		BIT_MASK(3)
 
 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0			0x00030028u
-#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK		BIT_MASK(3)
+#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK	BIT_MASK(3)
 
 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0				0x0003002cu
 #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK		BIT_MASK(3)
@@ -187,47 +187,14 @@
 #define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK		GENMASK(2, 1)
 #define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK		GENMASK(31, 3)
 
-#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR				0x00082020u
+#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR			0x00082020u
 #define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK	GENMASK(15, 0)
 #define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK		GENMASK(31, 16)
 
-#define VPU_37XX_HOST_MMU_IDR0						0x00200000u
-#define VPU_37XX_HOST_MMU_IDR1						0x00200004u
-#define VPU_37XX_HOST_MMU_IDR3						0x0020000cu
-#define VPU_37XX_HOST_MMU_IDR5						0x00200014u
-#define VPU_37XX_HOST_MMU_CR0						0x00200020u
-#define VPU_37XX_HOST_MMU_CR0ACK						0x00200024u
-#define VPU_37XX_HOST_MMU_CR1						0x00200028u
-#define VPU_37XX_HOST_MMU_CR2						0x0020002cu
-#define VPU_37XX_HOST_MMU_IRQ_CTRL					0x00200050u
-#define VPU_37XX_HOST_MMU_IRQ_CTRLACK					0x00200054u
-
-#define VPU_37XX_HOST_MMU_GERROR						0x00200060u
-#define VPU_37XX_HOST_MMU_GERROR_CMDQ_MASK				BIT_MASK(0)
-#define VPU_37XX_HOST_MMU_GERROR_EVTQ_ABT_MASK				BIT_MASK(2)
-#define VPU_37XX_HOST_MMU_GERROR_PRIQ_ABT_MASK				BIT_MASK(3)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK			BIT_MASK(4)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK			BIT_MASK(5)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK			BIT_MASK(6)
-#define VPU_37XX_HOST_MMU_GERROR_MSI_ABT_MASK				BIT_MASK(7)
-
-#define VPU_37XX_HOST_MMU_GERRORN					0x00200064u
-
-#define VPU_37XX_HOST_MMU_STRTAB_BASE					0x00200080u
-#define VPU_37XX_HOST_MMU_STRTAB_BASE_CFG				0x00200088u
-#define VPU_37XX_HOST_MMU_CMDQ_BASE					0x00200090u
-#define VPU_37XX_HOST_MMU_CMDQ_PROD					0x00200098u
-#define VPU_37XX_HOST_MMU_CMDQ_CONS					0x0020009cu
-#define VPU_37XX_HOST_MMU_EVTQ_BASE					0x002000a0u
-#define VPU_37XX_HOST_MMU_EVTQ_PROD					0x002000a8u
-#define VPU_37XX_HOST_MMU_EVTQ_CONS					0x002000acu
-#define VPU_37XX_HOST_MMU_EVTQ_PROD_SEC					(0x002000a8u + SZ_64K)
-#define VPU_37XX_HOST_MMU_EVTQ_CONS_SEC					(0x002000acu + SZ_64K)
-
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES				0x00360000u
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK	BIT_MASK(0)
-#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK		BIT_MASK(1)
-#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK		BIT_MASK(2)
+#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK	BIT_MASK(1)
+#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK	BIT_MASK(2)
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK	BIT_MASK(3)
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK	BIT_MASK(4)
 #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK	BIT_MASK(5)
@@ -246,36 +213,36 @@
 #define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK		BIT_MASK(8)
 #define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK		BIT_MASK(9)
 
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE					0x04000000u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL				0x04000000u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG				0x04400010u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG				0x04400014u
-#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG				0x04400020u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_BASE				0x04000000u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_CTRL				0x04000000u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_PC_REG				0x04400010u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_NPC_REG				0x04400014u
+#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG			0x04400020u
 
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET				0x06010004u
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK			BIT_MASK(1)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET				0x06010004u
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK			BIT_MASK(1)
 
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR				0x06010018u
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK			BIT_MASK(1)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR				0x06010018u
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK			BIT_MASK(1)
 
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC				0x06010040u
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK		BIT_MASK(0)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK		BIT_MASK(1)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK		BIT_MASK(2)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK		BIT_MASK(3)
-#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK		GENMASK(31, 4)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC				0x06010040u
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK	BIT_MASK(0)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK	BIT_MASK(1)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK	BIT_MASK(2)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK	BIT_MASK(3)
+#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK		GENMASK(31, 4)
 
-#define MTL_VPU_CPU_SS_TIM_WATCHDOG					0x0602009cu
-#define MTL_VPU_CPU_SS_TIM_WDOG_EN					0x060200a4u
-#define MTL_VPU_CPU_SS_TIM_SAFE						0x060200a8u
-#define MTL_VPU_CPU_SS_TIM_IPC_FIFO					0x060200f0u
+#define VPU_37XX_CPU_SS_TIM_WATCHDOG					0x0602009cu
+#define VPU_37XX_CPU_SS_TIM_WDOG_EN					0x060200a4u
+#define VPU_37XX_CPU_SS_TIM_SAFE					0x060200a8u
+#define VPU_37XX_CPU_SS_TIM_IPC_FIFO					0x060200f0u
 
-#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG					0x06021008u
-#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
+#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG					0x06021008u
+#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
 
-#define MTL_VPU_CPU_SS_DOORBELL_0					0x06300000u
-#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
+#define VPU_37XX_CPU_SS_DOORBELL_0					0x06300000u
+#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
 
-#define MTL_VPU_CPU_SS_DOORBELL_1					0x06301000u
+#define VPU_37XX_CPU_SS_DOORBELL_1					0x06301000u
 
-#endif /* __IVPU_HW_MTL_REG_H__ */
+#endif /* __IVPU_HW_37XX_REG_H__ */
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 85171a408363..e691c49c9841 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -125,6 +125,10 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
 
 	if (ivpu_hw_gen(vdev) == IVPU_HW_40XX)
 		vdev->wa.disable_clock_relinquish = true;
+
+	IVPU_PRINT_WA(punit_disabled);
+	IVPU_PRINT_WA(clear_runtime_mem);
+	IVPU_PRINT_WA(disable_clock_relinquish);
 }
 
 static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
@@ -134,16 +138,19 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.jsm = 50000;
 		vdev->timeout.tdr = 2000000;
 		vdev->timeout.reschedule_suspend = 1000;
+		vdev->timeout.autosuspend = -1;
 	} else if (ivpu_is_simics(vdev)) {
 		vdev->timeout.boot = 50;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 10000;
 		vdev->timeout.reschedule_suspend = 10;
+		vdev->timeout.autosuspend = -1;
 	} else {
 		vdev->timeout.boot = 1000;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 2000;
 		vdev->timeout.reschedule_suspend = 10;
+		vdev->timeout.autosuspend = 10;
 	}
 }
 
@@ -728,6 +735,10 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
 	ivpu_hw_init_range(&vdev->hw->ranges.shave,  0x80000000 + SZ_256M, SZ_2G - SZ_256M);
 	ivpu_hw_init_range(&vdev->hw->ranges.dma,   0x200000000, SZ_8G);
 
+	ivpu_hw_read_platform(vdev);
+	ivpu_hw_wa_init(vdev);
+	ivpu_hw_timeouts_init(vdev);
+
 	return 0;
 }
 
@@ -819,10 +830,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
 		return ret;
 	}
 
-	ivpu_hw_read_platform(vdev);
-	ivpu_hw_wa_init(vdev);
-	ivpu_hw_timeouts_init(vdev);
-
 	ret = ivpu_hw_40xx_d0i3_disable(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h
index 43c2c0c2d050..79b3f441eac4 100644
--- a/drivers/accel/ivpu/ivpu_hw_reg_io.h
+++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h
@@ -47,22 +47,30 @@
 #define REG_TEST_FLD_NUM(REG, FLD, num, val) \
 	((num) == FIELD_GET(REG##_##FLD##_MASK, val))
 
-#define REGB_POLL(reg, var, cond, timeout_us) \
-	read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg)
-
-#define REGV_POLL(reg, var, cond, timeout_us) \
-	read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg)
-
 #define REGB_POLL_FLD(reg, fld, val, timeout_us) \
 ({ \
 	u32 var; \
-	REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \
+	int r; \
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
+		 __func__, #reg, reg, #fld, val); \
+	r = read_poll_timeout(REGB_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
+			      REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
+		 __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
+	r; \
 })
 
 #define REGV_POLL_FLD(reg, fld, val, timeout_us) \
 ({ \
 	u32 var; \
-	REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \
+	int r; \
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \
+		 __func__, #reg, reg, #fld, val); \
+	r = read_poll_timeout(REGV_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\
+			      REG_POLL_SLEEP_US, timeout_us, false, (reg)); \
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \
+		 __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \
+	r; \
 })
 
 static inline u32
@@ -71,7 +79,7 @@ ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg,
 {
 	u32 val = readl(base + reg);
 
-	ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val);
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) RD: 0x%08x\n", func, name, reg, val);
 	return val;
 }
 
@@ -81,7 +89,7 @@ ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg,
 {
 	u64 val = readq(base + reg);
 
-	ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val);
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) RD: 0x%016llx\n", func, name, reg, val);
 	return val;
 }
 
@@ -89,7 +97,7 @@ static inline void
 ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val,
 		 const char *name, const char *func)
 {
-	ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val);
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) WR: 0x%08x\n", func, name, reg, val);
 	writel(val, base + reg);
 }
 
@@ -97,7 +105,7 @@ static inline void
 ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val,
 		 const char *name, const char *func)
 {
-	ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val);
+	ivpu_dbg(vdev, REG, "%s : %s (0x%08x) WR: 0x%016llx\n", func, name, reg, val);
 	writeq(val, base + reg);
 }
 
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 295c0d7b5039..a4ca40b184d4 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -45,8 +45,9 @@ static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c,
 	u32 *payload = (u32 *)&jsm_msg->payload;
 
 	ivpu_dbg(vdev, JSM,
-		 "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n",
-		 c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result,
+		 "%s: vpu:0x%08x (type:%s, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n",
+		 c, vpu_addr, ivpu_jsm_msg_type_to_str(jsm_msg->type),
+		 jsm_msg->status, jsm_msg->request_id, jsm_msg->result,
 		 payload[0], payload[1], payload[2], payload[3], payload[4]);
 }
 
@@ -79,8 +80,8 @@ ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 
 	tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf));
 	if (!tx_buf_vpu_addr) {
-		ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n",
-			 sizeof(*tx_buf));
+		ivpu_err_ratelimited(vdev, "Failed to reserve IPC buffer, size %ld\n",
+				     sizeof(*tx_buf));
 		return -ENOMEM;
 	}
 
@@ -93,12 +94,12 @@ ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm);
 
 	if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE)
-		ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n",
-			  tx_buf_vpu_addr);
+		ivpu_warn_ratelimited(vdev, "IPC message vpu:0x%x not released by firmware\n",
+				      tx_buf_vpu_addr);
 
 	if (tx_buf->jsm.status != VPU_JSM_MSG_FREE)
-		ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n",
-			  jsm_vpu_addr);
+		ivpu_warn_ratelimited(vdev, "JSM message vpu:0x%x not released by firmware\n",
+				      jsm_vpu_addr);
 
 	memset(tx_buf, 0, sizeof(*tx_buf));
 	tx_buf->ipc.data_addr = jsm_vpu_addr;
@@ -263,18 +264,19 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
 
 	ret = ivpu_ipc_send(vdev, &cons, req);
 	if (ret) {
-		ivpu_warn(vdev, "IPC send failed: %d\n", ret);
+		ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret);
 		goto consumer_del;
 	}
 
 	ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms);
 	if (ret) {
-		ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret);
+		ivpu_warn_ratelimited(vdev, "IPC receive failed: type %s, ret %d\n",
+				      ivpu_jsm_msg_type_to_str(req->type), ret);
 		goto consumer_del;
 	}
 
 	if (resp->type != expected_resp_type) {
-		ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type);
+		ivpu_warn_ratelimited(vdev, "Invalid JSM response type: 0x%x\n", resp->type);
 		ret = -EBADE;
 	}
 
@@ -372,13 +374,13 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 	while (ivpu_hw_reg_ipc_rx_count_get(vdev)) {
 		vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
 		if (vpu_addr == REG_IO_ERROR) {
-			ivpu_err(vdev, "Failed to read IPC rx addr register\n");
+			ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
 			return -EIO;
 		}
 
 		ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
 		if (!ipc_hdr) {
-			ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr);
+			ivpu_warn_ratelimited(vdev, "IPC msg 0x%x out of range\n", vpu_addr);
 			continue;
 		}
 		ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr);
@@ -387,7 +389,8 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) {
 			jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr);
 			if (!jsm_msg) {
-				ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr);
+				ivpu_warn_ratelimited(vdev, "JSM msg 0x%x out of range\n",
+						      ipc_hdr->data_addr);
 				ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL);
 				continue;
 			}
@@ -395,7 +398,8 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		}
 
 		if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) {
-			ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG);
+			ivpu_warn_ratelimited(vdev, "IPC RX msg dropped, msg count %d\n",
+					      IPC_MAX_RX_MSG);
 			ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
 			continue;
 		}
@@ -423,15 +427,20 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 int ivpu_ipc_init(struct ivpu_device *vdev)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
-	int ret = -ENOMEM;
+	int ret;
 
 	ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
-	if (!ipc->mem_tx)
-		return ret;
+	if (!ipc->mem_tx) {
+		ivpu_err(vdev, "Failed to allocate mem_tx\n");
+		return -ENOMEM;
+	}
 
 	ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
-	if (!ipc->mem_rx)
+	if (!ipc->mem_rx) {
+		ivpu_err(vdev, "Failed to allocate mem_rx\n");
+		ret = -ENOMEM;
 		goto err_free_tx;
+	}
 
 	ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT),
 					  -1, "TX_IPC_JSM");
@@ -441,7 +450,7 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
 		goto err_free_rx;
 	}
 
-	ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1);
+	ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ivpu_bo_size(ipc->mem_tx), -1);
 	if (ret) {
 		ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret);
 		goto err_free_rx;
@@ -497,8 +506,8 @@ void ivpu_ipc_reset(struct ivpu_device *vdev)
 
 	mutex_lock(&ipc->lock);
 
-	memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size);
-	memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size);
+	memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx));
+	memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx));
 	wmb(); /* Flush WC buffers for TX and RX rings */
 
 	mutex_unlock(&ipc->lock);
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index de9e69f70af7..689dc0d13b8f 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -48,10 +48,10 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 e
 		goto cmdq_free;
 
 	cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev);
-	cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) /
+	cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) /
 				  sizeof(struct vpu_job_queue_entry));
 
-	cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr;
+	cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem);
 	jobq_header = &cmdq->jobq->header;
 	jobq_header->engine_idx = engine;
 	jobq_header->head = 0;
@@ -93,7 +93,7 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16
 		return cmdq;
 
 	ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
-				   cmdq->mem->vpu_addr, cmdq->mem->base.size);
+				   cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem));
 	if (ret)
 		return NULL;
 
@@ -453,7 +453,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 		return -EBUSY;
 	}
 
-	if (commands_offset >= bo->base.size) {
+	if (commands_offset >= ivpu_bo_size(bo)) {
 		ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset);
 		return -EINVAL;
 	}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index bdddef2c59ee..0c2fe7142024 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -7,6 +7,70 @@
 #include "ivpu_ipc.h"
 #include "ivpu_jsm_msg.h"
 
+const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
+{
+	#define IVPU_CASE_TO_STR(x) case x: return #x
+	switch (type) {
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_UNKNOWN);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_UNREGISTER_DB);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_QUERY_ENGINE_HB);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_COUNT);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_POWER_LEVEL);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_OPEN);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_CLOSE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_SET_CONFIG);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CONFIG);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CAPABILITY);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_NAME);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SSID_RELEASE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_START);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_STOP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_UPDATE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_INFO);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_CREATE_CMD_QUEUE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_UNREGISTER_DB_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_QUERY_ENGINE_HB_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_POWER_LEVEL_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_SET_CONFIG_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CONFIG_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_NAME_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SSID_RELEASE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_START_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP);
+	}
+	#undef IVPU_CASE_TO_STR
+
+	return "Unknown JSM message type";
+}
+
 int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
 			 u64 jobq_base, u32 jobq_size)
 {
@@ -22,7 +86,7 @@ int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret) {
-		ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret);
+		ivpu_err_ratelimited(vdev, "Failed to register doorbell %d: %d\n", db_id, ret);
 		return ret;
 	}
 
@@ -42,7 +106,7 @@ int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id)
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret) {
-		ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret);
+		ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret);
 		return ret;
 	}
 
@@ -65,7 +129,8 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat)
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret) {
-		ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret);
+		ivpu_err_ratelimited(vdev, "Failed to get heartbeat from engine %d: %d\n",
+				     engine, ret);
 		return ret;
 	}
 
@@ -87,7 +152,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine)
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret)
-		ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret);
+		ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret);
 
 	return ret;
 }
@@ -107,7 +172,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret)
-		ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret);
+		ivpu_err_ratelimited(vdev, "Failed to preempt engine %d: %d\n", engine, ret);
 
 	return ret;
 }
@@ -123,7 +188,8 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret)
-		ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret);
+		ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n",
+				      command, ret);
 
 	return ret;
 }
@@ -138,7 +204,7 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret) {
-		ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret);
+		ivpu_warn_ratelimited(vdev, "Failed to get trace capability: %d\n", ret);
 		return ret;
 	}
 
@@ -162,7 +228,7 @@ int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 tra
 	ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp,
 				    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 	if (ret)
-		ivpu_warn(vdev, "Failed to set config: %d\n", ret);
+		ivpu_warn_ratelimited(vdev, "Failed to set config: %d\n", ret);
 
 	return ret;
 }
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index ab50d7b017c1..66979a948c7c 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -8,6 +8,8 @@
 
 #include "vpu_jsm_api.h"
 
+const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type);
+
 int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id,
 			 u64 jobq_base, u32 jobq_size);
 int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id);
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index baefaf7bb3cb..2538c78fbebe 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -7,12 +7,48 @@
 #include <linux/highmem.h>
 
 #include "ivpu_drv.h"
-#include "ivpu_hw_37xx_reg.h"
 #include "ivpu_hw_reg_io.h"
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
 #include "ivpu_pm.h"
 
+#define IVPU_MMU_REG_IDR0		      0x00200000u
+#define IVPU_MMU_REG_IDR1		      0x00200004u
+#define IVPU_MMU_REG_IDR3		      0x0020000cu
+#define IVPU_MMU_REG_IDR5		      0x00200014u
+#define IVPU_MMU_REG_CR0		      0x00200020u
+#define IVPU_MMU_REG_CR0ACK		      0x00200024u
+#define IVPU_MMU_REG_CR0ACK_VAL_MASK	      GENMASK(31, 0)
+#define IVPU_MMU_REG_CR1		      0x00200028u
+#define IVPU_MMU_REG_CR2		      0x0020002cu
+#define IVPU_MMU_REG_IRQ_CTRL		      0x00200050u
+#define IVPU_MMU_REG_IRQ_CTRLACK	      0x00200054u
+#define IVPU_MMU_REG_IRQ_CTRLACK_VAL_MASK     GENMASK(31, 0)
+
+#define IVPU_MMU_REG_GERROR		      0x00200060u
+#define IVPU_MMU_REG_GERROR_CMDQ_MASK	      BIT_MASK(0)
+#define IVPU_MMU_REG_GERROR_EVTQ_ABT_MASK     BIT_MASK(2)
+#define IVPU_MMU_REG_GERROR_PRIQ_ABT_MASK     BIT_MASK(3)
+#define IVPU_MMU_REG_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4)
+#define IVPU_MMU_REG_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5)
+#define IVPU_MMU_REG_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6)
+#define IVPU_MMU_REG_GERROR_MSI_ABT_MASK      BIT_MASK(7)
+
+#define IVPU_MMU_REG_GERRORN		      0x00200064u
+
+#define IVPU_MMU_REG_STRTAB_BASE	      0x00200080u
+#define IVPU_MMU_REG_STRTAB_BASE_CFG	      0x00200088u
+#define IVPU_MMU_REG_CMDQ_BASE		      0x00200090u
+#define IVPU_MMU_REG_CMDQ_PROD		      0x00200098u
+#define IVPU_MMU_REG_CMDQ_CONS		      0x0020009cu
+#define IVPU_MMU_REG_CMDQ_CONS_VAL_MASK	      GENMASK(23, 0)
+#define IVPU_MMU_REG_CMDQ_CONS_ERR_MASK	      GENMASK(30, 24)
+#define IVPU_MMU_REG_EVTQ_BASE		      0x002000a0u
+#define IVPU_MMU_REG_EVTQ_PROD		      0x002000a8u
+#define IVPU_MMU_REG_EVTQ_CONS		      0x002000acu
+#define IVPU_MMU_REG_EVTQ_PROD_SEC	      (0x002000a8u + SZ_64K)
+#define IVPU_MMU_REG_EVTQ_CONS_SEC	      (0x002000acu + SZ_64K)
+
 #define IVPU_MMU_IDR0_REF		0x080f3e0f
 #define IVPU_MMU_IDR0_REF_SIMICS	0x080f3e1f
 #define IVPU_MMU_IDR1_REF		0x0e739d18
@@ -186,13 +222,13 @@
 #define IVPU_MMU_REG_TIMEOUT_US		(10 * USEC_PER_MSEC)
 #define IVPU_MMU_QUEUE_TIMEOUT_US	(100 * USEC_PER_MSEC)
 
-#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(VPU_37XX_HOST_MMU_GERROR, CMDQ)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, EVTQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, PRIQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \
-				  (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_ABT)))
+#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(IVPU_MMU_REG_GERROR, CMDQ)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \
+				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT)))
 
 static char *ivpu_mmu_event_to_str(u32 cmd)
 {
@@ -250,15 +286,15 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev)
 	else
 		val_ref = IVPU_MMU_IDR0_REF;
 
-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR0);
+	val = REGV_RD32(IVPU_MMU_REG_IDR0);
 	if (val != val_ref)
 		ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref);
 
-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR1);
+	val = REGV_RD32(IVPU_MMU_REG_IDR1);
 	if (val != IVPU_MMU_IDR1_REF)
 		ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF);
 
-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR3);
+	val = REGV_RD32(IVPU_MMU_REG_IDR3);
 	if (val != IVPU_MMU_IDR3_REF)
 		ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF);
 
@@ -269,7 +305,7 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev)
 	else
 		val_ref = IVPU_MMU_IDR5_REF;
 
-	val = REGV_RD32(VPU_37XX_HOST_MMU_IDR5);
+	val = REGV_RD32(IVPU_MMU_REG_IDR5);
 	if (val != val_ref)
 		ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref);
 }
@@ -376,19 +412,18 @@ static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev)
 	return ret;
 }
 
-static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val)
+static int ivpu_mmu_reg_write_cr0(struct ivpu_device *vdev, u32 val)
 {
-	u32 reg_ack = reg + 4; /* ACK register is 4B after base register */
-	u32 val_ack;
-	int ret;
+	REGV_WR32(IVPU_MMU_REG_CR0, val);
 
-	REGV_WR32(reg, val);
+	return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US);
+}
 
-	ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US);
-	if (ret)
-		ivpu_err(vdev, "Failed to write register 0x%x\n", reg);
+static int ivpu_mmu_reg_write_irq_ctrl(struct ivpu_device *vdev, u32 val)
+{
+	REGV_WR32(IVPU_MMU_REG_IRQ_CTRL, val);
 
-	return ret;
+	return REGV_POLL_FLD(IVPU_MMU_REG_IRQ_CTRLACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US);
 }
 
 static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev)
@@ -396,19 +431,26 @@ static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev)
 	u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN;
 	int ret;
 
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_IRQ_CTRL, 0);
+	ret = ivpu_mmu_reg_write_irq_ctrl(vdev, 0);
 	if (ret)
 		return ret;
 
-	return ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_IRQ_CTRL, irq_ctrl);
+	return ivpu_mmu_reg_write_irq_ctrl(vdev, irq_ctrl);
 }
 
 static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
 {
 	struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+	int ret;
+
+	ret = REGV_POLL_FLD(IVPU_MMU_REG_CMDQ_CONS, VAL, cmdq->prod,
+			    IVPU_MMU_QUEUE_TIMEOUT_US);
+	if (ret)
+		return ret;
+
+	cmdq->cons = cmdq->prod;
 
-	return REGV_POLL(VPU_37XX_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons),
-			 IVPU_MMU_QUEUE_TIMEOUT_US);
+	return 0;
 }
 
 static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
@@ -447,7 +489,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
 		return ret;
 
 	clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE);
-	REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_PROD, q->prod);
+	REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);
 
 	ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
 	if (ret)
@@ -495,7 +537,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 	mmu->evtq.prod = 0;
 	mmu->evtq.cons = 0;
 
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, 0);
+	ret = ivpu_mmu_reg_write_cr0(vdev, 0);
 	if (ret)
 		return ret;
 
@@ -505,17 +547,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) |
 	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) |
 	      FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB);
-	REGV_WR32(VPU_37XX_HOST_MMU_CR1, val);
+	REGV_WR32(IVPU_MMU_REG_CR1, val);
 
-	REGV_WR64(VPU_37XX_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q);
-	REGV_WR32(VPU_37XX_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg);
+	REGV_WR64(IVPU_MMU_REG_STRTAB_BASE, mmu->strtab.dma_q);
+	REGV_WR32(IVPU_MMU_REG_STRTAB_BASE_CFG, mmu->strtab.base_cfg);
 
-	REGV_WR64(VPU_37XX_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q);
-	REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_PROD, 0);
-	REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_CONS, 0);
+	REGV_WR64(IVPU_MMU_REG_CMDQ_BASE, mmu->cmdq.dma_q);
+	REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0);
+	REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0);
 
 	val = IVPU_MMU_CR0_CMDQEN;
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	ret = ivpu_mmu_reg_write_cr0(vdev, val);
 	if (ret)
 		return ret;
 
@@ -531,17 +573,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 	if (ret)
 		return ret;
 
-	REGV_WR64(VPU_37XX_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q);
-	REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_PROD_SEC, 0);
-	REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, 0);
+	REGV_WR64(IVPU_MMU_REG_EVTQ_BASE, mmu->evtq.dma_q);
+	REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0);
+	REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0);
 
 	val |= IVPU_MMU_CR0_EVTQEN;
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	ret = ivpu_mmu_reg_write_cr0(vdev, val);
 	if (ret)
 		return ret;
 
 	val |= IVPU_MMU_CR0_ATSCHK;
-	ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	ret = ivpu_mmu_reg_write_cr0(vdev, val);
 	if (ret)
 		return ret;
 
@@ -550,7 +592,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev)
 		return ret;
 
 	val |= IVPU_MMU_CR0_SMMUEN;
-	return ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val);
+	return ivpu_mmu_reg_write_cr0(vdev, val);
 }
 
 static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid)
@@ -801,14 +843,14 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
 	u32 idx = IVPU_MMU_Q_IDX(evtq->cons);
 	u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
 
-	evtq->prod = REGV_RD32(VPU_37XX_HOST_MMU_EVTQ_PROD_SEC);
+	evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
 	if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
 		return NULL;
 
 	clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE);
 
 	evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
-	REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, evtq->cons);
+	REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons);
 
 	return evt;
 }
@@ -841,35 +883,35 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
 
 	ivpu_dbg(vdev, IRQ, "MMU error\n");
 
-	gerror_val = REGV_RD32(VPU_37XX_HOST_MMU_GERROR);
-	gerrorn_val = REGV_RD32(VPU_37XX_HOST_MMU_GERRORN);
+	gerror_val = REGV_RD32(IVPU_MMU_REG_GERROR);
+	gerrorn_val = REGV_RD32(IVPU_MMU_REG_GERRORN);
 
 	active = gerror_val ^ gerrorn_val;
 	if (!(active & IVPU_MMU_GERROR_ERR_MASK))
 		return;
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n");
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_PRIQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n");
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_EVTQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n");
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_CMDQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT, active))
 		ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n");
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, PRIQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT, active))
 		ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n");
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, EVTQ_ABT, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT, active))
 		ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n");
 
-	if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, CMDQ, active))
+	if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, CMDQ, active))
 		ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n");
 
-	REGV_WR32(VPU_37XX_HOST_MMU_GERRORN, gerror_val);
+	REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val);
 }
 
 int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable)
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index ce94f4029127..c1050a2df954 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -424,8 +424,10 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
 	INIT_LIST_HEAD(&ctx->bo_list);
 
 	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
-	if (ret)
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret);
 		return ret;
+	}
 
 	if (!context_id) {
 		start = vdev->hw->ranges.global.start;
@@ -464,6 +466,16 @@ void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
 	return ivpu_mmu_context_fini(vdev, &vdev->gctx);
 }
 
+int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
+}
+
+void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
+{
+	return ivpu_mmu_user_context_fini(vdev, &vdev->rctx);
+}
+
 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
 {
 	struct ivpu_file_priv *file_priv;
@@ -485,13 +497,13 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context
 
 	ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
 	if (ret) {
-		ivpu_err(vdev, "Failed to initialize context: %d\n", ret);
+		ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret);
 		return ret;
 	}
 
 	ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
 	if (ret) {
-		ivpu_err(vdev, "Failed to set page table: %d\n", ret);
+		ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret);
 		goto err_context_fini;
 	}
 
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index 961a0d6a6c7f..f15d8c630d8a 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -32,6 +32,8 @@ struct ivpu_mmu_context {
 
 int ivpu_mmu_global_context_init(struct ivpu_device *vdev);
 void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
+int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
+void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);
 
 int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id);
 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index ffff2496e8e8..0ace218783c8 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -37,7 +37,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
 static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
-	struct vpu_boot_params *bp = fw->mem->kvaddr;
+	struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem);
 
 	if (!bp->save_restore_ret_address) {
 		ivpu_pm_prepare_cold_boot(vdev);
@@ -246,6 +246,19 @@ int ivpu_rpm_get(struct ivpu_device *vdev)
 	return ret;
 }
 
+int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
+{
+	int ret;
+
+	ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
+		 atomic_read(&vdev->drm.dev->power.usage_count));
+
+	ret = pm_runtime_get_if_active(vdev->drm.dev, false);
+	drm_WARN_ON(&vdev->drm, ret < 0);
+
+	return ret;
+}
+
 void ivpu_rpm_put(struct ivpu_device *vdev)
 {
 	pm_runtime_mark_last_busy(vdev->drm.dev);
@@ -283,10 +296,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
 	pm_runtime_put_autosuspend(vdev->drm.dev);
 }
 
-int ivpu_pm_init(struct ivpu_device *vdev)
+void ivpu_pm_init(struct ivpu_device *vdev)
 {
 	struct device *dev = vdev->drm.dev;
 	struct ivpu_pm_info *pm = vdev->pm;
+	int delay;
 
 	pm->vdev = vdev;
 	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
@@ -294,16 +308,15 @@ int ivpu_pm_init(struct ivpu_device *vdev)
 	atomic_set(&pm->in_reset, 0);
 	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
 
-	pm_runtime_use_autosuspend(dev);
-
 	if (ivpu_disable_recovery)
-		pm_runtime_set_autosuspend_delay(dev, -1);
-	else if (ivpu_is_silicon(vdev))
-		pm_runtime_set_autosuspend_delay(dev, 100);
+		delay = -1;
 	else
-		pm_runtime_set_autosuspend_delay(dev, 60000);
+		delay = vdev->timeout.autosuspend;
 
-	return 0;
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, delay);
+
+	ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay);
 }
 
 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index fd4eada1290f..044db150be07 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -19,7 +19,7 @@ struct ivpu_pm_info {
 	u32 suspend_reschedule_counter;
 };
 
-int ivpu_pm_init(struct ivpu_device *vdev);
+void ivpu_pm_init(struct ivpu_device *vdev);
 void ivpu_pm_enable(struct ivpu_device *vdev);
 void ivpu_pm_disable(struct ivpu_device *vdev);
 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev);
@@ -33,6 +33,7 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev);
 void ivpu_pm_reset_done_cb(struct pci_dev *pdev);
 
 int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
+int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
 void ivpu_rpm_put(struct ivpu_device *vdev);
 
 void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index f2bd637a0d4e..e3f4c30f3ffd 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -27,6 +27,9 @@
 #define QAIC_DBC_OFF(i)		((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE)
 
 #define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base)
+#define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm)
+#define to_drm(qddev) (&(qddev)->drm)
+#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
 
 extern bool datapath_polling;
 
@@ -137,6 +140,8 @@ struct qaic_device {
 };
 
 struct qaic_drm_device {
+	/* The drm device struct of this drm device */
+	struct drm_device	drm;
 	/* Pointer to the root device struct driven by this driver */
 	struct qaic_device	*qdev;
 	/*
@@ -146,8 +151,6 @@ struct qaic_drm_device {
 	 * device is the actual physical device
 	 */
 	s32			partition_id;
-	/* Pointer to the drm device struct of this drm device */
-	struct drm_device	*ddev;
 	/* Head in list of users who have opened this drm device */
 	struct list_head	users;
 	/* Synchronizes access to users list */
@@ -158,8 +161,6 @@ struct qaic_bo {
 	struct drm_gem_object	base;
 	/* Scatter/gather table for allocate/imported BO */
 	struct sg_table		*sgt;
-	/* BO size requested by user. GEM object might be bigger in size. */
-	u64			size;
 	/* Head in list of slices of this BO */
 	struct list_head	slices;
 	/* Total nents, for all slices of this BO */
@@ -221,7 +222,8 @@ struct qaic_bo {
 		 */
 		u32		queue_level_before;
 	} perf_stats;
-
+	/* Synchronizes BO operations */
+	struct mutex		lock;
 };
 
 struct bo_slice {
@@ -277,6 +279,7 @@ int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *f
 int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
 int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
 int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
 void irq_polling_work(struct work_struct *work);
 
 #endif /* _QAIC_H_ */
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index f4b06792c6f1..4a8e43a7a6a4 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -154,6 +154,7 @@ static void free_slice(struct kref *kref)
 {
 	struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count);
 
+	slice->bo->total_slice_nents -= slice->nents;
 	list_del(&slice->slice);
 	drm_gem_object_put(&slice->bo->base);
 	sg_free_table(slice->sgt);
@@ -579,7 +580,7 @@ static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent,
 {
 	struct qaic_bo *bo = to_qaic_bo(obj);
 
-	drm_printf_indent(p, indent, "user requested size=%llu\n", bo->size);
+	drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir);
 }
 
 static const struct vm_operations_struct drm_vm_ops = {
@@ -623,6 +624,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
 		qaic_free_sgt(bo->sgt);
 	}
 
+	mutex_destroy(&bo->lock);
 	drm_gem_object_release(obj);
 	kfree(bo);
 }
@@ -634,6 +636,19 @@ static const struct drm_gem_object_funcs qaic_gem_funcs = {
 	.vm_ops = &drm_vm_ops,
 };
 
+static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
+{
+	if (reinit) {
+		bo->sliced = false;
+		reinit_completion(&bo->xfer_done);
+	} else {
+		mutex_init(&bo->lock);
+		init_completion(&bo->xfer_done);
+	}
+	complete_all(&bo->xfer_done);
+	INIT_LIST_HEAD(&bo->slices);
+}
+
 static struct qaic_bo *qaic_alloc_init_bo(void)
 {
 	struct qaic_bo *bo;
@@ -642,9 +657,7 @@ static struct qaic_bo *qaic_alloc_init_bo(void)
 	if (!bo)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&bo->slices);
-	init_completion(&bo->xfer_done);
-	complete_all(&bo->xfer_done);
+	qaic_init_bo(bo, false);
 
 	return bo;
 }
@@ -695,8 +708,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 	if (ret)
 		goto free_bo;
 
-	bo->size = args->size;
-
 	ret = drm_gem_handle_create(file_priv, obj, &args->handle);
 	if (ret)
 		goto free_sgt;
@@ -828,7 +839,6 @@ static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_h
 	}
 
 	bo->sgt = sgt;
-	bo->size = hdr->size;
 
 	return 0;
 }
@@ -838,7 +848,7 @@ static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 {
 	int ret;
 
-	if (bo->size != hdr->size)
+	if (bo->base.size < hdr->size)
 		return -EINVAL;
 
 	ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
@@ -857,9 +867,9 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 		ret = qaic_prepare_import_bo(bo, hdr);
 	else
 		ret = qaic_prepare_export_bo(qdev, bo, hdr);
-
-	if (ret == 0)
-		bo->dir = hdr->dir;
+	bo->dir = hdr->dir;
+	bo->dbc = &qdev->dbc[hdr->dbc_id];
+	bo->nr_slice = hdr->count;
 
 	return ret;
 }
@@ -868,7 +878,6 @@ static void qaic_unprepare_import_bo(struct qaic_bo *bo)
 {
 	dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir);
 	bo->sgt = NULL;
-	bo->size = 0;
 }
 
 static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo)
@@ -884,6 +893,8 @@ static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
 		qaic_unprepare_export_bo(qdev, bo);
 
 	bo->dir = 0;
+	bo->dbc = NULL;
+	bo->nr_slice = 0;
 }
 
 static void qaic_free_slices_bo(struct qaic_bo *bo)
@@ -892,6 +903,9 @@ static void qaic_free_slices_bo(struct qaic_bo *bo)
 
 	list_for_each_entry_safe(slice, temp, &bo->slices, slice)
 		kref_put(&slice->ref_count, free_slice);
+	if (WARN_ON_ONCE(bo->total_slice_nents != 0))
+		bo->total_slice_nents = 0;
+	bo->nr_slice = 0;
 }
 
 static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
@@ -908,15 +922,11 @@ static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
 		}
 	}
 
-	if (bo->total_slice_nents > qdev->dbc[hdr->dbc_id].nelem) {
+	if (bo->total_slice_nents > bo->dbc->nelem) {
 		qaic_free_slices_bo(bo);
 		return -ENOSPC;
 	}
 
-	bo->sliced = true;
-	bo->nr_slice = hdr->count;
-	list_add_tail(&bo->bo_list, &qdev->dbc[hdr->dbc_id].bo_lists);
-
 	return 0;
 }
 
@@ -994,10 +1004,13 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 	}
 
 	bo = to_qaic_bo(obj);
+	ret = mutex_lock_interruptible(&bo->lock);
+	if (ret)
+		goto put_bo;
 
 	if (bo->sliced) {
 		ret = -EINVAL;
-		goto put_bo;
+		goto unlock_bo;
 	}
 
 	dbc = &qdev->dbc[args->hdr.dbc_id];
@@ -1018,9 +1031,10 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 	if (args->hdr.dir == DMA_TO_DEVICE)
 		dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);
 
-	bo->dbc = dbc;
+	bo->sliced = true;
+	list_add_tail(&bo->bo_list, &bo->dbc->bo_lists);
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
-	drm_gem_object_put(obj);
+	mutex_unlock(&bo->lock);
 	kfree(slice_ent);
 	srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
 	srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
@@ -1031,6 +1045,8 @@ unprepare_bo:
 	qaic_unprepare_bo(qdev, bo);
 unlock_ch_srcu:
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
+unlock_bo:
+	mutex_unlock(&bo->lock);
 put_bo:
 	drm_gem_object_put(obj);
 free_slice_ent:
@@ -1185,15 +1201,18 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 		}
 
 		bo = to_qaic_bo(obj);
+		ret = mutex_lock_interruptible(&bo->lock);
+		if (ret)
+			goto failed_to_send_bo;
 
 		if (!bo->sliced) {
 			ret = -EINVAL;
-			goto failed_to_send_bo;
+			goto unlock_bo;
 		}
 
-		if (is_partial && pexec[i].resize > bo->size) {
+		if (is_partial && pexec[i].resize > bo->base.size) {
 			ret = -EINVAL;
-			goto failed_to_send_bo;
+			goto unlock_bo;
 		}
 
 		spin_lock_irqsave(&dbc->xfer_lock, flags);
@@ -1202,7 +1221,7 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 		if (queued) {
 			spin_unlock_irqrestore(&dbc->xfer_lock, flags);
 			ret = -EINVAL;
-			goto failed_to_send_bo;
+			goto unlock_bo;
 		}
 
 		bo->req_id = dbc->next_req_id++;
@@ -1233,17 +1252,20 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 			if (ret) {
 				bo->queued = false;
 				spin_unlock_irqrestore(&dbc->xfer_lock, flags);
-				goto failed_to_send_bo;
+				goto unlock_bo;
 			}
 		}
 		reinit_completion(&bo->xfer_done);
 		list_add_tail(&bo->xfer_list, &dbc->xfer_list);
 		spin_unlock_irqrestore(&dbc->xfer_lock, flags);
 		dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir);
+		mutex_unlock(&bo->lock);
 	}
 
 	return 0;
 
+unlock_bo:
+	mutex_unlock(&bo->lock);
 failed_to_send_bo:
 	if (likely(obj))
 		drm_gem_object_put(obj);
@@ -1799,6 +1821,91 @@ unlock_usr_srcu:
 	return ret;
 }
 
+static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo)
+{
+	qaic_free_slices_bo(bo);
+	qaic_unprepare_bo(qdev, bo);
+	qaic_init_bo(bo, true);
+	list_del(&bo->bo_list);
+	drm_gem_object_put(&bo->base);
+}
+
+int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+	struct qaic_detach_slice *args = data;
+	int rcu_id, usr_rcu_id, qdev_rcu_id;
+	struct dma_bridge_chan *dbc;
+	struct drm_gem_object *obj;
+	struct qaic_device *qdev;
+	struct qaic_user *usr;
+	unsigned long flags;
+	struct qaic_bo *bo;
+	int ret;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	usr = file_priv->driver_priv;
+	usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+	if (!usr->qddev) {
+		ret = -ENODEV;
+		goto unlock_usr_srcu;
+	}
+
+	qdev = usr->qddev->qdev;
+	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+	if (qdev->in_reset) {
+		ret = -ENODEV;
+		goto unlock_dev_srcu;
+	}
+
+	obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!obj) {
+		ret = -ENOENT;
+		goto unlock_dev_srcu;
+	}
+
+	bo = to_qaic_bo(obj);
+	ret = mutex_lock_interruptible(&bo->lock);
+	if (ret)
+		goto put_bo;
+
+	if (!bo->sliced) {
+		ret = -EINVAL;
+		goto unlock_bo;
+	}
+
+	dbc = bo->dbc;
+	rcu_id = srcu_read_lock(&dbc->ch_lock);
+	if (dbc->usr != usr) {
+		ret = -EINVAL;
+		goto unlock_ch_srcu;
+	}
+
+	/* Check if BO is committed to H/W for DMA */
+	spin_lock_irqsave(&dbc->xfer_lock, flags);
+	if (bo->queued) {
+		spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+		ret = -EBUSY;
+		goto unlock_ch_srcu;
+	}
+	spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+
+	detach_slice_bo(qdev, bo);
+
+unlock_ch_srcu:
+	srcu_read_unlock(&dbc->ch_lock, rcu_id);
+unlock_bo:
+	mutex_unlock(&bo->lock);
+put_bo:
+	drm_gem_object_put(obj);
+unlock_dev_srcu:
+	srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+	srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+	return ret;
+}
+
 static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
 {
 	unsigned long flags;
@@ -1810,6 +1917,12 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
 		bo->queued = false;
 		list_del(&bo->xfer_list);
 		spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+		bo->nr_slice_xfer_done = 0;
+		bo->req_id = 0;
+		bo->perf_stats.req_received_ts = 0;
+		bo->perf_stats.req_submit_ts = 0;
+		bo->perf_stats.req_processed_ts = 0;
+		bo->perf_stats.queue_level_before = 0;
 		dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
 		complete_all(&bo->xfer_done);
 		drm_gem_object_put(&bo->base);
@@ -1857,7 +1970,6 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
 
 void release_dbc(struct qaic_device *qdev, u32 dbc_id)
 {
-	struct bo_slice *slice, *slice_temp;
 	struct qaic_bo *bo, *bo_temp;
 	struct dma_bridge_chan *dbc;
 
@@ -1875,24 +1987,11 @@ void release_dbc(struct qaic_device *qdev, u32 dbc_id)
 	dbc->usr = NULL;
 
 	list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) {
-		list_for_each_entry_safe(slice, slice_temp, &bo->slices, slice)
-			kref_put(&slice->ref_count, free_slice);
-		bo->sliced = false;
-		INIT_LIST_HEAD(&bo->slices);
-		bo->total_slice_nents = 0;
-		bo->dir = 0;
-		bo->dbc = NULL;
-		bo->nr_slice = 0;
-		bo->nr_slice_xfer_done = 0;
-		bo->queued = false;
-		bo->req_id = 0;
-		init_completion(&bo->xfer_done);
-		complete_all(&bo->xfer_done);
-		list_del(&bo->bo_list);
-		bo->perf_stats.req_received_ts = 0;
-		bo->perf_stats.req_submit_ts = 0;
-		bo->perf_stats.req_processed_ts = 0;
-		bo->perf_stats.queue_level_before = 0;
+		drm_gem_object_get(&bo->base);
+		mutex_lock(&bo->lock);
+		detach_slice_bo(qdev, bo);
+		mutex_unlock(&bo->lock);
+		drm_gem_object_put(&bo->base);
 	}
 
 	dbc->in_use = false;
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index b5de82e6eb4d..6f58095767df 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -22,6 +22,7 @@
 #include <drm/drm_file.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_managed.h>
 #include <uapi/drm/qaic_accel.h>
 
 #include "mhi_controller.h"
@@ -55,7 +56,7 @@ static void free_usr(struct kref *kref)
 
 static int qaic_open(struct drm_device *dev, struct drm_file *file)
 {
-	struct qaic_drm_device *qddev = dev->dev_private;
+	struct qaic_drm_device *qddev = to_qaic_drm_device(dev);
 	struct qaic_device *qdev = qddev->qdev;
 	struct qaic_user *usr;
 	int rcu_id;
@@ -150,6 +151,7 @@ static const struct drm_ioctl_desc qaic_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(QAIC_DETACH_SLICE_BO, qaic_detach_slice_bo_ioctl, 0),
 };
 
 static const struct drm_driver qaic_accel_driver = {
@@ -170,64 +172,39 @@ static const struct drm_driver qaic_accel_driver = {
 
 static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
 {
-	struct qaic_drm_device *qddev;
-	struct drm_device *ddev;
-	struct device *pdev;
+	struct qaic_drm_device *qddev = qdev->qddev;
+	struct drm_device *drm = to_drm(qddev);
 	int ret;
 
 	/* Hold off implementing partitions until the uapi is determined */
 	if (partition_id != QAIC_NO_PARTITION)
 		return -EINVAL;
 
-	pdev = &qdev->pdev->dev;
-
-	qddev = kzalloc(sizeof(*qddev), GFP_KERNEL);
-	if (!qddev)
-		return -ENOMEM;
-
-	ddev = drm_dev_alloc(&qaic_accel_driver, pdev);
-	if (IS_ERR(ddev)) {
-		ret = PTR_ERR(ddev);
-		goto ddev_fail;
-	}
-
-	ddev->dev_private = qddev;
-	qddev->ddev = ddev;
-
-	qddev->qdev = qdev;
 	qddev->partition_id = partition_id;
-	INIT_LIST_HEAD(&qddev->users);
-	mutex_init(&qddev->users_mutex);
-
-	qdev->qddev = qddev;
-
-	ret = drm_dev_register(ddev, 0);
-	if (ret) {
-		pci_dbg(qdev->pdev, "%s: drm_dev_register failed %d\n", __func__, ret);
-		goto drm_reg_fail;
-	}
 
-	return 0;
+	/*
+	 * drm_dev_unregister() sets the driver data to NULL and
+	 * drm_dev_register() does not update the driver data. During a SOC
+	 * reset drm dev is unregistered and registered again leaving the
+	 * driver data to NULL.
+	 */
+	dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
 
-drm_reg_fail:
-	mutex_destroy(&qddev->users_mutex);
-	qdev->qddev = NULL;
-	drm_dev_put(ddev);
-ddev_fail:
-	kfree(qddev);
 	return ret;
 }
 
 static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 {
-	struct qaic_drm_device *qddev;
+	struct qaic_drm_device *qddev = qdev->qddev;
+	struct drm_device *drm = to_drm(qddev);
 	struct qaic_user *usr;
 
-	qddev = qdev->qddev;
-	qdev->qddev = NULL;
-	if (!qddev)
-		return;
-
+	drm_dev_get(drm);
+	drm_dev_unregister(drm);
+	qddev->partition_id = 0;
 	/*
 	 * Existing users get unresolvable errors till they close FDs.
 	 * Need to sync carefully with users calling close(). The
@@ -254,13 +231,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 		mutex_lock(&qddev->users_mutex);
 	}
 	mutex_unlock(&qddev->users_mutex);
-
-	if (qddev->ddev) {
-		drm_dev_unregister(qddev->ddev);
-		drm_dev_put(qddev->ddev);
-	}
-
-	kfree(qddev);
+	drm_dev_put(drm);
 }
 
 static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
@@ -344,8 +315,20 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
 		qdev->in_reset = false;
 }
 
+static void cleanup_qdev(struct qaic_device *qdev)
+{
+	int i;
+
+	for (i = 0; i < qdev->num_dbc; ++i)
+		cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
+	cleanup_srcu_struct(&qdev->dev_lock);
+	pci_set_drvdata(qdev->pdev, NULL);
+	destroy_workqueue(qdev->cntl_wq);
+}
+
 static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct qaic_drm_device *qddev;
 	struct qaic_device *qdev;
 	int i;
 
@@ -381,18 +364,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
 		INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
 	}
 
-	return qdev;
-}
+	qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
+	if (IS_ERR(qddev)) {
+		cleanup_qdev(qdev);
+		return NULL;
+	}
 
-static void cleanup_qdev(struct qaic_device *qdev)
-{
-	int i;
+	drmm_mutex_init(to_drm(qddev), &qddev->users_mutex);
+	INIT_LIST_HEAD(&qddev->users);
+	qddev->qdev = qdev;
+	qdev->qddev = qddev;
 
-	for (i = 0; i < qdev->num_dbc; ++i)
-		cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
-	cleanup_srcu_struct(&qdev->dev_lock);
-	pci_set_drvdata(qdev->pdev, NULL);
-	destroy_workqueue(qdev->cntl_wq);
+	return qdev;
 }
 
 static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
@@ -591,22 +574,22 @@ static int __init qaic_init(void)
 {
 	int ret;
 
-	ret = mhi_driver_register(&qaic_mhi_driver);
+	ret = pci_register_driver(&qaic_pci_driver);
 	if (ret) {
-		pr_debug("qaic: mhi_driver_register failed %d\n", ret);
+		pr_debug("qaic: pci_register_driver failed %d\n", ret);
 		return ret;
 	}
 
-	ret = pci_register_driver(&qaic_pci_driver);
+	ret = mhi_driver_register(&qaic_mhi_driver);
 	if (ret) {
-		pr_debug("qaic: pci_register_driver failed %d\n", ret);
-		goto free_mhi;
+		pr_debug("qaic: mhi_driver_register failed %d\n", ret);
+		goto free_pci;
 	}
 
 	return 0;
 
-free_mhi:
-	mhi_driver_unregister(&qaic_mhi_driver);
+free_pci:
+	pci_unregister_driver(&qaic_pci_driver);
 	return ret;
 }
 
@@ -628,8 +611,8 @@ static void __exit qaic_exit(void)
 	 * reinitializing the link_up state after the cleanup is done.
 	 */
 	link_up = true;
-	pci_unregister_driver(&qaic_pci_driver);
 	mhi_driver_unregister(&qaic_mhi_driver);
+	pci_unregister_driver(&qaic_pci_driver);
 }
 
 module_init(qaic_init);