summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/firmware/qcom_scm-32.c5
-rw-r--r--drivers/firmware/qcom_scm-64.c153
-rw-r--r--drivers/firmware/qcom_scm.c6
-rw-r--r--drivers/firmware/qcom_scm.h5
-rw-r--r--drivers/iommu/Makefile2
-rw-r--r--drivers/iommu/arm-smmu-impl.c5
-rw-r--r--drivers/iommu/arm-smmu-qcom.c51
-rw-r--r--drivers/iommu/arm-smmu-v3.c2
-rw-r--r--drivers/iommu/arm-smmu.c218
-rw-r--r--drivers/iommu/arm-smmu.h16
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c15
-rw-r--r--drivers/iommu/io-pgtable-arm.c180
-rw-r--r--drivers/iommu/ipmmu-vmsa.c2
-rw-r--r--drivers/iommu/qcom_iommu.c4
-rw-r--r--include/linux/io-pgtable.h2
-rw-r--r--include/linux/iommu.h8
-rw-r--r--include/linux/qcom_scm.h2
17 files changed, 439 insertions, 237 deletions
diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c
index 215061c581e1..bee8729525ec 100644
--- a/drivers/firmware/qcom_scm-32.c
+++ b/drivers/firmware/qcom_scm-32.c
@@ -614,3 +614,8 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val)
return qcom_scm_call_atomic2(QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
addr, val);
}
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool enable)
+{
+ return -ENODEV;
+}
diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c
index 91d5ad7cf58b..e1cd933ea9ae 100644
--- a/drivers/firmware/qcom_scm-64.c
+++ b/drivers/firmware/qcom_scm-64.c
@@ -62,32 +62,72 @@ static DEFINE_MUTEX(qcom_scm_lock);
#define FIRST_EXT_ARG_IDX 3
#define N_REGISTER_ARGS (MAX_QCOM_SCM_ARGS - N_EXT_QCOM_SCM_ARGS + 1)
-/**
- * qcom_scm_call() - Invoke a syscall in the secure world
- * @dev: device
- * @svc_id: service identifier
- * @cmd_id: command identifier
- * @desc: Descriptor structure containing arguments and return values
- *
- * Sends a command to the SCM and waits for the command to finish processing.
- * This should *only* be called in pre-emptible context.
-*/
-static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
- const struct qcom_scm_desc *desc,
- struct arm_smccc_res *res)
+static void __qcom_scm_call_do(const struct qcom_scm_desc *desc,
+ struct arm_smccc_res *res, u32 fn_id,
+ u64 x5, u32 type)
+{
+ u64 cmd;
+ struct arm_smccc_quirk quirk = { .id = ARM_SMCCC_QUIRK_QCOM_A6 };
+
+ cmd = ARM_SMCCC_CALL_VAL(type, qcom_smccc_convention,
+ ARM_SMCCC_OWNER_SIP, fn_id);
+
+ quirk.state.a6 = 0;
+
+ do {
+ arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
+ desc->args[1], desc->args[2], x5,
+ quirk.state.a6, 0, res, &quirk);
+
+ if (res->a0 == QCOM_SCM_INTERRUPTED)
+ cmd = res->a0;
+
+ } while (res->a0 == QCOM_SCM_INTERRUPTED);
+}
+
+static void qcom_scm_call_do(const struct qcom_scm_desc *desc,
+ struct arm_smccc_res *res, u32 fn_id,
+ u64 x5, bool atomic)
+{
+ int retry_count = 0;
+
+ if (atomic) {
+ __qcom_scm_call_do(desc, res, fn_id, x5, ARM_SMCCC_FAST_CALL);
+ return;
+ }
+
+ do {
+ mutex_lock(&qcom_scm_lock);
+
+ __qcom_scm_call_do(desc, res, fn_id, x5,
+ ARM_SMCCC_STD_CALL);
+
+ mutex_unlock(&qcom_scm_lock);
+
+ if (res->a0 == QCOM_SCM_V2_EBUSY) {
+ if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
+ break;
+ msleep(QCOM_SCM_EBUSY_WAIT_MS);
+ }
+ } while (res->a0 == QCOM_SCM_V2_EBUSY);
+}
+
+static int ___qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+ const struct qcom_scm_desc *desc,
+ struct arm_smccc_res *res, bool atomic)
{
int arglen = desc->arginfo & 0xf;
- int retry_count = 0, i;
+ int i;
u32 fn_id = QCOM_SCM_FNID(svc_id, cmd_id);
- u64 cmd, x5 = desc->args[FIRST_EXT_ARG_IDX];
+ u64 x5 = desc->args[FIRST_EXT_ARG_IDX];
dma_addr_t args_phys = 0;
void *args_virt = NULL;
size_t alloc_len;
- struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6};
+ gfp_t flag = atomic ? GFP_ATOMIC : GFP_KERNEL;
if (unlikely(arglen > N_REGISTER_ARGS)) {
alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64);
- args_virt = kzalloc(PAGE_ALIGN(alloc_len), GFP_KERNEL);
+ args_virt = kzalloc(PAGE_ALIGN(alloc_len), flag);
if (!args_virt)
return -ENOMEM;
@@ -117,46 +157,56 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
x5 = args_phys;
}
- do {
- mutex_lock(&qcom_scm_lock);
-
- cmd = ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL,
- qcom_smccc_convention,
- ARM_SMCCC_OWNER_SIP, fn_id);
-
- quirk.state.a6 = 0;
-
- do {
- arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0],
- desc->args[1], desc->args[2], x5,
- quirk.state.a6, 0, res, &quirk);
-
- if (res->a0 == QCOM_SCM_INTERRUPTED)
- cmd = res->a0;
-
- } while (res->a0 == QCOM_SCM_INTERRUPTED);
-
- mutex_unlock(&qcom_scm_lock);
-
- if (res->a0 == QCOM_SCM_V2_EBUSY) {
- if (retry_count++ > QCOM_SCM_EBUSY_MAX_RETRY)
- break;
- msleep(QCOM_SCM_EBUSY_WAIT_MS);
- }
- } while (res->a0 == QCOM_SCM_V2_EBUSY);
+ qcom_scm_call_do(desc, res, fn_id, x5, atomic);
if (args_virt) {
dma_unmap_single(dev, args_phys, alloc_len, DMA_TO_DEVICE);
kfree(args_virt);
}
- if (res->a0 < 0)
+ if ((long)res->a0 < 0)
return qcom_scm_remap_error(res->a0);
return 0;
}
/**
+ * qcom_scm_call() - Invoke a syscall in the secure world
+ * @dev: device
+ * @svc_id: service identifier
+ * @cmd_id: command identifier
+ * @desc: Descriptor structure containing arguments and return values
+ *
+ * Sends a command to the SCM and waits for the command to finish processing.
+ * This should *only* be called in pre-emptible context.
+ */
+static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id,
+ const struct qcom_scm_desc *desc,
+ struct arm_smccc_res *res)
+{
+ might_sleep();
+ return ___qcom_scm_call(dev, svc_id, cmd_id, desc, res, false);
+}
+
+/**
+ * qcom_scm_call_atomic() - atomic variation of qcom_scm_call()
+ * @dev: device
+ * @svc_id: service identifier
+ * @cmd_id: command identifier
+ * @desc: Descriptor structure containing arguments and return values
+ * @res: Structure containing results from SMC/HVC call
+ *
+ * Sends a command to the SCM and waits for the command to finish processing.
+ * This can be called in atomic context.
+ */
+static int qcom_scm_call_atomic(struct device *dev, u32 svc_id, u32 cmd_id,
+ const struct qcom_scm_desc *desc,
+ struct arm_smccc_res *res)
+{
+ return ___qcom_scm_call(dev, svc_id, cmd_id, desc, res, true);
+}
+
+/**
* qcom_scm_set_cold_boot_addr() - Set the cold boot address for cpus
* @entry: Entry point function for the cpus
* @cpus: The cpumask of cpus that will use the entry point
@@ -502,3 +552,16 @@ int __qcom_scm_io_writel(struct device *dev, phys_addr_t addr, unsigned int val)
return qcom_scm_call(dev, QCOM_SCM_SVC_IO, QCOM_SCM_IO_WRITE,
&desc, &res);
}
+
+int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev, bool en)
+{
+ struct qcom_scm_desc desc = {0};
+ struct arm_smccc_res res;
+
+ desc.args[0] = QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL;
+ desc.args[1] = en;
+ desc.arginfo = QCOM_SCM_ARGS(2);
+
+ return qcom_scm_call_atomic(dev, QCOM_SCM_SVC_SMMU_PROGRAM,
+ QCOM_SCM_CONFIG_ERRATA1, &desc, &res);
+}
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 4802ab170fe5..a729e05c21b8 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -345,6 +345,12 @@ int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare)
}
EXPORT_SYMBOL(qcom_scm_iommu_secure_ptbl_init);
+int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
+{
+ return __qcom_scm_qsmmu500_wait_safe_toggle(__scm->dev, en);
+}
+EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);
+
int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
{
return __qcom_scm_io_readl(__scm->dev, addr, val);
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
index 99506bd873c0..baee744dbcfe 100644
--- a/drivers/firmware/qcom_scm.h
+++ b/drivers/firmware/qcom_scm.h
@@ -91,10 +91,15 @@ extern int __qcom_scm_restore_sec_cfg(struct device *dev, u32 device_id,
u32 spare);
#define QCOM_SCM_IOMMU_SECURE_PTBL_SIZE 3
#define QCOM_SCM_IOMMU_SECURE_PTBL_INIT 4
+#define QCOM_SCM_SVC_SMMU_PROGRAM 0x15
+#define QCOM_SCM_CONFIG_ERRATA1 0x3
+#define QCOM_SCM_CONFIG_ERRATA1_CLIENT_ALL 0x2
extern int __qcom_scm_iommu_secure_ptbl_size(struct device *dev, u32 spare,
size_t *size);
extern int __qcom_scm_iommu_secure_ptbl_init(struct device *dev, u64 addr,
u32 size, u32 spare);
+extern int __qcom_scm_qsmmu500_wait_safe_toggle(struct device *dev,
+ bool enable);
#define QCOM_MEM_PROT_ASSIGN_ID 0x16
extern int __qcom_scm_assign_mem(struct device *dev,
phys_addr_t mem_region, size_t mem_sz,
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 4f405f926e73..86dadd13b2e6 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
+obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o arm-smmu-qcom.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 5c87a38620c4..b2fe72a8f019 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -109,7 +109,7 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm
#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
-static int arm_mmu500_reset(struct arm_smmu_device *smmu)
+int arm_mmu500_reset(struct arm_smmu_device *smmu)
{
u32 reg, major;
int i;
@@ -170,5 +170,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
"calxeda,smmu-secure-config-access"))
smmu->impl = &calxeda_impl;
+ if (of_device_is_compatible(smmu->dev->of_node, "qcom,sdm845-smmu-500"))
+ return qcom_smmu_impl_init(smmu);
+
return smmu;
}
diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
new file mode 100644
index 000000000000..24c071c1d8b0
--- /dev/null
+++ b/drivers/iommu/arm-smmu-qcom.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/qcom_scm.h>
+
+#include "arm-smmu.h"
+
+struct qcom_smmu {
+ struct arm_smmu_device smmu;
+};
+
+static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
+{
+ int ret;
+
+ arm_mmu500_reset(smmu);
+
+ /*
+ * To address performance degradation in non-real time clients,
+ * such as USB and UFS, turn off wait-for-safe on sdm845 based boards,
+ * such as MTP and db845, whose firmwares implement secure monitor
+ * call handlers to turn on/off the wait-for-safe logic.
+ */
+ ret = qcom_scm_qsmmu500_wait_safe_toggle(0);
+ if (ret)
+ dev_warn(smmu->dev, "Failed to turn off SAFE logic\n");
+
+ return ret;
+}
+
+static const struct arm_smmu_impl qcom_smmu_impl = {
+ .reset = qcom_sdm845_smmu500_reset,
+};
+
+struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ struct qcom_smmu *qsmmu;
+
+ qsmmu = devm_kzalloc(smmu->dev, sizeof(*qsmmu), GFP_KERNEL);
+ if (!qsmmu)
+ return ERR_PTR(-ENOMEM);
+
+ qsmmu->smmu = *smmu;
+
+ qsmmu->smmu.impl = &qcom_smmu_impl;
+ devm_kfree(smmu->dev, smmu);
+
+ return &qsmmu->smmu;
+}
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8da93e730d6f..3f20e548f1ec 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2172,7 +2172,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
cfg->cd.asid = (u16)asid;
cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+ cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
return 0;
out_free_asid:
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b18aac4c105e..424ebf38cd09 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/amba/bus.h>
@@ -122,7 +123,7 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
if (pm_runtime_enabled(smmu->dev))
- pm_runtime_put(smmu->dev);
+ pm_runtime_put_autosuspend(smmu->dev);
}
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
@@ -244,6 +245,9 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
unsigned int spin_cnt, delay;
u32 reg;
+ if (smmu->impl && unlikely(smmu->impl->tlb_sync))
+ return smmu->impl->tlb_sync(smmu, page, sync, status);
+
arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
@@ -268,9 +272,8 @@ static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
}
-static void arm_smmu_tlb_sync_context(void *cookie)
+static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
{
- struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long flags;
@@ -280,13 +283,6 @@ static void arm_smmu_tlb_sync_context(void *cookie)
spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
}
-static void arm_smmu_tlb_sync_vmid(void *cookie)
-{
- struct arm_smmu_domain *smmu_domain = cookie;
-
- arm_smmu_tlb_sync_global(smmu_domain->smmu);
-}
-
static void arm_smmu_tlb_inv_context_s1(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
@@ -297,7 +293,7 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)
wmb();
arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
- arm_smmu_tlb_sync_context(cookie);
+ arm_smmu_tlb_sync_context(smmu_domain);
}
static void arm_smmu_tlb_inv_context_s2(void *cookie)
@@ -312,18 +308,16 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
}
static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+ size_t granule, void *cookie, int reg)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- int reg, idx = cfg->cbndx;
+ int idx = cfg->cbndx;
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
-
if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
iova = (iova >> 12) << 12;
iova |= cfg->asid;
@@ -342,16 +336,15 @@ static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
}
static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+ size_t granule, void *cookie, int reg)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- int reg, idx = smmu_domain->cfg.cbndx;
+ int idx = smmu_domain->cfg.cbndx;
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
wmb();
- reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
iova >>= 12;
do {
if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
@@ -362,85 +355,98 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
} while (size -= granule);
}
-/*
- * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
- * almost negligible, but the benefit of getting the first one in as far ahead
- * of the sync as possible is significant, hence we don't just make this a
- * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
- */
-static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
-
- if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
- wmb();
+ arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVA);
+ arm_smmu_tlb_sync_context(cookie);
+}
- arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
+static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVAL);
+ arm_smmu_tlb_sync_context(cookie);
}
-static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+ arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
+ ARM_SMMU_CB_S1_TLBIVAL);
+}
- ops->tlb_inv_range(iova, size, granule, false, cookie);
- ops->tlb_sync(cookie);
+static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2);
+ arm_smmu_tlb_sync_context(cookie);
}
-static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
- struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+ arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2L);
+ arm_smmu_tlb_sync_context(cookie);
+}
- ops->tlb_inv_range(iova, size, granule, true, cookie);
- ops->tlb_sync(cookie);
+static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+ arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
+ ARM_SMMU_CB_S2_TLBIIPAS2L);
}
-static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
+static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+ arm_smmu_tlb_inv_context_s2(cookie);
+}
+/*
+ * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
+ * almost negligible, but the benefit of getting the first one in as far ahead
+ * of the sync as possible is significant, hence we don't just make this a
+ * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
+ * think.
+ */
+static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
- ops->tlb_inv_range(iova, granule, granule, true, cookie);
+ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
}
-static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_range_s1,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
+ .tlb_add_page = arm_smmu_tlb_add_page_s1,
};
-static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_range_s2,
- .tlb_sync = arm_smmu_tlb_sync_context,
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
+ .tlb_add_page = arm_smmu_tlb_add_page_s2,
};
-static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
- .tlb = {
- .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
- .tlb_flush_walk = arm_smmu_tlb_inv_walk,
- .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
- .tlb_add_page = arm_smmu_tlb_add_page,
- },
- .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync,
- .tlb_sync = arm_smmu_tlb_sync_vmid,
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
+ .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
+ .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
};
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -472,6 +478,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
{
u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
struct arm_smmu_device *smmu = dev;
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
@@ -481,11 +489,19 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
if (!gfsr)
return IRQ_NONE;
- dev_err_ratelimited(smmu->dev,
- "Unexpected global fault, this could be serious\n");
- dev_err_ratelimited(smmu->dev,
- "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
- gfsr, gfsynr0, gfsynr1, gfsynr2);
+ if (__ratelimit(&rs)) {
+ if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
+ (gfsr & sGFSR_USF))
+ dev_err(smmu->dev,
+ "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
+ (u16)gfsynr1);
+ else
+ dev_err(smmu->dev,
+ "Unexpected global fault, this could be serious\n");
+ dev_err(smmu->dev,
+ "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+ gfsr, gfsynr0, gfsynr1, gfsynr2);
+ }
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
return IRQ_HANDLED;
@@ -536,8 +552,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
} else {
- cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
- cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+ cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
+ cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
}
}
}
@@ -770,7 +786,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
.ias = ias,
.oas = oas,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
- .tlb = &smmu_domain->flush_ops->tlb,
+ .tlb = smmu_domain->flush_ops,
.iommu_dev = smmu->dev,
};
@@ -812,6 +828,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
return 0;
out_clear_smmu:
+ __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
smmu_domain->smmu = NULL;
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
@@ -1038,8 +1055,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev)
}
group = iommu_group_get_for_dev(dev);
- if (!group)
- group = ERR_PTR(-ENOMEM);
if (IS_ERR(group)) {
ret = PTR_ERR(group);
goto out_err;
@@ -1153,6 +1168,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
/* Looks ok, so add the device to the domain */
ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+ /*
+ * Setup an autosuspend delay to avoid bouncing runpm state.
+ * Otherwise, if a driver for a suspended consumer device
+ * unmaps buffers, it will runpm resume/suspend for each one.
+ *
+ * For example, when used by a GPU device, when an application
+ * or game exits, it can trigger unmapping 100s or 1000s of
+ * buffers. With a runpm cycle for each buffer, that adds up
+ * to 5-10sec worth of reprogramming the context bank, while
+ * the system appears to be locked up to the user.
+ */
+ pm_runtime_set_autosuspend_delay(smmu->dev, 20);
+ pm_runtime_use_autosuspend(smmu->dev);
+
rpm_put:
arm_smmu_rpm_put(smmu);
return ret;
@@ -1199,7 +1228,7 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
if (smmu_domain->flush_ops) {
arm_smmu_rpm_get(smmu);
- smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
+ smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
arm_smmu_rpm_put(smmu);
}
}
@@ -1210,11 +1239,16 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->flush_ops) {
- arm_smmu_rpm_get(smmu);
- smmu_domain->flush_ops->tlb_sync(smmu_domain);
- arm_smmu_rpm_put(smmu);
- }
+ if (!smmu)
+ return;
+
+ arm_smmu_rpm_get(smmu);
+ if (smmu->version == ARM_SMMU_V2 ||
+ smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ arm_smmu_tlb_sync_context(smmu_domain);
+ else
+ arm_smmu_tlb_sync_global(smmu);
+ arm_smmu_rpm_put(smmu);
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index b19b6cae9b5e..62b9f0cec49b 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -79,6 +79,8 @@
#define ID7_MINOR GENMASK(3, 0)
#define ARM_SMMU_GR0_sGFSR 0x48
+#define sGFSR_USF BIT(1)
+
#define ARM_SMMU_GR0_sGFSYNR0 0x50
#define ARM_SMMU_GR0_sGFSYNR1 0x54
#define ARM_SMMU_GR0_sGFSYNR2 0x58
@@ -304,17 +306,10 @@ enum arm_smmu_domain_stage {
ARM_SMMU_DOMAIN_BYPASS,
};
-struct arm_smmu_flush_ops {
- struct iommu_flush_ops tlb;
- void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
- bool leaf, void *cookie);
- void (*tlb_sync)(void *cookie);
-};
-
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
struct io_pgtable_ops *pgtbl_ops;
- const struct arm_smmu_flush_ops *flush_ops;
+ const struct iommu_flush_ops *flush_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
bool non_strict;
@@ -335,6 +330,8 @@ struct arm_smmu_impl {
int (*cfg_probe)(struct arm_smmu_device *smmu);
int (*reset)(struct arm_smmu_device *smmu);
int (*init_context)(struct arm_smmu_domain *smmu_domain);
+ void (*tlb_sync)(struct arm_smmu_device *smmu, int page, int sync,
+ int status);
};
static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
@@ -398,5 +395,8 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
+struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu);
+
+int arm_mmu500_reset(struct arm_smmu_device *smmu);
#endif /* _ARM_SMMU_H */
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 4cb394937700..7c3bd2c3cdca 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -846,27 +846,28 @@ struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
-static struct io_pgtable_cfg *cfg_cookie;
+static struct io_pgtable_cfg *cfg_cookie __initdata;
-static void dummy_tlb_flush_all(void *cookie)
+static void __init dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
-static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
- void *cookie)
+static void __init dummy_tlb_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule, void *cookie)
+static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
dummy_tlb_flush(iova, granule, granule, cookie);
}
-static const struct iommu_flush_ops dummy_tlb_ops = {
+static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_flush_walk = dummy_tlb_flush,
.tlb_flush_leaf = dummy_tlb_flush,
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4c91359057c5..bdf47f745268 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -32,39 +32,31 @@
io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
/*
- * For consistency with the architecture, we always consider
- * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0
- */
-#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels)
-
-/*
* Calculate the right shift amount to get to the portion describing level l
* in a virtual address mapped by the pagetable in d.
*/
#define ARM_LPAE_LVL_SHIFT(l,d) \
- ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \
- * (d)->bits_per_level) + (d)->pg_shift)
-
-#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift)
+ (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \
+ ilog2(sizeof(arm_lpae_iopte)))
-#define ARM_LPAE_PAGES_PER_PGD(d) \
- DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d))
+#define ARM_LPAE_GRANULE(d) \
+ (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
+#define ARM_LPAE_PGD_SIZE(d) \
+ (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
/*
* Calculate the index at level l used to map virtual address a using the
* pagetable in d.
*/
#define ARM_LPAE_PGD_IDX(l,d) \
- ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0)
+ ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
#define ARM_LPAE_LVL_IDX(a,l,d) \
(((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \
((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
/* Calculate the block/page mapping size at level l for pagetable in d. */
-#define ARM_LPAE_BLOCK_SIZE(l,d) \
- (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \
- ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level)))
+#define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
/* Page table bits */
#define ARM_LPAE_PTE_TYPE_SHIFT 0
@@ -166,6 +158,9 @@
#define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2)
#define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
+#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
+#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
+
/* IOPTE accessors */
#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
@@ -177,10 +172,9 @@
struct arm_lpae_io_pgtable {
struct io_pgtable iop;
- int levels;
- size_t pgd_size;
- unsigned long pg_shift;
- unsigned long bits_per_level;
+ int pgd_bits;
+ int start_level;
+ int bits_per_level;
void *pgd;
};
@@ -210,7 +204,7 @@ static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
{
u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
- if (data->pg_shift < 16)
+ if (ARM_LPAE_GRANULE(data) < SZ_64K)
return paddr;
/* Rotate the packed high-order bits back to the top */
@@ -389,7 +383,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
/* If we can install a leaf entry at this level, then do so */
- if (size == block_size && (size & cfg->pgsize_bitmap))
+ if (size == block_size)
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep);
/* We can't allocate tables at the final level */
@@ -461,7 +455,7 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
else if (prot & IOMMU_CACHE)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
- else if (prot & IOMMU_QCOM_SYS_CACHE)
+ else if (prot & IOMMU_SYS_CACHE_ONLY)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
<< ARM_LPAE_PTE_ATTRINDX_SHIFT);
}
@@ -476,16 +470,19 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int iommu_prot)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
- int ret, lvl = ARM_LPAE_START_LVL(data);
+ int ret, lvl = data->start_level;
arm_lpae_iopte prot;
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
- paddr >= (1ULL << data->iop.cfg.oas)))
+ if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+ return -EINVAL;
+
+ if (WARN_ON(iova >> data->iop.cfg.ias || paddr >> data->iop.cfg.oas))
return -ERANGE;
prot = arm_lpae_prot_to_pte(data, iommu_prot);
@@ -505,8 +502,8 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *start, *end;
unsigned long table_size;
- if (lvl == ARM_LPAE_START_LVL(data))
- table_size = data->pgd_size;
+ if (lvl == data->start_level)
+ table_size = ARM_LPAE_PGD_SIZE(data);
else
table_size = ARM_LPAE_GRANULE(data);
@@ -534,7 +531,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
{
struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
- __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd);
+ __arm_lpae_free_pgtable(data, data->start_level, data->pgd);
kfree(data);
}
@@ -649,13 +646,16 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
arm_lpae_iopte *ptep = data->pgd;
- int lvl = ARM_LPAE_START_LVL(data);
- if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
+ if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
+ return 0;
+
+ if (WARN_ON(iova >> data->iop.cfg.ias))
return 0;
- return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
+ return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep);
}
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -663,7 +663,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte pte, *ptep = data->pgd;
- int lvl = ARM_LPAE_START_LVL(data);
+ int lvl = data->start_level;
do {
/* Valid IOPTE pointer? */
@@ -740,8 +740,8 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
static struct arm_lpae_io_pgtable *
arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
{
- unsigned long va_bits, pgd_bits;
struct arm_lpae_io_pgtable *data;
+ int levels, va_bits, pg_shift;
arm_lpae_restrict_pgsizes(cfg);
@@ -763,15 +763,15 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
if (!data)
return NULL;
- data->pg_shift = __ffs(cfg->pgsize_bitmap);
- data->bits_per_level = data->pg_shift - ilog2(sizeof(arm_lpae_iopte));
+ pg_shift = __ffs(cfg->pgsize_bitmap);
+ data->bits_per_level = pg_shift - ilog2(sizeof(arm_lpae_iopte));
- va_bits = cfg->ias - data->pg_shift;
- data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
+ va_bits = cfg->ias - pg_shift;
+ levels = DIV_ROUND_UP(va_bits, data->bits_per_level);
+ data->start_level = ARM_LPAE_MAX_LEVELS - levels;
/* Calculate the actual size of our pgd (without concatenation) */
- pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1));
- data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte)));
+ data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
data->iop.ops = (struct io_pgtable_ops) {
.map = arm_lpae_map,
@@ -861,11 +861,11 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
(ARM_LPAE_MAIR_ATTR_INC_OWBRWA
<< ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE));
- cfg->arm_lpae_s1_cfg.mair[0] = reg;
- cfg->arm_lpae_s1_cfg.mair[1] = 0;
+ cfg->arm_lpae_s1_cfg.mair = reg;
/* Looking good; allocate a pgd */
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
+ GFP_KERNEL, cfg);
if (!data->pgd)
goto out_free_data;
@@ -900,13 +900,13 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
* Concatenate PGDs at level 1 if possible in order to reduce
* the depth of the stage-2 walk.
*/
- if (data->levels == ARM_LPAE_MAX_LEVELS) {
+ if (data->start_level == 0) {
unsigned long pgd_pages;
- pgd_pages = data->pgd_size >> ilog2(sizeof(arm_lpae_iopte));
+ pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
- data->pgd_size = pgd_pages << data->pg_shift;
- data->levels--;
+ data->pgd_bits += data->bits_per_level;
+ data->start_level++;
}
}
@@ -916,7 +916,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
(ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
(ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);
- sl = ARM_LPAE_START_LVL(data);
+ sl = data->start_level;
switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K:
@@ -962,7 +962,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s2_cfg.vtcr = reg;
/* Allocate pgd pages */
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data),
+ GFP_KERNEL, cfg);
if (!data->pgd)
goto out_free_data;
@@ -1015,27 +1016,57 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
static struct io_pgtable *
arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
- struct io_pgtable *iop;
+ struct arm_lpae_io_pgtable *data;
+
+ /* No quirks for Mali (hopefully) */
+ if (cfg->quirks)
+ return NULL;
- if (cfg->ias != 48 || cfg->oas > 40)
+ if (cfg->ias > 48 || cfg->oas > 40)
return NULL;
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
- iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
- if (iop) {
- u64 mair, ttbr;
- /* Copy values as union fields overlap */
- mair = cfg->arm_lpae_s1_cfg.mair[0];
- ttbr = cfg->arm_lpae_s1_cfg.ttbr[0];
+ data = arm_lpae_alloc_pgtable(cfg);
+ if (!data)
+ return NULL;
- cfg->arm_mali_lpae_cfg.memattr = mair;
- cfg->arm_mali_lpae_cfg.transtab = ttbr |
- ARM_MALI_LPAE_TTBR_READ_INNER |
- ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ /* Mali seems to need a full 4-level table regardless of IAS */
+ if (data->start_level > 0) {
+ data->start_level = 0;
+ data->pgd_bits = 0;
}
+ /*
+ * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
+ * best we can do is mimic the out-of-tree driver and hope that the
+ * "implementation-defined caching policy" is good enough. Similarly,
+ * we'll use it for the sake of a valid attribute for our 'device'
+ * index, although callers should never request that in practice.
+ */
+ cfg->arm_mali_lpae_cfg.memattr =
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
+ (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
+
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
+ cfg);
+ if (!data->pgd)
+ goto out_free_data;
- return iop;
+ /* Ensure the empty pgd is visible before TRANSTAB can be written */
+ wmb();
+
+ cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
+ ARM_MALI_LPAE_TTBR_READ_INNER |
+ ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ return &data->iop;
+
+out_free_data:
+ kfree(data);
+ return NULL;
}
struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
@@ -1065,22 +1096,23 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
-static struct io_pgtable_cfg *cfg_cookie;
+static struct io_pgtable_cfg *cfg_cookie __initdata;
-static void dummy_tlb_flush_all(void *cookie)
+static void __init dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
-static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
- void *cookie)
+static void __init dummy_tlb_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
-static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule, void *cookie)
+static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
dummy_tlb_flush(iova, granule, granule, cookie);
}
@@ -1099,9 +1131,9 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
cfg->pgsize_bitmap, cfg->ias);
- pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n",
- data->levels, data->pgd_size, data->pg_shift,
- data->bits_per_level, data->pgd);
+ pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
+ ARM_LPAE_MAX_LEVELS - data->start_level, ARM_LPAE_PGD_SIZE(data),
+ ilog2(ARM_LPAE_GRANULE(data)), data->bits_per_level, data->pgd);
}
#define __FAIL(ops, i) ({ \
@@ -1113,7 +1145,7 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
{
- static const enum io_pgtable_fmt fmts[] = {
+ static const enum io_pgtable_fmt fmts[] __initconst = {
ARM_64_LPAE_S1,
ARM_64_LPAE_S2,
};
@@ -1212,13 +1244,13 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
static int __init arm_lpae_do_selftests(void)
{
- static const unsigned long pgsize[] = {
+ static const unsigned long pgsize[] __initconst = {
SZ_4K | SZ_2M | SZ_1G,
SZ_16K | SZ_32M,
SZ_64K | SZ_512M,
};
- static const unsigned int ias[] = {
+ static const unsigned int ias[] __initconst = {
32, 36, 40, 42, 44, 48,
};
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 9da8309f7170..e4da6efbda49 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -438,7 +438,7 @@ static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain)
/* MAIR0 */
ipmmu_ctx_write_root(domain, IMMAIR0,
- domain->cfg.arm_lpae_s1_cfg.mair[0]);
+ domain->cfg.arm_lpae_s1_cfg.mair);
/* IMBUSCR */
if (domain->mmu->features->setup_imbuscr)
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index c31e7bc4ccbe..66e9b40e9275 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -284,9 +284,9 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
/* MAIRs (stage-1 only) */
iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0,
- pgtbl_cfg.arm_lpae_s1_cfg.mair[0]);
+ pgtbl_cfg.arm_lpae_s1_cfg.mair);
iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR1,
- pgtbl_cfg.arm_lpae_s1_cfg.mair[1]);
+ pgtbl_cfg.arm_lpae_s1_cfg.mair >> 32);
/* SCTLR */
reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE |
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ec7a13405f10..ee21eedafe98 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -102,7 +102,7 @@ struct io_pgtable_cfg {
struct {
u64 ttbr[2];
u64 tcr;
- u64 mair[2];
+ u64 mair;
} arm_lpae_s1_cfg;
struct {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 29bac5345563..a86bd21d08a9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,11 +31,11 @@
*/
#define IOMMU_PRIV (1 << 5)
/*
- * Non-coherent masters on few Qualcomm SoCs can use this page protection flag
- * to set correct cacheability attributes to use an outer level of cache -
- * last level cache, aka system cache.
+ * Non-coherent masters can use this page protection flag to set cacheable
+ * memory attributes for only a transparent outer level of cache, also known as
+ * the last-level or system cache.
*/
-#define IOMMU_QCOM_SYS_CACHE (1 << 6)
+#define IOMMU_SYS_CACHE_ONLY (1 << 6)
struct iommu_ops;
struct iommu_group;
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 2d5eff506e13..ffd72b3b14ee 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -58,6 +58,7 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id);
extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare);
extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size);
extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare);
+extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val);
extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val);
#else
@@ -97,6 +98,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; }
static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; }
static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; }
static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; }
+static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return -ENODEV; }
static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return -ENODEV; }
static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { return -ENODEV; }
#endif