From b3ba41f28f7fe147c23bfadf6280b3492c9696b1 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Sat, 23 Nov 2013 14:36:42 -0500
Subject: ARM: mm: Fix max_mapnr with recent max*pfn updates

With commit  26ba47b1 {ARM: 7805/1: mm: change max*pfn to include
the physical offset of memory}, the max_pfn already contain
PHYS_PFN_OFFSET, so it shouldn't be taken into account again.

While at it, use use set_max_mapnr() helper.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 arch/arm/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 3e8f106ee5fe..4c7bab44bf5c 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -587,7 +587,7 @@ void __init mem_init(void)
 	extern u32 itcm_end;
 #endif
 
-	max_mapnr   = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
+	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
 
 	/* this will put all unused low memory onto the freelists */
 	free_unused_memmap(&meminfo);
-- 
cgit v1.2.3


From 8e58caefd96c8ee249ab26a2fe00aab3785df9ea Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Sat, 23 Nov 2013 14:42:18 -0500
Subject: ARM: mm: Don't allow resizing of memblock data until "low" memory is
 not mapped

If allowed by call to memblock_allow_resize() - The Memblock core will
try to allocate additional memory and rearrange its internal data in
case, if there are more then INIT_MEMBLOCK_REGIONS(128) memory regions
of any type have been allocated. If this happens before Low memory is
mapped (which is done now by map_lowmem()) the system will hang, because
the Memblock core will try to operate with virtual addresses which
aren't mapped yet.

In ARM code, the memblock resizing is allowed (memblock_allow_resize())
from arm_memblock_init() which is called before map_lowmem(), so
this may lead to an error as described above.

Hence, allow Memblock resizing later during init, from bootmem_init()
when all appropriate mappings are ready.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 arch/arm/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 4c7bab44bf5c..ea66341779d4 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -384,7 +384,6 @@ void __init arm_memblock_init(struct meminfo *mi,
 	dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));
 
 	arm_memblock_steal_permitted = false;
-	memblock_allow_resize();
 	memblock_dump_all();
 }
 
@@ -392,6 +391,7 @@ void __init bootmem_init(void)
 {
 	unsigned long min, max_low, max_high;
 
+	memblock_allow_resize();
 	max_low = max_high = 0;
 
 	find_limits(&min, &max_low, &max_high);
-- 
cgit v1.2.3


From 84f452b1e8fc73ac0e31254c66e3e2260ce5263d Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Sun, 30 Jun 2013 00:28:46 -0400
Subject: ARM: mm: Remove bootmem code and switch to NO_BOOTMEM

Now with dma_mask series merged and max*pfn has consistent meaning on ARM
as rest of the arch's thanks to RMK's mega series, lets switch ARM code
to NO_BOOTMEM. With NO_BOOTMEM change, now we use memblock allocator to
reserve space for crash kernel to have one less dependency with nobootmem
allocator wrapper.

Tested with both flat memory and sparse (faked) memory models with highmem
enabled.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 arch/arm/mm/init.c | 58 ++----------------------------------------------------
 1 file changed, 2 insertions(+), 56 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index ea66341779d4..6dd66a999d9f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -145,58 +145,6 @@ static void __init find_limits(unsigned long *min, unsigned long *max_low,
 	*max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]);
 }
 
-static void __init arm_bootmem_init(unsigned long start_pfn,
-	unsigned long end_pfn)
-{
-	struct memblock_region *reg;
-	unsigned int boot_pages;
-	phys_addr_t bitmap;
-	pg_data_t *pgdat;
-
-	/*
-	 * Allocate the bootmem bitmap page.  This must be in a region
-	 * of memory which has already been mapped.
-	 */
-	boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
-				__pfn_to_phys(end_pfn));
-
-	/*
-	 * Initialise the bootmem allocator, handing the
-	 * memory banks over to bootmem.
-	 */
-	node_set_online(0);
-	pgdat = NODE_DATA(0);
-	init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);
-
-	/* Free the lowmem regions from memblock into bootmem. */
-	for_each_memblock(memory, reg) {
-		unsigned long start = memblock_region_memory_base_pfn(reg);
-		unsigned long end = memblock_region_memory_end_pfn(reg);
-
-		if (end >= end_pfn)
-			end = end_pfn;
-		if (start >= end)
-			break;
-
-		free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
-	}
-
-	/* Reserve the lowmem memblock reserved regions in bootmem. */
-	for_each_memblock(reserved, reg) {
-		unsigned long start = memblock_region_reserved_base_pfn(reg);
-		unsigned long end = memblock_region_reserved_end_pfn(reg);
-
-		if (end >= end_pfn)
-			end = end_pfn;
-		if (start >= end)
-			break;
-
-		reserve_bootmem(__pfn_to_phys(start),
-			        (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
-	}
-}
-
 #ifdef CONFIG_ZONE_DMA
 
 phys_addr_t arm_dma_zone_size __read_mostly;
@@ -236,7 +184,7 @@ void __init setup_dma_zone(const struct machine_desc *mdesc)
 #endif
 }
 
-static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
+static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
 	unsigned long max_high)
 {
 	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
@@ -396,8 +344,6 @@ void __init bootmem_init(void)
 
 	find_limits(&min, &max_low, &max_high);
 
-	arm_bootmem_init(min, max_low);
-
 	/*
 	 * Sparsemem tries to allocate bootmem in memory_present(),
 	 * so must be done after the fixed reservations
@@ -414,7 +360,7 @@ void __init bootmem_init(void)
 	 * the sparse mem_map arrays initialized by sparse_init()
 	 * for memmap_init_zone(), otherwise all PFNs are invalid.
 	 */
-	arm_bootmem_free(min, max_low, max_high);
+	zone_sizes_init(min, max_low, max_high);
 
 	/*
 	 * This doesn't seem to be used by the Linux memory manager any
-- 
cgit v1.2.3


From 1fd15b879d0075c8916e52fb9e52522827433d1f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Wed, 23 Oct 2013 16:13:02 +0100
Subject: ARM: add support to dump the kernel page tables

This patch allows the kernel page tables to be dumped via a debugfs file,
allowing kernel developers to check the layout of the kernel page tables
and the verify the various permissions and type settings.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/Makefile |   1 +
 arch/arm/mm/dump.c   | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 346 insertions(+)
 create mode 100644 arch/arm/mm/dump.c

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index ecfe6e53f6e0..7f39ce2f841f 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -12,6 +12,7 @@ ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
 endif
 
+obj-$(CONFIG_ARM_PTDUMP)	+= dump.o
 obj-$(CONFIG_MODULES)		+= proc-syms.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
new file mode 100644
index 000000000000..2b3a56414271
--- /dev/null
+++ b/arch/arm/mm/dump.c
@@ -0,0 +1,345 @@
+/*
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * Derived from x86 implementation:
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+	{ MODULES_VADDR,	"Modules" },
+	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ 0,			"vmalloc() Area" },
+	{ VMALLOC_END,		"vmalloc() End" },
+	{ FIXADDR_START,	"Fixmap Area" },
+	{ CONFIG_VECTORS_BASE,	"Vectors" },
+	{ CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" },
+	{ -1,			NULL },
+};
+
+struct pg_state {
+	struct seq_file *seq;
+	const struct addr_marker *marker;
+	unsigned long start_address;
+	unsigned level;
+	u64 current_prot;
+};
+
+struct prot_bits {
+	u64		mask;
+	u64		val;
+	const char	*set;
+	const char	*clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+	{
+		.mask	= L_PTE_USER,
+		.val	= L_PTE_USER,
+		.set	= "USR",
+		.clear	= "   ",
+	}, {
+		.mask	= L_PTE_RDONLY,
+		.val	= L_PTE_RDONLY,
+		.set	= "ro",
+		.clear	= "RW",
+	}, {
+		.mask	= L_PTE_XN,
+		.val	= L_PTE_XN,
+		.set	= "NX",
+		.clear	= "x ",
+	}, {
+		.mask	= L_PTE_SHARED,
+		.val	= L_PTE_SHARED,
+		.set	= "SHD",
+		.clear	= "   ",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_UNCACHED,
+		.set	= "SO/UNCACHED",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_BUFFERABLE,
+		.set	= "MEM/BUFFERABLE/WC",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_WRITETHROUGH,
+		.set	= "MEM/CACHED/WT",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_WRITEBACK,
+		.set	= "MEM/CACHED/WBRA",
+#ifndef CONFIG_ARM_LPAE
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_MINICACHE,
+		.set	= "MEM/MINICACHE",
+#endif
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_WRITEALLOC,
+		.set	= "MEM/CACHED/WBWA",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_SHARED,
+		.set	= "DEV/SHARED",
+#ifndef CONFIG_ARM_LPAE
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_NONSHARED,
+		.set	= "DEV/NONSHARED",
+#endif
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_WC,
+		.set	= "DEV/WC",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_CACHED,
+		.set	= "DEV/CACHED",
+	},
+};
+
+static const struct prot_bits section_bits[] = {
+#ifndef CONFIG_ARM_LPAE
+	/* These are approximate */
+	{
+		.mask	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= 0,
+		.set	= "    ro",
+	}, {
+		.mask	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_AP_WRITE,
+		.set	= "    RW",
+	}, {
+		.mask	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_AP_READ,
+		.set	= "USR ro",
+	}, {
+		.mask	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.set	= "USR RW",
+#else
+	{
+		.mask	= PMD_SECT_USER,
+		.val	= PMD_SECT_USER,
+		.set	= "USR",
+	}, {
+		.mask	= PMD_SECT_RDONLY,
+		.val	= PMD_SECT_RDONLY,
+		.set	= "ro",
+		.clear	= "RW",
+#endif
+	}, {
+		.mask	= PMD_SECT_XN,
+		.val	= PMD_SECT_XN,
+		.set	= "NX",
+		.clear	= "x ",
+	}, {
+		.mask	= PMD_SECT_S,
+		.val	= PMD_SECT_S,
+		.set	= "SHD",
+		.clear	= "   ",
+	},
+};
+
+struct pg_level {
+	const struct prot_bits *bits;
+	size_t num;
+	u64 mask;
+};
+
+static struct pg_level pg_level[] = {
+	{
+	}, { /* pgd */
+	}, { /* pud */
+	}, { /* pmd */
+		.bits	= section_bits,
+		.num	= ARRAY_SIZE(section_bits),
+	}, { /* pte */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	},
+};
+
+static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++, bits++) {
+		const char *s;
+
+		if ((st->current_prot & bits->mask) == bits->val)
+			s = bits->set;
+		else
+			s = bits->clear;
+
+		if (s)
+			seq_printf(st->seq, " %s", s);
+	}
+}
+
+static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val)
+{
+	static const char units[] = "KMGTPE";
+	u64 prot = val & pg_level[level].mask;
+
+	if (addr < USER_PGTABLES_CEILING)
+		return;
+
+	if (!st->level) {
+		st->level = level;
+		st->current_prot = prot;
+		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != st->current_prot || level != st->level ||
+		   addr >= st->marker[1].start_address) {
+		const char *unit = units;
+		unsigned long delta;
+
+		if (st->current_prot) {
+			seq_printf(st->seq, "0x%08lx-0x%08lx   ",
+				   st->start_address, addr);
+
+			delta = (addr - st->start_address) >> 10;
+			while (!(delta & 1023) && unit[1]) {
+				delta >>= 10;
+				unit++;
+			}
+			seq_printf(st->seq, "%9lu%c", delta, *unit);
+			if (pg_level[st->level].bits)
+				dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
+			seq_printf(st->seq, "\n");
+		}
+
+		if (addr >= st->marker[1].start_address) {
+			st->marker++;
+			seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+		}
+		st->start_address = addr;
+		st->current_prot = prot;
+		st->level = level;
+	}
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		addr = start + i * PAGE_SIZE;
+		note_page(st, addr, 4, pte_val(*pte));
+	}
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+		addr = start + i * PMD_SIZE;
+		if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd))
+			note_page(st, addr, 3, pmd_val(*pmd));
+		else
+			walk_pte(st, pmd, addr);
+	}
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+	pud_t *pud = pud_offset(pgd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		addr = start + i * PUD_SIZE;
+		if (!pud_none(*pud)) {
+			walk_pmd(st, pud, addr);
+		} else {
+			note_page(st, addr, 2, pud_val(*pud));
+		}
+	}
+}
+
+static void walk_pgd(struct seq_file *m)
+{
+	pgd_t *pgd = swapper_pg_dir;
+	struct pg_state st;
+	unsigned long addr;
+	unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE;
+
+	memset(&st, 0, sizeof(st));
+	st.seq = m;
+	st.marker = address_markers;
+
+	pgd += pgdoff;
+
+	for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) {
+		addr = i * PGDIR_SIZE;
+		if (!pgd_none(*pgd)) {
+			walk_pud(&st, pgd, addr);
+		} else {
+			note_page(&st, addr, 1, pgd_val(*pgd));
+		}
+	}
+
+	note_page(&st, 0, 0, 0);
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	walk_pgd(m);
+	return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+	.open		= ptdump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ptdump_init(void)
+{
+	struct dentry *pe;
+	unsigned i, j;
+
+	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+		if (pg_level[i].bits)
+			for (j = 0; j < pg_level[i].num; j++)
+				pg_level[i].mask |= pg_level[i].bits[j].mask;
+
+	address_markers[2].start_address = VMALLOC_START;
+
+	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+				 &ptdump_fops);
+	return pe ? 0 : -ENOMEM;
+}
+__initcall(ptdump_init);
-- 
cgit v1.2.3


From 2e2c9de207be043ee80161971c814d740759d3bc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 24 Oct 2013 10:26:40 +0100
Subject: ARM: add permission annotations to MT_MEMORY* mapping types

Document the permissions which the various MT_MEMORY* mapping types
will provide.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/ioremap.c |  4 ++--
 arch/arm/mm/mmu.c     | 30 +++++++++++++++---------------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index f123d6eb074b..f9c32ba73544 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -392,9 +392,9 @@ __arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached)
 	unsigned int mtype;
 
 	if (cached)
-		mtype = MT_MEMORY;
+		mtype = MT_MEMORY_RWX;
 	else
-		mtype = MT_MEMORY_NONCACHED;
+		mtype = MT_MEMORY_RWX_NONCACHED;
 
 	return __arm_ioremap_caller(phys_addr, size, mtype,
 			__builtin_return_address(0));
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 580ef2de82d7..fce2e7388098 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -287,7 +287,7 @@ static struct mem_type mem_types[] = {
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_USER,
 	},
-	[MT_MEMORY] = {
+	[MT_MEMORY_RWX] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
@@ -297,26 +297,26 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_MEMORY_NONCACHED] = {
+	[MT_MEMORY_RWX_NONCACHED] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_MT_BUFFERABLE,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_MEMORY_DTCM] = {
+	[MT_MEMORY_RW_DTCM] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_XN,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_MEMORY_ITCM] = {
+	[MT_MEMORY_RWX_ITCM] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_KERNEL,
 	},
-	[MT_MEMORY_SO] = {
+	[MT_MEMORY_RW_SO] = {
 		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 				L_PTE_MT_UNCACHED | L_PTE_XN,
 		.prot_l1   = PMD_TYPE_TABLE,
@@ -487,11 +487,11 @@ static void __init build_mem_type_table(void)
 			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
 			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
-			mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
-			mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
-			mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
-			mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
 		}
 	}
 
@@ -502,15 +502,15 @@ static void __init build_mem_type_table(void)
 	if (cpu_arch >= CPU_ARCH_ARMv6) {
 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 			/* Non-cacheable Normal is XCB = 001 */
-			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
 				PMD_SECT_BUFFERED;
 		} else {
 			/* For both ARMv6 and non-TEX-remapping ARMv7 */
-			mem_types[MT_MEMORY_NONCACHED].prot_sect |=
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
 				PMD_SECT_TEX(1);
 		}
 	} else {
-		mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+		mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
 	}
 
 #ifdef CONFIG_ARM_LPAE
@@ -543,10 +543,10 @@ static void __init build_mem_type_table(void)
 
 	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
-	mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
-	mem_types[MT_MEMORY].prot_pte |= kern_pgprot;
+	mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
 	mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
-	mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask;
+	mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
 	mem_types[MT_ROM].prot_sect |= cp->pmd;
 
 	switch (cp->pmd) {
-- 
cgit v1.2.3


From ebd4922ecc38e31e662cd641dd04099a9652c9b3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Thu, 24 Oct 2013 08:12:39 +0100
Subject: ARM: implement basic NX support for kernel lowmem mappings

Add basic NX support for kernel lowmem mappings.  We mark any section
which does not overlap kernel text as non-executable, preventing it
from being used to write code and then execute directly from there.

This does not change the alignment of the sections, so the kernel
image doesn't grow significantly via this change, so we can do this
without needing a config option.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 5 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index fce2e7388098..9ec715f12224 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -22,6 +22,7 @@
 #include <asm/cputype.h>
 #include <asm/sections.h>
 #include <asm/cachetype.h>
+#include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
 #include <asm/tlb.h>
@@ -293,6 +294,13 @@ static struct mem_type mem_types[] = {
 		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 		.domain    = DOMAIN_KERNEL,
 	},
+	[MT_MEMORY_RW] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+			     L_PTE_XN,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
 	[MT_ROM] = {
 		.prot_sect = PMD_TYPE_SECT,
 		.domain    = DOMAIN_KERNEL,
@@ -410,6 +418,9 @@ static void __init build_mem_type_table(void)
 			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
 			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
 			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
+
+			/* Also setup NX memory mapping */
+			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
 		}
 		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 			/*
@@ -489,6 +500,8 @@ static void __init build_mem_type_table(void)
 			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
 			mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
 			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
 			mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
@@ -545,6 +558,8 @@ static void __init build_mem_type_table(void)
 	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
 	mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
 	mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
+	mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
 	mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
 	mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
 	mem_types[MT_ROM].prot_sect |= cp->pmd;
@@ -1296,6 +1311,8 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
+	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
 	/* Map all the lowmem memory banks. */
 	for_each_memblock(memory, reg) {
@@ -1308,12 +1325,40 @@ static void __init map_lowmem(void)
 		if (start >= end)
 			break;
 
-		map.pfn = __phys_to_pfn(start);
-		map.virtual = __phys_to_virt(start);
-		map.length = end - start;
-		map.type = MT_MEMORY;
+		if (end < kernel_x_start || start >= kernel_x_end) {
+			map.pfn = __phys_to_pfn(start);
+			map.virtual = __phys_to_virt(start);
+			map.length = end - start;
+			map.type = MT_MEMORY_RWX;
 
-		create_mapping(&map);
+			create_mapping(&map);
+		} else {
+			/* This better cover the entire kernel */
+			if (start < kernel_x_start) {
+				map.pfn = __phys_to_pfn(start);
+				map.virtual = __phys_to_virt(start);
+				map.length = kernel_x_start - start;
+				map.type = MT_MEMORY_RW;
+
+				create_mapping(&map);
+			}
+
+			map.pfn = __phys_to_pfn(kernel_x_start);
+			map.virtual = __phys_to_virt(kernel_x_start);
+			map.length = kernel_x_end - kernel_x_start;
+			map.type = MT_MEMORY_RWX;
+
+			create_mapping(&map);
+
+			if (kernel_x_end < end) {
+				map.pfn = __phys_to_pfn(kernel_x_end);
+				map.virtual = __phys_to_virt(kernel_x_end);
+				map.length = end - kernel_x_end;
+				map.type = MT_MEMORY_RW;
+
+				create_mapping(&map);
+			}
+		}
 	}
 }
 
-- 
cgit v1.2.3


From 75374ad47c6465de6c098d7b913fe02d6131195c Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Mon, 17 Jun 2013 10:29:13 -0700
Subject: ARM: mm: Define set_memory_* functions for ARM

Other architectures define various set_memory functions to allow
attributes to be changed (e.g. set_memory_x, set_memory_rw, etc.)
Currently, these functions are missing on ARM. Define these in an
appropriate manner for ARM.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/mmu.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 9ec715f12224..09033890c8b0 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -345,6 +345,44 @@ const struct mem_type *get_mem_type(unsigned int type)
 }
 EXPORT_SYMBOL(get_mem_type);
 
+#define PTE_SET_FN(_name, pteop) \
+static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \
+			void *data) \
+{ \
+	pte_t pte = pteop(*ptep); \
+\
+	set_pte_ext(ptep, pte, 0); \
+	return 0; \
+} \
+
+#define SET_MEMORY_FN(_name, callback) \
+int set_memory_##_name(unsigned long addr, int numpages) \
+{ \
+	unsigned long start = addr; \
+	unsigned long size = PAGE_SIZE*numpages; \
+	unsigned end = start + size; \
+\
+	if (start < MODULES_VADDR || start >= MODULES_END) \
+		return -EINVAL;\
+\
+	if (end < MODULES_VADDR || end >= MODULES_END) \
+		return -EINVAL; \
+\
+	apply_to_page_range(&init_mm, start, size, callback, NULL); \
+	flush_tlb_kernel_range(start, end); \
+	return 0;\
+}
+
+PTE_SET_FN(ro, pte_wrprotect)
+PTE_SET_FN(rw, pte_mkwrite)
+PTE_SET_FN(x, pte_mkexec)
+PTE_SET_FN(nx, pte_mknexec)
+
+SET_MEMORY_FN(ro, pte_set_ro)
+SET_MEMORY_FN(rw, pte_set_rw)
+SET_MEMORY_FN(x, pte_set_x)
+SET_MEMORY_FN(nx, pte_set_nx)
+
 /*
  * Adjust the PMD section entries according to the CPU in use.
  */
-- 
cgit v1.2.3


From 71b55663c5d0a6f8473045db1c8b376803780191 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Mon, 25 Nov 2013 12:01:03 +0000
Subject: ARM: fix executability of CMA mappings

The CMA region was being marked executable:

0xdc04e000-0xdc050000           8K     RW x      MEM/CACHED/WBRA
0xdc060000-0xdc100000         640K     RW x      MEM/CACHED/WBRA
0xdc4f5000-0xdc500000          44K     RW x      MEM/CACHED/WBRA
0xdcce9000-0xe0000000       52316K     RW x      MEM/CACHED/WBRA

This is mainly due to the badly worded MT_MEMORY_DMA_READY symbol, but
there are also a few other places in dma-mapping which should be
corrected to use the right constant.  Fix all these places:

0xdc04e000-0xdc050000           8K     RW NX     MEM/CACHED/WBRA
0xdc060000-0xdc100000         640K     RW NX     MEM/CACHED/WBRA
0xdc280000-0xdc300000         512K     RW NX     MEM/CACHED/WBRA
0xdc6fc000-0xe0000000       58384K     RW NX     MEM/CACHED/WBRA

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 6 +++---
 arch/arm/mm/mmu.c         | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 79f8b39801a8..f0ea0134e5a3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -361,7 +361,7 @@ void __init init_dma_coherent_pool_size(unsigned long size)
 static int __init atomic_pool_init(void)
 {
 	struct dma_pool *pool = &atomic_pool;
-	pgprot_t prot = pgprot_dmacoherent(pgprot_kernel);
+	pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL);
 	gfp_t gfp = GFP_KERNEL | GFP_DMA;
 	unsigned long nr_pages = pool->size >> PAGE_SHIFT;
 	unsigned long *bitmap;
@@ -609,7 +609,7 @@ static void __free_from_contiguous(struct device *dev, struct page *page,
 	if (PageHighMem(page))
 		__dma_free_remap(cpu_addr, size);
 	else
-		__dma_remap(page, size, pgprot_kernel);
+		__dma_remap(page, size, PAGE_KERNEL);
 	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
 }
 
@@ -1357,7 +1357,7 @@ static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	    dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
 {
-	pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
+	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
 	struct page **pages;
 	void *addr = NULL;
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 09033890c8b0..4f08c133cc25 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -333,7 +333,8 @@ static struct mem_type mem_types[] = {
 		.domain    = DOMAIN_KERNEL,
 	},
 	[MT_MEMORY_DMA_READY] = {
-		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_XN,
 		.prot_l1   = PMD_TYPE_TABLE,
 		.domain    = DOMAIN_KERNEL,
 	},
-- 
cgit v1.2.3


From 70f665fe77c54740d0fa8aaad5de2181d75af15e Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 9 Dec 2013 18:06:53 +0100
Subject: ARM: 7919/1: mm: refactor v7 cache cleaning ops to use way/index
 sequence

Set-associative caches on all v7 implementations map the index bits
to physical addresses LSBs and tag bits to MSBs. As the last level
of cache on current and upcoming ARM systems grows in size,
this means that under normal DRAM controller configurations, the
current v7 cache flush routine using set/way operations triggers a
DRAM memory controller precharge/activate for every cache line
writeback since the cache routine cleans lines by first fixing the
index and then looping through ways (index bits are mapped to lower
physical addresses on all v7 cache implementations; this means that,
with last level cache sizes in the order of MBytes, lines belonging
to the same set but different ways map to different DRAM pages).

Given the random content of cache tags, swapping the order between
indexes and ways loops do not prevent DRAM pages precharge and
activate cycles but at least, on average, improves the chances that
either multiple lines hit the same page or multiple lines belong to
different DRAM banks, improving throughput significantly.

This patch swaps the inner loops in the v7 cache flushing routine
to carry out the clean operations first on all sets belonging to
a given way (looping through sets) and then decrementing the way.

Benchmarks showed that by swapping the ordering in which sets and
ways are decremented in the v7 cache flushing routine, that uses
set/way operations, time required to flush caches is reduced
significantly, owing to improved writebacks throughput to the DRAM
controller.

Benchmarks results vary and depend heavily on the last level of
cache tag RAM content when cache is cleaned and invalidated, ranging
from 2x throughput when all tag RAM entries contain dirty lines
mapping to sequential pages of RAM to 1x (ie no improvement) when
all tag RAM accesses trigger a DRAM precharge/activate cycle, as the
current code implies on most DRAM controller configurations.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-v7.S | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index b5c467a65c27..778bcf88ee79 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -146,18 +146,18 @@ flush_levels:
 	ldr	r7, =0x7fff
 	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
 loop1:
-	mov	r9, r4				@ create working copy of max way size
+	mov	r9, r7				@ create working copy of max index
 loop2:
- ARM(	orr	r11, r10, r9, lsl r5	)	@ factor way and cache number into r11
- THUMB(	lsl	r6, r9, r5		)
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
  THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
- ARM(	orr	r11, r11, r7, lsl r2	)	@ factor index number into r11
- THUMB(	lsl	r6, r7, r2		)
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
  THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
 	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
-	subs	r9, r9, #1			@ decrement the way
+	subs	r9, r9, #1			@ decrement the index
 	bge	loop2
-	subs	r7, r7, #1			@ decrement the index
+	subs	r4, r4, #1			@ decrement the way
 	bge	loop1
 skip:
 	add	r10, r10, #2			@ increment cache number
-- 
cgit v1.2.3


From e68f31f4520ea5d1ddbcaddb320ef0b4201eef3c Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Fri, 13 Dec 2013 16:42:19 +0100
Subject: ARM: 7922/1: l2x0: add Marvell Tauros3 support

This adds support for the Marvell Tauros3 cache controller which
is compatible with pl310 cache controller but broadcasts L1 cache
operations to L2 cache. While updating the binding documentation,
clean up the list of possible compatibles. Also reorder driver
compatibles to allow non-ARM derivated to be compatible to ARM
cache controller compatibles.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/cache-l2x0.c    | 48 +++++++++++++++++++++++++++++++++++++--------
 arch/arm/mm/cache-tauros3.h | 41 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 8 deletions(-)
 create mode 100644 arch/arm/mm/cache-tauros3.h

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 447da6ffadd5..7abde2ce8973 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -25,6 +25,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
+#include "cache-tauros3.h"
 #include "cache-aurora-l2.h"
 
 #define CACHE_LINE_SIZE		32
@@ -767,6 +768,14 @@ static void aurora_save(void)
 	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 }
 
+static void __init tauros3_save(void)
+{
+	l2x0_saved_regs.aux2_ctrl =
+		readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL);
+	l2x0_saved_regs.prefetch_ctrl =
+		readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL);
+}
+
 static void l2x0_resume(void)
 {
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
@@ -821,6 +830,18 @@ static void aurora_resume(void)
 	}
 }
 
+static void tauros3_resume(void)
+{
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+			       l2x0_base + TAUROS3_AUX2_CTRL);
+		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+			       l2x0_base + L2X0_PREFETCH_CTRL);
+	}
+
+	l2x0_resume();
+}
+
 static void __init aurora_broadcast_l2_commands(void)
 {
 	__u32 u;
@@ -906,6 +927,15 @@ static const struct l2x0_of_data aurora_no_outer_data = {
 	},
 };
 
+static const struct l2x0_of_data tauros3_data = {
+	.setup = NULL,
+	.save  = tauros3_save,
+	/* Tauros3 broadcasts L1 cache operations to L2 */
+	.outer_cache = {
+		.resume      = tauros3_resume,
+	},
+};
+
 static const struct l2x0_of_data bcm_l2x0_data = {
 	.setup = pl310_of_setup,
 	.save  = pl310_save,
@@ -922,17 +952,19 @@ static const struct l2x0_of_data bcm_l2x0_data = {
 };
 
 static const struct of_device_id l2x0_ids[] __initconst = {
-	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
-	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
 	{ .compatible = "arm,l210-cache", .data = (void *)&l2x0_data },
-	{ .compatible = "marvell,aurora-system-cache",
-	  .data = (void *)&aurora_no_outer_data},
-	{ .compatible = "marvell,aurora-outer-cache",
-	  .data = (void *)&aurora_with_outer_data},
-	{ .compatible = "brcm,bcm11351-a2-pl310-cache",
-	  .data = (void *)&bcm_l2x0_data},
+	{ .compatible = "arm,l220-cache", .data = (void *)&l2x0_data },
+	{ .compatible = "arm,pl310-cache", .data = (void *)&pl310_data },
 	{ .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */
 	  .data = (void *)&bcm_l2x0_data},
+	{ .compatible = "brcm,bcm11351-a2-pl310-cache",
+	  .data = (void *)&bcm_l2x0_data},
+	{ .compatible = "marvell,aurora-outer-cache",
+	  .data = (void *)&aurora_with_outer_data},
+	{ .compatible = "marvell,aurora-system-cache",
+	  .data = (void *)&aurora_no_outer_data},
+	{ .compatible = "marvell,tauros3-cache",
+	  .data = (void *)&tauros3_data },
 	{}
 };
 
diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h
new file mode 100644
index 000000000000..02c0a97cbc02
--- /dev/null
+++ b/arch/arm/mm/cache-tauros3.h
@@ -0,0 +1,41 @@
+/*
+ * Marvell Tauros3 cache controller includes
+ *
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * based on GPL'ed 2.6 kernel sources
+ *  (c) Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_HARDWARE_TAUROS3_H
+#define __ASM_ARM_HARDWARE_TAUROS3_H
+
+/*
+ * Marvell Tauros3 L2CC is compatible with PL310 r0p0
+ * but with PREFETCH_CTRL (r2p0) and an additional event counter.
+ * Also, there is AUX2_CTRL for some Marvell specific control.
+ */
+
+#define TAUROS3_EVENT_CNT2_CFG		0x224
+#define TAUROS3_EVENT_CNT2_VAL		0x228
+#define TAUROS3_INV_ALL			0x780
+#define TAUROS3_CLEAN_ALL		0x784
+#define TAUROS3_AUX2_CTRL		0x820
+
+/* Registers shifts and masks */
+#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN	(1 << 2)
+
+#endif
-- 
cgit v1.2.3


From 8c65da6dc89ccb605d73773b1dd617e72982d971 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 30 Nov 2013 12:52:31 +0000
Subject: ARM: pgd allocation: retry on failure

Make pgd allocation retry on failure; we really need this to succeed
otherwise fork() can trigger OOMs.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/pgd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index 1046b373d1ae..249379535be2 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -23,7 +23,7 @@
 #define __pgd_alloc()	kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL)
 #define __pgd_free(pgd)	kfree(pgd)
 #else
-#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL, 2)
+#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2)
 #define __pgd_free(pgd)	free_pages((unsigned long)pgd, 2)
 #endif
 
-- 
cgit v1.2.3


From e1a5848e3398dca135f3ae77fe2e01145f9d8826 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 17 Dec 2013 19:17:11 +0100
Subject: ARM: 7924/1: mm: don't bother with reserved ttbr0 when running with
 LPAE

With the new ASID allocation algorithm, active ASIDs at the time of a
rollover event will be marked as reserved, so active mm_structs can
continue to operate with the same ASID as before. This in turn means
that we don't need to worry about allocating a new ASID to an mm that
is currently active (installed in TTBR0).

Since updating the pgd and ASID is atomic on LPAE systems (by virtue of
the two being fields in the same hardware register), we can dispose of
the reserved TTBR0 and rely on whatever tables we currently have live.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/context.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 84e6f772e204..3ad0fdaa5cc1 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -78,20 +78,21 @@ void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
 #endif
 
 #ifdef CONFIG_ARM_LPAE
-static void cpu_set_reserved_ttbr0(void)
-{
-	/*
-	 * Set TTBR0 to swapper_pg_dir which contains only global entries. The
-	 * ASID is set to 0.
-	 */
-	cpu_set_ttbr(0, __pa(swapper_pg_dir));
-	isb();
-}
+/*
+ * With LPAE, the ASID and page tables are updated atomicly, so there is
+ * no need for a reserved set of tables (the active ASID tracking prevents
+ * any issues across a rollover).
+ */
+#define cpu_set_reserved_ttbr0()
 #else
 static void cpu_set_reserved_ttbr0(void)
 {
 	u32 ttb;
-	/* Copy TTBR1 into TTBR0 */
+	/*
+	 * Copy TTBR1 into TTBR0.
+	 * This points at swapper_pg_dir, which contains only global
+	 * entries so any speculative walks are perfectly safe.
+	 */
 	asm volatile(
 	"	mrc	p15, 0, %0, c2, c0, 1		@ read TTBR1\n"
 	"	mcr	p15, 0, %0, c2, c0, 0		@ set TTBR0\n"
-- 
cgit v1.2.3


From a7a04105068e9bb4cba43d97613c4f19b9e90b0c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 17 Dec 2013 19:17:31 +0100
Subject: ARM: 7925/1: mm: keep track of last ASID allocation to improve bitmap
 searching

Since we only clear entries in the ASID bitmap on a rollover event, the
bitmap tends to consist of a block of consecutive set bits followed by
a block of consecutive clear bits. The exception to this rule is for
ASIDs which have been carried over from a previous generation, but
these are bound by the number of CPUs.

This patch optimises our bitmap searching strategy, so that we search
from the last successful allocation, rather than search from index 1
each time we allocate a new ASID.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/context.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 3ad0fdaa5cc1..52e6f13ac9c7 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -180,6 +180,7 @@ static int is_reserved_asid(u64 asid)
 
 static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 {
+	static u32 cur_idx = 1;
 	u64 asid = atomic64_read(&mm->context.id);
 	u64 generation = atomic64_read(&asid_generation);
 
@@ -197,7 +198,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		 * as we reserve ASID #0 to switch via TTBR0 and indicate
 		 * rollover events.
 		 */
-		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
 		if (asid == NUM_USER_ASIDS) {
 			generation = atomic64_add_return(ASID_FIRST_VERSION,
 							 &asid_generation);
@@ -205,6 +206,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 			asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 		}
 		__set_bit(asid, asid_map);
+		cur_idx = asid;
 		asid |= generation;
 		cpumask_clear(mm_cpumask(mm));
 	}
-- 
cgit v1.2.3


From 5d49750933210457ec2d5e8507823e365b2604fb Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 17 Dec 2013 19:17:54 +0100
Subject: ARM: 7926/1: mm: flesh out and fix the comments in the ASID allocator

The ASID allocator has to deal with some pretty horrible behaviours by
the CPU, so expand on some of the comments in there so I remember why
we can never allocate ASID zero to a userspace task.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/context.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch/arm/mm')

diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 52e6f13ac9c7..6eb97b3a7481 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -36,8 +36,8 @@
  * The context ID is used by debuggers and trace logic, and
  * should be unique within all running processes.
  *
- * In big endian operation, the two 32 bit words are swapped if accesed by
- * non 64-bit operations.
+ * In big endian operation, the two 32 bit words are swapped if accessed
+ * by non-64-bit operations.
  */
 #define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
 #define NUM_USER_ASIDS		ASID_FIRST_VERSION
@@ -195,8 +195,11 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		 * Allocate a free ASID. If we can't find one, take a
 		 * note of the currently active ASIDs and mark the TLBs
 		 * as requiring flushes. We always count from ASID #1,
-		 * as we reserve ASID #0 to switch via TTBR0 and indicate
-		 * rollover events.
+		 * as we reserve ASID #0 to switch via TTBR0 and to
+		 * avoid speculative page table walks from hitting in
+		 * any partial walk caches, which could be populated
+		 * from overlapping level-1 descriptors used to map both
+		 * the module area and the userspace stack.
 		 */
 		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
 		if (asid == NUM_USER_ASIDS) {
@@ -224,8 +227,9 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 		__check_vmalloc_seq(mm);
 
 	/*
-	 * Required during context switch to avoid speculative page table
-	 * walking with the wrong TTBR.
+	 * We cannot update the pgd and the ASID atomicly with classic
+	 * MMU, so switch exclusively to global mappings to avoid
+	 * speculative page table walking with the wrong TTBR.
 	 */
 	cpu_set_reserved_ttbr0();
 
-- 
cgit v1.2.3