9 files changed, 2494 insertions, 0 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
new file mode 100644
index 000000000000..168dc146a8f6
--- /dev/null
+++ b/arch/arc/mm/Makefile
@@ -0,0 +1,10 @@
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-y	:= extable.o ioremap.o dma.o fault.o init.o
+obj-y	+= tlb.o tlbex.o cache_arc700.o
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
new file mode 100644
index 000000000000..88d617d84234
--- /dev/null
+++ b/arch/arc/mm/cache_arc700.c
@@ -0,0 +1,768 @@
+/*
+ * ARC700 VIPT Cache Management
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
+ *   -flush_cache_dup_mm (fork)
+ *   -likewise for flush_cache_mm (exit/execve)
+ *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
+ *
+ * vineetg: Apr 2011
+ *  -Now that MMU can support larger pg sz (16K), the determiniation of
+ *   aliasing shd not be based on assumption of 8k pg
+ *
+ * vineetg: Mar 2011
+ *  -optimised version of flush_icache_range( ) for making I/D coherent
+ *   when vaddr is available (agnostic of num of aliases)
+ *
+ * vineetg: Mar 2011
+ *  -Added documentation about I-cache aliasing on ARC700 and the way it
+ *   was handled up until MMU V2.
+ *  -Spotted a three year old bug when killing the 4 aliases, which needs
+ *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
+ *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
+ *   (Rajesh you owe me one now)
+ *
+ * vineetg: Dec 2010
+ *  -Off-by-one error when computing num_of_lines to flush
+ *   This broke signal handling with bionic which uses synthetic sigret stub
+ *
+ * vineetg: Mar 2010
+ *  -GCC can't generate ZOL for core cache flush loops.
+ *   Conv them into iterations based as opposed to while (start < end) types
+ *
+ * Vineetg: July 2009
+ *  -In I-cache flush routine we used to chk for aliasing for every line INV.
+ *   Instead now we setup routines per cache geometry and invoke them
+ *   via function pointers.
+ *
+ * Vineetg: Jan 2009
+ *  -Cache Line flush routines used to flush an extra line beyond end addr
+ *   because check was while (end >= start) instead of (end > start)
+ *     =Some call sites had to work around by doing -1, -4 etc to end param
+ *     =Some callers didnt care. This was spec bad in case of INV routines
+ *      which would discard valid data (cause of the horrible ext2 bug
+ *      in ARC IDE driver)
+ *
+ * vineetg: June 11th 2008: Fixed flush_icache_range( )
+ *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
+ *   to be flushed, which it was not doing.
+ *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
+ *   however ARC cache maintenance OPs require PHY addr. Thus need to do
+ *   vmalloc_to_phy.
+ *  -Also added optimisation there, that for range > PAGE SIZE we flush the
+ *   entire cache in one shot rather than line by line. For e.g. a module
+ *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
+ *   while cache is only 16 or 32k.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/setup.h>
+
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+static void __ic_line_inv_no_alias(unsigned long, int);
+static void __ic_line_inv_2_alias(unsigned long, int);
+static void __ic_line_inv_4_alias(unsigned long, int);
+
+/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
+static void (*___flush_icache_rtn) (unsigned long, int);
+#endif
+
+char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	int n = 0;
+	unsigned int c = smp_processor_id();
+
+#define PR_CACHE(p, enb, str)						\
+{									\
+	if (!(p)->ver)							\
+		n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");	\
+	else								\
+		n += scnprintf(buf + n, len - n,			\
+			str"\t\t: (%uK) VIPT, %dway set-asc, %ub Line %s\n", \
+			TO_KB((p)->sz), (p)->assoc, (p)->line_len,	\
+			enb ?  "" : "DISABLED (kernel-build)");		\
+}
+
+	PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache");
+	PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache");
+
+	return buf;
+}
+
+/*
+ * Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation done here, simply read/convert the BCRs
+ */
+void __init read_decode_cache_bcr(void)
+{
+	struct bcr_cache ibcr, dbcr;
+	struct cpuinfo_arc_cache *p_ic, *p_dc;
+	unsigned int cpu = smp_processor_id();
+
+	p_ic = &cpuinfo_arc700[cpu].icache;
+	READ_BCR(ARC_REG_IC_BCR, ibcr);
+
+	if (ibcr.config == 0x3)
+		p_ic->assoc = 2;
+	p_ic->line_len = 8 << ibcr.line_len;
+	p_ic->sz = 0x200 << ibcr.sz;
+	p_ic->ver = ibcr.ver;
+
+	p_dc = &cpuinfo_arc700[cpu].dcache;
+	READ_BCR(ARC_REG_DC_BCR, dbcr);
+
+	if (dbcr.config == 0x2)
+		p_dc->assoc = 4;
+	p_dc->line_len = 16 << dbcr.line_len;
+	p_dc->sz = 0x200 << dbcr.sz;
+	p_dc->ver = dbcr.ver;
+}
+
+/*
+ * 1. Validate the Cache Geomtery (compile time config matches hardware)
+ * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
+ *    (aliasing D-cache configurations are not supported YET)
+ * 3. Enable the Caches, setup default flush mode for D-Cache
+ * 3. Calculate the SHMLBA used by user space
+ */
+void __init arc_cache_init(void)
+{
+	unsigned int temp;
+	unsigned int cpu = smp_processor_id();
+	struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+	struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+	int way_pg_ratio = way_pg_ratio;
+	char str[256];
+
+	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+	if (!ic->ver)
+		goto chk_dc;
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+	/* 1. Confirm some of I-cache params which Linux assumes */
+	if ((ic->assoc != ARC_ICACHE_WAYS) ||
+	    (ic->line_len != ARC_ICACHE_LINE_LEN)) {
+		panic("Cache H/W doesn't match kernel Config");
+	}
+#if (CONFIG_ARC_MMU_VER > 2)
+	if (ic->ver != 3) {
+		if (running_on_hw)
+			panic("Cache ver doesn't match MMU ver\n");
+
+		/* For ISS - suggest the toggles to use */
+		pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n");
+
+	}
+#endif
+
+	/*
+	 * if Cache way size is <= page size then no aliasing exhibited
+	 * otherwise ratio determines num of aliases.
+	 * e.g. 32K I$, 2 way set assoc, 8k pg size
+	 *       way-sz = 32k/2 = 16k
+	 *       way-pg-ratio = 16k/8k = 2, so 2 aliases possible
+	 *       (meaning 1 line could be in 2 possible locations).
+	 */
+	way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
+	switch (way_pg_ratio) {
+	case 0:
+	case 1:
+		___flush_icache_rtn = __ic_line_inv_no_alias;
+		break;
+	case 2:
+		___flush_icache_rtn = __ic_line_inv_2_alias;
+		break;
+	case 4:
+		___flush_icache_rtn = __ic_line_inv_4_alias;
+		break;
+	default:
+		panic("Unsupported I-Cache Sz\n");
+	}
+#endif
+
+	/* Enable/disable I-Cache */
+	temp = read_aux_reg(ARC_REG_IC_CTRL);
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+	temp &= ~IC_CTRL_CACHE_DISABLE;
+#else
+	temp |= IC_CTRL_CACHE_DISABLE;
+#endif
+
+	write_aux_reg(ARC_REG_IC_CTRL, temp);
+
+chk_dc:
+	if (!dc->ver)
+		return;
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+	if ((dc->assoc != ARC_DCACHE_WAYS) ||
+	    (dc->line_len != ARC_DCACHE_LINE_LEN)) {
+		panic("Cache H/W doesn't match kernel Config");
+	}
+
+	/* check for D-Cache aliasing */
+	if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE)
+		panic("D$ aliasing not handled right now\n");
+#endif
+
+	/* Set the default Invalidate Mode to "simpy discard dirty lines"
+	 *  as this is more frequent then flush before invalidate
+	 * Ofcourse we toggle this default behviour when desired
+	 */
+	temp = read_aux_reg(ARC_REG_DC_CTRL);
+	temp &= ~DC_CTRL_INV_MODE_FLUSH;
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+	/* Enable D-Cache: Clear Bit 0 */
+	write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE);
+#else
+	/* Flush D cache */
+	write_aux_reg(ARC_REG_DC_FLSH, 0x1);
+	/* Disable D cache */
+	write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE);
+#endif
+
+	return;
+}
+
+#define OP_INV		0x1
+#define OP_FLUSH	0x2
+#define OP_FLUSH_N_INV	0x3
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+
+/***************************************************************
+ * Machine specific helpers for Entire D-Cache or Per Line ops
+ */
+
+static inline void wait_for_flush(void)
+{
+	while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
+		;
+}
+
+/*
+ * Operation on Entire D-Cache
+ * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * Note that constant propagation ensures all the checks are gone
+ * in generated code
+ */
+static inline void __dc_entire_op(const int cacheop)
+{
+	unsigned long flags, tmp = tmp;
+	int aux;
+
+	local_irq_save(flags);
+
+	if (cacheop == OP_FLUSH_N_INV) {
+		/* Dcache provides 2 cmd: FLUSH or INV
+		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+		 * flush-n-inv is achieved by INV cmd but with IM=1
+		 * Default INV sub-mode is DISCARD, which needs to be toggled
+		 */
+		tmp = read_aux_reg(ARC_REG_DC_CTRL);
+		write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
+	}
+
+	if (cacheop & OP_INV)	/* Inv or flush-n-inv use same cmd reg */
+		aux = ARC_REG_DC_IVDC;
+	else
+		aux = ARC_REG_DC_FLSH;
+
+	write_aux_reg(aux, 0x1);
+
+	if (cacheop & OP_FLUSH)	/* flush / flush-n-inv both wait */
+		wait_for_flush();
+
+	/* Switch back the DISCARD ONLY Invalidate mode */
+	if (cacheop == OP_FLUSH_N_INV)
+		write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Per Line Operation on D-Cache
+ * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
+ * It's sole purpose is to help gcc generate ZOL
+ */
+static inline void __dc_line_loop(unsigned long start, unsigned long sz,
+					  int aux_reg)
+{
+	int num_lines, slack;
+
+	/* Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @start - aligned to cache line and integral @num_lines.
+	 * This however can be avoided for page sized since:
+	 *  -@start will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+		slack = start & ~DCACHE_LINE_MASK;
+		sz += slack;
+		start -= slack;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
+
+	while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+		/*
+		 * Just as for I$, in MMU v3, D$ ops also require
+		 * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
+		 * But we pass phy addr for both. This works since Linux
+		 * doesn't support aliasing configs for D$, yet.
+		 * Thus paddr is enough to provide both tag and index.
+		 */
+		write_aux_reg(ARC_REG_DC_PTAG, start);
+#endif
+		write_aux_reg(aux_reg, start);
+		start += ARC_DCACHE_LINE_LEN;
+	}
+}
+
+/*
+ * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
+ */
+static inline void __dc_line_op(unsigned long start, unsigned long sz,
+					const int cacheop)
+{
+	unsigned long flags, tmp = tmp;
+	int aux;
+
+	local_irq_save(flags);
+
+	if (cacheop == OP_FLUSH_N_INV) {
+		/*
+		 * Dcache provides 2 cmd: FLUSH or INV
+		 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+		 * flush-n-inv is achieved by INV cmd but with IM=1
+		 * Default INV sub-mode is DISCARD, which needs to be toggled
+		 */
+		tmp = read_aux_reg(ARC_REG_DC_CTRL);
+		write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
+	}
+
+	if (cacheop & OP_INV)	/* Inv / flush-n-inv use same cmd reg */
+		aux = ARC_REG_DC_IVDL;
+	else
+		aux = ARC_REG_DC_FLDL;
+
+	__dc_line_loop(start, sz, aux);
+
+	if (cacheop & OP_FLUSH)	/* flush / flush-n-inv both wait */
+		wait_for_flush();
+
+	/* Switch back the DISCARD ONLY Invalidate mode */
+	if (cacheop == OP_FLUSH_N_INV)
+		write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
+
+	local_irq_restore(flags);
+}
+
+#else
+
+#define __dc_entire_op(cacheop)
+#define __dc_line_op(start, sz, cacheop)
+
+#endif /* CONFIG_ARC_HAS_DCACHE */
+
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+
+/*
+ *		I-Cache Aliasing in ARC700 VIPT caches
+ *
+ * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
+ * to "index" into SET of cache-line and paddr from MMU to match the TAG
+ * in the WAYS of SET.
+ *
+ * However the CDU iterface (to flush/inv) lines from software, only takes
+ * paddr (to have simpler hardware interface). For simpler cases, using paddr
+ * alone suffices.
+ * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
+ *      way_sz = cache_sz / num_ways = 16k/2 = 8k
+ *      num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
+ *   Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
+ *   bits req for calc set-index = bits 12:5 (0 based). Since this range fits
+ *   inside the bottom 13 bits of paddr, which are same for vaddr and paddr
+ *   (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
+ *   locate a cache-line.
+ *
+ * However for a difft sized cache, say 32k I$, above math yields need
+ * for 14 bits of vaddr to locate a cache line, which can't be provided by
+ * paddr, since the bit 13 (0 based) might differ between the two.
+ *
+ * This lack of extra bits needed for correct line addressing, defines the
+ * classical problem of Cache aliasing with VIPT architectures
+ * num_aliases = 1 << extra_bits
+ * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
+ *      2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
+ *      2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geom.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * Since vaddr was not available for all instances of I$ flush req by core
+ * kernel, the only safe way (non-optimal though) was to kill all possible
+ * lines which could represent an alias (even if they didnt represent one
+ * in execution).
+ * e.g. for 64K I$, 4 aliases possible, so we did
+ *      flush start
+ *      flush start | 0x01
+ *      flush start | 0x2
+ *      flush start | 0x3
+ *
+ * The penalty was invoking the operation itself, since tag match is anyways
+ * paddr based, a line which didn't represent an alias would not match the
+ * paddr, hence wont be killed
+ *
+ * Note that aliasing concerns are independent of line-sz for a given cache
+ * geometry (size + set_assoc) because the extra bits required by line-sz are
+ * reduced from the set calc.
+ * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
+ *  32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
+ *  64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports var page sizes (1k-16k) - Linux will support
+ * 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
+
+/***********************************************************
+ * Machine specific helpers for per line I-Cache invalidate.
+ * 3 routines to accpunt for 1, 2, 4 aliases possible
+ */
+
+static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
+{
+	while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+		write_aux_reg(ARC_REG_IC_PTAG, start);
+#endif
+		write_aux_reg(ARC_REG_IC_IVIL, start);
+		start += ARC_ICACHE_LINE_LEN;
+	}
+}
+
+static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
+{
+	while (num_lines-- > 0) {
+
+#if (CONFIG_ARC_MMU_VER > 2)
+		/*
+		 *  MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
+		 * reg to pass the "tag" bits and existing IVIL reg only looks
+		 * at bits relevant for "index" (details above)
+		 * Programming Notes:
+		 * -when writing tag to PTAG reg, bit chopping can be avoided,
+		 *  CDU ignores non-tag bits.
+		 * -Ideally "index" must be computed from vaddr, but it is not
+		 *  avail in these rtns. So to be safe, we kill the lines in all
+		 *  possible indexes corresp to num of aliases possible for
+		 *  given cache config.
+		 */
+		write_aux_reg(ARC_REG_IC_PTAG, start);
+		write_aux_reg(ARC_REG_IC_IVIL,
+				  start & ~(0x1 << PAGE_SHIFT));
+		write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
+#else
+		write_aux_reg(ARC_REG_IC_IVIL, start);
+		write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
+#endif
+		start += ARC_ICACHE_LINE_LEN;
+	}
+}
+
+static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
+{
+	while (num_lines-- > 0) {
+
+#if (CONFIG_ARC_MMU_VER > 2)
+		write_aux_reg(ARC_REG_IC_PTAG, start);
+
+		write_aux_reg(ARC_REG_IC_IVIL,
+				  start & ~(0x3 << PAGE_SHIFT));
+		write_aux_reg(ARC_REG_IC_IVIL,
+				  start & ~(0x2 << PAGE_SHIFT));
+		write_aux_reg(ARC_REG_IC_IVIL,
+				  start & ~(0x1 << PAGE_SHIFT));
+		write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
+#else
+		write_aux_reg(ARC_REG_IC_IVIL, start);
+		write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
+		write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
+		write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
+#endif
+		start += ARC_ICACHE_LINE_LEN;
+	}
+}
+
+static void __ic_line_inv(unsigned long start, unsigned long sz)
+{
+	unsigned long flags;
+	int num_lines, slack;
+
+	/*
+	 * Ensure we properly floor/ceil the non-line aligned/sized requests
+	 * and have @start - aligned to cache line, and integral @num_lines
+	 * However page sized flushes can be compile time optimised.
+	 *  -@start will be cache-line aligned already (being page aligned)
+	 *  -@sz will be integral multiple of line size (being page sized).
+	 */
+	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+		slack = start & ~ICACHE_LINE_MASK;
+		sz += slack;
+		start -= slack;
+	}
+
+	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
+
+	local_irq_save(flags);
+	(*___flush_icache_rtn) (start, num_lines);
+	local_irq_restore(flags);
+}
+
+/* Unlike routines above, having vaddr for flush op (along with paddr),
+ * prevents the need to speculatively kill the lines in multiple sets
+ * based on ratio of way_sz : pg_sz
+ */
+static void __ic_line_inv_vaddr(unsigned long phy_start,
+					 unsigned long vaddr, unsigned long sz)
+{
+	unsigned long flags;
+	int num_lines, slack;
+	unsigned int addr;
+
+	slack = phy_start & ~ICACHE_LINE_MASK;
+	sz += slack;
+	phy_start -= slack;
+	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
+
+#if (CONFIG_ARC_MMU_VER > 2)
+	vaddr &= ~ICACHE_LINE_MASK;
+	addr = phy_start;
+#else
+	/* bits 17:13 of vaddr go as bits 4:0 of paddr */
+	addr = phy_start | ((vaddr >> 13) & 0x1F);
+#endif
+
+	local_irq_save(flags);
+	while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+		/* tag comes from phy addr */
+		write_aux_reg(ARC_REG_IC_PTAG, addr);
+
+		/* index bits come from vaddr */
+		write_aux_reg(ARC_REG_IC_IVIL, vaddr);
+		vaddr += ARC_ICACHE_LINE_LEN;
+#else
+		/* this paddr contains vaddrs bits as needed */
+		write_aux_reg(ARC_REG_IC_IVIL, addr);
+#endif
+		addr += ARC_ICACHE_LINE_LEN;
+	}
+	local_irq_restore(flags);
+}
+
+#else
+
+#define __ic_line_inv(start, sz)
+#define __ic_line_inv_vaddr(pstart, vstart, sz)
+
+#endif /* CONFIG_ARC_HAS_ICACHE */
+
+
+/***********************************************************
+ * Exported APIs
+ */
+
+/* TBD: use pg_arch_1 to optimize this */
+void flush_dcache_page(struct page *page)
+{
+	__dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+	__dc_line_op(start, sz, OP_FLUSH_N_INV);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+	__dc_line_op(start, sz, OP_INV);
+}
+EXPORT_SYMBOL(dma_cache_inv);
+
+void dma_cache_wback(unsigned long start, unsigned long sz)
+{
+	__dc_line_op(start, sz, OP_FLUSH);
+}
+EXPORT_SYMBOL(dma_cache_wback);
+
+/*
+ * This is API for making I/D Caches consistent when modifying code
+ * (loadable modules, kprobes,  etc)
+ * This is called on insmod, with kernel virtual address for CODE of
+ * the module. ARC cache maintenance ops require PHY address thus we
+ * need to convert vmalloc addr to PHY addr
+ */
+void flush_icache_range(unsigned long kstart, unsigned long kend)
+{
+	unsigned int tot_sz, off, sz;
+	unsigned long phy, pfn;
+	unsigned long flags;
+
+	/* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
+
+	/* This is not the right API for user virtual address */
+	if (kstart < TASK_SIZE) {
+		BUG_ON("Flush icache range for user virtual addr space");
+		return;
+	}
+
+	/* Shortcut for bigger flush ranges.
+	 * Here we don't care if this was kernel virtual or phy addr
+	 */
+	tot_sz = kend - kstart;
+	if (tot_sz > PAGE_SIZE) {
+		flush_cache_all();
+		return;
+	}
+
+	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
+	if (likely(kstart > PAGE_OFFSET)) {
+		__ic_line_inv(kstart, kend - kstart);
+		__dc_line_op(kstart, kend - kstart, OP_FLUSH);
+		return;
+	}
+
+	/*
+	 * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
+	 * (1) ARC Cache Maintenance ops only take Phy addr, hence special
+	 *     handling of kernel vaddr.
+	 *
+	 * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
+	 *     it still needs to handle  a 2 page scenario, where the range
+	 *     straddles across 2 virtual pages and hence need for loop
+	 */
+	while (tot_sz > 0) {
+		off = kstart % PAGE_SIZE;
+		pfn = vmalloc_to_pfn((void *)kstart);
+		phy = (pfn << PAGE_SHIFT) + off;
+		sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
+		local_irq_save(flags);
+		__dc_line_op(phy, sz, OP_FLUSH);
+		__ic_line_inv(phy, sz);
+		local_irq_restore(flags);
+		kstart += sz;
+		tot_sz -= sz;
+	}
+}
+
+/*
+ * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
+ * where vaddr is also available. This allows passing both vaddr and paddr
+ * bits to CDU for cache flush, short-circuting the current pessimistic algo
+ * which kills all possible aliases.
+ * An added adv of knowing that vaddr is user-vaddr avoids various checks
+ * and handling for k-vaddr, k-paddr as done in orig ver above
+ */
+void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
+			      int len)
+{
+	__ic_line_inv_vaddr(paddr, u_vaddr, len);
+	__dc_line_op(paddr, len, OP_FLUSH);
+}
+
+/*
+ * XXX: This also needs to be optim using pg_arch_1
+ * This is called when a page-cache page is about to be mapped into a
+ * user process' address space.  It offers an opportunity for a
+ * port to ensure d-cache/i-cache coherency if necessary.
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
+	__ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
+}
+
+void flush_icache_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	write_aux_reg(ARC_REG_IC_IVIC, 1);
+
+	/* lr will not complete till the icache inv operation is not over */
+	read_aux_reg(ARC_REG_IC_CTRL);
+	local_irq_restore(flags);
+}
+
+noinline void flush_cache_all(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	flush_icache_all();
+	__dc_entire_op(OP_FLUSH_N_INV);
+
+	local_irq_restore(flags);
+
+}
+
+/**********************************************************************
+ * Explicit Cache flush request from user space via syscall
+ * Needed for JITs which generate code on the fly
+ */
+SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
+{
+	/* TBD: optimize this */
+	flush_cache_all();
+	return 0;
+}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
new file mode 100644
index 000000000000..12cc6485b218
--- /dev/null
+++ b/arch/arc/mm/dma.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * DMA Coherent API Notes
+ *
+ * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
+ * implemented by accessintg it using a kernel virtual address, with
+ * Cache bit off in the TLB entry.
+ *
+ * The default DMA address == Phy address which is 0x8000_0000 based.
+ * A platform/device can make it zero based, by over-riding
+ * plat_{dma,kernel}_addr_to_{kernel,dma}
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/export.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Helpers for Coherent DMA API.
+ */
+void *dma_alloc_noncoherent(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t gfp)
+{
+	void *paddr;
+
+	/* This is linear addr (0x8000_0000 based) */
+	paddr = alloc_pages_exact(size, gfp);
+	if (!paddr)
+		return NULL;
+
+	/* This is bus address, platform dependent */
+	*dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+
+	return paddr;
+}
+EXPORT_SYMBOL(dma_alloc_noncoherent);
+
+void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+			  dma_addr_t dma_handle)
+{
+	free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
+			 size);
+}
+EXPORT_SYMBOL(dma_free_noncoherent);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+			 dma_addr_t *dma_handle, gfp_t gfp)
+{
+	void *paddr, *kvaddr;
+
+	/* This is linear addr (0x8000_0000 based) */
+	paddr = alloc_pages_exact(size, gfp);
+	if (!paddr)
+		return NULL;
+
+	/* This is kernel Virtual address (0x7000_0000 based) */
+	kvaddr = ioremap_nocache((unsigned long)paddr, size);
+	if (kvaddr != NULL)
+		memset(kvaddr, 0, size);
+
+	/* This is bus address, platform dependent */
+	*dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+
+	return kvaddr;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
+		       dma_addr_t dma_handle)
+{
+	iounmap((void __force __iomem *)kvaddr);
+
+	free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
+			 size);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+/*
+ * Helper for streaming DMA...
+ */
+void __arc_dma_cache_sync(unsigned long paddr, size_t size,
+			  enum dma_data_direction dir)
+{
+	__inline_dma_cache_sync(paddr, size, dir);
+}
+EXPORT_SYMBOL(__arc_dma_cache_sync);
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
new file mode 100644
index 000000000000..014172ba8432
--- /dev/null
+++ b/arch/arc/mm/extable.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Borrowed heavily from MIPS
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(instruction_pointer(regs));
+	if (fixup) {
+		regs->ret = fixup->fixup;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+long arc_copy_from_user_noinline(void *to, const void __user * from,
+		unsigned long n)
+{
+	return __arc_copy_from_user(to, from, n);
+}
+EXPORT_SYMBOL(arc_copy_from_user_noinline);
+
+long arc_copy_to_user_noinline(void __user *to, const void *from,
+		unsigned long n)
+{
+	return __arc_copy_to_user(to, from, n);
+}
+EXPORT_SYMBOL(arc_copy_to_user_noinline);
+
+unsigned long arc_clear_user_noinline(void __user *to,
+		unsigned long n)
+{
+	return __arc_clear_user(to, n);
+}
+EXPORT_SYMBOL(arc_clear_user_noinline);
+
+long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
+		long count)
+{
+	return __arc_strncpy_from_user(dst, src, count);
+}
+EXPORT_SYMBOL(arc_strncpy_from_user_noinline);
+
+long arc_strnlen_user_noinline(const char __user *src, long n)
+{
+	return __arc_strnlen_user(src, n);
+}
+EXPORT_SYMBOL(arc_strnlen_user_noinline);
+#endif
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
new file mode 100644
index 000000000000..af55aab803d2
--- /dev/null
+++ b/arch/arc/mm/fault.c
@@ -0,0 +1,228 @@
+/* Page Fault Handling for ARC (TLB Miss / ProtV)
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/signal.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+#include <asm/pgalloc.h>
+
+static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
+{
+	/*
+	 * Synchronize this task's top level page-table
+	 * with the 'reference' page table.
+	 */
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	pgd = pgd_offset_fast(mm, address);
+	pgd_k = pgd_offset_k(address);
+
+	if (!pgd_present(*pgd_k))
+		goto bad_area;
+
+	pud = pud_offset(pgd, address);
+	pud_k = pud_offset(pgd_k, address);
+	if (!pud_present(*pud_k))
+		goto bad_area;
+
+	pmd = pmd_offset(pud, address);
+	pmd_k = pmd_offset(pud_k, address);
+	if (!pmd_present(*pmd_k))
+		goto bad_area;
+
+	set_pmd(pmd, *pmd_k);
+
+	/* XXX: create the TLB entry here */
+	return 0;
+
+bad_area:
+	return 1;
+}
+
+void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
+		   unsigned long cause_code)
+{
+	struct vm_area_struct *vma = NULL;
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	siginfo_t info;
+	int fault, ret;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+				(write ? FAULT_FLAG_WRITE : 0);
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	if (address >= VMALLOC_START && address <= VMALLOC_END) {
+		ret = handle_vmalloc_fault(mm, address);
+		if (unlikely(ret))
+			goto bad_area_nosemaphore;
+		else
+			return;
+	}
+
+	info.si_code = SEGV_MAPERR;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_atomic() || !mm)
+		goto no_context;
+
+retry:
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+good_area:
+	info.si_code = SEGV_ACCERR;
+
+	/* Handle protection violation, execute on heap or stack */
+
+	if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH))
+		goto bad_area;
+
+	if (write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+survive:
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, flags);
+
+	/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
+	if (unlikely(fatal_signal_pending(current))) {
+		if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
+			up_read(&mm->mmap_sem);
+		if (user_mode(regs))
+			return;
+	}
+
+	if (likely(!(fault & VM_FAULT_ERROR))) {
+		if (flags & FAULT_FLAG_ALLOW_RETRY) {
+			/* To avoid updating stats twice for retry case */
+			if (fault & VM_FAULT_MAJOR)
+				tsk->maj_flt++;
+			else
+				tsk->min_flt++;
+
+			if (fault & VM_FAULT_RETRY) {
+				flags &= ~FAULT_FLAG_ALLOW_RETRY;
+				flags |= FAULT_FLAG_TRIED;
+				goto retry;
+			}
+		}
+
+		/* Fault Handled Gracefully */
+		up_read(&mm->mmap_sem);
+		return;
+	}
+
+	/* TBD: switch to pagefault_out_of_memory() */
+	if (fault & VM_FAULT_OOM)
+		goto out_of_memory;
+	else if (fault & VM_FAULT_SIGBUS)
+		goto do_sigbus;
+
+	/* no man's land */
+	BUG();
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses just cause a SIGSEGV */
+	if (user_mode(regs)) {
+		tsk->thread.fault_address = address;
+		tsk->thread.cause_code = cause_code;
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		/* info.si_code has been set above */
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGSEGV, &info, tsk);
+		return;
+	}
+
+no_context:
+	/* Are we prepared to handle this kernel fault?
+	 *
+	 * (The kernel has valid exception-points in the source
+	 *  when it acesses user-memory. When it fails in one
+	 *  of those points, we find it in a table and do a jump
+	 *  to some fixup code that loads an appropriate error
+	 *  code)
+	 */
+	if (fixup_exception(regs))
+		return;
+
+	die("Oops", regs, address, cause_code);
+
+out_of_memory:
+	if (is_global_init(tsk)) {
+		yield();
+		goto survive;
+	}
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs))
+		do_group_exit(SIGKILL);	/* This will never return */
+
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+
+	if (!user_mode(regs))
+		goto no_context;
+
+	tsk->thread.fault_address = address;
+	tsk->thread.cause_code = cause_code;
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void __user *)address;
+	force_sig_info(SIGBUS, &info, tsk);
+}
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
new file mode 100644
index 000000000000..caf797de23fc
--- /dev/null
+++ b/arch/arc/mm/init.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#ifdef CONFIG_BLOCK_DEV_RAM
+#include <linux/blk.h>
+#endif
+#include <linux/swap.h>
+#include <linux/module.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <asm/arcregs.h>
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
+char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+EXPORT_SYMBOL(empty_zero_page);
+
+/* Default tot mem from .config */
+static unsigned long arc_mem_sz = 0x20000000;  /* some default */
+
+/* User can over-ride above with "mem=nnn[KkMm]" in cmdline */
+static int __init setup_mem_sz(char *str)
+{
+	arc_mem_sz = memparse(str, NULL) & PAGE_MASK;
+
+	/* early console might not be setup yet - it will show up later */
+	pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz));
+
+	return 0;
+}
+early_param("mem", setup_mem_sz);
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	arc_mem_sz = size & PAGE_MASK;
+	pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz));
+}
+
+/*
+ * First memory setup routine called from setup_arch()
+ * 1. setup swapper's mm @init_mm
+ * 2. Count the pages we have and setup bootmem allocator
+ * 3. zone setup
+ */
+void __init setup_arch_memory(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 };
+	unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
+
+	init_mm.start_code = (unsigned long)_text;
+	init_mm.end_code = (unsigned long)_etext;
+	init_mm.end_data = (unsigned long)_edata;
+	init_mm.brk = (unsigned long)_end;
+
+	/*
+	 * We do it here, so that memory is correctly instantiated
+	 * even if "mem=xxx" cmline over-ride is given and/or
+	 * DT has memory node. Each causes an update to @arc_mem_sz
+	 * and we finally add memory one here
+	 */
+	memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz);
+
+	/*------------- externs in mm need setting up ---------------*/
+
+	/* first page of system - kernel .vector starts here */
+	min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE);
+
+	/* Last usable page of low mem (no HIGHMEM yet for ARC port) */
+	max_low_pfn = max_pfn = PFN_DOWN(end_mem);
+
+	max_mapnr = num_physpages = max_low_pfn - min_low_pfn;
+
+	/*------------- reserve kernel image -----------------------*/
+	memblock_reserve(CONFIG_LINUX_LINK_BASE,
+			 __pa(_end) - CONFIG_LINUX_LINK_BASE);
+
+	memblock_dump_all();
+
+	/*-------------- node setup --------------------------------*/
+	memset(zones_size, 0, sizeof(zones_size));
+	zones_size[ZONE_NORMAL] = num_physpages;
+
+	/*
+	 * We can't use the helper free_area_init(zones[]) because it uses
+	 * PAGE_OFFSET to compute the @min_low_pfn which would be wrong
+	 * when our kernel doesn't start at PAGE_OFFSET, i.e.
+	 * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE
+	 */
+	free_area_init_node(0,			/* node-id */
+			    zones_size,		/* num pages per zone */
+			    min_low_pfn,	/* first pfn of node */
+			    NULL);		/* NO holes */
+}
+
+/*
+ * mem_init - initializes memory
+ *
+ * Frees up bootmem
+ * Calculates and displays memory available/used
+ */
+void __init mem_init(void)
+{
+	int codesize, datasize, initsize, reserved_pages, free_pages;
+	int tmp;
+
+	high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz);
+
+	totalram_pages = free_all_bootmem();
+
+	/* count all reserved pages [kernel code/data/mem_map..] */
+	reserved_pages = 0;
+	for (tmp = 0; tmp < max_mapnr; tmp++)
+		if (PageReserved(mem_map + tmp))
+			reserved_pages++;
+
+	/* XXX: nr_free_pages() is equivalent */
+	free_pages = max_mapnr - reserved_pages;
+
+	/*
+	 * For the purpose of display below, split the "reserve mem"
+	 * kernel code/data is already shown explicitly,
+	 * Show any other reservations (mem_map[ ] et al)
+	 */
+	reserved_pages -= (((unsigned int)_end - CONFIG_LINUX_LINK_BASE) >>
+								PAGE_SHIFT);
+
+	codesize = _etext - _text;
+	datasize = _end - _etext;
+	initsize = __init_end - __init_begin;
+
+	pr_info("Memory Available: %dM / %ldM (%dK code, %dK data, %dK init, %dK reserv)\n",
+		PAGES_TO_MB(free_pages),
+		TO_MB(arc_mem_sz),
+		TO_KB(codesize), TO_KB(datasize), TO_KB(initsize),
+		PAGES_TO_KB(reserved_pages));
+}
+
+static void __init free_init_pages(const char *what, unsigned long begin,
+				   unsigned long end)
+{
+	unsigned long addr;
+
+	pr_info("Freeing %s: %ldk [%lx] to [%lx]\n",
+		what, TO_KB(end - begin), begin, end);
+
+	/* need to check that the page we free is not a partial page */
+	for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+}
+
+/*
+ * free_initmem: Free all the __init memory.
+ */
+void __init_refok free_initmem(void)
+{
+	free_init_pages("unused kernel memory",
+			(unsigned long)__init_begin,
+			(unsigned long)__init_end);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_init_pages("initrd memory", start, end);
+}
+#endif
+
+#ifdef CONFIG_OF_FLATTREE
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+					    unsigned long end)
+{
+	pr_err("%s(%lx, %lx)\n", __func__, start, end);
+}
+#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
new file mode 100644
index 000000000000..3e5c92c79936
--- /dev/null
+++ b/arch/arc/mm/ioremap.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/cache.h>
+
+void __iomem *ioremap(unsigned long paddr, unsigned long size)
+{
+	unsigned long end;
+
+	/* Don't allow wraparound or zero size */
+	end = paddr + size - 1;
+	if (!size || (end < paddr))
+		return NULL;
+
+	/* If the region is h/w uncached, avoid MMU mappings */
+	if (paddr >= ARC_UNCACHED_ADDR_SPACE)
+		return (void __iomem *)paddr;
+
+	return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
+}
+EXPORT_SYMBOL(ioremap);
+
+/*
+ * ioremap with access flags
+ * Cache semantics wise it is same as ioremap - "forced" uncached.
+ * However unline vanilla ioremap which bypasses ARC MMU for addresses in
+ * ARC hardware uncached region, this one still goes thru the MMU as caller
+ * might need finer access control (R/W/X)
+ */
+void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+			   unsigned long flags)
+{
+	void __iomem *vaddr;
+	struct vm_struct *area;
+	unsigned long off, end;
+	pgprot_t prot = __pgprot(flags);
+
+	/* Don't allow wraparound, zero size */
+	end = paddr + size - 1;
+	if ((!size) || (end < paddr))
+		return NULL;
+
+	/* An early platform driver might end up here */
+	if (!slab_is_available())
+		return NULL;
+
+	/* force uncached */
+	prot = pgprot_noncached(prot);
+
+	/* Mappings have to be page-aligned */
+	off = paddr & ~PAGE_MASK;
+	paddr &= PAGE_MASK;
+	size = PAGE_ALIGN(end + 1) - paddr;
+
+	/*
+	 * Ok, go for it..
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return NULL;
+	area->phys_addr = paddr;
+	vaddr = (void __iomem *)area->addr;
+	if (ioremap_page_range((unsigned long)vaddr,
+			       (unsigned long)vaddr + size, paddr, prot)) {
+		vunmap((void __force *)vaddr);
+		return NULL;
+	}
+	return (void __iomem *)(off + (char __iomem *)vaddr);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+
+void iounmap(const void __iomem *addr)
+{
+	if (addr >= (void __force __iomem *)ARC_UNCACHED_ADDR_SPACE)
+		return;
+
+	vfree((void *)(PAGE_MASK & (unsigned long __force)addr));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
new file mode 100644
index 000000000000..9b9ce23f4ec3
--- /dev/null
+++ b/arch/arc/mm/tlb.c
@@ -0,0 +1,645 @@
+/*
+ * TLB Management (flush/create/diagnostics) for ARC700
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: Aug 2011
+ *  -Reintroduce duplicate PD fixup - some customer chips still have the issue
+ *
+ * vineetg: May 2011
+ *  -No need to flush_cache_page( ) for each call to update_mmu_cache()
+ *   some of the LMBench tests improved amazingly
+ *      = page-fault thrice as fast (75 usec to 28 usec)
+ *      = mmap twice as fast (9.6 msec to 4.6 msec),
+ *      = fork (5.3 msec to 3.7 msec)
+ *
+ * vineetg: April 2011 :
+ *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
+ *      helps avoid a shift when preparing PD0 from PTE
+ *
+ * vineetg: April 2011 : Preparing for MMU V3
+ *  -MMU v2/v3 BCRs decoded differently
+ *  -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
+ *  -tlb_entry_erase( ) can be void
+ *  -local_flush_tlb_range( ):
+ *      = need not "ceil" @end
+ *      = walks MMU only if range spans < 32 entries, as opposed to 256
+ *
+ * Vineetg: Sept 10th 2008
+ *  -Changes related to MMU v2 (Rel 4.8)
+ *
+ * Vineetg: Aug 29th 2008
+ *  -In TLB Flush operations (Metal Fix MMU) there is a explict command to
+ *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
+ *    it fails. Thus need to load it with ANY valid value before invoking
+ *    TLBIVUTLB cmd
+ *
+ * Vineetg: Aug 21th 2008:
+ *  -Reduced the duration of IRQ lockouts in TLB Flush routines
+ *  -Multiple copies of TLB erase code seperated into a "single" function
+ *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
+ *       in interrupt-safe region.
+ *
+ * Vineetg: April 23rd Bug #93131
+ *    Problem: tlb_flush_kernel_range() doesnt do anything if the range to
+ *              flush is more than the size of TLB itself.
+ *
+ * Rahul Trivedi : Codito Technologies 2004
+ */
+
+#include <linux/module.h>
+#include <asm/arcregs.h>
+#include <asm/setup.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+
+/*			Need for ARC MMU v2
+ *
+ * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
+ * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
+ * map into same set, there would be contention for the 2 ways causing severe
+ * Thrashing.
+ *
+ * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
+ * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
+ * Given this, the thrasing problem should never happen because once the 3
+ * J-TLB entries are created (even though 3rd will knock out one of the prev
+ * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
+ *
+ * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
+ * This is a simple design for keeping them in sync. So what do we do?
+ * The solution which James came up was pretty neat. It utilised the assoc
+ * of uTLBs by not invalidating always but only when absolutely necessary.
+ *
+ * - Existing TLB commands work as before
+ * - New command (TLBWriteNI) for TLB write without clearing uTLBs
+ * - New command (TLBIVUTLB) to invalidate uTLBs.
+ *
+ * The uTLBs need only be invalidated when pages are being removed from the
+ * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
+ * as a result of a miss, the removed entry is still allowed to exist in the
+ * uTLBs as it is still valid and present in the OS page table. This allows the
+ * full associativity of the uTLBs to hide the limited associativity of the main
+ * TLB.
+ *
+ * During a miss handler, the new "TLBWriteNI" command is used to load
+ * entries without clearing the uTLBs.
+ *
+ * When the OS page table is updated, TLB entries that may be associated with a
+ * removed page are removed (flushed) from the TLB using TLBWrite. In this
+ * circumstance, the uTLBs must also be cleared. This is done by using the
+ * existing TLBWrite command. An explicit IVUTLB is also required for those
+ * corner cases when TLBWrite was not executed at all because the corresp
+ * J-TLB entry got evicted/replaced.
+ */
+
+/* A copy of the ASID from the PID reg is kept in asid_cache */
+int asid_cache = FIRST_ASID;
+
+/* ASID to mm struct mapping. We have one extra entry corresponding to
+ * NO_ASID to save us a compare when clearing the mm entry for old asid
+ * see get_new_mmu_context (asm-arc/mmu_context.h)
+ */
+struct mm_struct *asid_mm_map[NUM_ASID + 1];
+
+/*
+ * Utility Routine to erase a J-TLB entry
+ * The procedure is to look it up in the MMU. If found, ERASE it by
+ *  issuing a TlbWrite CMD with PD0 = PD1 = 0
+ */
+
+static void __tlb_entry_erase(void)
+{
+	write_aux_reg(ARC_REG_TLBPD1, 0);
+	write_aux_reg(ARC_REG_TLBPD0, 0);
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+	unsigned int idx;
+
+	/* Locate the TLB entry for this vaddr + ASID */
+	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
+	idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+	/* No error means entry found, zero it out */
+	if (likely(!(idx & TLB_LKUP_ERR))) {
+		__tlb_entry_erase();
+	} else {		/* Some sort of Error */
+
+		/* Duplicate entry error */
+		if (idx & 0x1) {
+			/* TODO we need to handle this case too */
+			pr_emerg("unhandled Duplicate flush for %x\n",
+			       vaddr_n_asid);
+		}
+		/* else entry not found so nothing to do */
+	}
+}
+
+/****************************************************************************
+ * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs)
+ *
+ * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB
+ *
+ * utlb_invalidate ( )
+ *  -For v2 MMU calls Flush uTLB Cmd
+ *  -For v1 MMU does nothing (except for Metal Fix v1 MMU)
+ *      This is because in v1 TLBWrite itself invalidate uTLBs
+ ***************************************************************************/
+
+static void utlb_invalidate(void)
+{
+#if (CONFIG_ARC_MMU_VER >= 2)
+
+#if (CONFIG_ARC_MMU_VER < 3)
+	/* MMU v2 introduced the uTLB Flush command.
+	 * There was however an obscure hardware bug, where uTLB flush would
+	 * fail when a prior probe for J-TLB (both totally unrelated) would
+	 * return lkup err - because the entry didnt exist in MMU.
+	 * The Workround was to set Index reg with some valid value, prior to
+	 * flush. This was fixed in MMU v3 hence not needed any more
+	 */
+	unsigned int idx;
+
+	/* make sure INDEX Reg is valid */
+	idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+	/* If not write some dummy val */
+	if (unlikely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBINDEX, 0xa);
+#endif
+
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
+#endif
+
+}
+
+/*
+ * Un-conditionally (without lookup) erase the entire MMU contents
+ */
+
+noinline void local_flush_tlb_all(void)
+{
+	unsigned long flags;
+	unsigned int entry;
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+	local_irq_save(flags);
+
+	/* Load PD0 and PD1 with template for a Blank Entry */
+	write_aux_reg(ARC_REG_TLBPD1, 0);
+	write_aux_reg(ARC_REG_TLBPD0, 0);
+
+	for (entry = 0; entry < mmu->num_tlb; entry++) {
+		/* write this entry to the TLB */
+		write_aux_reg(ARC_REG_TLBINDEX, entry);
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+	}
+
+	utlb_invalidate();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Flush the entrie MM for userland. The fastest way is to move to Next ASID
+ */
+noinline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+	/*
+	 * Small optimisation courtesy IA64
+	 * flush_mm called during fork,exit,munmap etc, multiple times as well.
+	 * Only for fork( ) do we need to move parent to a new MMU ctxt,
+	 * all other cases are NOPs, hence this check.
+	 */
+	if (atomic_read(&mm->mm_users) == 0)
+		return;
+
+	/*
+	 * Workaround for Android weirdism:
+	 * A binder VMA could end up in a task such that vma->mm != tsk->mm
+	 * old code would cause h/w - s/w ASID to get out of sync
+	 */
+	if (current->mm != mm)
+		destroy_context(mm);
+	else
+		get_new_mmu_context(mm);
+}
+
+/*
+ * Flush a Range of TLB entries for userland.
+ * @start is inclusive, while @end is exclusive
+ * Difference between this and Kernel Range Flush is
+ *  -Here the fastest way (if range is too large) is to move to next ASID
+ *      without doing any explicit Shootdown
+ *  -In case of kernel Flush, entry has to be shot down explictly
+ */
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end)
+{
+	unsigned long flags;
+	unsigned int asid;
+
+	/* If range @start to @end is more than 32 TLB entries deep,
+	 * its better to move to a new ASID rather than searching for
+	 * individual entries and then shooting them down
+	 *
+	 * The calc above is rough, doesn't account for unaligned parts,
+	 * since this is heuristics based anyways
+	 */
+	if (unlikely((end - start) >= PAGE_SIZE * 32)) {
+		local_flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	/*
+	 * @start moved to page start: this alone suffices for checking
+	 * loop end condition below, w/o need for aligning @end to end
+	 * e.g. 2000 to 4001 will anyhow loop twice
+	 */
+	start &= PAGE_MASK;
+
+	local_irq_save(flags);
+	asid = vma->vm_mm->context.asid;
+
+	if (asid != NO_ASID) {
+		while (start < end) {
+			tlb_entry_erase(start | (asid & 0xff));
+			start += PAGE_SIZE;
+		}
+	}
+
+	utlb_invalidate();
+
+	local_irq_restore(flags);
+}
+
+/* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective)
+ *  @start, @end interpreted as kvaddr
+ * Interestingly, shared TLB entries can also be flushed using just
+ * @start,@end alone (interpreted as user vaddr), although technically SASID
+ * is also needed. However our smart TLbProbe lookup takes care of that.
+ */
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long flags;
+
+	/* exactly same as above, except for TLB entry not taking ASID */
+
+	if (unlikely((end - start) >= PAGE_SIZE * 32)) {
+		local_flush_tlb_all();
+		return;
+	}
+
+	start &= PAGE_MASK;
+
+	local_irq_save(flags);
+	while (start < end) {
+		tlb_entry_erase(start);
+		start += PAGE_SIZE;
+	}
+
+	utlb_invalidate();
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Delete TLB entry in MMU for a given page (??? address)
+ * NOTE One TLB entry contains translation for single PAGE
+ */
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	unsigned long flags;
+
+	/* Note that it is critical that interrupts are DISABLED between
+	 * checking the ASID and using it flush the TLB entry
+	 */
+	local_irq_save(flags);
+
+	if (vma->vm_mm->context.asid != NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) |
+				(vma->vm_mm->context.asid & 0xff));
+		utlb_invalidate();
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Routine to create a TLB entry
+ */
+void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+	unsigned long flags;
+	unsigned int idx, asid_or_sasid;
+	unsigned long pd0_flags;
+
+	/*
+	 * create_tlb() assumes that current->mm == vma->mm, since
+	 * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr)
+	 * -completes the lazy write to SASID reg (again valid for curr tsk)
+	 *
+	 * Removing the assumption involves
+	 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
+	 * -Fix the TLB paranoid debug code to not trigger false negatives.
+	 * -More importantly it makes this handler inconsistent with fast-path
+	 *  TLB Refill handler which always deals with "current"
+	 *
+	 * Lets see the use cases when current->mm != vma->mm and we land here
+	 *  1. execve->copy_strings()->__get_user_pages->handle_mm_fault
+	 *     Here VM wants to pre-install a TLB entry for user stack while
+	 *     current->mm still points to pre-execve mm (hence the condition).
+	 *     However the stack vaddr is soon relocated (randomization) and
+	 *     move_page_tables() tries to undo that TLB entry.
+	 *     Thus not creating TLB entry is not any worse.
+	 *
+	 *  2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a
+	 *     breakpoint in debugged task. Not creating a TLB now is not
+	 *     performance critical.
+	 *
+	 * Both the cases above are not good enough for code churn.
+	 */
+	if (current->active_mm != vma->vm_mm)
+		return;
+
+	local_irq_save(flags);
+
+	tlb_paranoid_check(vma->vm_mm->context.asid, address);
+
+	address &= PAGE_MASK;
+
+	/* update this PTE credentials */
+	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
+
+	/* Create HW TLB entry Flags (in PD0) from PTE Flags */
+#if (CONFIG_ARC_MMU_VER <= 2)
+	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1);
+#else
+	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0));
+#endif
+
+	/* ASID for this task */
+	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
+
+	write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid);
+
+	/* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */
+	write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1));
+
+	/* First verify if entry for this vaddr+ASID already exists */
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
+	idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+	/*
+	 * If Not already present get a free slot from MMU.
+	 * Otherwise, Probe would have located the entry and set INDEX Reg
+	 * with existing location. This will cause Write CMD to over-write
+	 * existing entry with new PD0 and PD1
+	 */
+	if (likely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+
+	/*
+	 * Commit the Entry to MMU
+	 * It doesnt sound safe to use the TLBWriteNI cmd here
+	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
+	 */
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+
+	local_irq_restore(flags);
+}
+
+/* arch hook called by core VM at the end of handle_mm_fault( ),
+ * when a new PTE is entered in Page Tables or an existing one
+ * is modified. We aggresively pre-install a TLB entry
+ */
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress,
+		      pte_t *ptep)
+{
+
+	create_tlb(vma, vaddress, ptep);
+}
+
+/* Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation is done here, simply read/convert the BCRs
+ */
+void __init read_decode_mmu_bcr(void)
+{
+	unsigned int tmp;
+	struct bcr_mmu_1_2 *mmu2;	/* encoded MMU2 attr */
+	struct bcr_mmu_3 *mmu3;		/* encoded MMU3 attr */
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+	tmp = read_aux_reg(ARC_REG_MMU_BCR);
+	mmu->ver = (tmp >> 24);
+
+	if (mmu->ver <= 2) {
+		mmu2 = (struct bcr_mmu_1_2 *)&tmp;
+		mmu->pg_sz = PAGE_SIZE;
+		mmu->sets = 1 << mmu2->sets;
+		mmu->ways = 1 << mmu2->ways;
+		mmu->u_dtlb = mmu2->u_dtlb;
+		mmu->u_itlb = mmu2->u_itlb;
+	} else {
+		mmu3 = (struct bcr_mmu_3 *)&tmp;
+		mmu->pg_sz = 512 << mmu3->pg_sz;
+		mmu->sets = 1 << mmu3->sets;
+		mmu->ways = 1 << mmu3->ways;
+		mmu->u_dtlb = mmu3->u_dtlb;
+		mmu->u_itlb = mmu3->u_itlb;
+	}
+
+	mmu->num_tlb = mmu->sets * mmu->ways;
+}
+
+char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
+{
+	int n = 0;
+	struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+	n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
+		       p_mmu->ver, TO_KB(p_mmu->pg_sz));
+
+	n += scnprintf(buf + n, len - n,
+		       "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n",
+		       p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
+		       p_mmu->u_dtlb, p_mmu->u_itlb,
+		       __CONFIG_ARC_MMU_SASID_VAL ? "SASID" : "");
+
+	return buf;
+}
+
+void __init arc_mmu_init(void)
+{
+	char str[256];
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+	printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
+
+	/* For efficiency sake, kernel is compile time built for a MMU ver
+	 * This must match the hardware it is running on.
+	 * Linux built for MMU V2, if run on MMU V1 will break down because V1
+	 *  hardware doesn't understand cmds such as WriteNI, or IVUTLB
+	 * On the other hand, Linux built for V1 if run on MMU V2 will do
+	 *   un-needed workarounds to prevent memcpy thrashing.
+	 * Similarly MMU V3 has new features which won't work on older MMU
+	 */
+	if (mmu->ver != CONFIG_ARC_MMU_VER) {
+		panic("MMU ver %d doesn't match kernel built for %d...\n",
+		      mmu->ver, CONFIG_ARC_MMU_VER);
+	}
+
+	if (mmu->pg_sz != PAGE_SIZE)
+		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
+
+	/*
+	 * ASID mgmt data structures are compile time init
+	 *  asid_cache = FIRST_ASID and asid_mm_map[] all zeroes
+	 */
+
+	local_flush_tlb_all();
+
+	/* Enable the MMU */
+	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
+
+	/* In smp we use this reg for interrupt 1 scratch */
+#ifndef CONFIG_SMP
+	/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
+#endif
+}
+
+/*
+ * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4}
+ * The mapping is Column-first.
+ *		---------------------	-----------
+ *		|way0|way1|way2|way3|	|way0|way1|
+ *		---------------------	-----------
+ * [set0]	|  0 |  1 |  2 |  3 |	|  0 |  1 |
+ * [set1]	|  4 |  5 |  6 |  7 |	|  2 |  3 |
+ *		~		    ~	~	  ~
+ * [set127]	| 508| 509| 510| 511|	| 254| 255|
+ *		---------------------	-----------
+ * For normal operations we don't(must not) care how above works since
+ * MMU cmd getIndex(vaddr) abstracts that out.
+ * However for walking WAYS of a SET, we need to know this
+ */
+#define SET_WAY_TO_IDX(mmu, set, way)  ((set) * mmu->ways + (way))
+
+/* Handling of Duplicate PD (TLB entry) in MMU.
+ * -Could be due to buggy customer tapeouts or obscure kernel bugs
+ * -MMU complaints not at the time of duplicate PD installation, but at the
+ *      time of lookup matching multiple ways.
+ * -Ideally these should never happen - but if they do - workaround by deleting
+ *      the duplicate one.
+ * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
+ */
+volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */
+
+void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
+			  struct pt_regs *regs)
+{
+	int set, way, n;
+	unsigned int pd0[4], pd1[4];	/* assume max 4 ways */
+	unsigned long flags, is_valid;
+	struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+	local_irq_save(flags);
+
+	/* re-enable the MMU */
+	write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
+
+	/* loop thru all sets of TLB */
+	for (set = 0; set < mmu->sets; set++) {
+
+		/* read out all the ways of current set */
+		for (way = 0, is_valid = 0; way < mmu->ways; way++) {
+			write_aux_reg(ARC_REG_TLBINDEX,
+					  SET_WAY_TO_IDX(mmu, set, way));
+			write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
+			pd0[way] = read_aux_reg(ARC_REG_TLBPD0);
+			pd1[way] = read_aux_reg(ARC_REG_TLBPD1);
+			is_valid |= pd0[way] & _PAGE_PRESENT;
+		}
+
+		/* If all the WAYS in SET are empty, skip to next SET */
+		if (!is_valid)
+			continue;
+
+		/* Scan the set for duplicate ways: needs a nested loop */
+		for (way = 0; way < mmu->ways; way++) {
+			if (!pd0[way])
+				continue;
+
+			for (n = way + 1; n < mmu->ways; n++) {
+				if ((pd0[way] & PAGE_MASK) ==
+				    (pd0[n] & PAGE_MASK)) {
+
+					if (dup_pd_verbose) {
+						pr_info("Duplicate PD's @"
+							"[%d:%d]/[%d:%d]\n",
+						     set, way, set, n);
+						pr_info("TLBPD0[%u]: %08x\n",
+						     way, pd0[way]);
+					}
+
+					/*
+					 * clear entry @way and not @n. This is
+					 * critical to our optimised loop
+					 */
+					pd0[way] = pd1[way] = 0;
+					write_aux_reg(ARC_REG_TLBINDEX,
+						SET_WAY_TO_IDX(mmu, set, way));
+					__tlb_entry_erase();
+				}
+			}
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+/***********************************************************************
+ * Diagnostic Routines
+ *  -Called from Low Level TLB Hanlders if things don;t look good
+ **********************************************************************/
+
+#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
+
+/*
+ * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
+ * don't match
+ */
+void print_asid_mismatch(int is_fast_path)
+{
+	int pid_sw, pid_hw;
+	pid_sw = current->active_mm->context.asid;
+	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
+
+	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
+	       is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw);
+
+	__asm__ __volatile__("flag 1");
+}
+
+void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr)
+{
+	unsigned int pid_hw;
+
+	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
+
+	if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID)))
+		print_asid_mismatch(0);
+}
+#endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
new file mode 100644
index 000000000000..9df765dc7c3a
--- /dev/null
+++ b/arch/arc/mm/tlbex.S
@@ -0,0 +1,408 @@
+/*
+ * TLB Exception Handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: April 2011 :
+ *  -MMU v1: moved out legacy code into a seperate file
+ *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
+ *      helps avoid a shift when preparing PD0 from PTE
+ *
+ * Vineetg: July 2009
+ *  -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
+ *   entry, so that it doesn't knock out it's I-TLB entry
+ *  -Some more fine tuning:
+ *   bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
+ *
+ * Vineetg: July 2009
+ *  -Practically rewrote the I/D TLB Miss handlers
+ *   Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
+ *   Hence Leaner by 1.5 K
+ *   Used Conditional arithmetic to replace excessive branching
+ *   Also used short instructions wherever possible
+ *
+ * Vineetg: Aug 13th 2008
+ *  -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing
+ *   more information in case of a Fatality
+ *
+ * Vineetg: March 25th Bug #92690
+ *  -Added Debug Code to check if sw-ASID == hw-ASID
+
+ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
+ */
+
+	.cpu A7
+
+#include <linux/linkage.h>
+#include <asm/entry.h>
+#include <asm/tlb.h>
+#include <asm/pgtable.h>
+#include <asm/arcregs.h>
+#include <asm/cache.h>
+#include <asm/processor.h>
+#if (CONFIG_ARC_MMU_VER == 1)
+#include <asm/tlb-mmu1.h>
+#endif
+
+;--------------------------------------------------------------------------
+; scratch memory to save the registers (r0-r3) used to code TLB refill Handler
+; For details refer to comments before TLBMISS_FREEUP_REGS below
+;--------------------------------------------------------------------------
+
+ARCFP_DATA ex_saved_reg1
+	.align 1 << L1_CACHE_SHIFT	; IMP: Must be Cache Line aligned
+	.type   ex_saved_reg1, @object
+#ifdef CONFIG_SMP
+	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
+ex_saved_reg1:
+	.zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
+#else
+	.size   ex_saved_reg1, 16
+ex_saved_reg1:
+	.zero 16
+#endif
+
+;============================================================================
+;  Troubleshooting Stuff
+;============================================================================
+
+; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
+; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
+; we use the MMU PID Reg to get current ASID.
+; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
+; So we try to detect this in TLB Mis shandler
+
+
+.macro DBG_ASID_MISMATCH
+
+#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
+
+	; make sure h/w ASID is same as s/w ASID
+
+	GET_CURR_TASK_ON_CPU  r3
+	ld r0, [r3, TASK_ACT_MM]
+	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
+
+	lr r1, [ARC_REG_PID]
+	and r1, r1, 0xFF
+	breq r1, r0, 5f
+
+	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
+	lr  r0, [erstatus]
+	bbit0 r0, STATUS_U_BIT, 5f
+
+	; We sure are in troubled waters, Flag the error, but to do so
+	; need to switch to kernel mode stack to call error routine
+	GET_TSK_STACK_BASE   r3, sp
+
+	; Call printk to shoutout aloud
+	mov r0, 1
+	j print_asid_mismatch
+
+5:   ; ASIDs match so proceed normally
+	nop
+
+#endif
+
+.endm
+
+;============================================================================
+;TLB Miss handling Code
+;============================================================================
+
+;-----------------------------------------------------------------------------
+; This macro does the page-table lookup for the faulting address.
+; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
+.macro LOAD_FAULT_PTE
+
+	lr  r2, [efa]
+
+#ifndef CONFIG_SMP
+	lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
+#else
+	GET_CURR_TASK_ON_CPU  r1
+	ld  r1, [r1, TASK_ACT_MM]
+	ld  r1, [r1, MM_PGD]
+#endif
+
+	lsr     r0, r2, PGDIR_SHIFT     ; Bits for indexing into PGD
+	ld.as   r1, [r1, r0]            ; PGD entry corresp to faulting addr
+	and.f   r1, r1, PAGE_MASK       ; Ignoring protection and other flags
+	;   contains Ptr to Page Table
+	bz.d    do_slow_path_pf         ; if no Page Table, do page fault
+
+	; Get the PTE entry: The idea is
+	; (1) x = addr >> PAGE_SHIFT 	-> masks page-off bits from @fault-addr
+	; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
+	; (3) z = pgtbl[y]
+	; To avoid the multiply by in end, we do the -2, <<2 below
+
+	lsr     r0, r2, (PAGE_SHIFT - 2)
+	and     r0, r0, ( (PTRS_PER_PTE - 1) << 2)
+	ld.aw   r0, [r1, r0]            ; get PTE and PTE ptr for fault addr
+#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
+	and.f 0, r0, _PAGE_PRESENT
+	bz   1f
+	ld   r2, [num_pte_not_present]
+	add  r2, r2, 1
+	st   r2, [num_pte_not_present]
+1:
+#endif
+
+.endm
+
+;-----------------------------------------------------------------
+; Convert Linux PTE entry into TLB entry
+; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+; IN: r0 = PTE, r1 = ptr to PTE
+
+.macro CONV_PTE_TO_TLB
+	and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE
+	sr  r3, [ARC_REG_TLBPD1]    ; these go in PD1
+
+	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
+#if (CONFIG_ARC_MMU_VER <= 2)   /* Neednot be done with v3 onwards */
+	lsr r2, r2                  ; shift PTE flags to match layout in PD0
+#endif
+
+	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid
+
+	or  r3, r3, r2              ; S | vaddr | {sasid|asid}
+	sr  r3,[ARC_REG_TLBPD0]     ; rewrite PD0
+.endm
+
+;-----------------------------------------------------------------
+; Commit the TLB entry into MMU
+
+.macro COMMIT_ENTRY_TO_MMU
+
+	/* Get free TLB slot: Set = computed from vaddr, way = random */
+	sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
+
+	/* Commit the Write */
+#if (CONFIG_ARC_MMU_VER >= 2)   /* introduced in v2 */
+	sr TLBWriteNI, [ARC_REG_TLBCOMMAND]
+#else
+	sr TLBWrite, [ARC_REG_TLBCOMMAND]
+#endif
+.endm
+
+;-----------------------------------------------------------------
+; ARC700 Exception Handling doesn't auto-switch stack and it only provides
+; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
+;
+; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
+; "global" is used to free-up FIRST core reg to be able to code the rest of
+; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
+; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
+; need to be saved as well by extending the "global" to be 4 words. Hence
+;	".size   ex_saved_reg1, 16"
+; [All of this dance is to avoid stack switching for each TLB Miss, since we
+; only need to save only a handful of regs, as opposed to complete reg file]
+;
+; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
+; core reg as it will not be SMP safe.
+; Thus scratch AUX reg is used (and no longer used to cache task PGD).
+; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
+; Epilogue thus has to locate the "per-cpu" storage for regs.
+; To avoid cache line bouncing the per-cpu global is aligned/sized per
+; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
+;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
+
+; As simple as that....
+
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_SMP
+	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
+	GET_CPU_ID  r0			; get to per cpu scratch mem,
+	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
+	add r0, @ex_saved_reg1, r0
+#else
+	st    r0, [@ex_saved_reg1]
+	mov_s r0, @ex_saved_reg1
+#endif
+	st_s  r1, [r0, 4]
+	st_s  r2, [r0, 8]
+	st_s  r3, [r0, 12]
+
+	; VERIFY if the ASID in MMU-PID Reg is same as
+	; one in Linux data structures
+
+	DBG_ASID_MISMATCH
+.endm
+
+;-----------------------------------------------------------------
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r0			; get to per cpu scratch mem
+	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
+	add r0, @ex_saved_reg1, r0
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	lr    r0, [ARC_REG_SCRATCH_DATA0]
+#else
+	mov_s r0, @ex_saved_reg1
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	ld_s  r0, [r0]
+#endif
+.endm
+
+ARCFP_CODE	;Fast Path Code, candidate for ICCM
+
+;-----------------------------------------------------------------------------
+; I-TLB Miss Exception Handler
+;-----------------------------------------------------------------------------
+
+ARC_ENTRY EV_TLBMissI
+
+	TLBMISS_FREEUP_REGS
+
+#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
+	ld  r0, [@numitlb]
+	add r0, r0, 1
+	st  r0, [@numitlb]
+#endif
+
+	;----------------------------------------------------------------
+	; Get the PTE corresponding to V-addr accessed
+	LOAD_FAULT_PTE
+
+	;----------------------------------------------------------------
+	; VERIFY_PTE: Check if PTE permissions approp for executing code
+	cmp_s   r2, VMALLOC_START
+	mov.lo  r2, (_PAGE_PRESENT | _PAGE_READ | _PAGE_EXECUTE)
+	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE)
+
+	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
+	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
+	bnz     do_slow_path_pf
+
+	; Let Linux VM know that the page was accessed
+	or      r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED)  ; set Accessed Bit
+	st_s    r0, [r1]                                  ; Write back PTE
+
+	CONV_PTE_TO_TLB
+	COMMIT_ENTRY_TO_MMU
+	TLBMISS_RESTORE_REGS
+	rtie
+
+ARC_EXIT EV_TLBMissI
+
+;-----------------------------------------------------------------------------
+; D-TLB Miss Exception Handler
+;-----------------------------------------------------------------------------
+
+ARC_ENTRY EV_TLBMissD
+
+	TLBMISS_FREEUP_REGS
+
+#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
+	ld  r0, [@numdtlb]
+	add r0, r0, 1
+	st  r0, [@numdtlb]
+#endif
+
+	;----------------------------------------------------------------
+	; Get the PTE corresponding to V-addr accessed
+	; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE
+	LOAD_FAULT_PTE
+
+	;----------------------------------------------------------------
+	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
+
+	mov_s   r2, 0
+	lr      r3, [ecr]
+	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
+	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
+	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
+	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
+	; Above laddering takes care of XCHG access
+	;   which is both Read and Write
+
+	; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx
+	; For copy_(to|from)_user, despite exception taken in kernel mode,
+	; this code is not hit, because EFA would still be the user mode
+	; address (EFA < 0x6000_0000).
+	; This code is for legit kernel mode faults, vmalloc specifically
+	; (EFA: 0x7000_0000 to 0x7FFF_FFFF)
+
+	lr      r3, [efa]
+	cmp     r3, VMALLOC_START - 1   ; If kernel mode access
+	asl.hi  r2, r2, 3               ; make _PAGE_xx flags as _PAGE_K_xx
+	or      r2, r2, _PAGE_PRESENT   ; Common flag for K/U mode
+
+	; By now, r2 setup with all the Flags we need to check in PTE
+	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
+	brne.d  r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test)
+
+	;----------------------------------------------------------------
+	; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
+	lr      r3, [ecr]
+	or      r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; Accessed bit always
+	btst_s  r3,  ECR_C_BIT_DTLB_ST_MISS   ; See if it was a Write Access ?
+	or.nz   r0, r0, _PAGE_MODIFIED        ; if Write, set Dirty bit as well
+	st_s    r0, [r1]                      ; Write back PTE
+
+	CONV_PTE_TO_TLB
+
+#if (CONFIG_ARC_MMU_VER == 1)
+	; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
+	; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
+	; But only for old MMU or one with Metal Fix
+	TLB_WRITE_HEURISTICS
+#endif
+
+	COMMIT_ENTRY_TO_MMU
+	TLBMISS_RESTORE_REGS
+	rtie
+
+;-------- Common routine to call Linux Page Fault Handler -----------
+do_slow_path_pf:
+
+	; Restore the 4-scratch regs saved by fast path miss handler
+	TLBMISS_RESTORE_REGS
+
+	; Slow path TLB Miss handled as a regular ARC Exception
+	; (stack switching / save the complete reg-file).
+	; That requires freeing up r9
+	EXCPN_PROLOG_FREEUP_REG r9
+
+	lr  r9, [erstatus]
+
+	SWITCH_TO_KERNEL_STK
+	SAVE_ALL_SYS
+
+	; ------- setup args for Linux Page fault Hanlder ---------
+	mov_s r0, sp
+	lr  r2, [efa]
+	lr  r3, [ecr]
+
+	; Both st and ex imply WRITE access of some sort, hence do_page_fault( )
+	; invoked with write=1 for DTLB-st/ex Miss and write=0 for ITLB miss or
+	; DTLB-ld Miss
+	; DTLB Miss Cause code is ld = 0x01 , st = 0x02, ex = 0x03
+	; Following code uses that fact that st/ex have one bit in common
+
+	btst_s r3,  ECR_C_BIT_DTLB_ST_MISS
+	mov.z  r1, 0
+	mov.nz r1, 1
+
+	; We don't want exceptions to be disabled while the fault is handled.
+	; Now that we have saved the context we return from exception hence
+	; exceptions get re-enable
+
+	FAKE_RET_FROM_EXCPN  r9
+
+	bl  do_page_fault
+	b   ret_from_exception
+
+ARC_EXIT EV_TLBMissD
+
+ARC_ENTRY EV_TLBMissB   ; Bogus entry to measure sz of DTLBMiss hdlr