220 files changed, 5387 insertions, 3500 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 651d5237c155..4471a416c274 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -60,7 +60,6 @@ ata *);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
-	long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 
 locking rules:
@@ -88,7 +87,6 @@ getxattr:	no
 listxattr:	no
 removexattr:	yes
 truncate_range:	yes
-fallocate:	no
 fiemap:		no
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
@@ -437,6 +435,7 @@ prototypes:
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
 			size_t, unsigned int);
 	int (*setlease)(struct file *, long, struct file_lock **);
+	long (*fallocate)(struct file *, int, loff_t, loff_t);
 };
 
 locking rules:
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 943fe6930f77..fc95ee1bcf6f 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -68,6 +68,9 @@ config GENERIC_IOMAP
 	bool
 	default n
 
+config GENERIC_HARDIRQS_NO__DO_IRQ
+	def_bool y
+
 config GENERIC_HARDIRQS
 	bool
 	default y
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index eda9b909aa05..56ff96501350 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -37,8 +37,9 @@
  */
 extern inline void __set_hae(unsigned long new_hae)
 {
-	unsigned long flags;
-	local_irq_save(flags);
+	unsigned long flags = swpipl(IPL_MAX);
+
+	barrier();
 
 	alpha_mv.hae_cache = new_hae;
 	*alpha_mv.hae_register = new_hae;
@@ -46,7 +47,8 @@ extern inline void __set_hae(unsigned long new_hae)
 	/* Re-read to make sure it was written.  */
 	new_hae = *alpha_mv.hae_register;
 
-	local_irq_restore(flags);
+	setipl(flags);
+	barrier();
 }
 
 extern inline void set_hae(unsigned long new_hae)
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index 1ee9b5b629b8..9bb7b858ed23 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -3,8 +3,8 @@
 #
 
 extra-y		:= head.o vmlinux.lds
-EXTRA_AFLAGS	:= $(KBUILD_CFLAGS)
-EXTRA_CFLAGS	:= -Werror -Wno-sign-compare
+asflags-y	:= $(KBUILD_CFLAGS)
+ccflags-y	:= -Werror -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
 	    irq_alpha.o signal.o setup.o ptrace.o time.o \
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index fe912984d9b1..9ab234f48dd8 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -44,10 +44,11 @@ static char irq_user_affinity[NR_IRQS];
 
 int irq_select_affinity(unsigned int irq)
 {
+	struct irq_desc *desc = irq_to_desc[irq];
 	static int last_cpu;
 	int cpu = last_cpu + 1;
 
-	if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
+	if (!desc || !get_irq_desc_chip(desc)->set_affinity || irq_user_affinity[irq])
 		return 1;
 
 	while (!cpu_possible(cpu) ||
@@ -55,8 +56,8 @@ int irq_select_affinity(unsigned int irq)
 		cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
 	last_cpu = cpu;
 
-	cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
-	irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
+	cpumask_copy(desc->affinity, cpumask_of(cpu));
+	get_irq_desc_chip(desc)->set_affinity(irq, cpumask_of(cpu));
 	return 0;
 }
 #endif /* CONFIG_SMP */
@@ -67,6 +68,7 @@ show_interrupts(struct seq_file *p, void *v)
 	int j;
 	int irq = *(loff_t *) v;
 	struct irqaction * action;
+	struct irq_desc *desc;
 	unsigned long flags;
 
 #ifdef CONFIG_SMP
@@ -79,8 +81,13 @@ show_interrupts(struct seq_file *p, void *v)
 #endif
 
 	if (irq < ACTUAL_NR_IRQS) {
-		raw_spin_lock_irqsave(&irq_desc[irq].lock, flags);
-		action = irq_desc[irq].action;
+		desc = irq_to_desc(irq);
+
+		if (!desc)
+			return 0;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+		action = desc->action;
 		if (!action) 
 			goto unlock;
 		seq_printf(p, "%3d: ", irq);
@@ -90,7 +97,7 @@ show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j));
 #endif
-		seq_printf(p, " %14s", irq_desc[irq].chip->name);
+		seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
 		seq_printf(p, "  %c%s",
 			(action->flags & IRQF_DISABLED)?'+':' ',
 			action->name);
@@ -103,7 +110,7 @@ show_interrupts(struct seq_file *p, void *v)
 
 		seq_putc(p, '\n');
 unlock:
-		raw_spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	} else if (irq == ACTUAL_NR_IRQS) {
 #ifdef CONFIG_SMP
 		seq_puts(p, "IPI: ");
@@ -142,8 +149,10 @@ handle_irq(int irq)
 	 * handled by some other CPU. (or is disabled)
 	 */
 	static unsigned int illegal_count=0;
+	struct irq_desc *desc = irq_to_desc(irq);
 	
-	if ((unsigned) irq > ACTUAL_NR_IRQS && illegal_count < MAX_ILLEGAL_IRQS ) {
+	if (!desc || ((unsigned) irq > ACTUAL_NR_IRQS &&
+	    illegal_count < MAX_ILLEGAL_IRQS)) {
 		irq_err_count++;
 		illegal_count++;
 		printk(KERN_CRIT "device_interrupt: invalid interrupt %d\n",
@@ -151,14 +160,14 @@ handle_irq(int irq)
 		return;
 	}
 
-	irq_enter();
 	/*
-	 * __do_IRQ() must be called with IPL_MAX. Note that we do not
+	 * From here we must proceed with IPL_MAX. Note that we do not
 	 * explicitly enable interrupts afterwards - some MILO PALcode
 	 * (namely LX164 one) seems to have severe problems with RTI
 	 * at IPL 0.
 	 */
 	local_irq_disable();
-	__do_IRQ(irq);
+	irq_enter();
+	generic_handle_irq_desc(irq, desc);
 	irq_exit();
 }
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 4c8bb374eb0a..2d0679b60939 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -219,31 +219,23 @@ process_mcheck_info(unsigned long vector, unsigned long la_ptr,
  * processed by PALcode, and comes in via entInt vector 1.
  */
 
-static void rtc_enable_disable(unsigned int irq) { }
-static unsigned int rtc_startup(unsigned int irq) { return 0; }
-
 struct irqaction timer_irqaction = {
 	.handler	= timer_interrupt,
 	.flags		= IRQF_DISABLED,
 	.name		= "timer",
 };
 
-static struct irq_chip rtc_irq_type = {
-	.name		= "RTC",
-	.startup	= rtc_startup,
-	.shutdown	= rtc_enable_disable,
-	.enable		= rtc_enable_disable,
-	.disable	= rtc_enable_disable,
-	.ack		= rtc_enable_disable,
-	.end		= rtc_enable_disable,
-};
-
 void __init
 init_rtc_irq(void)
 {
-	irq_desc[RTC_IRQ].status = IRQ_DISABLED;
-	irq_desc[RTC_IRQ].chip = &rtc_irq_type;
-	setup_irq(RTC_IRQ, &timer_irqaction);
+	struct irq_desc *desc = irq_to_desc(RTC_IRQ);
+
+	if (desc) {
+		desc->status |= IRQ_DISABLED;
+		set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
+			handle_simple_irq, "RTC");
+		setup_irq(RTC_IRQ, &timer_irqaction);
+	}
 }
 
 /* Dummy irqactions.  */
diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c
index 83a9ac280890..956ea0ed1694 100644
--- a/arch/alpha/kernel/irq_i8259.c
+++ b/arch/alpha/kernel/irq_i8259.c
@@ -69,28 +69,11 @@ i8259a_mask_and_ack_irq(unsigned int irq)
 	spin_unlock(&i8259_irq_lock);
 }
 
-unsigned int
-i8259a_startup_irq(unsigned int irq)
-{
-	i8259a_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-void
-i8259a_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		i8259a_enable_irq(irq);
-}
-
 struct irq_chip i8259a_irq_type = {
 	.name		= "XT-PIC",
-	.startup	= i8259a_startup_irq,
-	.shutdown	= i8259a_disable_irq,
-	.enable		= i8259a_enable_irq,
-	.disable	= i8259a_disable_irq,
-	.ack		= i8259a_mask_and_ack_irq,
-	.end		= i8259a_end_irq,
+	.unmask		= i8259a_enable_irq,
+	.mask		= i8259a_disable_irq,
+	.mask_ack	= i8259a_mask_and_ack_irq,
 };
 
 void __init
@@ -107,8 +90,7 @@ init_i8259a_irqs(void)
 	outb(0xff, 0xA1);	/* mask all of 8259A-2 */
 
 	for (i = 0; i < 16; i++) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].chip = &i8259a_irq_type;
+		set_irq_chip_and_handler(i, &i8259a_irq_type, handle_level_irq);
 	}
 
 	setup_irq(2, &cascade);
diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c
index 989ce46a0cf3..2863458c853e 100644
--- a/arch/alpha/kernel/irq_pyxis.c
+++ b/arch/alpha/kernel/irq_pyxis.c
@@ -40,20 +40,6 @@ pyxis_disable_irq(unsigned int irq)
 	pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16)));
 }
 
-static unsigned int
-pyxis_startup_irq(unsigned int irq)
-{
-	pyxis_enable_irq(irq);
-	return 0;
-}
-
-static void
-pyxis_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		pyxis_enable_irq(irq);
-}
-
 static void
 pyxis_mask_and_ack_irq(unsigned int irq)
 {
@@ -72,12 +58,9 @@ pyxis_mask_and_ack_irq(unsigned int irq)
 
 static struct irq_chip pyxis_irq_type = {
 	.name		= "PYXIS",
-	.startup	= pyxis_startup_irq,
-	.shutdown	= pyxis_disable_irq,
-	.enable		= pyxis_enable_irq,
-	.disable	= pyxis_disable_irq,
-	.ack		= pyxis_mask_and_ack_irq,
-	.end		= pyxis_end_irq,
+	.mask_ack	= pyxis_mask_and_ack_irq,
+	.mask		= pyxis_disable_irq,
+	.unmask		= pyxis_enable_irq,
 };
 
 void 
@@ -119,8 +102,8 @@ init_pyxis_irqs(unsigned long ignore_mask)
 	for (i = 16; i < 48; ++i) {
 		if ((ignore_mask >> i) & 1)
 			continue;
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &pyxis_irq_type;
+		set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq);
+		irq_to_desc(i)->status |= IRQ_LEVEL;
 	}
 
 	setup_irq(16+7, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c
index d63e93e1e8bf..0e57e828b413 100644
--- a/arch/alpha/kernel/irq_srm.c
+++ b/arch/alpha/kernel/irq_srm.c
@@ -33,29 +33,12 @@ srm_disable_irq(unsigned int irq)
 	spin_unlock(&srm_irq_lock);
 }
 
-static unsigned int
-srm_startup_irq(unsigned int irq)
-{
-	srm_enable_irq(irq);
-	return 0;
-}
-
-static void
-srm_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		srm_enable_irq(irq);
-}
-
 /* Handle interrupts from the SRM, assuming no additional weirdness.  */
 static struct irq_chip srm_irq_type = {
 	.name		= "SRM",
-	.startup	= srm_startup_irq,
-	.shutdown	= srm_disable_irq,
-	.enable		= srm_enable_irq,
-	.disable	= srm_disable_irq,
-	.ack		= srm_disable_irq,
-	.end		= srm_end_irq,
+	.unmask		= srm_enable_irq,
+	.mask		= srm_disable_irq,
+	.mask_ack	= srm_disable_irq,
 };
 
 void __init
@@ -68,8 +51,8 @@ init_srm_irqs(long max, unsigned long ignore_mask)
 	for (i = 16; i < max; ++i) {
 		if (i < 64 && ((ignore_mask >> i) & 1))
 			continue;
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &srm_irq_type;
+		set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq);
+		irq_to_desc(i)->status |= IRQ_LEVEL;
 	}
 }
 
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 547e8b84b2f7..fe698b5045e9 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -951,9 +951,6 @@ SYSCALL_DEFINE2(osf_utimes, const char __user *, filename,
 	return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0);
 }
 
-#define MAX_SELECT_SECONDS \
-	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
-
 SYSCALL_DEFINE5(osf_select, int, n, fd_set __user *, inp, fd_set __user *, outp,
 		fd_set __user *, exp, struct timeval32 __user *, tvp)
 {
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
index 20a30b8b9655..7bef61768236 100644
--- a/arch/alpha/kernel/sys_alcor.c
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -65,13 +65,6 @@ alcor_mask_and_ack_irq(unsigned int irq)
 	*(vuip)GRU_INT_CLEAR = 0; mb();
 }
 
-static unsigned int
-alcor_startup_irq(unsigned int irq)
-{
-	alcor_enable_irq(irq);
-	return 0;
-}
-
 static void
 alcor_isa_mask_and_ack_irq(unsigned int irq)
 {
@@ -82,21 +75,11 @@ alcor_isa_mask_and_ack_irq(unsigned int irq)
 	*(vuip)GRU_INT_CLEAR = 0; mb();
 }
 
-static void
-alcor_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		alcor_enable_irq(irq);
-}
-
 static struct irq_chip alcor_irq_type = {
 	.name		= "ALCOR",
-	.startup	= alcor_startup_irq,
-	.shutdown	= alcor_disable_irq,
-	.enable		= alcor_enable_irq,
-	.disable	= alcor_disable_irq,
-	.ack		= alcor_mask_and_ack_irq,
-	.end		= alcor_end_irq,
+	.unmask		= alcor_enable_irq,
+	.mask		= alcor_disable_irq,
+	.mask_ack	= alcor_mask_and_ack_irq,
 };
 
 static void
@@ -142,8 +125,8 @@ alcor_init_irq(void)
 		   on while IRQ probing.  */
 		if (i >= 16+20 && i <= 16+30)
 			continue;
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &alcor_irq_type;
+		set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq);
+		irq_to_desc(i)->status |= IRQ_LEVEL;
 	}
 	i8259a_irq_type.ack = alcor_isa_mask_and_ack_irq;
 
diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c
index 14c8898d19ec..b0c916493aea 100644
--- a/arch/alpha/kernel/sys_cabriolet.c
+++ b/arch/alpha/kernel/sys_cabriolet.c
@@ -57,28 +57,11 @@ cabriolet_disable_irq(unsigned int irq)
 	cabriolet_update_irq_hw(irq, cached_irq_mask |= 1UL << irq);
 }
 
-static unsigned int
-cabriolet_startup_irq(unsigned int irq)
-{ 
-	cabriolet_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-cabriolet_end_irq(unsigned int irq)
-{ 
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		cabriolet_enable_irq(irq);
-}
-
 static struct irq_chip cabriolet_irq_type = {
 	.name		= "CABRIOLET",
-	.startup	= cabriolet_startup_irq,
-	.shutdown	= cabriolet_disable_irq,
-	.enable		= cabriolet_enable_irq,
-	.disable	= cabriolet_disable_irq,
-	.ack		= cabriolet_disable_irq,
-	.end		= cabriolet_end_irq,
+	.unmask		= cabriolet_enable_irq,
+	.mask		= cabriolet_disable_irq,
+	.mask_ack	= cabriolet_disable_irq,
 };
 
 static void 
@@ -122,8 +105,9 @@ common_init_irq(void (*srm_dev_int)(unsigned long v))
 		outb(0xff, 0x806);
 
 		for (i = 16; i < 35; ++i) {
-			irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-			irq_desc[i].chip = &cabriolet_irq_type;
+			set_irq_chip_and_handler(i, &cabriolet_irq_type,
+				handle_level_irq);
+			irq_to_desc(i)->status |= IRQ_LEVEL;
 		}
 	}
 
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 4026502ab707..edad5f759ccd 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -115,20 +115,6 @@ dp264_disable_irq(unsigned int irq)
 	spin_unlock(&dp264_irq_lock);
 }
 
-static unsigned int
-dp264_startup_irq(unsigned int irq)
-{ 
-	dp264_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-dp264_end_irq(unsigned int irq)
-{ 
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		dp264_enable_irq(irq);
-}
-
 static void
 clipper_enable_irq(unsigned int irq)
 {
@@ -147,20 +133,6 @@ clipper_disable_irq(unsigned int irq)
 	spin_unlock(&dp264_irq_lock);
 }
 
-static unsigned int
-clipper_startup_irq(unsigned int irq)
-{ 
-	clipper_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-clipper_end_irq(unsigned int irq)
-{ 
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		clipper_enable_irq(irq);
-}
-
 static void
 cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 {
@@ -200,23 +172,17 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 
 static struct irq_chip dp264_irq_type = {
 	.name		= "DP264",
-	.startup	= dp264_startup_irq,
-	.shutdown	= dp264_disable_irq,
-	.enable		= dp264_enable_irq,
-	.disable	= dp264_disable_irq,
-	.ack		= dp264_disable_irq,
-	.end		= dp264_end_irq,
+	.unmask		= dp264_enable_irq,
+	.mask		= dp264_disable_irq,
+	.mask_ack	= dp264_disable_irq,
 	.set_affinity	= dp264_set_affinity,
 };
 
 static struct irq_chip clipper_irq_type = {
 	.name		= "CLIPPER",
-	.startup	= clipper_startup_irq,
-	.shutdown	= clipper_disable_irq,
-	.enable		= clipper_enable_irq,
-	.disable	= clipper_disable_irq,
-	.ack		= clipper_disable_irq,
-	.end		= clipper_end_irq,
+	.unmask		= clipper_enable_irq,
+	.mask		= clipper_disable_irq,
+	.mask_ack	= clipper_disable_irq,
 	.set_affinity	= clipper_set_affinity,
 };
 
@@ -302,8 +268,8 @@ init_tsunami_irqs(struct irq_chip * ops, int imin, int imax)
 {
 	long i;
 	for (i = imin; i <= imax; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = ops;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, ops, handle_level_irq);
 	}
 }
 
diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c
index df2090ce5e7f..ae5f29d127b0 100644
--- a/arch/alpha/kernel/sys_eb64p.c
+++ b/arch/alpha/kernel/sys_eb64p.c
@@ -55,28 +55,11 @@ eb64p_disable_irq(unsigned int irq)
 	eb64p_update_irq_hw(irq, cached_irq_mask |= 1 << irq);
 }
 
-static unsigned int
-eb64p_startup_irq(unsigned int irq)
-{
-	eb64p_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-eb64p_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		eb64p_enable_irq(irq);
-}
-
 static struct irq_chip eb64p_irq_type = {
 	.name		= "EB64P",
-	.startup	= eb64p_startup_irq,
-	.shutdown	= eb64p_disable_irq,
-	.enable		= eb64p_enable_irq,
-	.disable	= eb64p_disable_irq,
-	.ack		= eb64p_disable_irq,
-	.end		= eb64p_end_irq,
+	.unmask		= eb64p_enable_irq,
+	.mask		= eb64p_disable_irq,
+	.mask_ack	= eb64p_disable_irq,
 };
 
 static void 
@@ -135,8 +118,8 @@ eb64p_init_irq(void)
 	init_i8259a_irqs();
 
 	for (i = 16; i < 32; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &eb64p_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq);
 	}		
 
 	common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c
index 3ca1dbcf4044..1121bc5c6c6c 100644
--- a/arch/alpha/kernel/sys_eiger.c
+++ b/arch/alpha/kernel/sys_eiger.c
@@ -66,28 +66,11 @@ eiger_disable_irq(unsigned int irq)
 	eiger_update_irq_hw(irq, mask);
 }
 
-static unsigned int
-eiger_startup_irq(unsigned int irq)
-{
-	eiger_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-eiger_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		eiger_enable_irq(irq);
-}
-
 static struct irq_chip eiger_irq_type = {
 	.name		= "EIGER",
-	.startup	= eiger_startup_irq,
-	.shutdown	= eiger_disable_irq,
-	.enable		= eiger_enable_irq,
-	.disable	= eiger_disable_irq,
-	.ack		= eiger_disable_irq,
-	.end		= eiger_end_irq,
+	.unmask		= eiger_enable_irq,
+	.mask		= eiger_disable_irq,
+	.mask_ack	= eiger_disable_irq,
 };
 
 static void
@@ -153,8 +136,8 @@ eiger_init_irq(void)
 	init_i8259a_irqs();
 
 	for (i = 16; i < 128; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &eiger_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq);
 	}
 }
 
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index 7a7ae36fff91..34f55e03d331 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -62,30 +62,6 @@
  * world.
  */
 
-static unsigned int
-jensen_local_startup(unsigned int irq)
-{
-	/* the parport is really hw IRQ 1, silly Jensen.  */
-	if (irq == 7)
-		i8259a_startup_irq(1);
-	else
-		/*
-		 * For all true local interrupts, set the flag that prevents
-		 * the IPL from being dropped during handler processing.
-		 */
-		if (irq_desc[irq].action)
-			irq_desc[irq].action->flags |= IRQF_DISABLED;
-	return 0;
-}
-
-static void
-jensen_local_shutdown(unsigned int irq)
-{
-	/* the parport is really hw IRQ 1, silly Jensen.  */
-	if (irq == 7)
-		i8259a_disable_irq(1);
-}
-
 static void
 jensen_local_enable(unsigned int irq)
 {
@@ -103,29 +79,18 @@ jensen_local_disable(unsigned int irq)
 }
 
 static void
-jensen_local_ack(unsigned int irq)
+jensen_local_mask_ack(unsigned int irq)
 {
 	/* the parport is really hw IRQ 1, silly Jensen.  */
 	if (irq == 7)
 		i8259a_mask_and_ack_irq(1);
 }
 
-static void
-jensen_local_end(unsigned int irq)
-{
-	/* the parport is really hw IRQ 1, silly Jensen.  */
-	if (irq == 7)
-		i8259a_end_irq(1);
-}
-
 static struct irq_chip jensen_local_irq_type = {
 	.name		= "LOCAL",
-	.startup	= jensen_local_startup,
-	.shutdown	= jensen_local_shutdown,
-	.enable		= jensen_local_enable,
-	.disable	= jensen_local_disable,
-	.ack		= jensen_local_ack,
-	.end		= jensen_local_end,
+	.unmask		= jensen_local_enable,
+	.mask		= jensen_local_disable,
+	.mask_ack	= jensen_local_mask_ack,
 };
 
 static void 
@@ -158,7 +123,7 @@ jensen_device_interrupt(unsigned long vector)
 	}
 
 	/* If there is no handler yet... */
-	if (irq_desc[irq].action == NULL) {
+	if (!irq_has_action(irq)) {
 	    /* If it is a local interrupt that cannot be masked... */
 	    if (vector >= 0x900)
 	    {
@@ -206,11 +171,11 @@ jensen_init_irq(void)
 {
 	init_i8259a_irqs();
 
-	irq_desc[1].chip = &jensen_local_irq_type;
-	irq_desc[4].chip = &jensen_local_irq_type;
-	irq_desc[3].chip = &jensen_local_irq_type;
-	irq_desc[7].chip = &jensen_local_irq_type;
-	irq_desc[9].chip = &jensen_local_irq_type;
+	set_irq_chip_and_handler(1, &jensen_local_irq_type, handle_level_irq);
+	set_irq_chip_and_handler(4, &jensen_local_irq_type, handle_level_irq);
+	set_irq_chip_and_handler(3, &jensen_local_irq_type, handle_level_irq);
+	set_irq_chip_and_handler(7, &jensen_local_irq_type, handle_level_irq);
+	set_irq_chip_and_handler(9, &jensen_local_irq_type, handle_level_irq);
 
 	common_init_isa_dma();
 }
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 0bb3b5c4f693..2bfc9f1b1ddc 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -143,20 +143,6 @@ io7_disable_irq(unsigned int irq)
 	spin_unlock(&io7->irq_lock);
 }
 
-static unsigned int
-io7_startup_irq(unsigned int irq)
-{
-	io7_enable_irq(irq);
-	return 0;	/* never anything pending */
-}
-
-static void
-io7_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		io7_enable_irq(irq);
-}
-
 static void
 marvel_irq_noop(unsigned int irq) 
 { 
@@ -171,32 +157,22 @@ marvel_irq_noop_return(unsigned int irq)
 
 static struct irq_chip marvel_legacy_irq_type = {
 	.name		= "LEGACY",
-	.startup	= marvel_irq_noop_return,
-	.shutdown	= marvel_irq_noop,
-	.enable		= marvel_irq_noop,
-	.disable	= marvel_irq_noop,
-	.ack		= marvel_irq_noop,
-	.end		= marvel_irq_noop,
+	.mask		= marvel_irq_noop,
+	.unmask		= marvel_irq_noop,
 };
 
 static struct irq_chip io7_lsi_irq_type = {
 	.name		= "LSI",
-	.startup	= io7_startup_irq,
-	.shutdown	= io7_disable_irq,
-	.enable		= io7_enable_irq,
-	.disable	= io7_disable_irq,
-	.ack		= io7_disable_irq,
-	.end		= io7_end_irq,
+	.unmask		= io7_enable_irq,
+	.mask		= io7_disable_irq,
+	.mask_ack	= io7_disable_irq,
 };
 
 static struct irq_chip io7_msi_irq_type = {
 	.name		= "MSI",
-	.startup	= io7_startup_irq,
-	.shutdown	= io7_disable_irq,
-	.enable		= io7_enable_irq,
-	.disable	= io7_disable_irq,
+	.unmask		= io7_enable_irq,
+	.mask		= io7_disable_irq,
 	.ack		= marvel_irq_noop,
-	.end		= io7_end_irq,
 };
 
 static void
@@ -304,8 +280,8 @@ init_io7_irqs(struct io7 *io7,
 
 	/* Set up the lsi irqs.  */
 	for (i = 0; i < 128; ++i) {
-		irq_desc[base + i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[base + i].chip = lsi_ops;
+		irq_to_desc(base + i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq);
 	}
 
 	/* Disable the implemented irqs in hardware.  */
@@ -318,8 +294,8 @@ init_io7_irqs(struct io7 *io7,
 
 	/* Set up the msi irqs.  */
 	for (i = 128; i < (128 + 512); ++i) {
-		irq_desc[base + i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[base + i].chip = msi_ops;
+		irq_to_desc(base + i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq);
 	}
 
 	for (i = 0; i < 16; ++i)
@@ -336,8 +312,8 @@ marvel_init_irq(void)
 
 	/* Reserve the legacy irqs.  */
 	for (i = 0; i < 16; ++i) {
-		irq_desc[i].status = IRQ_DISABLED;
-		irq_desc[i].chip = &marvel_legacy_irq_type;
+		set_irq_chip_and_handler(i, &marvel_legacy_irq_type,
+			handle_level_irq);
 	}
 
 	/* Init the io7 irqs.  */
diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c
index ee8865169811..bcc1639e8efb 100644
--- a/arch/alpha/kernel/sys_mikasa.c
+++ b/arch/alpha/kernel/sys_mikasa.c
@@ -54,28 +54,11 @@ mikasa_disable_irq(unsigned int irq)
 	mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (irq - 16)));
 }
 
-static unsigned int
-mikasa_startup_irq(unsigned int irq)
-{
-	mikasa_enable_irq(irq);
-	return 0;
-}
-
-static void
-mikasa_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		mikasa_enable_irq(irq);
-}
-
 static struct irq_chip mikasa_irq_type = {
 	.name		= "MIKASA",
-	.startup	= mikasa_startup_irq,
-	.shutdown	= mikasa_disable_irq,
-	.enable		= mikasa_enable_irq,
-	.disable	= mikasa_disable_irq,
-	.ack		= mikasa_disable_irq,
-	.end		= mikasa_end_irq,
+	.unmask		= mikasa_enable_irq,
+	.mask		= mikasa_disable_irq,
+	.mask_ack	= mikasa_disable_irq,
 };
 
 static void 
@@ -115,8 +98,8 @@ mikasa_init_irq(void)
 	mikasa_update_irq_hw(0);
 
 	for (i = 16; i < 32; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &mikasa_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq);
 	}
 
 	init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c
index 86503fe73a88..e88f4ae1260e 100644
--- a/arch/alpha/kernel/sys_noritake.c
+++ b/arch/alpha/kernel/sys_noritake.c
@@ -59,28 +59,11 @@ noritake_disable_irq(unsigned int irq)
 	noritake_update_irq_hw(irq, cached_irq_mask &= ~(1 << (irq - 16)));
 }
 
-static unsigned int
-noritake_startup_irq(unsigned int irq)
-{
-	noritake_enable_irq(irq);
-	return 0;
-}
-
-static void
-noritake_end_irq(unsigned int irq)
-{
-        if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-                noritake_enable_irq(irq);
-}
-
 static struct irq_chip noritake_irq_type = {
 	.name		= "NORITAKE",
-	.startup	= noritake_startup_irq,
-	.shutdown	= noritake_disable_irq,
-	.enable		= noritake_enable_irq,
-	.disable	= noritake_disable_irq,
-	.ack		= noritake_disable_irq,
-	.end		= noritake_end_irq,
+	.unmask		= noritake_enable_irq,
+	.mask		= noritake_disable_irq,
+	.mask_ack	= noritake_disable_irq,
 };
 
 static void 
@@ -144,8 +127,8 @@ noritake_init_irq(void)
 	outw(0, 0x54c);
 
 	for (i = 16; i < 48; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &noritake_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq);
 	}
 
 	init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c
index 26c322bf89ee..6a51364dd1cc 100644
--- a/arch/alpha/kernel/sys_rawhide.c
+++ b/arch/alpha/kernel/sys_rawhide.c
@@ -121,28 +121,11 @@ rawhide_mask_and_ack_irq(unsigned int irq)
 	spin_unlock(&rawhide_irq_lock);
 }
 
-static unsigned int
-rawhide_startup_irq(unsigned int irq)
-{
-	rawhide_enable_irq(irq);
-	return 0;
-}
-
-static void
-rawhide_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		rawhide_enable_irq(irq);
-}
-
 static struct irq_chip rawhide_irq_type = {
 	.name		= "RAWHIDE",
-	.startup	= rawhide_startup_irq,
-	.shutdown	= rawhide_disable_irq,
-	.enable		= rawhide_enable_irq,
-	.disable	= rawhide_disable_irq,
-	.ack		= rawhide_mask_and_ack_irq,
-	.end		= rawhide_end_irq,
+	.unmask		= rawhide_enable_irq,
+	.mask		= rawhide_disable_irq,
+	.mask_ack	= rawhide_mask_and_ack_irq,
 };
 
 static void 
@@ -194,8 +177,8 @@ rawhide_init_irq(void)
 	}
 
 	for (i = 16; i < 128; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &rawhide_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq);
 	}
 
 	init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c
index be161129eab9..89e7e37ec84c 100644
--- a/arch/alpha/kernel/sys_rx164.c
+++ b/arch/alpha/kernel/sys_rx164.c
@@ -58,28 +58,11 @@ rx164_disable_irq(unsigned int irq)
 	rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16)));
 }
 
-static unsigned int
-rx164_startup_irq(unsigned int irq)
-{
-	rx164_enable_irq(irq);
-	return 0;
-}
-
-static void
-rx164_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		rx164_enable_irq(irq);
-}
-
 static struct irq_chip rx164_irq_type = {
 	.name		= "RX164",
-	.startup	= rx164_startup_irq,
-	.shutdown	= rx164_disable_irq,
-	.enable		= rx164_enable_irq,
-	.disable	= rx164_disable_irq,
-	.ack		= rx164_disable_irq,
-	.end		= rx164_end_irq,
+	.unmask		= rx164_enable_irq,
+	.mask		= rx164_disable_irq,
+	.mask_ack	= rx164_disable_irq,
 };
 
 static void 
@@ -116,8 +99,8 @@ rx164_init_irq(void)
 
 	rx164_update_irq_hw(0);
 	for (i = 16; i < 40; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &rx164_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq);
 	}
 
 	init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index b2abe27a23cf..5c4423d1b06c 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -474,20 +474,6 @@ sable_lynx_disable_irq(unsigned int irq)
 #endif
 }
 
-static unsigned int
-sable_lynx_startup_irq(unsigned int irq)
-{
-	sable_lynx_enable_irq(irq);
-	return 0;
-}
-
-static void
-sable_lynx_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		sable_lynx_enable_irq(irq);
-}
-
 static void
 sable_lynx_mask_and_ack_irq(unsigned int irq)
 {
@@ -503,12 +489,9 @@ sable_lynx_mask_and_ack_irq(unsigned int irq)
 
 static struct irq_chip sable_lynx_irq_type = {
 	.name		= "SABLE/LYNX",
-	.startup	= sable_lynx_startup_irq,
-	.shutdown	= sable_lynx_disable_irq,
-	.enable		= sable_lynx_enable_irq,
-	.disable	= sable_lynx_disable_irq,
-	.ack		= sable_lynx_mask_and_ack_irq,
-	.end		= sable_lynx_end_irq,
+	.unmask		= sable_lynx_enable_irq,
+	.mask		= sable_lynx_disable_irq,
+	.mask_ack	= sable_lynx_mask_and_ack_irq,
 };
 
 static void 
@@ -535,8 +518,9 @@ sable_lynx_init_irq(int nr_of_irqs)
 	long i;
 
 	for (i = 0; i < nr_of_irqs; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &sable_lynx_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &sable_lynx_irq_type,
+			handle_level_irq);
 	}
 
 	common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c
index 4da596b6adbb..f8a1e8a862fb 100644
--- a/arch/alpha/kernel/sys_takara.c
+++ b/arch/alpha/kernel/sys_takara.c
@@ -60,28 +60,11 @@ takara_disable_irq(unsigned int irq)
 	takara_update_irq_hw(irq, mask);
 }
 
-static unsigned int
-takara_startup_irq(unsigned int irq)
-{
-	takara_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-takara_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		takara_enable_irq(irq);
-}
-
 static struct irq_chip takara_irq_type = {
 	.name		= "TAKARA",
-	.startup	= takara_startup_irq,
-	.shutdown	= takara_disable_irq,
-	.enable		= takara_enable_irq,
-	.disable	= takara_disable_irq,
-	.ack		= takara_disable_irq,
-	.end		= takara_end_irq,
+	.unmask		= takara_enable_irq,
+	.mask		= takara_disable_irq,
+	.mask_ack	= takara_disable_irq,
 };
 
 static void
@@ -153,8 +136,8 @@ takara_init_irq(void)
 		takara_update_irq_hw(i, -1);
 
 	for (i = 16; i < 128; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = &takara_irq_type;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq);
 	}
 
 	common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 9008d0f20c53..e02494bf5ef3 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -129,20 +129,6 @@ titan_disable_irq(unsigned int irq)
 	spin_unlock(&titan_irq_lock);
 }
 
-static unsigned int
-titan_startup_irq(unsigned int irq)
-{
-	titan_enable_irq(irq);
-	return 0;	/* never anything pending */
-}
-
-static void
-titan_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		titan_enable_irq(irq);
-}
-
 static void
 titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 {
@@ -189,20 +175,17 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax)
 {
 	long i;
 	for (i = imin; i <= imax; ++i) {
-		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i].chip = ops;
+		irq_to_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, ops, handle_level_irq);
 	}
 }
 
 static struct irq_chip titan_irq_type = {
-       .name	       = "TITAN",
-       .startup        = titan_startup_irq,
-       .shutdown       = titan_disable_irq,
-       .enable         = titan_enable_irq,
-       .disable        = titan_disable_irq,
-       .ack            = titan_disable_irq,
-       .end            = titan_end_irq,
-       .set_affinity   = titan_set_irq_affinity,
+       .name		= "TITAN",
+       .unmask		= titan_enable_irq,
+       .mask		= titan_disable_irq,
+       .mask_ack	= titan_disable_irq,
+       .set_affinity	= titan_set_irq_affinity,
 };
 
 static irqreturn_t
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
index 62fd972e18ef..eec52594d410 100644
--- a/arch/alpha/kernel/sys_wildfire.c
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -139,32 +139,11 @@ wildfire_mask_and_ack_irq(unsigned int irq)
 	spin_unlock(&wildfire_irq_lock);
 }
 
-static unsigned int
-wildfire_startup_irq(unsigned int irq)
-{ 
-	wildfire_enable_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static void
-wildfire_end_irq(unsigned int irq)
-{ 
-#if 0
-	if (!irq_desc[irq].action)
-		printk("got irq %d\n", irq);
-#endif
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		wildfire_enable_irq(irq);
-}
-
 static struct irq_chip wildfire_irq_type = {
 	.name		= "WILDFIRE",
-	.startup	= wildfire_startup_irq,
-	.shutdown	= wildfire_disable_irq,
-	.enable		= wildfire_enable_irq,
-	.disable	= wildfire_disable_irq,
-	.ack		= wildfire_mask_and_ack_irq,
-	.end		= wildfire_end_irq,
+	.unmask		= wildfire_enable_irq,
+	.mask		= wildfire_disable_irq,
+	.mask_ack	= wildfire_mask_and_ack_irq,
 };
 
 static void __init
@@ -198,15 +177,18 @@ wildfire_init_irq_per_pca(int qbbno, int pcano)
 	for (i = 0; i < 16; ++i) {
 		if (i == 2)
 			continue;
-		irq_desc[i+irq_bias].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i+irq_bias].chip = &wildfire_irq_type;
+		irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
+			handle_level_irq);
 	}
 
-	irq_desc[36+irq_bias].status = IRQ_DISABLED | IRQ_LEVEL;
-	irq_desc[36+irq_bias].chip = &wildfire_irq_type;
+	irq_to_desc(36+irq_bias)->status |= IRQ_LEVEL;
+	set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type,
+		handle_level_irq);
 	for (i = 40; i < 64; ++i) {
-		irq_desc[i+irq_bias].status = IRQ_DISABLED | IRQ_LEVEL;
-		irq_desc[i+irq_bias].chip = &wildfire_irq_type;
+		irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
+			handle_level_irq);
 	}
 
 	setup_irq(32+irq_bias, &isa_enable);	
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 9b72c59c95be..c0a83ab62b78 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -2,8 +2,8 @@
 # Makefile for alpha-specific library files..
 #
 
-EXTRA_AFLAGS := $(KBUILD_CFLAGS)
-EXTRA_CFLAGS := -Werror
+asflags-y := $(KBUILD_CFLAGS)
+ccflags-y := -Werror
 
 # Many of these routines have implementations tuned for ev6.
 # Choose them iff we're targeting ev6 specifically.
diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile
index 359ef087e69e..7f4671995245 100644
--- a/arch/alpha/math-emu/Makefile
+++ b/arch/alpha/math-emu/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the FPU instruction emulation.
 #
 
-EXTRA_CFLAGS := -w
+ccflags-y := -w
 
 obj-$(CONFIG_MATHEMU) += math-emu.o
 
diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
index 09399c5386cb..c993d3f93cf6 100644
--- a/arch/alpha/mm/Makefile
+++ b/arch/alpha/mm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux alpha-specific parts of the memory manager.
 #
 
-EXTRA_CFLAGS := -Werror
+ccflags-y := -Werror
 
 obj-y	:= init.o fault.o extable.o
 
diff --git a/arch/alpha/oprofile/Makefile b/arch/alpha/oprofile/Makefile
index 4aa56247bdc6..3473de751b03 100644
--- a/arch/alpha/oprofile/Makefile
+++ b/arch/alpha/oprofile/Makefile
@@ -1,4 +1,4 @@
-EXTRA_CFLAGS := -Werror -Wno-sign-compare
+ccflags-y := -Werror -Wno-sign-compare
 
 obj-$(CONFIG_OPROFILE) += oprofile.o
 
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index bd0495a9ac3b..22cc8c8df6cb 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -179,6 +179,22 @@ static struct omap_board_config_kernel ams_delta_config[] = {
 	{ OMAP_TAG_LCD,		&ams_delta_lcd_config },
 };
 
+static struct resource ams_delta_nand_resources[] = {
+	[0] = {
+		.start	= OMAP1_MPUIO_BASE,
+		.end	= OMAP1_MPUIO_BASE +
+				OMAP_MPUIO_IO_CNTL + sizeof(u32) - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+};
+
+static struct platform_device ams_delta_nand_device = {
+	.name	= "ams-delta-nand",
+	.id	= -1,
+	.num_resources	= ARRAY_SIZE(ams_delta_nand_resources),
+	.resource	= ams_delta_nand_resources,
+};
+
 static struct resource ams_delta_kp_resources[] = {
 	[0] = {
 		.start	= INT_KEYBOARD,
@@ -265,6 +281,7 @@ static struct omap1_cam_platform_data ams_delta_camera_platform_data = {
 };
 
 static struct platform_device *ams_delta_devices[] __initdata = {
+	&ams_delta_nand_device,
 	&ams_delta_kp_device,
 	&ams_delta_lcd_device,
 	&ams_delta_led_device,
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 74b62f10d07f..4d6dd4c39b75 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -13,6 +13,14 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 
+/*
+ * Maxium size for a single dma descriptor
+ * Size is limited to 16 bits.
+ * Size is in the units of addr-widths (1,2,4,8 bytes)
+ * Larger transfers will be split up to multiple linked desc
+ */
+#define STEDMA40_MAX_SEG_SIZE 0xFFFF
+
 /* dev types for memcpy */
 #define STEDMA40_DEV_DST_MEMORY (-1)
 #define	STEDMA40_DEV_SRC_MEMORY (-1)
diff --git a/arch/arm/plat-omap/include/plat/onenand.h b/arch/arm/plat-omap/include/plat/onenand.h
index 72f433d7d827..affe87e9ece7 100644
--- a/arch/arm/plat-omap/include/plat/onenand.h
+++ b/arch/arm/plat-omap/include/plat/onenand.h
@@ -23,6 +23,7 @@ struct omap_onenand_platform_data {
 	int                     (*onenand_setup)(void __iomem *, int freq);
 	int			dma_channel;
 	u8			flags;
+	u8			regulator_can_sleep;
 };
 
 #define ONENAND_MAX_PARTITIONS 8
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index c881393f07fd..bceaa5543e39 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -47,9 +47,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	memblock_add(base, size);
 }
 
-u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return memblock_alloc(size, align);
+	return __va(memblock_alloc(size, align));
 }
 
 #ifdef CONFIG_EARLY_PRINTK
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 9dbe58368953..a19811e98a41 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -45,11 +45,9 @@ void __init free_mem_mach(unsigned long addr, unsigned long size)
 	return free_bootmem(addr, size);
 }
 
-u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return virt_to_phys(
-		__alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS))
-		);
+	return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9e3132db718b..7185f0da7dc3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -519,9 +519,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	memblock_add(base, size);
 }
 
-u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return memblock_alloc(size, align);
+	return __va(memblock_alloc(size, align));
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index ef138731c0ea..1c28816152fa 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -200,11 +200,16 @@ config PL330_DMA
 	  platform_data for a dma-pl330 device.
 
 config PCH_DMA
-	tristate "Topcliff (Intel EG20T) PCH DMA support"
+	tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH DMA support"
 	depends on PCI && X86
 	select DMA_ENGINE
 	help
-	  Enable support for the Topcliff (Intel EG20T) PCH DMA engine.
+	  Enable support for Intel EG20T PCH DMA engine.
+
+	  This driver also can be used for OKI SEMICONDUCTOR ML7213 IOH(Input/
+	  Output Hub) which is for IVI(In-Vehicle Infotainment) use.
+	  ML7213 is companion chip for Intel Atom E6xx series.
+	  ML7213 is completely compatible for Intel EG20T PCH.
 
 config IMX_SDMA
 	tristate "i.MX SDMA support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index b605cc9ac3a2..297f48b0cba9 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -19,14 +19,14 @@
  * this program; if not, write to the Free Software Foundation, Inc., 59
  * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  *
- * The full GNU General Public License is iin this distribution in the
- * file called COPYING.
+ * The full GNU General Public License is in this distribution in the file
+ * called COPYING.
  *
  * Documentation: ARM DDI 0196G == PL080
- * Documentation: ARM DDI 0218E	== PL081
+ * Documentation: ARM DDI 0218E == PL081
  *
- * PL080 & PL081 both have 16 sets of DMA signals that can be routed to
- * any channel.
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any
+ * channel.
  *
  * The PL080 has 8 channels available for simultaneous use, and the PL081
  * has only two channels. So on these DMA controllers the number of channels
@@ -53,7 +53,23 @@
  *
  * ASSUMES default (little) endianness for DMA transfers
  *
- * Only DMAC flow control is implemented
+ * The PL08x has two flow control settings:
+ *  - DMAC flow control: the transfer size defines the number of transfers
+ *    which occur for the current LLI entry, and the DMAC raises TC at the
+ *    end of every LLI entry.  Observed behaviour shows the DMAC listening
+ *    to both the BREQ and SREQ signals (contrary to documented),
+ *    transferring data if either is active.  The LBREQ and LSREQ signals
+ *    are ignored.
+ *
+ *  - Peripheral flow control: the transfer size is ignored (and should be
+ *    zero).  The data is transferred from the current LLI entry, until
+ *    after the final transfer signalled by LBREQ or LSREQ.  The DMAC
+ *    will then move to the next LLI entry.
+ *
+ * Only the former works sanely with scatter lists, so we only implement
+ * the DMAC flow control method.  However, peripherals which use the LBREQ
+ * and LSREQ signals (eg, MMCI) are unable to use this mode, which through
+ * these hardware restrictions prevents them from using scatter DMA.
  *
  * Global TODO:
  * - Break out common code from arch/arm/mach-s3c64xx and share
@@ -61,50 +77,39 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/dmapool.h>
-#include <linux/amba/bus.h>
 #include <linux/dmaengine.h>
+#include <linux/amba/bus.h>
 #include <linux/amba/pl08x.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
 #include <asm/hardware/pl080.h>
-#include <asm/dma.h>
-#include <asm/mach/dma.h>
-#include <asm/atomic.h>
-#include <asm/processor.h>
-#include <asm/cacheflush.h>
 
 #define DRIVER_NAME	"pl08xdmac"
 
 /**
- * struct vendor_data - vendor-specific config parameters
- * for PL08x derivates
- * @name: the name of this specific variant
+ * struct vendor_data - vendor-specific config parameters for PL08x derivatives
  * @channels: the number of channels available in this variant
- * @dualmaster: whether this version supports dual AHB masters
- * or not.
+ * @dualmaster: whether this version supports dual AHB masters or not.
  */
 struct vendor_data {
-	char *name;
 	u8 channels;
 	bool dualmaster;
 };
 
 /*
  * PL08X private data structures
- * An LLI struct - see pl08x TRM
- * Note that next uses bit[0] as a bus bit,
- * start & end do not - their bus bit info
- * is in cctl
+ * An LLI struct - see PL08x TRM.  Note that next uses bit[0] as a bus bit,
+ * start & end do not - their bus bit info is in cctl.  Also note that these
+ * are fixed 32-bit quantities.
  */
-struct lli {
-	dma_addr_t src;
-	dma_addr_t dst;
-	dma_addr_t next;
+struct pl08x_lli {
+	u32 src;
+	u32 dst;
+	u32 lli;
 	u32 cctl;
 };
 
@@ -119,6 +124,8 @@ struct lli {
  * @phy_chans: array of data for the physical channels
  * @pool: a pool for the LLI descriptors
  * @pool_ctr: counter of LLIs in the pool
+ * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI fetches
+ * @mem_buses: set to indicate memory transfers on AHB2.
  * @lock: a spinlock for this struct
  */
 struct pl08x_driver_data {
@@ -126,11 +133,13 @@ struct pl08x_driver_data {
 	struct dma_device memcpy;
 	void __iomem *base;
 	struct amba_device *adev;
-	struct vendor_data *vd;
+	const struct vendor_data *vd;
 	struct pl08x_platform_data *pd;
 	struct pl08x_phy_chan *phy_chans;
 	struct dma_pool *pool;
 	int pool_ctr;
+	u8 lli_buses;
+	u8 mem_buses;
 	spinlock_t lock;
 };
 
@@ -152,9 +161,9 @@ struct pl08x_driver_data {
 /* Size (bytes) of each LLI buffer allocated for one transfer */
 # define PL08X_LLI_TSFR_SIZE	0x2000
 
-/* Maximimum times we call dma_pool_alloc on this pool without freeing */
+/* Maximum times we call dma_pool_alloc on this pool without freeing */
 #define PL08X_MAX_ALLOCS	0x40
-#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct lli))
+#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
 #define PL08X_ALIGN		8
 
 static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
@@ -162,6 +171,11 @@ static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
 	return container_of(chan, struct pl08x_dma_chan, chan);
 }
 
+static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct pl08x_txd, tx);
+}
+
 /*
  * Physical channel handling
  */
@@ -177,88 +191,47 @@ static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
 
 /*
  * Set the initial DMA register values i.e. those for the first LLI
- * The next lli pointer and the configuration interrupt bit have
- * been set when the LLIs were constructed
+ * The next LLI pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed.  Poke them into the hardware
+ * and start the transfer.
  */
-static void pl08x_set_cregs(struct pl08x_driver_data *pl08x,
-			    struct pl08x_phy_chan *ch)
-{
-	/* Wait for channel inactive */
-	while (pl08x_phy_channel_busy(ch))
-		;
-
-	dev_vdbg(&pl08x->adev->dev,
-		"WRITE channel %d: csrc=%08x, cdst=%08x, "
-		 "cctl=%08x, clli=%08x, ccfg=%08x\n",
-		ch->id,
-		ch->csrc,
-		ch->cdst,
-		ch->cctl,
-		ch->clli,
-		ch->ccfg);
-
-	writel(ch->csrc, ch->base + PL080_CH_SRC_ADDR);
-	writel(ch->cdst, ch->base + PL080_CH_DST_ADDR);
-	writel(ch->clli, ch->base + PL080_CH_LLI);
-	writel(ch->cctl, ch->base + PL080_CH_CONTROL);
-	writel(ch->ccfg, ch->base + PL080_CH_CONFIG);
-}
-
-static inline void pl08x_config_phychan_for_txd(struct pl08x_dma_chan *plchan)
+static void pl08x_start_txd(struct pl08x_dma_chan *plchan,
+	struct pl08x_txd *txd)
 {
-	struct pl08x_channel_data *cd = plchan->cd;
+	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_phy_chan *phychan = plchan->phychan;
-	struct pl08x_txd *txd = plchan->at;
-
-	/* Copy the basic control register calculated at transfer config */
-	phychan->csrc = txd->csrc;
-	phychan->cdst = txd->cdst;
-	phychan->clli = txd->clli;
-	phychan->cctl = txd->cctl;
-
-	/* Assign the signal to the proper control registers */
-	phychan->ccfg = cd->ccfg;
-	phychan->ccfg &= ~PL080_CONFIG_SRC_SEL_MASK;
-	phychan->ccfg &= ~PL080_CONFIG_DST_SEL_MASK;
-	/* If it wasn't set from AMBA, ignore it */
-	if (txd->direction == DMA_TO_DEVICE)
-		/* Select signal as destination */
-		phychan->ccfg |=
-			(phychan->signal << PL080_CONFIG_DST_SEL_SHIFT);
-	else if (txd->direction == DMA_FROM_DEVICE)
-		/* Select signal as source */
-		phychan->ccfg |=
-			(phychan->signal << PL080_CONFIG_SRC_SEL_SHIFT);
-	/* Always enable error interrupts */
-	phychan->ccfg |= PL080_CONFIG_ERR_IRQ_MASK;
-	/* Always enable terminal interrupts */
-	phychan->ccfg |= PL080_CONFIG_TC_IRQ_MASK;
-}
-
-/*
- * Enable the DMA channel
- * Assumes all other configuration bits have been set
- * as desired before this code is called
- */
-static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x,
-				  struct pl08x_phy_chan *ch)
-{
+	struct pl08x_lli *lli = &txd->llis_va[0];
 	u32 val;
 
-	/*
-	 * Do not access config register until channel shows as disabled
-	 */
-	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << ch->id))
-		;
+	plchan->at = txd;
 
-	/*
-	 * Do not access config register until channel shows as inactive
-	 */
-	val = readl(ch->base + PL080_CH_CONFIG);
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(phychan))
+		cpu_relax();
+
+	dev_vdbg(&pl08x->adev->dev,
+		"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+		"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
+		phychan->id, lli->src, lli->dst, lli->lli, lli->cctl,
+		txd->ccfg);
+
+	writel(lli->src, phychan->base + PL080_CH_SRC_ADDR);
+	writel(lli->dst, phychan->base + PL080_CH_DST_ADDR);
+	writel(lli->lli, phychan->base + PL080_CH_LLI);
+	writel(lli->cctl, phychan->base + PL080_CH_CONTROL);
+	writel(txd->ccfg, phychan->base + PL080_CH_CONFIG);
+
+	/* Enable the DMA channel */
+	/* Do not access config register until channel shows as disabled */
+	while (readl(pl08x->base + PL080_EN_CHAN) & (1 << phychan->id))
+		cpu_relax();
+
+	/* Do not access config register until channel shows as inactive */
+	val = readl(phychan->base + PL080_CH_CONFIG);
 	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
-		val = readl(ch->base + PL080_CH_CONFIG);
+		val = readl(phychan->base + PL080_CH_CONFIG);
 
-	writel(val | PL080_CONFIG_ENABLE, ch->base + PL080_CH_CONFIG);
+	writel(val | PL080_CONFIG_ENABLE, phychan->base + PL080_CH_CONFIG);
 }
 
 /*
@@ -266,10 +239,8 @@ static void pl08x_enable_phy_chan(struct pl08x_driver_data *pl08x,
  *
  * Disabling individual channels could lose data.
  *
- * Disable the peripheral DMA after disabling the DMAC
- * in order to allow the DMAC FIFO to drain, and
- * hence allow the channel to show inactive
- *
+ * Disable the peripheral DMA after disabling the DMAC in order to allow
+ * the DMAC FIFO to drain, and hence allow the channel to show inactive
  */
 static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
 {
@@ -282,7 +253,7 @@ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
 
 	/* Wait for channel inactive */
 	while (pl08x_phy_channel_busy(ch))
-		;
+		cpu_relax();
 }
 
 static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
@@ -333,54 +304,56 @@ static inline u32 get_bytes_in_cctl(u32 cctl)
 static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 {
 	struct pl08x_phy_chan *ch;
-	struct pl08x_txd *txdi = NULL;
 	struct pl08x_txd *txd;
 	unsigned long flags;
-	u32 bytes = 0;
+	size_t bytes = 0;
 
 	spin_lock_irqsave(&plchan->lock, flags);
-
 	ch = plchan->phychan;
 	txd = plchan->at;
 
 	/*
-	 * Next follow the LLIs to get the number of pending bytes in the
-	 * currently active transaction.
+	 * Follow the LLIs to get the number of remaining
+	 * bytes in the currently active transaction.
 	 */
 	if (ch && txd) {
-		struct lli *llis_va = txd->llis_va;
-		struct lli *llis_bus = (struct lli *) txd->llis_bus;
-		u32 clli = readl(ch->base + PL080_CH_LLI);
+		u32 clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
 
-		/* First get the bytes in the current active LLI */
+		/* First get the remaining bytes in the active transfer */
 		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
 
 		if (clli) {
-			int i = 0;
+			struct pl08x_lli *llis_va = txd->llis_va;
+			dma_addr_t llis_bus = txd->llis_bus;
+			int index;
+
+			BUG_ON(clli < llis_bus || clli >= llis_bus +
+				sizeof(struct pl08x_lli) * MAX_NUM_TSFR_LLIS);
+
+			/*
+			 * Locate the next LLI - as this is an array,
+			 * it's simple maths to find.
+			 */
+			index = (clli - llis_bus) / sizeof(struct pl08x_lli);
 
-			/* Forward to the LLI pointed to by clli */
-			while ((clli != (u32) &(llis_bus[i])) &&
-			       (i < MAX_NUM_TSFR_LLIS))
-				i++;
+			for (; index < MAX_NUM_TSFR_LLIS; index++) {
+				bytes += get_bytes_in_cctl(llis_va[index].cctl);
 
-			while (clli) {
-				bytes += get_bytes_in_cctl(llis_va[i].cctl);
 				/*
-				 * A clli of 0x00000000 will terminate the
-				 * LLI list
+				 * A LLI pointer of 0 terminates the LLI list
 				 */
-				clli = llis_va[i].next;
-				i++;
+				if (!llis_va[index].lli)
+					break;
 			}
 		}
 	}
 
 	/* Sum up all queued transactions */
-	if (!list_empty(&plchan->desc_list)) {
-		list_for_each_entry(txdi, &plchan->desc_list, node) {
+	if (!list_empty(&plchan->pend_list)) {
+		struct pl08x_txd *txdi;
+		list_for_each_entry(txdi, &plchan->pend_list, node) {
 			bytes += txdi->len;
 		}
-
 	}
 
 	spin_unlock_irqrestore(&plchan->lock, flags);
@@ -390,6 +363,10 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 
 /*
  * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
  */
 static struct pl08x_phy_chan *
 pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
@@ -399,12 +376,6 @@ pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
 	unsigned long flags;
 	int i;
 
-	/*
-	 * Try to locate a physical channel to be used for
-	 * this transfer. If all are taken return NULL and
-	 * the requester will have to cope by using some fallback
-	 * PIO mode or retrying later.
-	 */
 	for (i = 0; i < pl08x->vd->channels; i++) {
 		ch = &pl08x->phy_chans[i];
 
@@ -465,11 +436,11 @@ static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
 }
 
 static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
-				  u32 tsize)
+				  size_t tsize)
 {
 	u32 retbits = cctl;
 
-	/* Remove all src, dst and transfersize bits */
+	/* Remove all src, dst and transfer size bits */
 	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
 	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
 	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
@@ -509,95 +480,87 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
 	return retbits;
 }
 
+struct pl08x_lli_build_data {
+	struct pl08x_txd *txd;
+	struct pl08x_driver_data *pl08x;
+	struct pl08x_bus_data srcbus;
+	struct pl08x_bus_data dstbus;
+	size_t remainder;
+};
+
 /*
- * Autoselect a master bus to use for the transfer
- * this prefers the destination bus if both available
- * if fixed address on one bus the other will be chosen
+ * Autoselect a master bus to use for the transfer this prefers the
+ * destination bus if both available if fixed address on one bus the
+ * other will be chosen
  */
-void pl08x_choose_master_bus(struct pl08x_bus_data *src_bus,
-	struct pl08x_bus_data *dst_bus, struct pl08x_bus_data **mbus,
-	struct pl08x_bus_data **sbus, u32 cctl)
+static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd,
+	struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl)
 {
 	if (!(cctl & PL080_CONTROL_DST_INCR)) {
-		*mbus = src_bus;
-		*sbus = dst_bus;
+		*mbus = &bd->srcbus;
+		*sbus = &bd->dstbus;
 	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
-		*mbus = dst_bus;
-		*sbus = src_bus;
+		*mbus = &bd->dstbus;
+		*sbus = &bd->srcbus;
 	} else {
-		if (dst_bus->buswidth == 4) {
-			*mbus = dst_bus;
-			*sbus = src_bus;
-		} else if (src_bus->buswidth == 4) {
-			*mbus = src_bus;
-			*sbus = dst_bus;
-		} else if (dst_bus->buswidth == 2) {
-			*mbus = dst_bus;
-			*sbus = src_bus;
-		} else if (src_bus->buswidth == 2) {
-			*mbus = src_bus;
-			*sbus = dst_bus;
+		if (bd->dstbus.buswidth == 4) {
+			*mbus = &bd->dstbus;
+			*sbus = &bd->srcbus;
+		} else if (bd->srcbus.buswidth == 4) {
+			*mbus = &bd->srcbus;
+			*sbus = &bd->dstbus;
+		} else if (bd->dstbus.buswidth == 2) {
+			*mbus = &bd->dstbus;
+			*sbus = &bd->srcbus;
+		} else if (bd->srcbus.buswidth == 2) {
+			*mbus = &bd->srcbus;
+			*sbus = &bd->dstbus;
 		} else {
-			/* src_bus->buswidth == 1 */
-			*mbus = dst_bus;
-			*sbus = src_bus;
+			/* bd->srcbus.buswidth == 1 */
+			*mbus = &bd->dstbus;
+			*sbus = &bd->srcbus;
 		}
 	}
 }
 
 /*
- * Fills in one LLI for a certain transfer descriptor
- * and advance the counter
+ * Fills in one LLI for a certain transfer descriptor and advance the counter
  */
-int pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
-			    struct pl08x_txd *txd, int num_llis, int len,
-			    u32 cctl, u32 *remainder)
+static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
+	int num_llis, int len, u32 cctl)
 {
-	struct lli *llis_va = txd->llis_va;
-	struct lli *llis_bus = (struct lli *) txd->llis_bus;
+	struct pl08x_lli *llis_va = bd->txd->llis_va;
+	dma_addr_t llis_bus = bd->txd->llis_bus;
 
 	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
 
-	llis_va[num_llis].cctl		= cctl;
-	llis_va[num_llis].src		= txd->srcbus.addr;
-	llis_va[num_llis].dst		= txd->dstbus.addr;
-
-	/*
-	 * On versions with dual masters, you can optionally AND on
-	 * PL080_LLI_LM_AHB2 to the LLI to tell the hardware to read
-	 * in new LLIs with that controller, but we always try to
-	 * choose AHB1 to point into memory. The idea is to have AHB2
-	 * fixed on the peripheral and AHB1 messing around in the
-	 * memory. So we don't manipulate this bit currently.
-	 */
-
-	llis_va[num_llis].next =
-		(dma_addr_t)((u32) &(llis_bus[num_llis + 1]));
+	llis_va[num_llis].cctl = cctl;
+	llis_va[num_llis].src = bd->srcbus.addr;
+	llis_va[num_llis].dst = bd->dstbus.addr;
+	llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli);
+	if (bd->pl08x->lli_buses & PL08X_AHB2)
+		llis_va[num_llis].lli |= PL080_LLI_LM_AHB2;
 
 	if (cctl & PL080_CONTROL_SRC_INCR)
-		txd->srcbus.addr += len;
+		bd->srcbus.addr += len;
 	if (cctl & PL080_CONTROL_DST_INCR)
-		txd->dstbus.addr += len;
+		bd->dstbus.addr += len;
 
-	*remainder -= len;
+	BUG_ON(bd->remainder < len);
 
-	return num_llis + 1;
+	bd->remainder -= len;
 }
 
 /*
- * Return number of bytes to fill to boundary, or len
+ * Return number of bytes to fill to boundary, or len.
+ * This calculation works for any value of addr.
  */
-static inline u32 pl08x_pre_boundary(u32 addr, u32 len)
+static inline size_t pl08x_pre_boundary(u32 addr, size_t len)
 {
-	u32 boundary;
-
-	boundary = ((addr >> PL08X_BOUNDARY_SHIFT) + 1)
-		<< PL08X_BOUNDARY_SHIFT;
+	size_t boundary_len = PL08X_BOUNDARY_SIZE -
+			(addr & (PL08X_BOUNDARY_SIZE - 1));
 
-	if (boundary < addr + len)
-		return boundary - addr;
-	else
-		return len;
+	return min(boundary_len, len);
 }
 
 /*
@@ -608,20 +571,13 @@ static inline u32 pl08x_pre_boundary(u32 addr, u32 len)
 static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 			      struct pl08x_txd *txd)
 {
-	struct pl08x_channel_data *cd = txd->cd;
 	struct pl08x_bus_data *mbus, *sbus;
-	u32 remainder;
+	struct pl08x_lli_build_data bd;
 	int num_llis = 0;
 	u32 cctl;
-	int max_bytes_per_lli;
-	int total_bytes = 0;
-	struct lli *llis_va;
-	struct lli *llis_bus;
-
-	if (!txd) {
-		dev_err(&pl08x->adev->dev, "%s no descriptor\n", __func__);
-		return 0;
-	}
+	size_t max_bytes_per_lli;
+	size_t total_bytes = 0;
+	struct pl08x_lli *llis_va;
 
 	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT,
 				      &txd->llis_bus);
@@ -632,121 +588,79 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 
 	pl08x->pool_ctr++;
 
-	/*
-	 * Initialize bus values for this transfer
-	 * from the passed optimal values
-	 */
-	if (!cd) {
-		dev_err(&pl08x->adev->dev, "%s no channel data\n", __func__);
-		return 0;
-	}
+	/* Get the default CCTL */
+	cctl = txd->cctl;
 
-	/* Get the default CCTL from the platform data */
-	cctl = cd->cctl;
-
-	/*
-	 * On the PL080 we have two bus masters and we
-	 * should select one for source and one for
-	 * destination. We try to use AHB2 for the
-	 * bus which does not increment (typically the
-	 * peripheral) else we just choose something.
-	 */
-	cctl &= ~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
-	if (pl08x->vd->dualmaster) {
-		if (cctl & PL080_CONTROL_SRC_INCR)
-			/* Source increments, use AHB2 for destination */
-			cctl |= PL080_CONTROL_DST_AHB2;
-		else if (cctl & PL080_CONTROL_DST_INCR)
-			/* Destination increments, use AHB2 for source */
-			cctl |= PL080_CONTROL_SRC_AHB2;
-		else
-			/* Just pick something, source AHB1 dest AHB2 */
-			cctl |= PL080_CONTROL_DST_AHB2;
-	}
+	bd.txd = txd;
+	bd.pl08x = pl08x;
+	bd.srcbus.addr = txd->src_addr;
+	bd.dstbus.addr = txd->dst_addr;
 
 	/* Find maximum width of the source bus */
-	txd->srcbus.maxwidth =
+	bd.srcbus.maxwidth =
 		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
 				       PL080_CONTROL_SWIDTH_SHIFT);
 
 	/* Find maximum width of the destination bus */
-	txd->dstbus.maxwidth =
+	bd.dstbus.maxwidth =
 		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
 				       PL080_CONTROL_DWIDTH_SHIFT);
 
 	/* Set up the bus widths to the maximum */
-	txd->srcbus.buswidth = txd->srcbus.maxwidth;
-	txd->dstbus.buswidth = txd->dstbus.maxwidth;
+	bd.srcbus.buswidth = bd.srcbus.maxwidth;
+	bd.dstbus.buswidth = bd.dstbus.maxwidth;
 	dev_vdbg(&pl08x->adev->dev,
 		 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
-		 __func__, txd->srcbus.buswidth, txd->dstbus.buswidth);
+		 __func__, bd.srcbus.buswidth, bd.dstbus.buswidth);
 
 
 	/*
 	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
 	 */
-	max_bytes_per_lli = min(txd->srcbus.buswidth, txd->dstbus.buswidth) *
+	max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) *
 		PL080_CONTROL_TRANSFER_SIZE_MASK;
 	dev_vdbg(&pl08x->adev->dev,
-		 "%s max bytes per lli = %d\n",
+		 "%s max bytes per lli = %zu\n",
 		 __func__, max_bytes_per_lli);
 
 	/* We need to count this down to zero */
-	remainder = txd->len;
+	bd.remainder = txd->len;
 	dev_vdbg(&pl08x->adev->dev,
-		 "%s remainder = %d\n",
-		 __func__, remainder);
+		 "%s remainder = %zu\n",
+		 __func__, bd.remainder);
 
 	/*
 	 * Choose bus to align to
 	 * - prefers destination bus if both available
 	 * - if fixed address on one bus chooses other
-	 * - modifies cctl to choose an apropriate master
-	 */
-	pl08x_choose_master_bus(&txd->srcbus, &txd->dstbus,
-				&mbus, &sbus, cctl);
-
-
-	/*
-	 * The lowest bit of the LLI register
-	 * is also used to indicate which master to
-	 * use for reading the LLIs.
 	 */
+	pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
 
 	if (txd->len < mbus->buswidth) {
-		/*
-		 * Less than a bus width available
-		 * - send as single bytes
-		 */
-		while (remainder) {
+		/* Less than a bus width available - send as single bytes */
+		while (bd.remainder) {
 			dev_vdbg(&pl08x->adev->dev,
 				 "%s single byte LLIs for a transfer of "
-				 "less than a bus width (remain %08x)\n",
-				 __func__, remainder);
+				 "less than a bus width (remain 0x%08x)\n",
+				 __func__, bd.remainder);
 			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
-			num_llis =
-				pl08x_fill_lli_for_desc(pl08x, txd, num_llis, 1,
-					cctl, &remainder);
+			pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl);
 			total_bytes++;
 		}
 	} else {
-		/*
-		 *  Make one byte LLIs until master bus is aligned
-		 *  - slave will then be aligned also
-		 */
+		/* Make one byte LLIs until master bus is aligned */
 		while ((mbus->addr) % (mbus->buswidth)) {
 			dev_vdbg(&pl08x->adev->dev,
 				"%s adjustment lli for less than bus width "
-				 "(remain %08x)\n",
-				 __func__, remainder);
+				 "(remain 0x%08x)\n",
+				 __func__, bd.remainder);
 			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
-			num_llis = pl08x_fill_lli_for_desc
-				(pl08x, txd, num_llis, 1, cctl, &remainder);
+			pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl);
 			total_bytes++;
 		}
 
 		/*
-		 *  Master now aligned
+		 * Master now aligned
 		 * - if slave is not then we must set its width down
 		 */
 		if (sbus->addr % sbus->buswidth) {
@@ -761,63 +675,51 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		 * Make largest possible LLIs until less than one bus
 		 * width left
 		 */
-		while (remainder > (mbus->buswidth - 1)) {
-			int lli_len, target_len;
-			int tsize;
-			int odd_bytes;
+		while (bd.remainder > (mbus->buswidth - 1)) {
+			size_t lli_len, target_len, tsize, odd_bytes;
 
 			/*
 			 * If enough left try to send max possible,
 			 * otherwise try to send the remainder
 			 */
-			target_len = remainder;
-			if (remainder > max_bytes_per_lli)
-				target_len = max_bytes_per_lli;
+			target_len = min(bd.remainder, max_bytes_per_lli);
 
 			/*
-			 * Set bus lengths for incrementing busses
-			 * to number of bytes which fill to next memory
-			 * boundary
+			 * Set bus lengths for incrementing buses to the
+			 * number of bytes which fill to next memory boundary,
+			 * limiting on the target length calculated above.
 			 */
 			if (cctl & PL080_CONTROL_SRC_INCR)
-				txd->srcbus.fill_bytes =
-					pl08x_pre_boundary(
-						txd->srcbus.addr,
-						remainder);
+				bd.srcbus.fill_bytes =
+					pl08x_pre_boundary(bd.srcbus.addr,
+						target_len);
 			else
-				txd->srcbus.fill_bytes =
-					max_bytes_per_lli;
+				bd.srcbus.fill_bytes = target_len;
 
 			if (cctl & PL080_CONTROL_DST_INCR)
-				txd->dstbus.fill_bytes =
-					pl08x_pre_boundary(
-						txd->dstbus.addr,
-						remainder);
+				bd.dstbus.fill_bytes =
+					pl08x_pre_boundary(bd.dstbus.addr,
+						target_len);
 			else
-				txd->dstbus.fill_bytes =
-						max_bytes_per_lli;
+				bd.dstbus.fill_bytes = target_len;
 
-			/*
-			 *  Find the nearest
-			 */
-			lli_len	= min(txd->srcbus.fill_bytes,
-				txd->dstbus.fill_bytes);
+			/* Find the nearest */
+			lli_len	= min(bd.srcbus.fill_bytes,
+				      bd.dstbus.fill_bytes);
 
-			BUG_ON(lli_len > remainder);
+			BUG_ON(lli_len > bd.remainder);
 
 			if (lli_len <= 0) {
 				dev_err(&pl08x->adev->dev,
-					"%s lli_len is %d, <= 0\n",
+					"%s lli_len is %zu, <= 0\n",
 						__func__, lli_len);
 				return 0;
 			}
 
 			if (lli_len == target_len) {
 				/*
-				 * Can send what we wanted
-				 */
-				/*
-				 *  Maintain alignment
+				 * Can send what we wanted.
+				 * Maintain alignment
 				 */
 				lli_len	= (lli_len/mbus->buswidth) *
 							mbus->buswidth;
@@ -825,17 +727,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 			} else {
 				/*
 				 * So now we know how many bytes to transfer
-				 * to get to the nearest boundary
-				 * The next lli will past the boundary
-				 * - however we may be working to a boundary
-				 *   on the slave bus
-				 *   We need to ensure the master stays aligned
+				 * to get to the nearest boundary.  The next
+				 * LLI will past the boundary.  However, we
+				 * may be working to a boundary on the slave
+				 * bus.  We need to ensure the master stays
+				 * aligned, and that we are working in
+				 * multiples of the bus widths.
 				 */
 				odd_bytes = lli_len % mbus->buswidth;
-				/*
-				 * - and that we are working in multiples
-				 *   of the bus widths
-				 */
 				lli_len -= odd_bytes;
 
 			}
@@ -855,41 +754,38 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 
 				if (target_len != lli_len) {
 					dev_vdbg(&pl08x->adev->dev,
-					"%s can't send what we want. Desired %08x, lli of %08x bytes in txd of %08x\n",
+					"%s can't send what we want. Desired 0x%08zx, lli of 0x%08zx bytes in txd of 0x%08zx\n",
 					__func__, target_len, lli_len, txd->len);
 				}
 
 				cctl = pl08x_cctl_bits(cctl,
-						       txd->srcbus.buswidth,
-						       txd->dstbus.buswidth,
+						       bd.srcbus.buswidth,
+						       bd.dstbus.buswidth,
 						       tsize);
 
 				dev_vdbg(&pl08x->adev->dev,
-					"%s fill lli with single lli chunk of size %08x (remainder %08x)\n",
-					__func__, lli_len, remainder);
-				num_llis = pl08x_fill_lli_for_desc(pl08x, txd,
-						num_llis, lli_len, cctl,
-						&remainder);
+					"%s fill lli with single lli chunk of size 0x%08zx (remainder 0x%08zx)\n",
+					__func__, lli_len, bd.remainder);
+				pl08x_fill_lli_for_desc(&bd, num_llis++,
+					lli_len, cctl);
 				total_bytes += lli_len;
 			}
 
 
 			if (odd_bytes) {
 				/*
-				 * Creep past the boundary,
-				 * maintaining master alignment
+				 * Creep past the boundary, maintaining
+				 * master alignment
 				 */
 				int j;
 				for (j = 0; (j < mbus->buswidth)
-						&& (remainder); j++) {
+						&& (bd.remainder); j++) {
 					cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
 					dev_vdbg(&pl08x->adev->dev,
-						"%s align with boundardy, single byte (remain %08x)\n",
-						__func__, remainder);
-					num_llis =
-						pl08x_fill_lli_for_desc(pl08x,
-							txd, num_llis, 1,
-							cctl, &remainder);
+						"%s align with boundary, single byte (remain 0x%08zx)\n",
+						__func__, bd.remainder);
+					pl08x_fill_lli_for_desc(&bd,
+						num_llis++, 1, cctl);
 					total_bytes++;
 				}
 			}
@@ -898,25 +794,18 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		/*
 		 * Send any odd bytes
 		 */
-		if (remainder < 0) {
-			dev_err(&pl08x->adev->dev, "%s remainder not fitted 0x%08x bytes\n",
-					__func__, remainder);
-			return 0;
-		}
-
-		while (remainder) {
+		while (bd.remainder) {
 			cctl = pl08x_cctl_bits(cctl, 1, 1, 1);
 			dev_vdbg(&pl08x->adev->dev,
-				"%s align with boundardy, single odd byte (remain %d)\n",
-				__func__, remainder);
-			num_llis = pl08x_fill_lli_for_desc(pl08x, txd, num_llis,
-					1, cctl, &remainder);
+				"%s align with boundary, single odd byte (remain %zu)\n",
+				__func__, bd.remainder);
+			pl08x_fill_lli_for_desc(&bd, num_llis++, 1, cctl);
 			total_bytes++;
 		}
 	}
 	if (total_bytes != txd->len) {
 		dev_err(&pl08x->adev->dev,
-			"%s size of encoded lli:s don't match total txd, transferred 0x%08x from size 0x%08x\n",
+			"%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n",
 			__func__, total_bytes, txd->len);
 		return 0;
 	}
@@ -927,41 +816,12 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 			__func__, (u32) MAX_NUM_TSFR_LLIS);
 		return 0;
 	}
-	/*
-	 * Decide whether this is a loop or a terminated transfer
-	 */
-	llis_va = txd->llis_va;
-	llis_bus = (struct lli *) txd->llis_bus;
 
-	if (cd->circular_buffer) {
-		/*
-		 * Loop the circular buffer so that the next element
-		 * points back to the beginning of the LLI.
-		 */
-		llis_va[num_llis - 1].next =
-			(dma_addr_t)((unsigned int)&(llis_bus[0]));
-	} else {
-		/*
-		 * On non-circular buffers, the final LLI terminates
-		 * the LLI.
-		 */
-		llis_va[num_llis - 1].next = 0;
-		/*
-		 * The final LLI element shall also fire an interrupt
-		 */
-		llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
-	}
-
-	/* Now store the channel register values */
-	txd->csrc = llis_va[0].src;
-	txd->cdst = llis_va[0].dst;
-	if (num_llis > 1)
-		txd->clli = llis_va[0].next;
-	else
-		txd->clli = 0;
-
-	txd->cctl = llis_va[0].cctl;
-	/* ccfg will be set at physical channel allocation time */
+	llis_va = txd->llis_va;
+	/* The final LLI terminates the LLI. */
+	llis_va[num_llis - 1].lli = 0;
+	/* The final LLI element shall also fire an interrupt. */
+	llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
 
 #ifdef VERBOSE_DEBUG
 	{
@@ -969,13 +829,13 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 
 		for (i = 0; i < num_llis; i++) {
 			dev_vdbg(&pl08x->adev->dev,
-				 "lli %d @%p: csrc=%08x, cdst=%08x, cctl=%08x, clli=%08x\n",
+				 "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n",
 				 i,
 				 &llis_va[i],
 				 llis_va[i].src,
 				 llis_va[i].dst,
 				 llis_va[i].cctl,
-				 llis_va[i].next
+				 llis_va[i].lli
 				);
 		}
 	}
@@ -988,14 +848,8 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
 			   struct pl08x_txd *txd)
 {
-	if (!txd)
-		dev_err(&pl08x->adev->dev,
-			"%s no descriptor to free\n",
-			__func__);
-
 	/* Free the LLI */
-	dma_pool_free(pl08x->pool, txd->llis_va,
-		      txd->llis_bus);
+	dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus);
 
 	pl08x->pool_ctr--;
 
@@ -1008,13 +862,12 @@ static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
 	struct pl08x_txd *txdi = NULL;
 	struct pl08x_txd *next;
 
-	if (!list_empty(&plchan->desc_list)) {
+	if (!list_empty(&plchan->pend_list)) {
 		list_for_each_entry_safe(txdi,
-					 next, &plchan->desc_list, node) {
+					 next, &plchan->pend_list, node) {
 			list_del(&txdi->node);
 			pl08x_free_txd(pl08x, txdi);
 		}
-
 	}
 }
 
@@ -1069,6 +922,12 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
 			return -EBUSY;
 		}
 		ch->signal = ret;
+
+		/* Assign the flow control signal to this channel */
+		if (txd->direction == DMA_TO_DEVICE)
+			txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
+		else if (txd->direction == DMA_FROM_DEVICE)
+			txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
 	}
 
 	dev_dbg(&pl08x->adev->dev, "allocated physical channel %d and signal %d for xfer on %s\n",
@@ -1076,19 +935,54 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
 		 ch->signal,
 		 plchan->name);
 
+	plchan->phychan_hold++;
 	plchan->phychan = ch;
 
 	return 0;
 }
 
+static void release_phy_channel(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	if ((plchan->phychan->signal >= 0) && pl08x->pd->put_signal) {
+		pl08x->pd->put_signal(plchan);
+		plchan->phychan->signal = -1;
+	}
+	pl08x_put_phy_channel(pl08x, plchan->phychan);
+	plchan->phychan = NULL;
+}
+
 static dma_cookie_t pl08x_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(tx->chan);
+	struct pl08x_txd *txd = to_pl08x_txd(tx);
+	unsigned long flags;
 
-	atomic_inc(&plchan->last_issued);
-	tx->cookie = atomic_read(&plchan->last_issued);
-	/* This unlock follows the lock in the prep() function */
-	spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+	spin_lock_irqsave(&plchan->lock, flags);
+
+	plchan->chan.cookie += 1;
+	if (plchan->chan.cookie < 0)
+		plchan->chan.cookie = 1;
+	tx->cookie = plchan->chan.cookie;
+
+	/* Put this onto the pending list */
+	list_add_tail(&txd->node, &plchan->pend_list);
+
+	/*
+	 * If there was no physical channel available for this memcpy,
+	 * stack the request up and indicate that the channel is waiting
+	 * for a free physical channel.
+	 */
+	if (!plchan->slave && !plchan->phychan) {
+		/* Do this memcpy whenever there is a channel ready */
+		plchan->state = PL08X_CHAN_WAITING;
+		plchan->waiting = txd;
+	} else {
+		plchan->phychan_hold--;
+	}
+
+	spin_unlock_irqrestore(&plchan->lock, flags);
 
 	return tx->cookie;
 }
@@ -1102,10 +996,9 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
 }
 
 /*
- * Code accessing dma_async_is_complete() in a tight loop
- * may give problems - could schedule where indicated.
- * If slaves are relying on interrupts to signal completion this
- * function must not be called with interrupts disabled
+ * Code accessing dma_async_is_complete() in a tight loop may give problems.
+ * If slaves are relying on interrupts to signal completion this function
+ * must not be called with interrupts disabled.
  */
 static enum dma_status
 pl08x_dma_tx_status(struct dma_chan *chan,
@@ -1118,7 +1011,7 @@ pl08x_dma_tx_status(struct dma_chan *chan,
 	enum dma_status ret;
 	u32 bytesleft = 0;
 
-	last_used = atomic_read(&plchan->last_issued);
+	last_used = plchan->chan.cookie;
 	last_complete = plchan->lc;
 
 	ret = dma_async_is_complete(cookie, last_complete, last_used);
@@ -1128,13 +1021,9 @@ pl08x_dma_tx_status(struct dma_chan *chan,
 	}
 
 	/*
-	 * schedule(); could be inserted here
-	 */
-
-	/*
 	 * This cookie not complete yet
 	 */
-	last_used = atomic_read(&plchan->last_issued);
+	last_used = plchan->chan.cookie;
 	last_complete = plchan->lc;
 
 	/* Get number of bytes left in the active transactions and queue */
@@ -1199,37 +1088,35 @@ static const struct burst_table burst_sizes[] = {
 	},
 };
 
-static void dma_set_runtime_config(struct dma_chan *chan,
-			       struct dma_slave_config *config)
+static int dma_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_channel_data *cd = plchan->cd;
 	enum dma_slave_buswidth addr_width;
+	dma_addr_t addr;
 	u32 maxburst;
 	u32 cctl = 0;
-	/* Mask out all except src and dst channel */
-	u32 ccfg = cd->ccfg & 0x000003DEU;
-	int i = 0;
+	int i;
+
+	if (!plchan->slave)
+		return -EINVAL;
 
 	/* Transfer direction */
 	plchan->runtime_direction = config->direction;
 	if (config->direction == DMA_TO_DEVICE) {
-		plchan->runtime_addr = config->dst_addr;
-		cctl |= PL080_CONTROL_SRC_INCR;
-		ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr = config->dst_addr;
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
 	} else if (config->direction == DMA_FROM_DEVICE) {
-		plchan->runtime_addr = config->src_addr;
-		cctl |= PL080_CONTROL_DST_INCR;
-		ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		addr = config->src_addr;
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
 	} else {
 		dev_err(&pl08x->adev->dev,
 			"bad runtime_config: alien transfer direction\n");
-		return;
+		return -EINVAL;
 	}
 
 	switch (addr_width) {
@@ -1248,42 +1135,40 @@ static void dma_set_runtime_config(struct dma_chan *chan,
 	default:
 		dev_err(&pl08x->adev->dev,
 			"bad runtime_config: alien address width\n");
-		return;
+		return -EINVAL;
 	}
 
 	/*
 	 * Now decide on a maxburst:
-	 * If this channel will only request single transfers, set
-	 * this down to ONE element.
+	 * If this channel will only request single transfers, set this
+	 * down to ONE element.  Also select one element if no maxburst
+	 * is specified.
 	 */
-	if (plchan->cd->single) {
+	if (plchan->cd->single || maxburst == 0) {
 		cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
 			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT);
 	} else {
-		while (i < ARRAY_SIZE(burst_sizes)) {
+		for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
 			if (burst_sizes[i].burstwords <= maxburst)
 				break;
-			i++;
-		}
 		cctl |= burst_sizes[i].reg;
 	}
 
-	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
-	cctl &= ~PL080_CONTROL_PROT_MASK;
-	cctl |= PL080_CONTROL_PROT_SYS;
+	plchan->runtime_addr = addr;
 
 	/* Modify the default channel data to fit PrimeCell request */
 	cd->cctl = cctl;
-	cd->ccfg = ccfg;
 
 	dev_dbg(&pl08x->adev->dev,
 		"configured channel %s (%s) for %s, data width %d, "
-		"maxburst %d words, LE, CCTL=%08x, CCFG=%08x\n",
+		"maxburst %d words, LE, CCTL=0x%08x\n",
 		dma_chan_name(chan), plchan->name,
 		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
 		addr_width,
 		maxburst,
-		cctl, ccfg);
+		cctl);
+
+	return 0;
 }
 
 /*
@@ -1293,35 +1178,26 @@ static void dma_set_runtime_config(struct dma_chan *chan,
 static void pl08x_issue_pending(struct dma_chan *chan)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
-	struct pl08x_driver_data *pl08x = plchan->host;
 	unsigned long flags;
 
 	spin_lock_irqsave(&plchan->lock, flags);
-	/* Something is already active */
-	if (plchan->at) {
-			spin_unlock_irqrestore(&plchan->lock, flags);
-			return;
-	}
-
-	/* Didn't get a physical channel so waiting for it ... */
-	if (plchan->state == PL08X_CHAN_WAITING)
+	/* Something is already active, or we're waiting for a channel... */
+	if (plchan->at || plchan->state == PL08X_CHAN_WAITING) {
+		spin_unlock_irqrestore(&plchan->lock, flags);
 		return;
+	}
 
 	/* Take the first element in the queue and execute it */
-	if (!list_empty(&plchan->desc_list)) {
+	if (!list_empty(&plchan->pend_list)) {
 		struct pl08x_txd *next;
 
-		next = list_first_entry(&plchan->desc_list,
+		next = list_first_entry(&plchan->pend_list,
 					struct pl08x_txd,
 					node);
 		list_del(&next->node);
-		plchan->at = next;
 		plchan->state = PL08X_CHAN_RUNNING;
 
-		/* Configure the physical channel for the active txd */
-		pl08x_config_phychan_for_txd(plchan);
-		pl08x_set_cregs(pl08x, plchan->phychan);
-		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+		pl08x_start_txd(plchan, next);
 	}
 
 	spin_unlock_irqrestore(&plchan->lock, flags);
@@ -1330,30 +1206,17 @@ static void pl08x_issue_pending(struct dma_chan *chan)
 static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
 					struct pl08x_txd *txd)
 {
-	int num_llis;
 	struct pl08x_driver_data *pl08x = plchan->host;
-	int ret;
+	unsigned long flags;
+	int num_llis, ret;
 
 	num_llis = pl08x_fill_llis_for_desc(pl08x, txd);
-
-	if (!num_llis)
+	if (!num_llis) {
+		kfree(txd);
 		return -EINVAL;
+	}
 
-	spin_lock_irqsave(&plchan->lock, plchan->lockflags);
-
-	/*
-	 * If this device is not using a circular buffer then
-	 * queue this new descriptor for transfer.
-	 * The descriptor for a circular buffer continues
-	 * to be used until the channel is freed.
-	 */
-	if (txd->cd->circular_buffer)
-		dev_err(&pl08x->adev->dev,
-			"%s attempting to queue a circular buffer\n",
-			__func__);
-	else
-		list_add_tail(&txd->node,
-			      &plchan->desc_list);
+	spin_lock_irqsave(&plchan->lock, flags);
 
 	/*
 	 * See if we already have a physical channel allocated,
@@ -1362,45 +1225,74 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
 	ret = prep_phy_channel(plchan, txd);
 	if (ret) {
 		/*
-		 * No physical channel available, we will
-		 * stack up the memcpy channels until there is a channel
-		 * available to handle it whereas slave transfers may
-		 * have been denied due to platform channel muxing restrictions
-		 * and since there is no guarantee that this will ever be
-		 * resolved, and since the signal must be aquired AFTER
-		 * aquiring the physical channel, we will let them be NACK:ed
-		 * with -EBUSY here. The drivers can alway retry the prep()
-		 * call if they are eager on doing this using DMA.
+		 * No physical channel was available.
+		 *
+		 * memcpy transfers can be sorted out at submission time.
+		 *
+		 * Slave transfers may have been denied due to platform
+		 * channel muxing restrictions.  Since there is no guarantee
+		 * that this will ever be resolved, and the signal must be
+		 * acquired AFTER acquiring the physical channel, we will let
+		 * them be NACK:ed with -EBUSY here. The drivers can retry
+		 * the prep() call if they are eager on doing this using DMA.
 		 */
 		if (plchan->slave) {
 			pl08x_free_txd_list(pl08x, plchan);
-			spin_unlock_irqrestore(&plchan->lock, plchan->lockflags);
+			pl08x_free_txd(pl08x, txd);
+			spin_unlock_irqrestore(&plchan->lock, flags);
 			return -EBUSY;
 		}
-		/* Do this memcpy whenever there is a channel ready */
-		plchan->state = PL08X_CHAN_WAITING;
-		plchan->waiting = txd;
 	} else
 		/*
-		 * Else we're all set, paused and ready to roll,
-		 * status will switch to PL08X_CHAN_RUNNING when
-		 * we call issue_pending(). If there is something
-		 * running on the channel already we don't change
-		 * its state.
+		 * Else we're all set, paused and ready to roll, status
+		 * will switch to PL08X_CHAN_RUNNING when we call
+		 * issue_pending(). If there is something running on the
+		 * channel already we don't change its state.
 		 */
 		if (plchan->state == PL08X_CHAN_IDLE)
 			plchan->state = PL08X_CHAN_PAUSED;
 
-	/*
-	 * Notice that we leave plchan->lock locked on purpose:
-	 * it will be unlocked in the subsequent tx_submit()
-	 * call. This is a consequence of the current API.
-	 */
+	spin_unlock_irqrestore(&plchan->lock, flags);
 
 	return 0;
 }
 
 /*
+ * Given the source and destination available bus masks, select which
+ * will be routed to each port.  We try to have source and destination
+ * on separate ports, but always respect the allowable settings.
+ */
+static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst)
+{
+	u32 cctl = 0;
+
+	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
+		cctl |= PL080_CONTROL_DST_AHB2;
+	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
+		cctl |= PL080_CONTROL_SRC_AHB2;
+
+	return cctl;
+}
+
+static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
+	unsigned long flags)
+{
+	struct pl08x_txd *txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+
+	if (txd) {
+		dma_async_tx_descriptor_init(&txd->tx, &plchan->chan);
+		txd->tx.flags = flags;
+		txd->tx.tx_submit = pl08x_tx_submit;
+		INIT_LIST_HEAD(&txd->node);
+
+		/* Always enable error and terminal interrupts */
+		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+			    PL080_CONFIG_TC_IRQ_MASK;
+	}
+	return txd;
+}
+
+/*
  * Initialize a descriptor to be used by memcpy submit
  */
 static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
@@ -1412,40 +1304,38 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	struct pl08x_txd *txd;
 	int ret;
 
-	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	txd = pl08x_get_txd(plchan, flags);
 	if (!txd) {
 		dev_err(&pl08x->adev->dev,
 			"%s no memory for descriptor\n", __func__);
 		return NULL;
 	}
 
-	dma_async_tx_descriptor_init(&txd->tx, chan);
 	txd->direction = DMA_NONE;
-	txd->srcbus.addr = src;
-	txd->dstbus.addr = dest;
+	txd->src_addr = src;
+	txd->dst_addr = dest;
+	txd->len = len;
 
 	/* Set platform data for m2m */
-	txd->cd = &pl08x->pd->memcpy_channel;
+	txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+	txd->cctl = pl08x->pd->memcpy_channel.cctl &
+			~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+
 	/* Both to be incremented or the code will break */
-	txd->cd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
-	txd->tx.tx_submit = pl08x_tx_submit;
-	txd->tx.callback = NULL;
-	txd->tx.callback_param = NULL;
-	txd->len = len;
+	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+
+	if (pl08x->vd->dualmaster)
+		txd->cctl |= pl08x_select_bus(pl08x,
+					pl08x->mem_buses, pl08x->mem_buses);
 
-	INIT_LIST_HEAD(&txd->node);
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
 		return NULL;
-	/*
-	 * NB: the channel lock is held at this point so tx_submit()
-	 * must be called in direct succession.
-	 */
 
 	return &txd->tx;
 }
 
-struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags)
@@ -1453,6 +1343,7 @@ struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
+	u8 src_buses, dst_buses;
 	int ret;
 
 	/*
@@ -1467,14 +1358,12 @@ struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
 		__func__, sgl->length, plchan->name);
 
-	txd = kzalloc(sizeof(struct pl08x_txd), GFP_NOWAIT);
+	txd = pl08x_get_txd(plchan, flags);
 	if (!txd) {
 		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
 		return NULL;
 	}
 
-	dma_async_tx_descriptor_init(&txd->tx, chan);
-
 	if (direction != plchan->runtime_direction)
 		dev_err(&pl08x->adev->dev, "%s DMA setup does not match "
 			"the direction configured for the PrimeCell\n",
@@ -1486,37 +1375,47 @@ struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	 * channel target address dynamically at runtime.
 	 */
 	txd->direction = direction;
+	txd->len = sgl->length;
+
+	txd->cctl = plchan->cd->cctl &
+			~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
+			  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
+			  PL080_CONTROL_PROT_MASK);
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	txd->cctl |= PL080_CONTROL_PROT_SYS;
+
 	if (direction == DMA_TO_DEVICE) {
-		txd->srcbus.addr = sgl->dma_address;
+		txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		txd->cctl |= PL080_CONTROL_SRC_INCR;
+		txd->src_addr = sgl->dma_address;
 		if (plchan->runtime_addr)
-			txd->dstbus.addr = plchan->runtime_addr;
+			txd->dst_addr = plchan->runtime_addr;
 		else
-			txd->dstbus.addr = plchan->cd->addr;
+			txd->dst_addr = plchan->cd->addr;
+		src_buses = pl08x->mem_buses;
+		dst_buses = plchan->cd->periph_buses;
 	} else if (direction == DMA_FROM_DEVICE) {
+		txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		txd->cctl |= PL080_CONTROL_DST_INCR;
 		if (plchan->runtime_addr)
-			txd->srcbus.addr = plchan->runtime_addr;
+			txd->src_addr = plchan->runtime_addr;
 		else
-			txd->srcbus.addr = plchan->cd->addr;
-		txd->dstbus.addr = sgl->dma_address;
+			txd->src_addr = plchan->cd->addr;
+		txd->dst_addr = sgl->dma_address;
+		src_buses = plchan->cd->periph_buses;
+		dst_buses = pl08x->mem_buses;
 	} else {
 		dev_err(&pl08x->adev->dev,
 			"%s direction unsupported\n", __func__);
 		return NULL;
 	}
-	txd->cd = plchan->cd;
-	txd->tx.tx_submit = pl08x_tx_submit;
-	txd->tx.callback = NULL;
-	txd->tx.callback_param = NULL;
-	txd->len = sgl->length;
-	INIT_LIST_HEAD(&txd->node);
+
+	txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses);
 
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
 		return NULL;
-	/*
-	 * NB: the channel lock is held at this point so tx_submit()
-	 * must be called in direct succession.
-	 */
 
 	return &txd->tx;
 }
@@ -1531,10 +1430,8 @@ static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 
 	/* Controls applicable to inactive channels */
 	if (cmd == DMA_SLAVE_CONFIG) {
-		dma_set_runtime_config(chan,
-				       (struct dma_slave_config *)
-				       arg);
-		return 0;
+		return dma_set_runtime_config(chan,
+					      (struct dma_slave_config *)arg);
 	}
 
 	/*
@@ -1558,16 +1455,8 @@ static int pl08x_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 			 * Mark physical channel as free and free any slave
 			 * signal
 			 */
-			if ((plchan->phychan->signal >= 0) &&
-			    pl08x->pd->put_signal) {
-				pl08x->pd->put_signal(plchan);
-				plchan->phychan->signal = -1;
-			}
-			pl08x_put_phy_channel(pl08x, plchan->phychan);
-			plchan->phychan = NULL;
+			release_phy_channel(plchan);
 		}
-		/* Stop any pending tasklet */
-		tasklet_disable(&plchan->tasklet);
 		/* Dequeue jobs and free LLIs */
 		if (plchan->at) {
 			pl08x_free_txd(pl08x, plchan->at);
@@ -1609,10 +1498,9 @@ bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
 
 /*
  * Just check that the device is there and active
- * TODO: turn this bit on/off depending on the number of
- * physical channels actually used, if it is zero... well
- * shut it off. That will save some power. Cut the clock
- * at the same time.
+ * TODO: turn this bit on/off depending on the number of physical channels
+ * actually used, if it is zero... well shut it off. That will save some
+ * power. Cut the clock at the same time.
  */
 static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
 {
@@ -1620,78 +1508,66 @@ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
 
 	val = readl(pl08x->base + PL080_CONFIG);
 	val &= ~(PL080_CONFIG_M2_BE | PL080_CONFIG_M1_BE | PL080_CONFIG_ENABLE);
-	/* We implictly clear bit 1 and that means little-endian mode */
+	/* We implicitly clear bit 1 and that means little-endian mode */
 	val |= PL080_CONFIG_ENABLE;
 	writel(val, pl08x->base + PL080_CONFIG);
 }
 
+static void pl08x_unmap_buffers(struct pl08x_txd *txd)
+{
+	struct device *dev = txd->tx.chan->device->dev;
+
+	if (!(txd->tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			dma_unmap_single(dev, txd->src_addr, txd->len,
+				DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, txd->src_addr, txd->len,
+				DMA_TO_DEVICE);
+	}
+	if (!(txd->tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			dma_unmap_single(dev, txd->dst_addr, txd->len,
+				DMA_FROM_DEVICE);
+		else
+			dma_unmap_page(dev, txd->dst_addr, txd->len,
+				DMA_FROM_DEVICE);
+	}
+}
+
 static void pl08x_tasklet(unsigned long data)
 {
 	struct pl08x_dma_chan *plchan = (struct pl08x_dma_chan *) data;
-	struct pl08x_phy_chan *phychan = plchan->phychan;
 	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	unsigned long flags;
 
-	if (!plchan)
-		BUG();
-
-	spin_lock(&plchan->lock);
-
-	if (plchan->at) {
-		dma_async_tx_callback callback =
-			plchan->at->tx.callback;
-		void *callback_param =
-			plchan->at->tx.callback_param;
-
-		/*
-		 * Update last completed
-		 */
-		plchan->lc =
-			(plchan->at->tx.cookie);
-
-		/*
-		 * Callback to signal completion
-		 */
-		if (callback)
-			callback(callback_param);
+	spin_lock_irqsave(&plchan->lock, flags);
 
-		/*
-		 * Device callbacks should NOT clear
-		 * the current transaction on the channel
-		 * Linus: sometimes they should?
-		 */
-		if (!plchan->at)
-			BUG();
+	txd = plchan->at;
+	plchan->at = NULL;
 
-		/*
-		 * Free the descriptor if it's not for a device
-		 * using a circular buffer
-		 */
-		if (!plchan->at->cd->circular_buffer) {
-			pl08x_free_txd(pl08x, plchan->at);
-			plchan->at = NULL;
-		}
-		/*
-		 * else descriptor for circular
-		 * buffers only freed when
-		 * client has disabled dma
-		 */
+	if (txd) {
+		/* Update last completed */
+		plchan->lc = txd->tx.cookie;
 	}
-	/*
-	 * If a new descriptor is queued, set it up
-	 * plchan->at is NULL here
-	 */
-	if (!list_empty(&plchan->desc_list)) {
+
+	/* If a new descriptor is queued, set it up plchan->at is NULL here */
+	if (!list_empty(&plchan->pend_list)) {
 		struct pl08x_txd *next;
 
-		next = list_first_entry(&plchan->desc_list,
+		next = list_first_entry(&plchan->pend_list,
 					struct pl08x_txd,
 					node);
 		list_del(&next->node);
-		plchan->at = next;
-		/* Configure the physical channel for the next txd */
-		pl08x_config_phychan_for_txd(plchan);
-		pl08x_set_cregs(pl08x, plchan->phychan);
-		pl08x_enable_phy_chan(pl08x, plchan->phychan);
+
+		pl08x_start_txd(plchan, next);
+	} else if (plchan->phychan_hold) {
+		/*
+		 * This channel is still in use - we have a new txd being
+		 * prepared and will soon be queued.  Don't give up the
+		 * physical channel.
+		 */
 	} else {
 		struct pl08x_dma_chan *waiting = NULL;
 
@@ -1699,20 +1575,14 @@ static void pl08x_tasklet(unsigned long data)
 		 * No more jobs, so free up the physical channel
 		 * Free any allocated signal on slave transfers too
 		 */
-		if ((phychan->signal >= 0) && pl08x->pd->put_signal) {
-			pl08x->pd->put_signal(plchan);
-			phychan->signal = -1;
-		}
-		pl08x_put_phy_channel(pl08x, phychan);
-		plchan->phychan = NULL;
+		release_phy_channel(plchan);
 		plchan->state = PL08X_CHAN_IDLE;
 
 		/*
-		 * And NOW before anyone else can grab that free:d
-		 * up physical channel, see if there is some memcpy
-		 * pending that seriously needs to start because of
-		 * being stacked up while we were choking the
-		 * physical channels with data.
+		 * And NOW before anyone else can grab that free:d up
+		 * physical channel, see if there is some memcpy pending
+		 * that seriously needs to start because of being stacked
+		 * up while we were choking the physical channels with data.
 		 */
 		list_for_each_entry(waiting, &pl08x->memcpy.channels,
 				    chan.device_node) {
@@ -1724,6 +1594,7 @@ static void pl08x_tasklet(unsigned long data)
 				ret = prep_phy_channel(waiting,
 						       waiting->waiting);
 				BUG_ON(ret);
+				waiting->phychan_hold--;
 				waiting->state = PL08X_CHAN_RUNNING;
 				waiting->waiting = NULL;
 				pl08x_issue_pending(&waiting->chan);
@@ -1732,7 +1603,25 @@ static void pl08x_tasklet(unsigned long data)
 		}
 	}
 
-	spin_unlock(&plchan->lock);
+	spin_unlock_irqrestore(&plchan->lock, flags);
+
+	if (txd) {
+		dma_async_tx_callback callback = txd->tx.callback;
+		void *callback_param = txd->tx.callback_param;
+
+		/* Don't try to unmap buffers on slave channels */
+		if (!plchan->slave)
+			pl08x_unmap_buffers(txd);
+
+		/* Free the descriptor */
+		spin_lock_irqsave(&plchan->lock, flags);
+		pl08x_free_txd(pl08x, txd);
+		spin_unlock_irqrestore(&plchan->lock, flags);
+
+		/* Callback to signal completion */
+		if (callback)
+			callback(callback_param);
+	}
 }
 
 static irqreturn_t pl08x_irq(int irq, void *dev)
@@ -1744,9 +1633,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 
 	val = readl(pl08x->base + PL080_ERR_STATUS);
 	if (val) {
-		/*
-		 * An error interrupt (on one or more channels)
-		 */
+		/* An error interrupt (on one or more channels) */
 		dev_err(&pl08x->adev->dev,
 			"%s error interrupt, register value 0x%08x\n",
 				__func__, val);
@@ -1770,9 +1657,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 			mask |= (1 << i);
 		}
 	}
-	/*
-	 * Clear only the terminal interrupts on channels we processed
-	 */
+	/* Clear only the terminal interrupts on channels we processed */
 	writel(mask, pl08x->base + PL080_TC_CLEAR);
 
 	return mask ? IRQ_HANDLED : IRQ_NONE;
@@ -1791,6 +1676,7 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 	int i;
 
 	INIT_LIST_HEAD(&dmadev->channels);
+
 	/*
 	 * Register as many many memcpy as we have physical channels,
 	 * we won't always be able to use all but the code will have
@@ -1819,16 +1705,23 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 				return -ENOMEM;
 			}
 		}
+		if (chan->cd->circular_buffer) {
+			dev_err(&pl08x->adev->dev,
+				"channel %s: circular buffers not supported\n",
+				chan->name);
+			kfree(chan);
+			continue;
+		}
 		dev_info(&pl08x->adev->dev,
 			 "initialize virtual channel \"%s\"\n",
 			 chan->name);
 
 		chan->chan.device = dmadev;
-		atomic_set(&chan->last_issued, 0);
-		chan->lc = atomic_read(&chan->last_issued);
+		chan->chan.cookie = 0;
+		chan->lc = 0;
 
 		spin_lock_init(&chan->lock);
-		INIT_LIST_HEAD(&chan->desc_list);
+		INIT_LIST_HEAD(&chan->pend_list);
 		tasklet_init(&chan->tasklet, pl08x_tasklet,
 			     (unsigned long) chan);
 
@@ -1898,7 +1791,7 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data)
 	seq_printf(s, "CHANNEL:\tSTATE:\n");
 	seq_printf(s, "--------\t------\n");
 	list_for_each_entry(chan, &pl08x->memcpy.channels, chan.device_node) {
-		seq_printf(s, "%s\t\t\%s\n", chan->name,
+		seq_printf(s, "%s\t\t%s\n", chan->name,
 			   pl08x_state_str(chan->state));
 	}
 
@@ -1906,7 +1799,7 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data)
 	seq_printf(s, "CHANNEL:\tSTATE:\n");
 	seq_printf(s, "--------\t------\n");
 	list_for_each_entry(chan, &pl08x->slave.channels, chan.device_node) {
-		seq_printf(s, "%s\t\t\%s\n", chan->name,
+		seq_printf(s, "%s\t\t%s\n", chan->name,
 			   pl08x_state_str(chan->state));
 	}
 
@@ -1942,7 +1835,7 @@ static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
 static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
 {
 	struct pl08x_driver_data *pl08x;
-	struct vendor_data *vd = id->data;
+	const struct vendor_data *vd = id->data;
 	int ret = 0;
 	int i;
 
@@ -1990,6 +1883,14 @@ static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
 	pl08x->adev = adev;
 	pl08x->vd = vd;
 
+	/* By default, AHB1 only.  If dualmaster, from platform */
+	pl08x->lli_buses = PL08X_AHB1;
+	pl08x->mem_buses = PL08X_AHB1;
+	if (pl08x->vd->dualmaster) {
+		pl08x->lli_buses = pl08x->pd->lli_buses;
+		pl08x->mem_buses = pl08x->pd->mem_buses;
+	}
+
 	/* A DMA memory pool for LLIs, align on 1-byte boundary */
 	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
 			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
@@ -2009,14 +1910,12 @@ static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
 	/* Turn on the PL08x */
 	pl08x_ensure_on(pl08x);
 
-	/*
-	 * Attach the interrupt handler
-	 */
+	/* Attach the interrupt handler */
 	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
 	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
 
 	ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
-			  vd->name, pl08x);
+			  DRIVER_NAME, pl08x);
 	if (ret) {
 		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
 			__func__, adev->irq[0]);
@@ -2087,8 +1986,9 @@ static int pl08x_probe(struct amba_device *adev, struct amba_id *id)
 
 	amba_set_drvdata(adev, pl08x);
 	init_pl08x_debugfs(pl08x);
-	dev_info(&pl08x->adev->dev, "ARM(R) %s DMA block initialized @%08x\n",
-		vd->name, adev->res.start);
+	dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n",
+		 amba_part(adev), amba_rev(adev),
+		 (unsigned long long)adev->res.start, adev->irq[0]);
 	return 0;
 
 out_no_slave_reg:
@@ -2115,13 +2015,11 @@ out_no_pl08x:
 
 /* PL080 has 8 channels and the PL080 have just 2 */
 static struct vendor_data vendor_pl080 = {
-	.name = "PL080",
 	.channels = 8,
 	.dualmaster = true,
 };
 
 static struct vendor_data vendor_pl081 = {
-	.name = "PL081",
 	.channels = 2,
 	.dualmaster = false,
 };
@@ -2160,7 +2058,7 @@ static int __init pl08x_init(void)
 	retval = amba_driver_register(&pl08x_amba_driver);
 	if (retval)
 		printk(KERN_WARNING DRIVER_NAME
-		       "failed to register as an amba device (%d)\n",
+		       "failed to register as an AMBA device (%d)\n",
 		       retval);
 	return retval;
 }
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index ea0ee81cff53..3d7d705f026f 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -253,7 +253,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 	/* move myself to free_list */
 	list_move(&desc->desc_node, &atchan->free_list);
 
-	/* unmap dma addresses */
+	/* unmap dma addresses (not on slave channels) */
 	if (!atchan->chan_common.private) {
 		struct device *parent = chan2parent(&atchan->chan_common);
 		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
@@ -583,7 +583,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		desc->lli.ctrlb = ctrlb;
 
 		desc->txd.cookie = 0;
-		async_tx_ack(&desc->txd);
 
 		if (!first) {
 			first = desc;
@@ -604,7 +603,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	/* set end-of-link to the last link descriptor of list*/
 	set_desc_eol(desc);
 
-	desc->txd.flags = flags; /* client is in control of this ack */
+	first->txd.flags = flags; /* client is in control of this ack */
 
 	return &first->txd;
 
@@ -670,7 +669,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			if (!desc)
 				goto err_desc_get;
 
-			mem = sg_phys(sg);
+			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 			mem_width = 2;
 			if (unlikely(mem & 3 || len & 3))
@@ -712,7 +711,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			if (!desc)
 				goto err_desc_get;
 
-			mem = sg_phys(sg);
+			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 			mem_width = 2;
 			if (unlikely(mem & 3 || len & 3))
@@ -749,8 +748,8 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	first->txd.cookie = -EBUSY;
 	first->len = total_len;
 
-	/* last link descriptor of list is responsible of flags */
-	prev->txd.flags = flags; /* client is in control of this ack */
+	/* first link descriptor of list is responsible of flags */
+	first->txd.flags = flags; /* client is in control of this ack */
 
 	return &first->txd;
 
@@ -854,11 +853,11 @@ static void atc_issue_pending(struct dma_chan *chan)
 
 	dev_vdbg(chan2dev(chan), "issue_pending\n");
 
+	spin_lock_bh(&atchan->lock);
 	if (!atc_chan_is_enabled(atchan)) {
-		spin_lock_bh(&atchan->lock);
 		atc_advance_work(atchan);
-		spin_unlock_bh(&atchan->lock);
 	}
+	spin_unlock_bh(&atchan->lock);
 }
 
 /**
@@ -1210,7 +1209,7 @@ static int __init at_dma_init(void)
 {
 	return platform_driver_probe(&at_dma_driver, at_dma_probe);
 }
-module_init(at_dma_init);
+subsys_initcall(at_dma_init);
 
 static void __exit at_dma_exit(void)
 {
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e5e172d21692..4de947a450fc 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -1,7 +1,7 @@
 /*
  * Freescale MPC85xx, MPC83xx DMA Engine support
  *
- * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author:
  *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
@@ -1324,6 +1324,8 @@ static int __devinit fsldma_of_probe(struct platform_device *op,
 	fdev->common.device_control = fsl_dma_device_control;
 	fdev->common.dev = &op->dev;
 
+	dma_set_mask(&(op->dev), DMA_BIT_MASK(36));
+
 	dev_set_drvdata(&op->dev, fdev);
 
 	/*
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 78266382797e..798f46a4590d 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -664,11 +664,20 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 	/*calculate CTL_LO*/
 	ctl_lo.ctl_lo = 0;
 	ctl_lo.ctlx.int_en = 1;
-	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width;
-	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width;
 	ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
 	ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
 
+	/*
+	 * Here we need some translation from "enum dma_slave_buswidth"
+	 * to the format for our dma controller
+	 *		standard	intel_mid_dmac's format
+	 *		 1 Byte			0b000
+	 *		 2 Bytes		0b001
+	 *		 4 Bytes		0b010
+	 */
+	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2;
+	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2;
+
 	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
 		ctl_lo.ctlx.tt_fc = 0;
 		ctl_lo.ctlx.sinc = 0;
@@ -746,8 +755,18 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 	BUG_ON(!mids);
 
 	if (!midc->dma->pimr_mask) {
-		pr_debug("MDMA: SG list is not supported by this controller\n");
-		return  NULL;
+		/* We can still handle sg list with only one item */
+		if (sg_len == 1) {
+			txd = intel_mid_dma_prep_memcpy(chan,
+						mids->dma_slave.dst_addr,
+						mids->dma_slave.src_addr,
+						sgl->length,
+						flags);
+			return txd;
+		} else {
+			pr_warn("MDMA: SG list is not supported by this controller\n");
+			return  NULL;
+		}
 	}
 
 	pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
@@ -758,6 +777,7 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 		pr_err("MDMA: Prep memcpy failed\n");
 		return NULL;
 	}
+
 	desc = to_intel_mid_dma_desc(txd);
 	desc->dirn = direction;
 	ctl_lo.ctl_lo = desc->ctl_lo;
@@ -1021,11 +1041,6 @@ static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
 
 	/*DMA Interrupt*/
 	pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
-	if (!mid) {
-		pr_err("ERR_MDMA:null pointer mid\n");
-		return -EINVAL;
-	}
-
 	pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask);
 	tfr_status &= mid->intr_mask;
 	if (tfr_status) {
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 161c452923b8..c6b01f535b29 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1261,7 +1261,7 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_MD_RAID6_PQ
+#ifdef CONFIG_RAID6_PQ
 static int __devinit
 iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
 {
@@ -1584,7 +1584,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 
 	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
 	    dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
-		#ifdef CONFIG_MD_RAID6_PQ
+		#ifdef CONFIG_RAID6_PQ
 		ret = iop_adma_pq_zero_sum_self_test(adev);
 		dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
 		#else
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index c064c89420d0..1c38418ae61f 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -1,6 +1,7 @@
 /*
  * Topcliff PCH DMA controller driver
  * Copyright (c) 2010 Intel Corporation
+ * Copyright (C) 2011 OKI SEMICONDUCTOR CO., LTD.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -921,12 +922,19 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev)
 }
 
 /* PCI Device ID of DMA device */
-#define PCI_DEVICE_ID_PCH_DMA_8CH        0x8810
-#define PCI_DEVICE_ID_PCH_DMA_4CH        0x8815
+#define PCI_VENDOR_ID_ROHM             0x10DB
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH        0x8810
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH        0x8815
+#define PCI_DEVICE_ID_ML7213_DMA1_8CH	0x8026
+#define PCI_DEVICE_ID_ML7213_DMA2_8CH	0x802B
+#define PCI_DEVICE_ID_ML7213_DMA3_4CH	0x8034
 
 static const struct pci_device_id pch_dma_id_table[] = {
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_DMA_8CH), 8 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_DMA_4CH), 4 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */
 	{ 0, },
 };
 
@@ -954,6 +962,7 @@ static void __exit pch_dma_exit(void)
 module_init(pch_dma_init);
 module_exit(pch_dma_exit);
 
-MODULE_DESCRIPTION("Topcliff PCH DMA controller driver");
+MODULE_DESCRIPTION("Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH "
+		   "DMA controller driver");
 MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index fab68a553205..6e1d46a65d0e 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (C) ST-Ericsson SA 2007-2010
+ * Copyright (C) Ericsson AB 2007-2008
+ * Copyright (C) ST-Ericsson SA 2008-2010
  * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
  * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
  * License terms: GNU General Public License (GPL) version 2
@@ -554,8 +555,66 @@ static struct d40_desc *d40_last_queued(struct d40_chan *d40c)
 	return d;
 }
 
-/* Support functions for logical channels */
+static int d40_psize_2_burst_size(bool is_log, int psize)
+{
+	if (is_log) {
+		if (psize == STEDMA40_PSIZE_LOG_1)
+			return 1;
+	} else {
+		if (psize == STEDMA40_PSIZE_PHY_1)
+			return 1;
+	}
+
+	return 2 << psize;
+}
+
+/*
+ * The dma only supports transmitting packages up to
+ * STEDMA40_MAX_SEG_SIZE << data_width. Calculate the total number of
+ * dma elements required to send the entire sg list
+ */
+static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2)
+{
+	int dmalen;
+	u32 max_w = max(data_width1, data_width2);
+	u32 min_w = min(data_width1, data_width2);
+	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w);
+
+	if (seg_max > STEDMA40_MAX_SEG_SIZE)
+		seg_max -= (1 << max_w);
+
+	if (!IS_ALIGNED(size, 1 << max_w))
+		return -EINVAL;
+
+	if (size <= seg_max)
+		dmalen = 1;
+	else {
+		dmalen = size / seg_max;
+		if (dmalen * seg_max < size)
+			dmalen++;
+	}
+	return dmalen;
+}
+
+static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len,
+			   u32 data_width1, u32 data_width2)
+{
+	struct scatterlist *sg;
+	int i;
+	int len = 0;
+	int ret;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		ret = d40_size_2_dmalen(sg_dma_len(sg),
+					data_width1, data_width2);
+		if (ret < 0)
+			return ret;
+		len += ret;
+	}
+	return len;
+}
 
+/* Support functions for logical channels */
 
 static int d40_channel_execute_command(struct d40_chan *d40c,
 				       enum d40_command command)
@@ -1241,6 +1300,21 @@ static int d40_validate_conf(struct d40_chan *d40c,
 		res = -EINVAL;
 	}
 
+	if (d40_psize_2_burst_size(is_log, conf->src_info.psize) *
+	    (1 << conf->src_info.data_width) !=
+	    d40_psize_2_burst_size(is_log, conf->dst_info.psize) *
+	    (1 << conf->dst_info.data_width)) {
+		/*
+		 * The DMAC hardware only supports
+		 * src (burst x width) == dst (burst x width)
+		 */
+
+		dev_err(&d40c->chan.dev->device,
+			"[%s] src (burst x width) != dst (burst x width)\n",
+			__func__);
+		res = -EINVAL;
+	}
+
 	return res;
 }
 
@@ -1638,13 +1712,21 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 	if (d40d == NULL)
 		goto err;
 
-	d40d->lli_len = sgl_len;
+	d40d->lli_len = d40_sg_2_dmalen(sgl_dst, sgl_len,
+					d40c->dma_cfg.src_info.data_width,
+					d40c->dma_cfg.dst_info.data_width);
+	if (d40d->lli_len < 0) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Unaligned size\n", __func__);
+		goto err;
+	}
+
 	d40d->lli_current = 0;
 	d40d->txd.flags = dma_flags;
 
 	if (d40c->log_num != D40_PHY_CHAN) {
 
-		if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) {
+		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
 			dev_err(&d40c->chan.dev->device,
 				"[%s] Out of memory\n", __func__);
 			goto err;
@@ -1654,15 +1736,17 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 sgl_len,
 					 d40d->lli_log.src,
 					 d40c->log_def.lcsp1,
-					 d40c->dma_cfg.src_info.data_width);
+					 d40c->dma_cfg.src_info.data_width,
+					 d40c->dma_cfg.dst_info.data_width);
 
 		(void) d40_log_sg_to_lli(sgl_dst,
 					 sgl_len,
 					 d40d->lli_log.dst,
 					 d40c->log_def.lcsp3,
-					 d40c->dma_cfg.dst_info.data_width);
+					 d40c->dma_cfg.dst_info.data_width,
+					 d40c->dma_cfg.src_info.data_width);
 	} else {
-		if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) {
+		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
 			dev_err(&d40c->chan.dev->device,
 				"[%s] Out of memory\n", __func__);
 			goto err;
@@ -1675,6 +1759,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					virt_to_phys(d40d->lli_phy.src),
 					d40c->src_def_cfg,
 					d40c->dma_cfg.src_info.data_width,
+					d40c->dma_cfg.dst_info.data_width,
 					d40c->dma_cfg.src_info.psize);
 
 		if (res < 0)
@@ -1687,6 +1772,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					virt_to_phys(d40d->lli_phy.dst),
 					d40c->dst_def_cfg,
 					d40c->dma_cfg.dst_info.data_width,
+					d40c->dma_cfg.src_info.data_width,
 					d40c->dma_cfg.dst_info.psize);
 
 		if (res < 0)
@@ -1826,7 +1912,6 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
 	unsigned long flags;
-	int err = 0;
 
 	if (d40c->phy_chan == NULL) {
 		dev_err(&d40c->chan.dev->device,
@@ -1844,6 +1929,15 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	}
 
 	d40d->txd.flags = dma_flags;
+	d40d->lli_len = d40_size_2_dmalen(size,
+					  d40c->dma_cfg.src_info.data_width,
+					  d40c->dma_cfg.dst_info.data_width);
+	if (d40d->lli_len < 0) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Unaligned size\n", __func__);
+		goto err;
+	}
+
 
 	dma_async_tx_descriptor_init(&d40d->txd, chan);
 
@@ -1851,37 +1945,40 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 
 	if (d40c->log_num != D40_PHY_CHAN) {
 
-		if (d40_pool_lli_alloc(d40d, 1, true) < 0) {
+		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
 			dev_err(&d40c->chan.dev->device,
 				"[%s] Out of memory\n", __func__);
 			goto err;
 		}
-		d40d->lli_len = 1;
 		d40d->lli_current = 0;
 
-		d40_log_fill_lli(d40d->lli_log.src,
-				 src,
-				 size,
-				 d40c->log_def.lcsp1,
-				 d40c->dma_cfg.src_info.data_width,
-				 true);
+		if (d40_log_buf_to_lli(d40d->lli_log.src,
+				       src,
+				       size,
+				       d40c->log_def.lcsp1,
+				       d40c->dma_cfg.src_info.data_width,
+				       d40c->dma_cfg.dst_info.data_width,
+				       true) == NULL)
+			goto err;
 
-		d40_log_fill_lli(d40d->lli_log.dst,
-				 dst,
-				 size,
-				 d40c->log_def.lcsp3,
-				 d40c->dma_cfg.dst_info.data_width,
-				 true);
+		if (d40_log_buf_to_lli(d40d->lli_log.dst,
+				       dst,
+				       size,
+				       d40c->log_def.lcsp3,
+				       d40c->dma_cfg.dst_info.data_width,
+				       d40c->dma_cfg.src_info.data_width,
+				       true) == NULL)
+			goto err;
 
 	} else {
 
-		if (d40_pool_lli_alloc(d40d, 1, false) < 0) {
+		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
 			dev_err(&d40c->chan.dev->device,
 				"[%s] Out of memory\n", __func__);
 			goto err;
 		}
 
-		err = d40_phy_fill_lli(d40d->lli_phy.src,
+		if (d40_phy_buf_to_lli(d40d->lli_phy.src,
 				       src,
 				       size,
 				       d40c->dma_cfg.src_info.psize,
@@ -1889,11 +1986,11 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 				       d40c->src_def_cfg,
 				       true,
 				       d40c->dma_cfg.src_info.data_width,
-				       false);
-		if (err)
-			goto err_fill_lli;
+				       d40c->dma_cfg.dst_info.data_width,
+				       false) == NULL)
+			goto err;
 
-		err = d40_phy_fill_lli(d40d->lli_phy.dst,
+		if (d40_phy_buf_to_lli(d40d->lli_phy.dst,
 				       dst,
 				       size,
 				       d40c->dma_cfg.dst_info.psize,
@@ -1901,10 +1998,9 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 				       d40c->dst_def_cfg,
 				       true,
 				       d40c->dma_cfg.dst_info.data_width,
-				       false);
-
-		if (err)
-			goto err_fill_lli;
+				       d40c->dma_cfg.src_info.data_width,
+				       false) == NULL)
+			goto err;
 
 		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
 				      d40d->lli_pool.size, DMA_TO_DEVICE);
@@ -1913,9 +2009,6 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return &d40d->txd;
 
-err_fill_lli:
-	dev_err(&d40c->chan.dev->device,
-		"[%s] Failed filling in PHY LLI\n", __func__);
 err:
 	if (d40d)
 		d40_desc_free(d40c, d40d);
@@ -1945,13 +2038,21 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 	dma_addr_t dev_addr = 0;
 	int total_size;
 
-	if (d40_pool_lli_alloc(d40d, sg_len, true) < 0) {
+	d40d->lli_len = d40_sg_2_dmalen(sgl, sg_len,
+					d40c->dma_cfg.src_info.data_width,
+					d40c->dma_cfg.dst_info.data_width);
+	if (d40d->lli_len < 0) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Unaligned size\n", __func__);
+		return -EINVAL;
+	}
+
+	if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Out of memory\n", __func__);
 		return -ENOMEM;
 	}
 
-	d40d->lli_len = sg_len;
 	d40d->lli_current = 0;
 
 	if (direction == DMA_FROM_DEVICE)
@@ -1993,13 +2094,21 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 	dma_addr_t dst_dev_addr;
 	int res;
 
-	if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) {
+	d40d->lli_len = d40_sg_2_dmalen(sgl, sgl_len,
+					d40c->dma_cfg.src_info.data_width,
+					d40c->dma_cfg.dst_info.data_width);
+	if (d40d->lli_len < 0) {
+		dev_err(&d40c->chan.dev->device,
+			"[%s] Unaligned size\n", __func__);
+		return -EINVAL;
+	}
+
+	if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Out of memory\n", __func__);
 		return -ENOMEM;
 	}
 
-	d40d->lli_len = sgl_len;
 	d40d->lli_current = 0;
 
 	if (direction == DMA_FROM_DEVICE) {
@@ -2024,6 +2133,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 				virt_to_phys(d40d->lli_phy.src),
 				d40c->src_def_cfg,
 				d40c->dma_cfg.src_info.data_width,
+				d40c->dma_cfg.dst_info.data_width,
 				d40c->dma_cfg.src_info.psize);
 	if (res < 0)
 		return res;
@@ -2035,6 +2145,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 				virt_to_phys(d40d->lli_phy.dst),
 				d40c->dst_def_cfg,
 				d40c->dma_cfg.dst_info.data_width,
+				d40c->dma_cfg.src_info.data_width,
 				d40c->dma_cfg.dst_info.psize);
 	if (res < 0)
 		return res;
@@ -2244,6 +2355,8 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 			psize = STEDMA40_PSIZE_PHY_8;
 		else if (config_maxburst >= 4)
 			psize = STEDMA40_PSIZE_PHY_4;
+		else if (config_maxburst >= 2)
+			psize = STEDMA40_PSIZE_PHY_2;
 		else
 			psize = STEDMA40_PSIZE_PHY_1;
 	}
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 8557cb88b255..0b096a38322d 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson SA 2007-2010
- * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
  * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
  * License terms: GNU General Public License (GPL) version 2
  */
@@ -122,15 +122,15 @@ void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
 	*dst_cfg = dst;
 }
 
-int d40_phy_fill_lli(struct d40_phy_lli *lli,
-		     dma_addr_t data,
-		     u32 data_size,
-		     int psize,
-		     dma_addr_t next_lli,
-		     u32 reg_cfg,
-		     bool term_int,
-		     u32 data_width,
-		     bool is_device)
+static int d40_phy_fill_lli(struct d40_phy_lli *lli,
+			    dma_addr_t data,
+			    u32 data_size,
+			    int psize,
+			    dma_addr_t next_lli,
+			    u32 reg_cfg,
+			    bool term_int,
+			    u32 data_width,
+			    bool is_device)
 {
 	int num_elems;
 
@@ -139,13 +139,6 @@ int d40_phy_fill_lli(struct d40_phy_lli *lli,
 	else
 		num_elems = 2 << psize;
 
-	/*
-	 * Size is 16bit. data_width is 8, 16, 32 or 64 bit
-	 * Block large than 64 KiB must be split.
-	 */
-	if (data_size > (0xffff << data_width))
-		return -EINVAL;
-
 	/* Must be aligned */
 	if (!IS_ALIGNED(data, 0x1 << data_width))
 		return -EINVAL;
@@ -187,55 +180,118 @@ int d40_phy_fill_lli(struct d40_phy_lli *lli,
 	return 0;
 }
 
+static int d40_seg_size(int size, int data_width1, int data_width2)
+{
+	u32 max_w = max(data_width1, data_width2);
+	u32 min_w = min(data_width1, data_width2);
+	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w);
+
+	if (seg_max > STEDMA40_MAX_SEG_SIZE)
+		seg_max -= (1 << max_w);
+
+	if (size <= seg_max)
+		return size;
+
+	if (size <= 2 * seg_max)
+		return ALIGN(size / 2, 1 << max_w);
+
+	return seg_max;
+}
+
+struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
+				       dma_addr_t addr,
+				       u32 size,
+				       int psize,
+				       dma_addr_t lli_phys,
+				       u32 reg_cfg,
+				       bool term_int,
+				       u32 data_width1,
+				       u32 data_width2,
+				       bool is_device)
+{
+	int err;
+	dma_addr_t next = lli_phys;
+	int size_rest = size;
+	int size_seg = 0;
+
+	do {
+		size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+		size_rest -= size_seg;
+
+		if (term_int && size_rest == 0)
+			next = 0;
+		else
+			next = ALIGN(next + sizeof(struct d40_phy_lli),
+				     D40_LLI_ALIGN);
+
+		err = d40_phy_fill_lli(lli,
+				       addr,
+				       size_seg,
+				       psize,
+				       next,
+				       reg_cfg,
+				       !next,
+				       data_width1,
+				       is_device);
+
+		if (err)
+			goto err;
+
+		lli++;
+		if (!is_device)
+			addr += size_seg;
+	} while (size_rest);
+
+	return lli;
+
+ err:
+	return NULL;
+}
+
 int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
 		      dma_addr_t target,
-		      struct d40_phy_lli *lli,
+		      struct d40_phy_lli *lli_sg,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
-		      u32 data_width,
+		      u32 data_width1,
+		      u32 data_width2,
 		      int psize)
 {
 	int total_size = 0;
 	int i;
 	struct scatterlist *current_sg = sg;
-	dma_addr_t next_lli_phys;
 	dma_addr_t dst;
-	int err = 0;
+	struct d40_phy_lli *lli = lli_sg;
+	dma_addr_t l_phys = lli_phys;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 
 		total_size += sg_dma_len(current_sg);
 
-		/* If this scatter list entry is the last one, no next link */
-		if (sg_len - 1 == i)
-			next_lli_phys = 0;
-		else
-			next_lli_phys = ALIGN(lli_phys + (i + 1) *
-					      sizeof(struct d40_phy_lli),
-					      D40_LLI_ALIGN);
-
 		if (target)
 			dst = target;
 		else
 			dst = sg_phys(current_sg);
 
-		err = d40_phy_fill_lli(&lli[i],
-				       dst,
-				       sg_dma_len(current_sg),
-				       psize,
-				       next_lli_phys,
-				       reg_cfg,
-				       !next_lli_phys,
-				       data_width,
-				       target == dst);
-		if (err)
-			goto err;
+		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
+			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
+
+		lli = d40_phy_buf_to_lli(lli,
+					 dst,
+					 sg_dma_len(current_sg),
+					 psize,
+					 l_phys,
+					 reg_cfg,
+					 sg_len - 1 == i,
+					 data_width1,
+					 data_width2,
+					 target == dst);
+		if (lli == NULL)
+			return -EINVAL;
 	}
 
 	return total_size;
-err:
-	return err;
 }
 
 
@@ -315,17 +371,20 @@ void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
 	writel(lli_dst->lcsp13, &lcla[1].lcsp13);
 }
 
-void d40_log_fill_lli(struct d40_log_lli *lli,
-		      dma_addr_t data, u32 data_size,
-		      u32 reg_cfg,
-		      u32 data_width,
-		      bool addr_inc)
+static void d40_log_fill_lli(struct d40_log_lli *lli,
+			     dma_addr_t data, u32 data_size,
+			     u32 reg_cfg,
+			     u32 data_width,
+			     bool addr_inc)
 {
 	lli->lcsp13 = reg_cfg;
 
 	/* The number of elements to transfer */
 	lli->lcsp02 = ((data_size >> data_width) <<
 		       D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK;
+
+	BUG_ON((data_size >> data_width) > STEDMA40_MAX_SEG_SIZE);
+
 	/* 16 LSBs address of the current element */
 	lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK;
 	/* 16 MSBs address of the current element */
@@ -348,55 +407,94 @@ int d40_log_sg_to_dev(struct scatterlist *sg,
 	int total_size = 0;
 	struct scatterlist *current_sg = sg;
 	int i;
+	struct d40_log_lli *lli_src = lli->src;
+	struct d40_log_lli *lli_dst = lli->dst;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 		total_size += sg_dma_len(current_sg);
 
 		if (direction == DMA_TO_DEVICE) {
-			d40_log_fill_lli(&lli->src[i],
-					 sg_phys(current_sg),
-					 sg_dma_len(current_sg),
-					 lcsp->lcsp1, src_data_width,
-					 true);
-			d40_log_fill_lli(&lli->dst[i],
-					 dev_addr,
-					 sg_dma_len(current_sg),
-					 lcsp->lcsp3, dst_data_width,
-					 false);
+			lli_src =
+				d40_log_buf_to_lli(lli_src,
+						   sg_phys(current_sg),
+						   sg_dma_len(current_sg),
+						   lcsp->lcsp1, src_data_width,
+						   dst_data_width,
+						   true);
+			lli_dst =
+				d40_log_buf_to_lli(lli_dst,
+						   dev_addr,
+						   sg_dma_len(current_sg),
+						   lcsp->lcsp3, dst_data_width,
+						   src_data_width,
+						   false);
 		} else {
-			d40_log_fill_lli(&lli->dst[i],
-					 sg_phys(current_sg),
-					 sg_dma_len(current_sg),
-					 lcsp->lcsp3, dst_data_width,
-					 true);
-			d40_log_fill_lli(&lli->src[i],
-					 dev_addr,
-					 sg_dma_len(current_sg),
-					 lcsp->lcsp1, src_data_width,
-					 false);
+			lli_dst =
+				d40_log_buf_to_lli(lli_dst,
+						   sg_phys(current_sg),
+						   sg_dma_len(current_sg),
+						   lcsp->lcsp3, dst_data_width,
+						   src_data_width,
+						   true);
+			lli_src =
+				d40_log_buf_to_lli(lli_src,
+						   dev_addr,
+						   sg_dma_len(current_sg),
+						   lcsp->lcsp1, src_data_width,
+						   dst_data_width,
+						   false);
 		}
 	}
 	return total_size;
 }
 
+struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+				       dma_addr_t addr,
+				       int size,
+				       u32 lcsp13, /* src or dst*/
+				       u32 data_width1,
+				       u32 data_width2,
+				       bool addr_inc)
+{
+	struct d40_log_lli *lli = lli_sg;
+	int size_rest = size;
+	int size_seg = 0;
+
+	do {
+		size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+		size_rest -= size_seg;
+
+		d40_log_fill_lli(lli,
+				 addr,
+				 size_seg,
+				 lcsp13, data_width1,
+				 addr_inc);
+		if (addr_inc)
+			addr += size_seg;
+		lli++;
+	} while (size_rest);
+
+	return lli;
+}
+
 int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
-		      u32 data_width)
+		      u32 data_width1, u32 data_width2)
 {
 	int total_size = 0;
 	struct scatterlist *current_sg = sg;
 	int i;
+	struct d40_log_lli *lli = lli_sg;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 		total_size += sg_dma_len(current_sg);
-
-		d40_log_fill_lli(&lli_sg[i],
-				 sg_phys(current_sg),
-				 sg_dma_len(current_sg),
-				 lcsp13, data_width,
-				 true);
+		lli = d40_log_buf_to_lli(lli,
+					 sg_phys(current_sg),
+					 sg_dma_len(current_sg),
+					 lcsp13,
+					 data_width1, data_width2, true);
 	}
 	return total_size;
 }
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 9e419b907544..9cc43495bea2 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -292,18 +292,20 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      struct d40_phy_lli *lli,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
-		      u32 data_width,
+		      u32 data_width1,
+		      u32 data_width2,
 		      int psize);
 
-int d40_phy_fill_lli(struct d40_phy_lli *lli,
-		     dma_addr_t data,
-		     u32 data_size,
-		     int psize,
-		     dma_addr_t next_lli,
-		     u32 reg_cfg,
-		     bool term_int,
-		     u32 data_width,
-		     bool is_device);
+struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
+				       dma_addr_t data,
+				       u32 data_size,
+				       int psize,
+				       dma_addr_t next_lli,
+				       u32 reg_cfg,
+				       bool term_int,
+				       u32 data_width1,
+				       u32 data_width2,
+				       bool is_device);
 
 void d40_phy_lli_write(void __iomem *virtbase,
 		       u32 phy_chan_num,
@@ -312,12 +314,12 @@ void d40_phy_lli_write(void __iomem *virtbase,
 
 /* Logical channels */
 
-void d40_log_fill_lli(struct d40_log_lli *lli,
-		      dma_addr_t data,
-		      u32 data_size,
-		      u32 reg_cfg,
-		      u32 data_width,
-		      bool addr_inc);
+struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+				       dma_addr_t addr,
+				       int size,
+				       u32 lcsp13, /* src or dst*/
+				       u32 data_width1, u32 data_width2,
+				       bool addr_inc);
 
 int d40_log_sg_to_dev(struct scatterlist *sg,
 		      int sg_len,
@@ -332,7 +334,7 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
-		      u32 data_width);
+		      u32 data_width1, u32 data_width2);
 
 void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 			    struct d40_log_lli *lli_dst,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 46e32573b3a3..01bffc4412d2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -160,6 +160,7 @@ enum nouveau_flags {
 #define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
 #define NVOBJ_FLAG_ZERO_FREE		(1 << 2)
 #define NVOBJ_FLAG_VM			(1 << 3)
+#define NVOBJ_FLAG_VM_USER		(1 << 4)
 
 #define NVOBJ_CINST_GLOBAL	0xdeadbeef
 
@@ -1576,6 +1577,20 @@ nv_match_device(struct drm_device *dev, unsigned device,
 		dev->pdev->subsystem_device == sub_device;
 }
 
+/* returns 1 if device is one of the nv4x using the 0x4497 object class,
+ * helpful to determine a number of other hardware features
+ */
+static inline int
+nv44_graph_class(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	if ((dev_priv->chipset & 0xf0) == 0x60)
+		return 1;
+
+	return !(0x0baf & (1 << (dev_priv->chipset & 0x0f)));
+}
+
 /* memory type/access flags, do not match hardware values */
 #define NV_MEM_ACCESS_RO  1
 #define NV_MEM_ACCESS_WO  2
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 6d56a54b6e2e..60769d2f9a66 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -352,8 +352,8 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 			      FBINFO_HWACCEL_IMAGEBLIT;
 	info->flags |= FBINFO_CAN_FORCE_OUTPUT;
 	info->fbops = &nouveau_fbcon_sw_ops;
-	info->fix.smem_start = dev->mode_config.fb_base +
-			       (nvbo->bo.mem.start << PAGE_SHIFT);
+	info->fix.smem_start = nvbo->bo.mem.bus.base +
+			       nvbo->bo.mem.bus.offset;
 	info->fix.smem_len = size;
 
 	info->screen_base = nvbo_kmap_obj_iovirtual(nouveau_fb->nvbo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 69044eb104bb..26347b7cd872 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -742,30 +742,24 @@ nouveau_vram_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
 {
 	struct nouveau_mm *mm = man->priv;
 	struct nouveau_mm_node *r;
-	u64 total = 0, ttotal[3] = {}, tused[3] = {}, tfree[3] = {};
-	int i;
+	u32 total = 0, free = 0;
 
 	mutex_lock(&mm->mutex);
 	list_for_each_entry(r, &mm->nodes, nl_entry) {
-		printk(KERN_DEBUG "%s %s-%d: 0x%010llx 0x%010llx\n",
-		       prefix, r->free ? "free" : "used", r->type,
-		       ((u64)r->offset << 12),
+		printk(KERN_DEBUG "%s %d: 0x%010llx 0x%010llx\n",
+		       prefix, r->type, ((u64)r->offset << 12),
 		       (((u64)r->offset + r->length) << 12));
+
 		total += r->length;
-		ttotal[r->type] += r->length;
-		if (r->free)
-			tfree[r->type] += r->length;
-		else
-			tused[r->type] += r->length;
+		if (!r->type)
+			free += r->length;
 	}
 	mutex_unlock(&mm->mutex);
 
-	printk(KERN_DEBUG "%s  total: 0x%010llx\n", prefix, total << 12);
-	for (i = 0; i < 3; i++) {
-		printk(KERN_DEBUG "%s type %d: 0x%010llx, "
-				  "used 0x%010llx, free 0x%010llx\n", prefix,
-		       i, ttotal[i] << 12, tused[i] << 12, tfree[i] << 12);
-	}
+	printk(KERN_DEBUG "%s  total: 0x%010llx free: 0x%010llx\n",
+	       prefix, (u64)total << 12, (u64)free << 12);
+	printk(KERN_DEBUG "%s  block: 0x%08x\n",
+	       prefix, mm->block_size << 12);
 }
 
 const struct ttm_mem_type_manager_func nouveau_vram_manager = {
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c
index cdbb11eb701b..8844b50c3e54 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.c
@@ -48,175 +48,76 @@ region_split(struct nouveau_mm *rmm, struct nouveau_mm_node *a, u32 size)
 
 	b->offset = a->offset;
 	b->length = size;
-	b->free   = a->free;
 	b->type   = a->type;
 	a->offset += size;
 	a->length -= size;
 	list_add_tail(&b->nl_entry, &a->nl_entry);
-	if (b->free)
+	if (b->type == 0)
 		list_add_tail(&b->fl_entry, &a->fl_entry);
 	return b;
 }
 
-static struct nouveau_mm_node *
-nouveau_mm_merge(struct nouveau_mm *rmm, struct nouveau_mm_node *this)
-{
-	struct nouveau_mm_node *prev, *next;
-
-	/* try to merge with free adjacent entries of same type */
-	prev = list_entry(this->nl_entry.prev, struct nouveau_mm_node, nl_entry);
-	if (this->nl_entry.prev != &rmm->nodes) {
-		if (prev->free && prev->type == this->type) {
-			prev->length += this->length;
-			region_put(rmm, this);
-			this = prev;
-		}
-	}
-
-	next = list_entry(this->nl_entry.next, struct nouveau_mm_node, nl_entry);
-	if (this->nl_entry.next != &rmm->nodes) {
-		if (next->free && next->type == this->type) {
-			next->offset  = this->offset;
-			next->length += this->length;
-			region_put(rmm, this);
-			this = next;
-		}
-	}
-
-	return this;
-}
+#define node(root, dir) ((root)->nl_entry.dir == &rmm->nodes) ? NULL : \
+	list_entry((root)->nl_entry.dir, struct nouveau_mm_node, nl_entry)
 
 void
 nouveau_mm_put(struct nouveau_mm *rmm, struct nouveau_mm_node *this)
 {
-	u32 block_s, block_l;
+	struct nouveau_mm_node *prev = node(this, prev);
+	struct nouveau_mm_node *next = node(this, next);
 
-	this->free = true;
 	list_add(&this->fl_entry, &rmm->free);
-	this = nouveau_mm_merge(rmm, this);
-
-	/* any entirely free blocks now?  we'll want to remove typing
-	 * on them now so they can be use for any memory allocation
-	 */
-	block_s = roundup(this->offset, rmm->block_size);
-	if (block_s + rmm->block_size > this->offset + this->length)
-		return;
+	this->type = 0;
 
-	/* split off any still-typed region at the start */
-	if (block_s != this->offset) {
-		if (!region_split(rmm, this, block_s - this->offset))
-			return;
+	if (prev && prev->type == 0) {
+		prev->length += this->length;
+		region_put(rmm, this);
+		this = prev;
 	}
 
-	/* split off the soon-to-be-untyped block(s) */
-	block_l = rounddown(this->length, rmm->block_size);
-	if (block_l != this->length) {
-		this = region_split(rmm, this, block_l);
-		if (!this)
-			return;
+	if (next && next->type == 0) {
+		next->offset  = this->offset;
+		next->length += this->length;
+		region_put(rmm, this);
 	}
-
-	/* mark as having no type, and retry merge with any adjacent
-	 * untyped blocks
-	 */
-	this->type = 0;
-	nouveau_mm_merge(rmm, this);
 }
 
 int
 nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
 	       u32 align, struct nouveau_mm_node **pnode)
 {
-	struct nouveau_mm_node *this, *tmp, *next;
-	u32 splitoff, avail, alloc;
-
-	list_for_each_entry_safe(this, tmp, &rmm->free, fl_entry) {
-		next = list_entry(this->nl_entry.next, struct nouveau_mm_node, nl_entry);
-		if (this->nl_entry.next == &rmm->nodes)
-			next = NULL;
-
-		/* skip wrongly typed blocks */
-		if (this->type && this->type != type)
+	struct nouveau_mm_node *prev, *this, *next;
+	u32 min = size_nc ? size_nc : size;
+	u32 align_mask = align - 1;
+	u32 splitoff;
+	u32 s, e;
+
+	list_for_each_entry(this, &rmm->free, fl_entry) {
+		e = this->offset + this->length;
+		s = this->offset;
+
+		prev = node(this, prev);
+		if (prev && prev->type != type)
+			s = roundup(s, rmm->block_size);
+
+		next = node(this, next);
+		if (next && next->type != type)
+			e = rounddown(e, rmm->block_size);
+
+		s  = (s + align_mask) & ~align_mask;
+		e &= ~align_mask;
+		if (s > e || e - s < min)
 			continue;
 
-		/* account for alignment */
-		splitoff = this->offset & (align - 1);
-		if (splitoff)
-			splitoff = align - splitoff;
-
-		if (this->length <= splitoff)
-			continue;
-
-		/* determine total memory available from this, and
-		 * the next block (if appropriate)
-		 */
-		avail = this->length;
-		if (next && next->free && (!next->type || next->type == type))
-			avail += next->length;
-
-		avail -= splitoff;
-
-		/* determine allocation size */
-		if (size_nc) {
-			alloc = min(avail, size);
-			alloc = rounddown(alloc, size_nc);
-			if (alloc == 0)
-				continue;
-		} else {
-			alloc = size;
-			if (avail < alloc)
-				continue;
-		}
-
-		/* untyped block, split off a chunk that's a multiple
-		 * of block_size and type it
-		 */
-		if (!this->type) {
-			u32 block = roundup(alloc + splitoff, rmm->block_size);
-			if (this->length < block)
-				continue;
-
-			this = region_split(rmm, this, block);
-			if (!this)
-				return -ENOMEM;
-
-			this->type = type;
-		}
-
-		/* stealing memory from adjacent block */
-		if (alloc > this->length) {
-			u32 amount = alloc - (this->length - splitoff);
-
-			if (!next->type) {
-				amount = roundup(amount, rmm->block_size);
-
-				next = region_split(rmm, next, amount);
-				if (!next)
-					return -ENOMEM;
-
-				next->type = type;
-			}
-
-			this->length += amount;
-			next->offset += amount;
-			next->length -= amount;
-			if (!next->length) {
-				list_del(&next->nl_entry);
-				list_del(&next->fl_entry);
-				kfree(next);
-			}
-		}
-
-		if (splitoff) {
-			if (!region_split(rmm, this, splitoff))
-				return -ENOMEM;
-		}
+		splitoff = s - this->offset;
+		if (splitoff && !region_split(rmm, this, splitoff))
+			return -ENOMEM;
 
-		this = region_split(rmm, this, alloc);
-		if (this == NULL)
+		this = region_split(rmm, this, min(size, e - s));
+		if (!this)
 			return -ENOMEM;
 
-		this->free = false;
+		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
 		return 0;
@@ -234,7 +135,6 @@ nouveau_mm_init(struct nouveau_mm **prmm, u32 offset, u32 length, u32 block)
 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
 	if (!heap)
 		return -ENOMEM;
-	heap->free = true;
 	heap->offset = roundup(offset, block);
 	heap->length = rounddown(offset + length, block) - heap->offset;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h
index af3844933036..798eaf39691c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.h
@@ -30,9 +30,7 @@ struct nouveau_mm_node {
 	struct list_head fl_entry;
 	struct list_head rl_entry;
 
-	bool free;
-	int  type;
-
+	u8  type;
 	u32 offset;
 	u32 length;
 };
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 19ef92a0375a..8870d72388c8 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -451,8 +451,7 @@ nv40_graph_register(struct drm_device *dev)
 	NVOBJ_CLASS(dev, 0x309e, GR); /* swzsurf */
 
 	/* curie */
-	if (dev_priv->chipset >= 0x60 ||
-	    0x00005450 & (1 << (dev_priv->chipset & 0x0f)))
+	if (nv44_graph_class(dev))
 		NVOBJ_CLASS(dev, 0x4497, GR);
 	else
 		NVOBJ_CLASS(dev, 0x4097, GR);
diff --git a/drivers/gpu/drm/nouveau/nv40_grctx.c b/drivers/gpu/drm/nouveau/nv40_grctx.c
index ce585093264e..f70447d131d7 100644
--- a/drivers/gpu/drm/nouveau/nv40_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv40_grctx.c
@@ -118,17 +118,6 @@
  */
 
 static int
-nv40_graph_4097(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if ((dev_priv->chipset & 0xf0) == 0x60)
-		return 0;
-
-	return !!(0x0baf & (1 << dev_priv->chipset));
-}
-
-static int
 nv40_graph_vs_count(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -219,7 +208,7 @@ nv40_graph_construct_general(struct nouveau_grctx *ctx)
 		gr_def(ctx, 0x4009dc, 0x80000000);
 	} else {
 		cp_ctx(ctx, 0x400840, 20);
-		if (!nv40_graph_4097(ctx->dev)) {
+		if (nv44_graph_class(ctx->dev)) {
 			for (i = 0; i < 8; i++)
 				gr_def(ctx, 0x400860 + (i * 4), 0x00000001);
 		}
@@ -228,7 +217,7 @@ nv40_graph_construct_general(struct nouveau_grctx *ctx)
 		gr_def(ctx, 0x400888, 0x00000040);
 		cp_ctx(ctx, 0x400894, 11);
 		gr_def(ctx, 0x400894, 0x00000040);
-		if (nv40_graph_4097(ctx->dev)) {
+		if (!nv44_graph_class(ctx->dev)) {
 			for (i = 0; i < 8; i++)
 				gr_def(ctx, 0x4008a0 + (i * 4), 0x80000000);
 		}
@@ -546,7 +535,7 @@ nv40_graph_construct_state3d_2(struct nouveau_grctx *ctx)
 static void
 nv40_graph_construct_state3d_3(struct nouveau_grctx *ctx)
 {
-	int len = nv40_graph_4097(ctx->dev) ? 0x0684 : 0x0084;
+	int len = nv44_graph_class(ctx->dev) ? 0x0084 : 0x0684;
 
 	cp_out (ctx, 0x300000);
 	cp_lsr (ctx, len - 4);
@@ -582,11 +571,11 @@ nv40_graph_construct_shader(struct nouveau_grctx *ctx)
 	} else {
 		b0_offset = 0x1d40/4; /* 2200 */
 		b1_offset = 0x3f40/4; /* 0b00 : 0a40 */
-		vs_len = nv40_graph_4097(dev) ? 0x4a40/4 : 0x4980/4;
+		vs_len = nv44_graph_class(dev) ? 0x4980/4 : 0x4a40/4;
 	}
 
 	cp_lsr(ctx, vs_len * vs_nr + 0x300/4);
-	cp_out(ctx, nv40_graph_4097(dev) ? 0x800041 : 0x800029);
+	cp_out(ctx, nv44_graph_class(dev) ? 0x800029 : 0x800041);
 
 	offset = ctx->ctxvals_pos;
 	ctx->ctxvals_pos += (0x0300/4 + (vs_nr * vs_len));
diff --git a/drivers/gpu/drm/nouveau/nv40_mc.c b/drivers/gpu/drm/nouveau/nv40_mc.c
index e4e72c12ab6a..03c0d4c3f355 100644
--- a/drivers/gpu/drm/nouveau/nv40_mc.c
+++ b/drivers/gpu/drm/nouveau/nv40_mc.c
@@ -6,27 +6,17 @@
 int
 nv40_mc_init(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
-
 	/* Power up everything, resetting each individual unit will
 	 * be done later if needed.
 	 */
 	nv_wr32(dev, NV03_PMC_ENABLE, 0xFFFFFFFF);
 
-	switch (dev_priv->chipset) {
-	case 0x44:
-	case 0x46: /* G72 */
-	case 0x4e:
-	case 0x4c: /* C51_G7X */
-		tmp = nv_rd32(dev, NV04_PFB_FIFO_DATA);
+	if (nv44_graph_class(dev)) {
+		u32 tmp = nv_rd32(dev, NV04_PFB_FIFO_DATA);
 		nv_wr32(dev, NV40_PMC_1700, tmp);
 		nv_wr32(dev, NV40_PMC_1704, 0);
 		nv_wr32(dev, NV40_PMC_1708, 0);
 		nv_wr32(dev, NV40_PMC_170C, tmp);
-		break;
-	default:
-		break;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index 2e1b1cd19a4b..ea0041810ae3 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -332,8 +332,11 @@ nv50_instmem_get(struct nouveau_gpuobj *gpuobj, u32 size, u32 align)
 	gpuobj->vinst = node->vram->offset;
 
 	if (gpuobj->flags & NVOBJ_FLAG_VM) {
-		ret = nouveau_vm_get(dev_priv->chan_vm, size, 12,
-				     NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
+		u32 flags = NV_MEM_ACCESS_RW;
+		if (!(gpuobj->flags & NVOBJ_FLAG_VM_USER))
+			flags |= NV_MEM_ACCESS_SYS;
+
+		ret = nouveau_vm_get(dev_priv->chan_vm, size, 12, flags,
 				     &node->chan_vma);
 		if (ret) {
 			vram->put(dev, &node->vram);
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c
index 5feacd5d5fa4..e6ea7d83187f 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.c
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.c
@@ -105,7 +105,8 @@ nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan)
 	if (ret)
 		return ret;
 
-	ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, NVOBJ_FLAG_VM,
+	ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096,
+				 NVOBJ_FLAG_VM | NVOBJ_FLAG_VM_USER,
 				 &grch->unk418810);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nvc0_vm.c b/drivers/gpu/drm/nouveau/nvc0_vm.c
index 4b9251bb0ff4..e4e83c2caf5b 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vm.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vm.c
@@ -48,8 +48,8 @@ nvc0_vm_addr(struct nouveau_vma *vma, u64 phys, u32 memtype, u32 target)
 	phys >>= 8;
 
 	phys |= 0x00000001; /* present */
-//	if (vma->access & NV_MEM_ACCESS_SYS)
-//		phys |= 0x00000002;
+	if (vma->access & NV_MEM_ACCESS_SYS)
+		phys |= 0x00000002;
 
 	phys |= ((u64)target  << 32);
 	phys |= ((u64)memtype << 36);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 7fe8ebdcdc0e..a8973acb3987 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3002,31 +3002,6 @@ int evergreen_copy_blit(struct radeon_device *rdev,
 	return 0;
 }
 
-static bool evergreen_card_posted(struct radeon_device *rdev)
-{
-	u32 reg;
-
-	/* first check CRTCs */
-	if (rdev->flags & RADEON_IS_IGP)
-		reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
-			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET);
-	else
-		reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
-			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET) |
-			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET) |
-			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET) |
-			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) |
-			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
-	if (reg & EVERGREEN_CRTC_MASTER_EN)
-		return true;
-
-	/* then check MEM_SIZE, in case the crtcs are off */
-	if (RREG32(CONFIG_MEMSIZE))
-		return true;
-
-	return false;
-}
-
 /* Plan is to move initialization in that function and use
  * helper function so that radeon_device_init pretty much
  * do nothing more than calling asic specific function. This
@@ -3063,7 +3038,7 @@ int evergreen_init(struct radeon_device *rdev)
 	if (radeon_asic_reset(rdev))
 		dev_warn(rdev->dev, "GPU reset failed !\n");
 	/* Post card if necessary */
-	if (!evergreen_card_posted(rdev)) {
+	if (!radeon_card_posted(rdev)) {
 		if (!rdev->bios) {
 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
@@ -3158,6 +3133,9 @@ static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, speed_cntl;
 
+	if (radeon_pcie_gen2 == 0)
+		return;
+
 	if (rdev->flags & RADEON_IS_IGP)
 		return;
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index f637595b14e1..46da5142b131 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2086,12 +2086,13 @@ int r100_asic_reset(struct radeon_device *rdev)
 {
 	struct r100_mc_save save;
 	u32 status, tmp;
+	int ret = 0;
 
-	r100_mc_stop(rdev, &save);
 	status = RREG32(R_000E40_RBBM_STATUS);
 	if (!G_000E40_GUI_ACTIVE(status)) {
 		return 0;
 	}
+	r100_mc_stop(rdev, &save);
 	status = RREG32(R_000E40_RBBM_STATUS);
 	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
 	/* stop CP */
@@ -2131,11 +2132,11 @@ int r100_asic_reset(struct radeon_device *rdev)
 		G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
 		dev_err(rdev->dev, "failed to reset GPU\n");
 		rdev->gpu_lockup = true;
-		return -1;
-	}
+		ret = -1;
+	} else
+		dev_info(rdev->dev, "GPU reset succeed\n");
 	r100_mc_resume(rdev, &save);
-	dev_info(rdev->dev, "GPU reset succeed\n");
-	return 0;
+	return ret;
 }
 
 void r100_set_common_regs(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index fae5e709f270..cf862ca580bf 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -405,12 +405,13 @@ int r300_asic_reset(struct radeon_device *rdev)
 {
 	struct r100_mc_save save;
 	u32 status, tmp;
+	int ret = 0;
 
-	r100_mc_stop(rdev, &save);
 	status = RREG32(R_000E40_RBBM_STATUS);
 	if (!G_000E40_GUI_ACTIVE(status)) {
 		return 0;
 	}
+	r100_mc_stop(rdev, &save);
 	status = RREG32(R_000E40_RBBM_STATUS);
 	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
 	/* stop CP */
@@ -451,11 +452,11 @@ int r300_asic_reset(struct radeon_device *rdev)
 	if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
 		dev_err(rdev->dev, "failed to reset GPU\n");
 		rdev->gpu_lockup = true;
-		return -1;
-	}
+		ret = -1;
+	} else
+		dev_info(rdev->dev, "GPU reset succeed\n");
 	r100_mc_resume(rdev, &save);
-	dev_info(rdev->dev, "GPU reset succeed\n");
-	return 0;
+	return ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 6b50716267c0..aca2236268fa 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2358,24 +2358,6 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
 	/* FIXME: implement */
 }
 
-
-bool r600_card_posted(struct radeon_device *rdev)
-{
-	uint32_t reg;
-
-	/* first check CRTCs */
-	reg = RREG32(D1CRTC_CONTROL) |
-		RREG32(D2CRTC_CONTROL);
-	if (reg & CRTC_EN)
-		return true;
-
-	/* then check MEM_SIZE, in case the crtcs are off */
-	if (RREG32(CONFIG_MEMSIZE))
-		return true;
-
-	return false;
-}
-
 int r600_startup(struct radeon_device *rdev)
 {
 	int r;
@@ -2536,7 +2518,7 @@ int r600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	/* Post card if necessary */
-	if (!r600_card_posted(rdev)) {
+	if (!radeon_card_posted(rdev)) {
 		if (!rdev->bios) {
 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
@@ -3658,6 +3640,9 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev)
 	u32 link_width_cntl, lanes, speed_cntl, training_cntl, tmp;
 	u16 link_cntl2;
 
+	if (radeon_pcie_gen2 == 0)
+		return;
+
 	if (rdev->flags & RADEON_IS_IGP)
 		return;
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index e9486630a467..71d2a554bbe6 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -92,6 +92,7 @@ extern int radeon_tv;
 extern int radeon_audio;
 extern int radeon_disp_priority;
 extern int radeon_hw_i2c;
+extern int radeon_pcie_gen2;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index be5cb4f28c29..d5680a0c87af 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -104,6 +104,7 @@ int radeon_tv = 1;
 int radeon_audio = 1;
 int radeon_disp_priority = 0;
 int radeon_hw_i2c = 0;
+int radeon_pcie_gen2 = 0;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -147,6 +148,9 @@ module_param_named(disp_priority, radeon_disp_priority, int, 0444);
 MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
 module_param_named(hw_i2c, radeon_hw_i2c, int, 0444);
 
+MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (1 = enable)");
+module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444);
+
 static int radeon_suspend(struct drm_device *dev, pm_message_t state)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen
index ac40fd39d787..9177f9191837 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/evergreen
+++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen
@@ -439,7 +439,7 @@ evergreen 0x9400
 0x000286EC SPI_COMPUTE_NUM_THREAD_X
 0x000286F0 SPI_COMPUTE_NUM_THREAD_Y
 0x000286F4 SPI_COMPUTE_NUM_THREAD_Z
-0x000286F8 GDS_ADDR_SIZE
+0x00028724 GDS_ADDR_SIZE
 0x00028780 CB_BLEND0_CONTROL
 0x00028784 CB_BLEND1_CONTROL
 0x00028788 CB_BLEND2_CONTROL
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index b4192acaab5f..5afe294ed51f 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -339,16 +339,16 @@ void rs600_bm_disable(struct radeon_device *rdev)
 
 int rs600_asic_reset(struct radeon_device *rdev)
 {
-	u32 status, tmp;
-
 	struct rv515_mc_save save;
+	u32 status, tmp;
+	int ret = 0;
 
-	/* Stops all mc clients */
-	rv515_mc_stop(rdev, &save);
 	status = RREG32(R_000E40_RBBM_STATUS);
 	if (!G_000E40_GUI_ACTIVE(status)) {
 		return 0;
 	}
+	/* Stops all mc clients */
+	rv515_mc_stop(rdev, &save);
 	status = RREG32(R_000E40_RBBM_STATUS);
 	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
 	/* stop CP */
@@ -392,11 +392,11 @@ int rs600_asic_reset(struct radeon_device *rdev)
 	if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
 		dev_err(rdev->dev, "failed to reset GPU\n");
 		rdev->gpu_lockup = true;
-		return -1;
-	}
+		ret = -1;
+	} else
+		dev_info(rdev->dev, "GPU reset succeed\n");
 	rv515_mc_resume(rdev, &save);
-	dev_info(rdev->dev, "GPU reset succeed\n");
-	return 0;
+	return ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 3a264aa3a79a..491dc9000655 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1268,7 +1268,7 @@ int rv770_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 	/* Post card if necessary */
-	if (!r600_card_posted(rdev)) {
+	if (!radeon_card_posted(rdev)) {
 		if (!rdev->bios) {
 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
@@ -1372,6 +1372,9 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
 	u32 link_width_cntl, lanes, speed_cntl, tmp;
 	u16 link_cntl2;
 
+	if (radeon_pcie_gen2 == 0)
+		return;
+
 	if (rdev->flags & RADEON_IS_IGP)
 		return;
 
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 68883565b725..f9ba7d74dfc0 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -308,7 +308,7 @@ static void ib_cache_event(struct ib_event_handler *handler,
 			INIT_WORK(&work->work, ib_cache_task);
 			work->device   = event->device;
 			work->port_num = event->element.port_num;
-			schedule_work(&work->work);
+			queue_work(ib_wq, &work->work);
 		}
 	}
 }
@@ -368,7 +368,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)
 	int p;
 
 	ib_unregister_event_handler(&device->cache.event_handler);
-	flush_scheduled_work();
+	flush_workqueue(ib_wq);
 
 	for (p = 0; p <= end_port(device) - start_port(device); ++p) {
 		kfree(device->cache.pkey_cache[p]);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index a19effad0811..f793bf2f5da7 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -38,7 +38,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
-#include <linux/workqueue.h>
 
 #include "core_priv.h"
 
@@ -52,6 +51,9 @@ struct ib_client_data {
 	void *            data;
 };
 
+struct workqueue_struct *ib_wq;
+EXPORT_SYMBOL_GPL(ib_wq);
+
 static LIST_HEAD(device_list);
 static LIST_HEAD(client_list);
 
@@ -718,6 +720,10 @@ static int __init ib_core_init(void)
 {
 	int ret;
 
+	ib_wq = alloc_workqueue("infiniband", 0, 0);
+	if (!ib_wq)
+		return -ENOMEM;
+
 	ret = ib_sysfs_setup();
 	if (ret)
 		printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
@@ -726,6 +732,7 @@ static int __init ib_core_init(void)
 	if (ret) {
 		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
 		ib_sysfs_cleanup();
+		destroy_workqueue(ib_wq);
 	}
 
 	return ret;
@@ -736,7 +743,7 @@ static void __exit ib_core_cleanup(void)
 	ib_cache_cleanup();
 	ib_sysfs_cleanup();
 	/* Make sure that any pending umem accounting work is done. */
-	flush_scheduled_work();
+	destroy_workqueue(ib_wq);
 }
 
 module_init(ib_core_init);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 91a660310b7c..e38be1bcc01c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -425,7 +425,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
 		port->sm_ah = NULL;
 		spin_unlock_irqrestore(&port->ah_lock, flags);
 
-		schedule_work(&sa_dev->port[event->element.port_num -
+		queue_work(ib_wq, &sa_dev->port[event->element.port_num -
 					    sa_dev->start_port].update_task);
 	}
 }
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 415e186eee32..b645e558876f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -262,7 +262,7 @@ void ib_umem_release(struct ib_umem *umem)
 			umem->mm   = mm;
 			umem->diff = diff;
 
-			schedule_work(&umem->work);
+			queue_work(ib_wq, &umem->work);
 			return;
 		}
 	} else
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index 85cfae4cad71..8c81992fa6db 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -459,13 +459,12 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
 	     IB_DEVICE_MEM_WINDOW);
 
 	/* Allocate the qptr_array */
-	c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+	c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
 	if (!c2dev->qptr_array) {
 		return -ENOMEM;
 	}
 
-	/* Inialize the qptr_array */
-	memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+	/* Initialize the qptr_array */
 	c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
 	c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
 	c2dev->qptr_array[2] = (void *) &c2dev->aeq;
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 1596e3085344..1898d6e7cce5 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -222,15 +222,14 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
 	queue->small_page = NULL;
 
 	/* allocate queue page pointers */
-	queue->queue_pages = kmalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
+	queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), GFP_KERNEL);
 	if (!queue->queue_pages) {
-		queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+		queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
 		if (!queue->queue_pages) {
 			ehca_gen_err("Couldn't allocate queue page list");
 			return 0;
 		}
 	}
-	memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
 
 	/* allocate actual queue pages */
 	if (is_small) {
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index b33f0457a1ff..47db4bf34628 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -199,12 +199,11 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
 		goto bail;
 	}
 
-	dd = vmalloc(sizeof(*dd));
+	dd = vzalloc(sizeof(*dd));
 	if (!dd) {
 		dd = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
-	memset(dd, 0, sizeof(*dd));
 	dd->ipath_unit = -1;
 
 	spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -756,7 +755,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 	 */
 	ipath_shutdown_device(dd);
 
-	flush_scheduled_work();
+	flush_workqueue(ib_wq);
 
 	if (dd->verbs_dev)
 		ipath_unregister_ib_device(dd->verbs_dev);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 9292a15ad7c4..6d4b29c4cd89 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1530,7 +1530,7 @@ static int init_subports(struct ipath_devdata *dd,
 	}
 
 	num_subports = uinfo->spu_subport_cnt;
-	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
+	pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
 	if (!pd->subport_uregbase) {
 		ret = -ENOMEM;
 		goto bail;
@@ -1538,13 +1538,13 @@ static int init_subports(struct ipath_devdata *dd,
 	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
 	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
 		     sizeof(u32), PAGE_SIZE) * num_subports;
-	pd->subport_rcvhdr_base = vmalloc(size);
+	pd->subport_rcvhdr_base = vzalloc(size);
 	if (!pd->subport_rcvhdr_base) {
 		ret = -ENOMEM;
 		goto bail_ureg;
 	}
 
-	pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+	pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
 					pd->port_rcvegrbuf_size *
 					num_subports);
 	if (!pd->subport_rcvegrbuf) {
@@ -1556,11 +1556,6 @@ static int init_subports(struct ipath_devdata *dd,
 	pd->port_subport_id = uinfo->spu_subport_id;
 	pd->active_slaves = 1;
 	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
-	memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
-	memset(pd->subport_rcvhdr_base, 0, size);
-	memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
-				         pd->port_rcvegrbuf_size *
-				         num_subports);
 	goto bail;
 
 bail_rhdr:
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 776938299e4c..fef0f4201257 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -442,7 +442,7 @@ static void init_shadow_tids(struct ipath_devdata *dd)
 	struct page **pages;
 	dma_addr_t *addrs;
 
-	pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+	pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
 			sizeof(struct page *));
 	if (!pages) {
 		ipath_dev_err(dd, "failed to allocate shadow page * "
@@ -461,9 +461,6 @@ static void init_shadow_tids(struct ipath_devdata *dd)
 		return;
 	}
 
-	memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-	       sizeof(struct page *));
-
 	dd->ipath_pageshadow = pages;
 	dd->ipath_physshadow = addrs;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 5e86d73eba2a..bab9f74c0665 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -220,7 +220,7 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
 	work->mm = mm;
 	work->num_pages = num_pages;
 
-	schedule_work(&work->work);
+	queue_work(ib_wq, &work->work);
 	return;
 
 bail_mm:
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4c85224aeaa7..c7a6213c6996 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -623,8 +623,9 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 
-	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags &
-				    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
+	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
+				    !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+				    MLX4_PROTOCOL_IB);
 	if (err)
 		return err;
 
@@ -635,7 +636,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	return 0;
 
 err_add:
-	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw);
+	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB);
 	return err;
 }
 
@@ -665,7 +666,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	struct mlx4_ib_gid_entry *ge;
 
 	err = mlx4_multicast_detach(mdev->dev,
-				    &mqp->mqp, gid->raw);
+				    &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB);
 	if (err)
 		return err;
 
@@ -1005,7 +1006,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
 		goto err_pd;
 
-	ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
+				 PAGE_SIZE);
 	if (!ibdev->uar_map)
 		goto err_uar;
 	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 0aa0110e4b6c..e4a08c2819e4 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -146,7 +146,7 @@ static void poll_catas(unsigned long dev_ptr)
 
 void mthca_start_catas_poll(struct mthca_dev *dev)
 {
-	unsigned long addr;
+	phys_addr_t addr;
 
 	init_timer(&dev->catas_err.timer);
 	dev->catas_err.map  = NULL;
@@ -158,7 +158,8 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
 	dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
 	if (!dev->catas_err.map) {
 		mthca_warn(dev, "couldn't map catastrophic error region "
-			   "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+			   "at 0x%llx/0x%x\n", (unsigned long long) addr,
+			   dev->catas_err.size * 4);
 		return;
 	}
 
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index f4ceecd9684b..7bfa2a164955 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -713,7 +713,7 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
 
 static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
 {
-	unsigned long addr;
+	phys_addr_t addr;
 	u16 max_off = 0;
 	int i;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8e8c728aff88..76785c653c13 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -653,7 +653,7 @@ static int mthca_map_reg(struct mthca_dev *dev,
 			 unsigned long offset, unsigned long size,
 			 void __iomem **map)
 {
-	unsigned long base = pci_resource_start(dev->pdev, 0);
+	phys_addr_t base = pci_resource_start(dev->pdev, 0);
 
 	*map = ioremap(base + offset, size);
 	if (!*map)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 5eee6665919a..8a40cd539ab1 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -790,7 +790,7 @@ static int mthca_setup_hca(struct mthca_dev *dev)
 		goto err_uar_table_free;
 	}
 
-	dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	dev->kar = ioremap((phys_addr_t) dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
 	if (!dev->kar) {
 		mthca_err(dev, "Couldn't map kernel access region, "
 			  "aborting.\n");
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 065b20899876..44045c8846db 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -853,7 +853,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 
 int mthca_init_mr_table(struct mthca_dev *dev)
 {
-	unsigned long addr;
+	phys_addr_t addr;
 	int mpts, mtts, err, i;
 
 	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 0c9f0aa5d4ea..3b4ec3238ceb 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -144,6 +144,7 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
 	struct nes_device *nesdev;
 	struct net_device *netdev;
 	struct nes_vnic *nesvnic;
+	unsigned int is_bonded;
 
 	nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n",
 		  &ifa->ifa_address, &ifa->ifa_mask);
@@ -152,7 +153,8 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
 				nesdev, nesdev->netdev[0]->name);
 		netdev = nesdev->netdev[0];
 		nesvnic = netdev_priv(netdev);
-		if (netdev == event_netdev) {
+		is_bonded = (netdev->master == event_netdev);
+		if ((netdev == event_netdev) || is_bonded) {
 			if (nesvnic->rdma_enabled == 0) {
 				nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
 						" RDMA is not enabled.\n",
@@ -169,7 +171,10 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
 					nes_manage_arp_cache(netdev, netdev->dev_addr,
 							ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE);
 					nesvnic->local_ipaddr = 0;
-					return NOTIFY_OK;
+					if (is_bonded)
+						continue;
+					else
+						return NOTIFY_OK;
 					break;
 				case NETDEV_UP:
 					nes_debug(NES_DBG_NETDEV, "event:UP\n");
@@ -178,15 +183,24 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
 						nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n");
 						return NOTIFY_OK;
 					}
+					/* fall through */
+				case NETDEV_CHANGEADDR:
 					/* Add the address to the IP table */
-					nesvnic->local_ipaddr = ifa->ifa_address;
+					if (netdev->master)
+						nesvnic->local_ipaddr =
+							((struct in_device *)netdev->master->ip_ptr)->ifa_list->ifa_address;
+					else
+						nesvnic->local_ipaddr = ifa->ifa_address;
 
 					nes_write_indexed(nesdev,
 							NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
-							ntohl(ifa->ifa_address));
+							ntohl(nesvnic->local_ipaddr));
 					nes_manage_arp_cache(netdev, netdev->dev_addr,
 							ntohl(nesvnic->local_ipaddr), NES_ARP_ADD);
-					return NOTIFY_OK;
+					if (is_bonded)
+						continue;
+					else
+						return NOTIFY_OK;
 					break;
 				default:
 					break;
@@ -660,6 +674,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
 	}
 	nes_notifiers_registered++;
 
+	INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
+
 	/* Initialize network devices */
 	if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL)
 		goto bail7;
@@ -742,6 +758,7 @@ static void __devexit nes_remove(struct pci_dev *pcidev)
 	struct nes_device *nesdev = pci_get_drvdata(pcidev);
 	struct net_device *netdev;
 	int netdev_index = 0;
+	unsigned long flags;
 
 		if (nesdev->netdev_count) {
 			netdev = nesdev->netdev[netdev_index];
@@ -768,6 +785,14 @@ static void __devexit nes_remove(struct pci_dev *pcidev)
 	free_irq(pcidev->irq, nesdev);
 	tasklet_kill(&nesdev->dpc_tasklet);
 
+	spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+	if (nesdev->link_recheck) {
+		spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+		cancel_delayed_work_sync(&nesdev->work);
+	} else {
+		spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+	}
+
 	/* Deallocate the Adapter Structure */
 	nes_destroy_adapter(nesdev->nesadapter);
 
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index b3d145e82b4c..6fe79876009e 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -268,6 +268,9 @@ struct nes_device {
 	u8                     napi_isr_ran;
 	u8                     disable_rx_flow_control;
 	u8                     disable_tx_flow_control;
+
+	struct delayed_work    work;
+	u8                     link_recheck;
 };
 
 
@@ -507,6 +510,7 @@ void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
 void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
 int nes_destroy_cqp(struct nes_device *);
 int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
+void nes_recheck_link_status(struct work_struct *work);
 
 /* nes_nic.c */
 struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 25ad0f9944c0..009ec814d517 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1107,6 +1107,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 	struct flowi fl;
 	struct neighbour *neigh;
 	int rc = arpindex;
+	struct net_device *netdev;
 	struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
 
 	memset(&fl, 0, sizeof fl);
@@ -1117,7 +1118,12 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 		return rc;
 	}
 
-	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, nesvnic->netdev);
+	if (nesvnic->netdev->master)
+		netdev = nesvnic->netdev->master;
+	else
+		netdev = nesvnic->netdev;
+
+	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev);
 	if (neigh) {
 		if (neigh->nud_state & NUD_VALID) {
 			nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 1980a461c499..8b606fd64022 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2608,6 +2608,13 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 						netif_start_queue(nesvnic->netdev);
 					nesvnic->linkup = 1;
 					netif_carrier_on(nesvnic->netdev);
+
+					spin_lock(&nesvnic->port_ibevent_lock);
+					if (nesdev->iw_status == 0) {
+						nesdev->iw_status = 1;
+						nes_port_ibevent(nesvnic);
+					}
+					spin_unlock(&nesvnic->port_ibevent_lock);
 				}
 			}
 		} else {
@@ -2633,9 +2640,23 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 						netif_stop_queue(nesvnic->netdev);
 					nesvnic->linkup = 0;
 					netif_carrier_off(nesvnic->netdev);
+
+					spin_lock(&nesvnic->port_ibevent_lock);
+					if (nesdev->iw_status == 1) {
+						nesdev->iw_status = 0;
+						nes_port_ibevent(nesvnic);
+					}
+					spin_unlock(&nesvnic->port_ibevent_lock);
 				}
 			}
 		}
+		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
+			if (nesdev->link_recheck)
+				cancel_delayed_work(&nesdev->work);
+			nesdev->link_recheck = 1;
+			schedule_delayed_work(&nesdev->work,
+					      NES_LINK_RECHECK_DELAY);
+		}
 	}
 
 	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
@@ -2643,6 +2664,80 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 	nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE;
 }
 
+void nes_recheck_link_status(struct work_struct *work)
+{
+	unsigned long flags;
+	struct nes_device *nesdev = container_of(work, struct nes_device, work.work);
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_vnic *nesvnic;
+	u32 mac_index = nesdev->mac_index;
+	u16 phy_data;
+	u16 temp_phy_data;
+
+	spin_lock_irqsave(&nesadapter->phy_lock, flags);
+
+	/* check link status */
+	nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
+	temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+	nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+	nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+	nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+	phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+	phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
+
+	nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+		__func__, phy_data,
+		nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
+
+	if (phy_data & 0x0004) {
+		nesadapter->mac_link_down[mac_index] = 0;
+		list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+			if (nesvnic->linkup == 0) {
+				printk(PFX "The Link is now up for port %s, netdev %p.\n",
+						nesvnic->netdev->name, nesvnic->netdev);
+				if (netif_queue_stopped(nesvnic->netdev))
+					netif_start_queue(nesvnic->netdev);
+				nesvnic->linkup = 1;
+				netif_carrier_on(nesvnic->netdev);
+
+				spin_lock(&nesvnic->port_ibevent_lock);
+				if (nesdev->iw_status == 0) {
+					nesdev->iw_status = 1;
+					nes_port_ibevent(nesvnic);
+				}
+				spin_unlock(&nesvnic->port_ibevent_lock);
+			}
+		}
+
+	} else {
+		nesadapter->mac_link_down[mac_index] = 1;
+		list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+			if (nesvnic->linkup == 1) {
+				printk(PFX "The Link is now down for port %s, netdev %p.\n",
+						nesvnic->netdev->name, nesvnic->netdev);
+				if (!(netif_queue_stopped(nesvnic->netdev)))
+					netif_stop_queue(nesvnic->netdev);
+				nesvnic->linkup = 0;
+				netif_carrier_off(nesvnic->netdev);
+
+				spin_lock(&nesvnic->port_ibevent_lock);
+				if (nesdev->iw_status == 1) {
+					nesdev->iw_status = 0;
+					nes_port_ibevent(nesvnic);
+				}
+				spin_unlock(&nesvnic->port_ibevent_lock);
+			}
+		}
+	}
+	if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX)
+		schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
+	else
+		nesdev->link_recheck = 0;
+
+	spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
 
 
 static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 1204c3432b63..d2abe07133a5 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1193,6 +1193,8 @@ struct nes_listener {
 
 struct nes_ib_device;
 
+#define NES_EVENT_DELAY msecs_to_jiffies(100)
+
 struct nes_vnic {
 	struct nes_ib_device *nesibdev;
 	u64 sq_full;
@@ -1247,6 +1249,10 @@ struct nes_vnic {
 	u32 lro_max_aggr;
 	struct net_lro_mgr lro_mgr;
 	struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
+	struct timer_list event_timer;
+	enum ib_event_type delayed_event;
+	enum ib_event_type last_dispatched_event;
+	spinlock_t port_ibevent_lock;
 };
 
 struct nes_ib_device {
@@ -1348,6 +1354,10 @@ struct nes_terminate_hdr {
 #define BAD_FRAME_OFFSET	64
 #define CQE_MAJOR_DRV		0x8000
 
+/* Used for link status recheck after interrupt processing */
+#define NES_LINK_RECHECK_DELAY	msecs_to_jiffies(50)
+#define NES_LINK_RECHECK_MAX	60
+
 #define nes_vlan_rx vlan_hwaccel_receive_skb
 #define nes_netif_rx netif_receive_skb
 
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 5a4c36484722..2c9c1933bbe3 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -144,6 +144,7 @@ static int nes_netdev_open(struct net_device *netdev)
 	u32 nic_active_bit;
 	u32 nic_active;
 	struct list_head *list_pos, *list_temp;
+	unsigned long flags;
 
 	assert(nesdev != NULL);
 
@@ -233,18 +234,36 @@ static int nes_netdev_open(struct net_device *netdev)
 		first_nesvnic = nesvnic;
 	}
 
-	if (nesvnic->of_device_registered) {
-		nesdev->iw_status = 1;
-		nesdev->nesadapter->send_term_ok = 1;
-		nes_port_ibevent(nesvnic);
-	}
-
 	if (first_nesvnic->linkup) {
 		/* Enable network packets */
 		nesvnic->linkup = 1;
 		netif_start_queue(netdev);
 		netif_carrier_on(netdev);
 	}
+
+	spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+	if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
+		if (nesdev->link_recheck)
+			cancel_delayed_work(&nesdev->work);
+		nesdev->link_recheck = 1;
+		schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
+	}
+	spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+
+	spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
+	if (nesvnic->of_device_registered) {
+		nesdev->nesadapter->send_term_ok = 1;
+		if (nesvnic->linkup == 1) {
+			if (nesdev->iw_status == 0) {
+				nesdev->iw_status = 1;
+				nes_port_ibevent(nesvnic);
+			}
+		} else {
+			nesdev->iw_status = 0;
+		}
+	}
+	spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
+
 	napi_enable(&nesvnic->napi);
 	nesvnic->netdev_open = 1;
 
@@ -263,6 +282,7 @@ static int nes_netdev_stop(struct net_device *netdev)
 	u32 nic_active;
 	struct nes_vnic *first_nesvnic = NULL;
 	struct list_head *list_pos, *list_temp;
+	unsigned long flags;
 
 	nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n",
 			nesvnic, nesdev, netdev, netdev->name);
@@ -315,12 +335,17 @@ static int nes_netdev_stop(struct net_device *netdev)
 	nic_active &= nic_active_mask;
 	nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
 
-
+	spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
 	if (nesvnic->of_device_registered) {
 		nesdev->nesadapter->send_term_ok = 0;
 		nesdev->iw_status = 0;
-		nes_port_ibevent(nesvnic);
+		if (nesvnic->linkup == 1)
+			nes_port_ibevent(nesvnic);
 	}
+	del_timer_sync(&nesvnic->event_timer);
+	nesvnic->event_timer.function = NULL;
+	spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
+
 	nes_destroy_nic_qp(nesvnic);
 
 	nesvnic->netdev_open = 0;
@@ -1750,7 +1775,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 		nesvnic->rdma_enabled = 0;
 	}
 	nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id;
+	init_timer(&nesvnic->event_timer);
+	nesvnic->event_timer.function = NULL;
 	spin_lock_init(&nesvnic->tx_lock);
+	spin_lock_init(&nesvnic->port_ibevent_lock);
 	nesdev->netdev[nesdev->netdev_count] = netdev;
 
 	nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n",
@@ -1763,8 +1791,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 	      (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
 	       ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
 		u32 u32temp;
-		u32 link_mask;
-		u32 link_val;
+		u32 link_mask = 0;
+		u32 link_val = 0;
+		u16 temp_phy_data;
+		u16 phy_data = 0;
+		unsigned long flags;
 
 		u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
 				(0x200 * (nesdev->mac_index & 1)));
@@ -1786,6 +1817,23 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 				link_val = 0x02020000;
 			}
 			break;
+		case NES_PHY_TYPE_SFP_D:
+			spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+			nes_read_10G_phy_reg(nesdev,
+					     nesdev->nesadapter->phy_index[nesdev->mac_index],
+					     1, 0x9003);
+			temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+			nes_read_10G_phy_reg(nesdev,
+					     nesdev->nesadapter->phy_index[nesdev->mac_index],
+					     3, 0x0021);
+			nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+			nes_read_10G_phy_reg(nesdev,
+					     nesdev->nesadapter->phy_index[nesdev->mac_index],
+					     3, 0x0021);
+			phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+			spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+			phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
+			break;
 		default:
 			link_mask = 0x0f1f0000;
 			link_val = 0x0f0f0000;
@@ -1795,8 +1843,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 		u32temp = nes_read_indexed(nesdev,
 					   NES_IDX_PHY_PCS_CONTROL_STATUS0 +
 					   (0x200 * (nesdev->mac_index & 1)));
-		if ((u32temp & link_mask) == link_val)
-			nesvnic->linkup = 1;
+
+		if (phy_type == NES_PHY_TYPE_SFP_D) {
+			if (phy_data & 0x0004)
+				nesvnic->linkup = 1;
+		} else {
+			if ((u32temp & link_mask) == link_val)
+				nesvnic->linkup = 1;
+		}
 
 		/* clear the MAC interrupt status, assumes direct logical to physical mapping */
 		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 99933e4e48ff..26d8018c0a7c 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3936,6 +3936,30 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 	return nesibdev;
 }
 
+
+/**
+ * nes_handle_delayed_event
+ */
+static void nes_handle_delayed_event(unsigned long data)
+{
+	struct nes_vnic *nesvnic = (void *) data;
+
+	if (nesvnic->delayed_event != nesvnic->last_dispatched_event) {
+		struct ib_event event;
+
+		event.device = &nesvnic->nesibdev->ibdev;
+		if (!event.device)
+			goto stop_timer;
+		event.event = nesvnic->delayed_event;
+		event.element.port_num = nesvnic->logical_port + 1;
+		ib_dispatch_event(&event);
+	}
+
+stop_timer:
+	nesvnic->event_timer.function = NULL;
+}
+
+
 void  nes_port_ibevent(struct nes_vnic *nesvnic)
 {
 	struct nes_ib_device *nesibdev = nesvnic->nesibdev;
@@ -3944,7 +3968,18 @@ void  nes_port_ibevent(struct nes_vnic *nesvnic)
 	event.device = &nesibdev->ibdev;
 	event.element.port_num = nesvnic->logical_port + 1;
 	event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
-	ib_dispatch_event(&event);
+
+	if (!nesvnic->event_timer.function) {
+		ib_dispatch_event(&event);
+		nesvnic->last_dispatched_event = event.event;
+		nesvnic->event_timer.function = nes_handle_delayed_event;
+		nesvnic->event_timer.data = (unsigned long) nesvnic;
+		nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY;
+		add_timer(&nesvnic->event_timer);
+	} else {
+		mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY);
+	}
+	nesvnic->delayed_event = event.event;
 }
 
 
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 127a0d5069f0..de799f17cb9e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1692,8 +1692,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd)
 	ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
 	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
 	wake_up(&ppd->cpspec->autoneg_wait);
-	cancel_delayed_work(&ppd->cpspec->autoneg_work);
-	flush_scheduled_work();
+	cancel_delayed_work_sync(&ppd->cpspec->autoneg_work);
 
 	shutdown_7220_relock_poll(ppd->dd);
 	val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg);
@@ -3515,8 +3514,8 @@ static void try_7220_autoneg(struct qib_pportdata *ppd)
 
 	toggle_7220_rclkrls(ppd->dd);
 	/* 2 msec is minimum length of a poll cycle */
-	schedule_delayed_work(&ppd->cpspec->autoneg_work,
-			      msecs_to_jiffies(2));
+	queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work,
+			   msecs_to_jiffies(2));
 }
 
 /*
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index abd409d592ef..50cceb3ab885 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2406,10 +2406,9 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd)
 	ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
 	spin_unlock_irqrestore(&ppd->lflags_lock, flags);
 	wake_up(&ppd->cpspec->autoneg_wait);
-	cancel_delayed_work(&ppd->cpspec->autoneg_work);
+	cancel_delayed_work_sync(&ppd->cpspec->autoneg_work);
 	if (ppd->dd->cspec->r1)
-		cancel_delayed_work(&ppd->cpspec->ipg_work);
-	flush_scheduled_work();
+		cancel_delayed_work_sync(&ppd->cpspec->ipg_work);
 
 	ppd->cpspec->chase_end = 0;
 	if (ppd->cpspec->chase_timer.data) /* if initted */
@@ -2706,7 +2705,7 @@ static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd)
 			if (!(pins & mask)) {
 				++handled;
 				qd->t_insert = get_jiffies_64();
-				schedule_work(&qd->work);
+				queue_work(ib_wq, &qd->work);
 			}
 		}
 	}
@@ -4990,8 +4989,8 @@ static void try_7322_autoneg(struct qib_pportdata *ppd)
 	set_7322_ibspeed_fast(ppd, QIB_IB_DDR);
 	qib_7322_mini_pcs_reset(ppd);
 	/* 2 msec is minimum length of a poll cycle */
-	schedule_delayed_work(&ppd->cpspec->autoneg_work,
-			      msecs_to_jiffies(2));
+	queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work,
+			   msecs_to_jiffies(2));
 }
 
 /*
@@ -5121,7 +5120,8 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
 		ib_free_send_mad(send_buf);
 retry:
 	delay = 2 << ppd->cpspec->ipg_tries;
-	schedule_delayed_work(&ppd->cpspec->ipg_work, msecs_to_jiffies(delay));
+	queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work,
+			   msecs_to_jiffies(delay));
 }
 
 /*
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 7896afbb9ce8..ffefb78b8949 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -80,7 +80,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
 module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
 MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
 
-struct workqueue_struct *qib_wq;
 struct workqueue_struct *qib_cq_wq;
 
 static void verify_interrupt(unsigned long);
@@ -270,23 +269,20 @@ static void init_shadow_tids(struct qib_devdata *dd)
 	struct page **pages;
 	dma_addr_t *addrs;
 
-	pages = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
+	pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
 	if (!pages) {
 		qib_dev_err(dd, "failed to allocate shadow page * "
 			    "array, no expected sends!\n");
 		goto bail;
 	}
 
-	addrs = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
+	addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
 	if (!addrs) {
 		qib_dev_err(dd, "failed to allocate shadow dma handle "
 			    "array, no expected sends!\n");
 		goto bail_free;
 	}
 
-	memset(pages, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
-	memset(addrs, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
-
 	dd->pageshadow = pages;
 	dd->physshadow = addrs;
 	return;
@@ -1047,24 +1043,10 @@ static int __init qlogic_ib_init(void)
 	if (ret)
 		goto bail;
 
-	/*
-	 * We create our own workqueue mainly because we want to be
-	 * able to flush it when devices are being removed.  We can't
-	 * use schedule_work()/flush_scheduled_work() because both
-	 * unregister_netdev() and linkwatch_event take the rtnl lock,
-	 * so flush_scheduled_work() can deadlock during device
-	 * removal.
-	 */
-	qib_wq = create_workqueue("qib");
-	if (!qib_wq) {
-		ret = -ENOMEM;
-		goto bail_dev;
-	}
-
 	qib_cq_wq = create_singlethread_workqueue("qib_cq");
 	if (!qib_cq_wq) {
 		ret = -ENOMEM;
-		goto bail_wq;
+		goto bail_dev;
 	}
 
 	/*
@@ -1094,8 +1076,6 @@ bail_unit:
 	idr_destroy(&qib_unit_table);
 bail_cq_wq:
 	destroy_workqueue(qib_cq_wq);
-bail_wq:
-	destroy_workqueue(qib_wq);
 bail_dev:
 	qib_dev_cleanup();
 bail:
@@ -1119,7 +1099,6 @@ static void __exit qlogic_ib_cleanup(void)
 
 	pci_unregister_driver(&qib_driver);
 
-	destroy_workqueue(qib_wq);
 	destroy_workqueue(qib_cq_wq);
 
 	qib_cpulist_count = 0;
@@ -1292,7 +1271,7 @@ static int __devinit qib_init_one(struct pci_dev *pdev,
 
 	if (qib_mini_init || initfail || ret) {
 		qib_stop_timers(dd);
-		flush_scheduled_work();
+		flush_workqueue(ib_wq);
 		for (pidx = 0; pidx < dd->num_pports; ++pidx)
 			dd->f_quiet_serdes(dd->pport + pidx);
 		if (qib_mini_init)
@@ -1341,8 +1320,8 @@ static void __devexit qib_remove_one(struct pci_dev *pdev)
 
 	qib_stop_timers(dd);
 
-	/* wait until all of our (qsfp) schedule_work() calls complete */
-	flush_scheduled_work();
+	/* wait until all of our (qsfp) queue_work() calls complete */
+	flush_workqueue(ib_wq);
 
 	ret = qibfs_remove(dd);
 	if (ret)
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 35b3604b691d..3374a52232c1 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -485,7 +485,7 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
 		goto bail;
 	/* We see a module, but it may be unwise to look yet. Just schedule */
 	qd->t_insert = get_jiffies_64();
-	schedule_work(&qd->work);
+	queue_work(ib_wq, &qd->work);
 bail:
 	return;
 }
@@ -493,10 +493,9 @@ bail:
 void qib_qsfp_deinit(struct qib_qsfp_data *qd)
 {
 	/*
-	 * There is nothing to do here for now.  our
-	 * work is scheduled with schedule_work(), and
-	 * flush_scheduled_work() from remove_one will
-	 * block until all work ssetup with schedule_work()
+	 * There is nothing to do here for now.  our work is scheduled
+	 * with queue_work(), and flush_workqueue() from remove_one
+	 * will block until all work setup with queue_work()
 	 * completes.
 	 */
 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 63b22a9a7feb..95e5b47223b3 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -805,7 +805,6 @@ static inline int qib_send_ok(struct qib_qp *qp)
 		 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
 }
 
-extern struct workqueue_struct *qib_wq;
 extern struct workqueue_struct *qib_cq_wq;
 
 /*
@@ -814,7 +813,7 @@ extern struct workqueue_struct *qib_cq_wq;
 static inline void qib_schedule_send(struct qib_qp *qp)
 {
 	if (qib_send_ok(qp))
-		queue_work(qib_wq, &qp->s_work);
+		queue_work(ib_wq, &qp->s_work);
 }
 
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c1c49f2d35b5..93d55806b967 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -352,15 +352,13 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 	int ret;
 	int i;
 
-	rx->rx_ring = vmalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
+	rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
 	if (!rx->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n",
 		       priv->ca->name, ipoib_recvq_size);
 		return -ENOMEM;
 	}
 
-	memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring);
-
 	t = kmalloc(sizeof *t, GFP_KERNEL);
 	if (!t) {
 		ret = -ENOMEM;
@@ -1097,13 +1095,12 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
 	int ret;
 
-	p->tx_ring = vmalloc(ipoib_sendq_size * sizeof *p->tx_ring);
+	p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring);
 	if (!p->tx_ring) {
 		ipoib_warn(priv, "failed to allocate tx ring\n");
 		ret = -ENOMEM;
 		goto err_tx;
 	}
-	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
 
 	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
 	if (IS_ERR(p->qp)) {
@@ -1521,7 +1518,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 		return;
 	}
 
-	priv->cm.srq_ring = vmalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
+	priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
 	if (!priv->cm.srq_ring) {
 		printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
 		       priv->ca->name, ipoib_recvq_size);
@@ -1530,7 +1527,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 		return;
 	}
 
-	memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring);
 }
 
 int ipoib_cm_dev_init(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7a07a728fe0d..aca3b44f7aed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -916,13 +916,12 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 		goto out;
 	}
 
-	priv->tx_ring = vmalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
+	priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
 		       ca->name, ipoib_sendq_size);
 		goto out_rx_ring_cleanup;
 	}
-	memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
 
 	/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 4b62105ed1e8..83664ed2804f 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -638,7 +638,7 @@ err:
 	if (target->state == SRP_TARGET_CONNECTING) {
 		target->state = SRP_TARGET_DEAD;
 		INIT_WORK(&target->work, srp_remove_work);
-		schedule_work(&target->work);
+		queue_work(ib_wq, &target->work);
 	}
 	spin_unlock_irq(&target->lock);
 
@@ -1132,15 +1132,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 
 	spin_lock_irqsave(&target->lock, flags);
 	iu = __srp_get_tx_iu(target, SRP_IU_CMD);
-	if (iu) {
-		req = list_first_entry(&target->free_reqs, struct srp_request,
-				      list);
-		list_del(&req->list);
-	}
-	spin_unlock_irqrestore(&target->lock, flags);
-
 	if (!iu)
-		goto err;
+		goto err_unlock;
+
+	req = list_first_entry(&target->free_reqs, struct srp_request, list);
+	list_del(&req->list);
+	spin_unlock_irqrestore(&target->lock, flags);
 
 	dev = target->srp_host->srp_dev->dev;
 	ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len,
@@ -1185,6 +1182,8 @@ err_iu:
 
 	spin_lock_irqsave(&target->lock, flags);
 	list_add(&req->list, &target->free_reqs);
+
+err_unlock:
 	spin_unlock_irqrestore(&target->lock, flags);
 
 err:
@@ -2199,7 +2198,7 @@ static void srp_remove_one(struct ib_device *device)
 		 * started before we marked our target ports as
 		 * removed, and any target port removal tasks.
 		 */
-		flush_scheduled_work();
+		flush_workqueue(ib_wq);
 
 		list_for_each_entry_safe(target, tmp_target,
 					 &host->target_list, list) {
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index b1f768917395..77414702cb00 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -53,9 +53,10 @@ config MTD_PARTITIONS
 	  devices. Partitioning on NFTL 'devices' is a different - that's the
 	  'normal' form of partitioning used on a block device.
 
+if MTD_PARTITIONS
+
 config MTD_REDBOOT_PARTS
 	tristate "RedBoot partition table parsing"
-	depends on MTD_PARTITIONS
 	---help---
 	  RedBoot is a ROM monitor and bootloader which deals with multiple
 	  'images' in flash devices by putting a table one of the erase
@@ -72,9 +73,10 @@ config MTD_REDBOOT_PARTS
 	  SA1100 map driver (CONFIG_MTD_SA1100) has an option for this, for
 	  example.
 
+if MTD_REDBOOT_PARTS
+
 config MTD_REDBOOT_DIRECTORY_BLOCK
 	int "Location of RedBoot partition table"
-	depends on MTD_REDBOOT_PARTS
 	default "-1"
 	---help---
 	  This option is the Linux counterpart to the
@@ -91,18 +93,18 @@ config MTD_REDBOOT_DIRECTORY_BLOCK
 
 config MTD_REDBOOT_PARTS_UNALLOCATED
 	bool "Include unallocated flash regions"
-	depends on MTD_REDBOOT_PARTS
 	help
 	  If you need to register each unallocated flash region as a MTD
 	  'partition', enable this option.
 
 config MTD_REDBOOT_PARTS_READONLY
 	bool "Force read-only for RedBoot system images"
-	depends on MTD_REDBOOT_PARTS
 	help
 	  If you need to force read-only for 'RedBoot', 'RedBoot Config' and
 	  'FIS directory' images, enable this option.
 
+endif # MTD_REDBOOT_PARTS
+
 config MTD_CMDLINE_PARTS
 	bool "Command line partition table parsing"
 	depends on MTD_PARTITIONS = "y" && MTD = "y"
@@ -142,7 +144,7 @@ config MTD_CMDLINE_PARTS
 
 config MTD_AFS_PARTS
 	tristate "ARM Firmware Suite partition parsing"
-	depends on ARM && MTD_PARTITIONS
+	depends on ARM
 	---help---
 	  The ARM Firmware Suite allows the user to divide flash devices into
 	  multiple 'images'. Each such image has a header containing its name
@@ -158,8 +160,8 @@ config MTD_AFS_PARTS
 	  example.
 
 config MTD_OF_PARTS
-	tristate "Flash partition map based on OF description"
-	depends on OF && MTD_PARTITIONS
+	def_bool y
+	depends on OF
 	help
 	  This provides a partition parsing function which derives
 	  the partition map from the children of the flash node,
@@ -167,10 +169,11 @@ config MTD_OF_PARTS
 
 config MTD_AR7_PARTS
 	tristate "TI AR7 partitioning support"
-	depends on MTD_PARTITIONS
 	---help---
 	  TI AR7 partitioning support
 
+endif # MTD_PARTITIONS
+
 comment "User Modules And Translation Layers"
 
 config MTD_CHAR
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 760abc533395..d4e7f25b1ebb 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -6,13 +6,13 @@
 obj-$(CONFIG_MTD)		+= mtd.o
 mtd-y				:= mtdcore.o mtdsuper.o
 mtd-$(CONFIG_MTD_PARTITIONS)	+= mtdpart.o
+mtd-$(CONFIG_MTD_OF_PARTS)	+= ofpart.o
 
 obj-$(CONFIG_MTD_CONCAT)	+= mtdconcat.o
 obj-$(CONFIG_MTD_REDBOOT_PARTS) += redboot.o
 obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdlinepart.o
 obj-$(CONFIG_MTD_AFS_PARTS)	+= afs.o
 obj-$(CONFIG_MTD_AR7_PARTS)	+= ar7part.o
-obj-$(CONFIG_MTD_OF_PARTS)      += ofpart.o
 
 # 'Users' - code which presents functionality to userspace.
 obj-$(CONFIG_MTD_CHAR)		+= mtdchar.o
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index ad9268b44416..a8c3e1c9b02a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -162,7 +162,7 @@ static void cfi_tell_features(struct cfi_pri_intelext *extp)
 #endif
 
 /* Atmel chips don't use the same PRI format as Intel chips */
-static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
+static void fixup_convert_atmel_pri(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -202,7 +202,7 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
 	cfi->cfiq->BufWriteTimeoutMax = 0;
 }
 
-static void fixup_at49bv640dx_lock(struct mtd_info *mtd, void *param)
+static void fixup_at49bv640dx_lock(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -214,7 +214,7 @@ static void fixup_at49bv640dx_lock(struct mtd_info *mtd, void *param)
 
 #ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE
 /* Some Intel Strata Flash prior to FPO revision C has bugs in this area */
-static void fixup_intel_strataflash(struct mtd_info *mtd, void* param)
+static void fixup_intel_strataflash(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -227,7 +227,7 @@ static void fixup_intel_strataflash(struct mtd_info *mtd, void* param)
 #endif
 
 #ifdef CMDSET0001_DISABLE_WRITE_SUSPEND
-static void fixup_no_write_suspend(struct mtd_info *mtd, void* param)
+static void fixup_no_write_suspend(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -240,7 +240,7 @@ static void fixup_no_write_suspend(struct mtd_info *mtd, void* param)
 }
 #endif
 
-static void fixup_st_m28w320ct(struct mtd_info *mtd, void* param)
+static void fixup_st_m28w320ct(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -249,7 +249,7 @@ static void fixup_st_m28w320ct(struct mtd_info *mtd, void* param)
 	cfi->cfiq->BufWriteTimeoutMax = 0;	/* Not supported */
 }
 
-static void fixup_st_m28w320cb(struct mtd_info *mtd, void* param)
+static void fixup_st_m28w320cb(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -259,7 +259,7 @@ static void fixup_st_m28w320cb(struct mtd_info *mtd, void* param)
 		(cfi->cfiq->EraseRegionInfo[1] & 0xffff0000) | 0x3e;
 };
 
-static void fixup_use_point(struct mtd_info *mtd, void *param)
+static void fixup_use_point(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	if (!mtd->point && map_is_linear(map)) {
@@ -268,7 +268,7 @@ static void fixup_use_point(struct mtd_info *mtd, void *param)
 	}
 }
 
-static void fixup_use_write_buffers(struct mtd_info *mtd, void *param)
+static void fixup_use_write_buffers(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -282,7 +282,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param)
 /*
  * Some chips power-up with all sectors locked by default.
  */
-static void fixup_unlock_powerup_lock(struct mtd_info *mtd, void *param)
+static void fixup_unlock_powerup_lock(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -295,31 +295,31 @@ static void fixup_unlock_powerup_lock(struct mtd_info *mtd, void *param)
 }
 
 static struct cfi_fixup cfi_fixup_table[] = {
-	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
-	{ CFI_MFR_ATMEL, AT49BV640D, fixup_at49bv640dx_lock, NULL },
-	{ CFI_MFR_ATMEL, AT49BV640DT, fixup_at49bv640dx_lock, NULL },
+	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri },
+	{ CFI_MFR_ATMEL, AT49BV640D, fixup_at49bv640dx_lock },
+	{ CFI_MFR_ATMEL, AT49BV640DT, fixup_at49bv640dx_lock },
 #ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE
-	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_intel_strataflash, NULL },
+	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_intel_strataflash },
 #endif
 #ifdef CMDSET0001_DISABLE_WRITE_SUSPEND
-	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_no_write_suspend, NULL },
+	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_no_write_suspend },
 #endif
 #if !FORCE_WORD_WRITE
-	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL },
+	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers },
 #endif
-	{ CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct, NULL },
-	{ CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb, NULL },
-	{ CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock, NULL, },
-	{ 0, 0, NULL, NULL }
+	{ CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct },
+	{ CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb },
+	{ CFI_MFR_INTEL, CFI_ID_ANY, fixup_unlock_powerup_lock },
+	{ 0, 0, NULL }
 };
 
 static struct cfi_fixup jedec_fixup_table[] = {
-	{ CFI_MFR_INTEL, I82802AB,   fixup_use_fwh_lock, NULL, },
-	{ CFI_MFR_INTEL, I82802AC,   fixup_use_fwh_lock, NULL, },
-	{ CFI_MFR_ST,    M50LPW080,  fixup_use_fwh_lock, NULL, },
-	{ CFI_MFR_ST,    M50FLW080A, fixup_use_fwh_lock, NULL, },
-	{ CFI_MFR_ST,    M50FLW080B, fixup_use_fwh_lock, NULL, },
-	{ 0, 0, NULL, NULL }
+	{ CFI_MFR_INTEL, I82802AB,   fixup_use_fwh_lock },
+	{ CFI_MFR_INTEL, I82802AC,   fixup_use_fwh_lock },
+	{ CFI_MFR_ST,    M50LPW080,  fixup_use_fwh_lock },
+	{ CFI_MFR_ST,    M50FLW080A, fixup_use_fwh_lock },
+	{ CFI_MFR_ST,    M50FLW080B, fixup_use_fwh_lock },
+	{ 0, 0, NULL }
 };
 static struct cfi_fixup fixup_table[] = {
 	/* The CFI vendor ids and the JEDEC vendor IDs appear
@@ -327,8 +327,8 @@ static struct cfi_fixup fixup_table[] = {
 	 * well.  This table is to pick all cases where
 	 * we know that is the case.
 	 */
-	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_point, NULL },
-	{ 0, 0, NULL, NULL }
+	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_point },
+	{ 0, 0, NULL }
 };
 
 static void cfi_fixup_major_minor(struct cfi_private *cfi,
@@ -455,6 +455,7 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary)
 	mtd->flags   = MTD_CAP_NORFLASH;
 	mtd->name    = map->name;
 	mtd->writesize = 1;
+	mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize;
 
 	mtd->reboot_notifier.notifier_call = cfi_intelext_reboot;
 
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 3b8e32d87977..f072fcfde04e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -134,7 +134,7 @@ static void cfi_tell_features(struct cfi_pri_amdstd *extp)
 
 #ifdef AMD_BOOTLOC_BUG
 /* Wheee. Bring me the head of someone at AMD. */
-static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
+static void fixup_amd_bootblock(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -186,7 +186,7 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
 }
 #endif
 
-static void fixup_use_write_buffers(struct mtd_info *mtd, void *param)
+static void fixup_use_write_buffers(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -197,7 +197,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param)
 }
 
 /* Atmel chips don't use the same PRI format as AMD chips */
-static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
+static void fixup_convert_atmel_pri(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -228,14 +228,14 @@ static void fixup_convert_atmel_pri(struct mtd_info *mtd, void *param)
 	cfi->cfiq->BufWriteTimeoutMax = 0;
 }
 
-static void fixup_use_secsi(struct mtd_info *mtd, void *param)
+static void fixup_use_secsi(struct mtd_info *mtd)
 {
 	/* Setup for chips with a secsi area */
 	mtd->read_user_prot_reg = cfi_amdstd_secsi_read;
 	mtd->read_fact_prot_reg = cfi_amdstd_secsi_read;
 }
 
-static void fixup_use_erase_chip(struct mtd_info *mtd, void *param)
+static void fixup_use_erase_chip(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -250,7 +250,7 @@ static void fixup_use_erase_chip(struct mtd_info *mtd, void *param)
  * Some Atmel chips (e.g. the AT49BV6416) power-up with all sectors
  * locked by default.
  */
-static void fixup_use_atmel_lock(struct mtd_info *mtd, void *param)
+static void fixup_use_atmel_lock(struct mtd_info *mtd)
 {
 	mtd->lock = cfi_atmel_lock;
 	mtd->unlock = cfi_atmel_unlock;
@@ -271,7 +271,7 @@ static void fixup_old_sst_eraseregion(struct mtd_info *mtd)
 	cfi->cfiq->NumEraseRegions = 1;
 }
 
-static void fixup_sst39vf(struct mtd_info *mtd, void *param)
+static void fixup_sst39vf(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -282,7 +282,7 @@ static void fixup_sst39vf(struct mtd_info *mtd, void *param)
 	cfi->addr_unlock2 = 0x2AAA;
 }
 
-static void fixup_sst39vf_rev_b(struct mtd_info *mtd, void *param)
+static void fixup_sst39vf_rev_b(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -295,12 +295,12 @@ static void fixup_sst39vf_rev_b(struct mtd_info *mtd, void *param)
 	cfi->sector_erase_cmd = CMD(0x50);
 }
 
-static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd, void *param)
+static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
 
-	fixup_sst39vf_rev_b(mtd, param);
+	fixup_sst39vf_rev_b(mtd);
 
 	/*
 	 * CFI reports 1024 sectors (0x03ff+1) of 64KBytes (0x0100*256) where
@@ -310,7 +310,7 @@ static void fixup_sst38vf640x_sectorsize(struct mtd_info *mtd, void *param)
 	pr_warning("%s: Bad 38VF640x CFI data; adjusting sector size from 64 to 8KiB\n", mtd->name);
 }
 
-static void fixup_s29gl064n_sectors(struct mtd_info *mtd, void *param)
+static void fixup_s29gl064n_sectors(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -321,7 +321,7 @@ static void fixup_s29gl064n_sectors(struct mtd_info *mtd, void *param)
 	}
 }
 
-static void fixup_s29gl032n_sectors(struct mtd_info *mtd, void *param)
+static void fixup_s29gl032n_sectors(struct mtd_info *mtd)
 {
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
@@ -334,47 +334,47 @@ static void fixup_s29gl032n_sectors(struct mtd_info *mtd, void *param)
 
 /* Used to fix CFI-Tables of chips without Extended Query Tables */
 static struct cfi_fixup cfi_nopri_fixup_table[] = {
-	{ CFI_MFR_SST, 0x234A, fixup_sst39vf, NULL, }, /* SST39VF1602 */
-	{ CFI_MFR_SST, 0x234B, fixup_sst39vf, NULL, }, /* SST39VF1601 */
-	{ CFI_MFR_SST, 0x235A, fixup_sst39vf, NULL, }, /* SST39VF3202 */
-	{ CFI_MFR_SST, 0x235B, fixup_sst39vf, NULL, }, /* SST39VF3201 */
-	{ CFI_MFR_SST, 0x235C, fixup_sst39vf_rev_b, NULL, }, /* SST39VF3202B */
-	{ CFI_MFR_SST, 0x235D, fixup_sst39vf_rev_b, NULL, }, /* SST39VF3201B */
-	{ CFI_MFR_SST, 0x236C, fixup_sst39vf_rev_b, NULL, }, /* SST39VF6402B */
-	{ CFI_MFR_SST, 0x236D, fixup_sst39vf_rev_b, NULL, }, /* SST39VF6401B */
-	{ 0, 0, NULL, NULL }
+	{ CFI_MFR_SST, 0x234a, fixup_sst39vf }, /* SST39VF1602 */
+	{ CFI_MFR_SST, 0x234b, fixup_sst39vf }, /* SST39VF1601 */
+	{ CFI_MFR_SST, 0x235a, fixup_sst39vf }, /* SST39VF3202 */
+	{ CFI_MFR_SST, 0x235b, fixup_sst39vf }, /* SST39VF3201 */
+	{ CFI_MFR_SST, 0x235c, fixup_sst39vf_rev_b }, /* SST39VF3202B */
+	{ CFI_MFR_SST, 0x235d, fixup_sst39vf_rev_b }, /* SST39VF3201B */
+	{ CFI_MFR_SST, 0x236c, fixup_sst39vf_rev_b }, /* SST39VF6402B */
+	{ CFI_MFR_SST, 0x236d, fixup_sst39vf_rev_b }, /* SST39VF6401B */
+	{ 0, 0, NULL }
 };
 
 static struct cfi_fixup cfi_fixup_table[] = {
-	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
+	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri },
 #ifdef AMD_BOOTLOC_BUG
-	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
-	{ CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
+	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock },
+	{ CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock },
 #endif
-	{ CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
-	{ CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
-	{ CFI_MFR_AMD, 0x0055, fixup_use_secsi, NULL, },
-	{ CFI_MFR_AMD, 0x0056, fixup_use_secsi, NULL, },
-	{ CFI_MFR_AMD, 0x005C, fixup_use_secsi, NULL, },
-	{ CFI_MFR_AMD, 0x005F, fixup_use_secsi, NULL, },
-	{ CFI_MFR_AMD, 0x0c01, fixup_s29gl064n_sectors, NULL, },
-	{ CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors, NULL, },
-	{ CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors, NULL, },
-	{ CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors, NULL, },
-	{ CFI_MFR_SST, 0x536A, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6402 */
-	{ CFI_MFR_SST, 0x536B, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6401 */
-	{ CFI_MFR_SST, 0x536C, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6404 */
-	{ CFI_MFR_SST, 0x536D, fixup_sst38vf640x_sectorsize, NULL, }, /* SST38VF6403 */
+	{ CFI_MFR_AMD, 0x0050, fixup_use_secsi },
+	{ CFI_MFR_AMD, 0x0053, fixup_use_secsi },
+	{ CFI_MFR_AMD, 0x0055, fixup_use_secsi },
+	{ CFI_MFR_AMD, 0x0056, fixup_use_secsi },
+	{ CFI_MFR_AMD, 0x005C, fixup_use_secsi },
+	{ CFI_MFR_AMD, 0x005F, fixup_use_secsi },
+	{ CFI_MFR_AMD, 0x0c01, fixup_s29gl064n_sectors },
+	{ CFI_MFR_AMD, 0x1301, fixup_s29gl064n_sectors },
+	{ CFI_MFR_AMD, 0x1a00, fixup_s29gl032n_sectors },
+	{ CFI_MFR_AMD, 0x1a01, fixup_s29gl032n_sectors },
+	{ CFI_MFR_SST, 0x536a, fixup_sst38vf640x_sectorsize }, /* SST38VF6402 */
+	{ CFI_MFR_SST, 0x536b, fixup_sst38vf640x_sectorsize }, /* SST38VF6401 */
+	{ CFI_MFR_SST, 0x536c, fixup_sst38vf640x_sectorsize }, /* SST38VF6404 */
+	{ CFI_MFR_SST, 0x536d, fixup_sst38vf640x_sectorsize }, /* SST38VF6403 */
 #if !FORCE_WORD_WRITE
-	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers, NULL, },
+	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_write_buffers },
 #endif
-	{ 0, 0, NULL, NULL }
+	{ 0, 0, NULL }
 };
 static struct cfi_fixup jedec_fixup_table[] = {
-	{ CFI_MFR_SST, SST49LF004B, fixup_use_fwh_lock, NULL, },
-	{ CFI_MFR_SST, SST49LF040B, fixup_use_fwh_lock, NULL, },
-	{ CFI_MFR_SST, SST49LF008A, fixup_use_fwh_lock, NULL, },
-	{ 0, 0, NULL, NULL }
+	{ CFI_MFR_SST, SST49LF004B, fixup_use_fwh_lock },
+	{ CFI_MFR_SST, SST49LF040B, fixup_use_fwh_lock },
+	{ CFI_MFR_SST, SST49LF008A, fixup_use_fwh_lock },
+	{ 0, 0, NULL }
 };
 
 static struct cfi_fixup fixup_table[] = {
@@ -383,18 +383,30 @@ static struct cfi_fixup fixup_table[] = {
 	 * well.  This table is to pick all cases where
 	 * we know that is the case.
 	 */
-	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip, NULL },
-	{ CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock, NULL },
-	{ 0, 0, NULL, NULL }
+	{ CFI_MFR_ANY, CFI_ID_ANY, fixup_use_erase_chip },
+	{ CFI_MFR_ATMEL, AT49BV6416, fixup_use_atmel_lock },
+	{ 0, 0, NULL }
 };
 
 
 static void cfi_fixup_major_minor(struct cfi_private *cfi,
 				  struct cfi_pri_amdstd *extp)
 {
-	if (cfi->mfr == CFI_MFR_SAMSUNG && cfi->id == 0x257e &&
-	    extp->MajorVersion == '0')
-		extp->MajorVersion = '1';
+	if (cfi->mfr == CFI_MFR_SAMSUNG) {
+		if ((extp->MajorVersion == '0' && extp->MinorVersion == '0') ||
+		    (extp->MajorVersion == '3' && extp->MinorVersion == '3')) {
+			/*
+			 * Samsung K8P2815UQB and K8D6x16UxM chips
+			 * report major=0 / minor=0.
+			 * K8D3x16UxC chips report major=3 / minor=3.
+			 */
+			printk(KERN_NOTICE "  Fixing Samsung's Amd/Fujitsu"
+			       " Extended Query version to 1.%c\n",
+			       extp->MinorVersion);
+			extp->MajorVersion = '1';
+		}
+	}
+
 	/*
 	 * SST 38VF640x chips report major=0xFF / minor=0xFF.
 	 */
@@ -428,6 +440,10 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 	mtd->flags   = MTD_CAP_NORFLASH;
 	mtd->name    = map->name;
 	mtd->writesize = 1;
+	mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n",
+		__func__, mtd->writebufsize);
 
 	mtd->reboot_notifier.notifier_call = cfi_amdstd_reboot;
 
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 314af1f5a370..c04b7658abe9 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -238,6 +238,7 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map)
 	mtd->resume = cfi_staa_resume;
 	mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE;
 	mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */
+	mtd->writebufsize = 1 << cfi->cfiq->MaxBufWriteSize;
 	map->fldrv = &cfi_staa_chipdrv;
 	__module_get(THIS_MODULE);
 	mtd->name = map->name;
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index 360525c637d2..6ae3d111e1e7 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -156,7 +156,7 @@ void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup *fixups)
 	for (f=fixups; f->fixup; f++) {
 		if (((f->mfr == CFI_MFR_ANY) || (f->mfr == cfi->mfr)) &&
 		    ((f->id  == CFI_ID_ANY)  || (f->id  == cfi->id))) {
-			f->fixup(mtd, f->param);
+			f->fixup(mtd);
 		}
 	}
 }
diff --git a/drivers/mtd/chips/fwh_lock.h b/drivers/mtd/chips/fwh_lock.h
index d18064977192..5e3cc80128aa 100644
--- a/drivers/mtd/chips/fwh_lock.h
+++ b/drivers/mtd/chips/fwh_lock.h
@@ -98,7 +98,7 @@ static int fwh_unlock_varsize(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return ret;
 }
 
-static void fixup_use_fwh_lock(struct mtd_info *mtd, void *param)
+static void fixup_use_fwh_lock(struct mtd_info *mtd)
 {
 	printk(KERN_NOTICE "using fwh lock/unlock method\n");
 	/* Setup for the chips with the fwh lock method */
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index bf5a002209bd..e4eba6cc1b2e 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -51,6 +51,10 @@
 #define	OPCODE_WRDI		0x04	/* Write disable */
 #define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
 
+/* Used for Macronix flashes only. */
+#define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
+#define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+
 /* Status Register bits. */
 #define	SR_WIP			1	/* Write in progress */
 #define	SR_WEL			2	/* Write enable latch */
@@ -62,7 +66,7 @@
 
 /* Define max times to check status register before we give up. */
 #define	MAX_READY_WAIT_JIFFIES	(40 * HZ)	/* M25P16 specs 40s max chip erase */
-#define	MAX_CMD_SIZE		4
+#define	MAX_CMD_SIZE		5
 
 #ifdef CONFIG_M25PXX_USE_FAST_READ
 #define OPCODE_READ 	OPCODE_FAST_READ
@@ -152,6 +156,16 @@ static inline int write_disable(struct m25p *flash)
 }
 
 /*
+ * Enable/disable 4-byte addressing mode.
+ */
+static inline int set_4byte(struct m25p *flash, int enable)
+{
+	u8	code = enable ? OPCODE_EN4B : OPCODE_EX4B;
+
+	return spi_write_then_read(flash->spi, &code, 1, NULL, 0);
+}
+
+/*
  * Service routine to read status register until ready, or timeout occurs.
  * Returns non-zero if error.
  */
@@ -207,6 +221,7 @@ static void m25p_addr2cmd(struct m25p *flash, unsigned int addr, u8 *cmd)
 	cmd[1] = addr >> (flash->addr_width * 8 -  8);
 	cmd[2] = addr >> (flash->addr_width * 8 - 16);
 	cmd[3] = addr >> (flash->addr_width * 8 - 24);
+	cmd[4] = addr >> (flash->addr_width * 8 - 32);
 }
 
 static int m25p_cmdsz(struct m25p *flash)
@@ -482,6 +497,10 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	size_t actual;
 	int cmd_sz, ret;
 
+	DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %zd\n",
+			dev_name(&flash->spi->dev), __func__, "to",
+			(u32)to, len);
+
 	*retlen = 0;
 
 	/* sanity checks */
@@ -607,7 +626,6 @@ struct flash_info {
 		.sector_size = (_sector_size),				\
 		.n_sectors = (_n_sectors),				\
 		.page_size = 256,					\
-		.addr_width = 3,					\
 		.flags = (_flags),					\
 	})
 
@@ -635,7 +653,7 @@ static const struct spi_device_id m25p_ids[] = {
 	{ "at26f004",   INFO(0x1f0400, 0, 64 * 1024,  8, SECT_4K) },
 	{ "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) },
 	{ "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
-	{ "at26df321",  INFO(0x1f4701, 0, 64 * 1024, 64, SECT_4K) },
+	{ "at26df321",  INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
 
 	/* EON -- en25pxx */
 	{ "en25p32", INFO(0x1c2016, 0, 64 * 1024,  64, 0) },
@@ -653,6 +671,8 @@ static const struct spi_device_id m25p_ids[] = {
 	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
 	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
 	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
+	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
+	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
 
 	/* Spansion -- single (large) sector size only, at least
 	 * for the chips listed here (without boot sectors).
@@ -764,6 +784,7 @@ static const struct spi_device_id *__devinit jedec_probe(struct spi_device *spi)
 			return &m25p_ids[tmp];
 		}
 	}
+	dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
 	return ERR_PTR(-ENODEV);
 }
 
@@ -883,7 +904,17 @@ static int __devinit m25p_probe(struct spi_device *spi)
 
 	flash->mtd.dev.parent = &spi->dev;
 	flash->page_size = info->page_size;
-	flash->addr_width = info->addr_width;
+
+	if (info->addr_width)
+		flash->addr_width = info->addr_width;
+	else {
+		/* enable 4-byte addressing if the device exceeds 16MiB */
+		if (flash->mtd.size > 0x1000000) {
+			flash->addr_width = 4;
+			set_4byte(flash, 1);
+		} else
+			flash->addr_width = 3;
+	}
 
 	dev_info(&spi->dev, "%s (%lld Kbytes)\n", id->name,
 			(long long)flash->mtd.size >> 10);
diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index 684247a8a5ed..c163e619abc9 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c
@@ -335,7 +335,7 @@ out:
 	return ret;
 }
 
-static struct flash_info *__init sst25l_match_device(struct spi_device *spi)
+static struct flash_info *__devinit sst25l_match_device(struct spi_device *spi)
 {
 	struct flash_info *flash_info = NULL;
 	struct spi_message m;
@@ -375,7 +375,7 @@ static struct flash_info *__init sst25l_match_device(struct spi_device *spi)
 	return flash_info;
 }
 
-static int __init sst25l_probe(struct spi_device *spi)
+static int __devinit sst25l_probe(struct spi_device *spi)
 {
 	struct flash_info *flash_info;
 	struct sst25l_flash *flash;
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 19fe92db0c46..77d64ce19e9f 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -149,11 +149,8 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
 	if (request_resource(&iomem_resource, &window->rsrc)) {
 		window->rsrc.parent = NULL;
 		printk(KERN_ERR MOD_NAME
-			" %s(): Unable to register resource"
-			" 0x%.16llx-0x%.16llx - kernel bug?\n",
-			__func__,
-			(unsigned long long)window->rsrc.start,
-			(unsigned long long)window->rsrc.end);
+		       " %s(): Unable to register resource %pR - kernel bug?\n",
+		       __func__, &window->rsrc);
 	}
 
 
diff --git a/drivers/mtd/maps/bcm963xx-flash.c b/drivers/mtd/maps/bcm963xx-flash.c
index d175c120ee84..1f3049590d9e 100644
--- a/drivers/mtd/maps/bcm963xx-flash.c
+++ b/drivers/mtd/maps/bcm963xx-flash.c
@@ -196,10 +196,15 @@ static int bcm963xx_probe(struct platform_device *pdev)
 	bcm963xx_mtd_info = do_map_probe("cfi_probe", &bcm963xx_map);
 	if (!bcm963xx_mtd_info) {
 		dev_err(&pdev->dev, "failed to probe using CFI\n");
+		bcm963xx_mtd_info = do_map_probe("jedec_probe", &bcm963xx_map);
+		if (bcm963xx_mtd_info)
+			goto probe_ok;
+		dev_err(&pdev->dev, "failed to probe using JEDEC\n");
 		err = -EIO;
 		goto err_probe;
 	}
 
+probe_ok:
 	bcm963xx_mtd_info->owner = THIS_MODULE;
 
 	/* This is mutually exclusive */
diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c
index ddb462bea9b5..5fdb7b26cea3 100644
--- a/drivers/mtd/maps/ck804xrom.c
+++ b/drivers/mtd/maps/ck804xrom.c
@@ -178,11 +178,8 @@ static int __devinit ck804xrom_init_one (struct pci_dev *pdev,
 	if (request_resource(&iomem_resource, &window->rsrc)) {
 		window->rsrc.parent = NULL;
 		printk(KERN_ERR MOD_NAME
-			" %s(): Unable to register resource"
-			" 0x%.016llx-0x%.016llx - kernel bug?\n",
-			__func__,
-			(unsigned long long)window->rsrc.start,
-			(unsigned long long)window->rsrc.end);
+		       " %s(): Unable to register resource %pR - kernel bug?\n",
+			__func__, &window->rsrc);
 	}
 
 
diff --git a/drivers/mtd/maps/esb2rom.c b/drivers/mtd/maps/esb2rom.c
index d12c93dc1aad..4feb7507ab7c 100644
--- a/drivers/mtd/maps/esb2rom.c
+++ b/drivers/mtd/maps/esb2rom.c
@@ -242,12 +242,9 @@ static int __devinit esb2rom_init_one(struct pci_dev *pdev,
 	window->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	if (request_resource(&iomem_resource, &window->rsrc)) {
 		window->rsrc.parent = NULL;
-		printk(KERN_DEBUG MOD_NAME
-			": %s(): Unable to register resource"
-			" 0x%.08llx-0x%.08llx - kernel bug?\n",
-			__func__,
-			(unsigned long long)window->rsrc.start,
-			(unsigned long long)window->rsrc.end);
+		printk(KERN_DEBUG MOD_NAME ": "
+		       "%s(): Unable to register resource %pR - kernel bug?\n",
+			__func__, &window->rsrc);
 	}
 
 	/* Map the firmware hub into my address space. */
diff --git a/drivers/mtd/maps/ichxrom.c b/drivers/mtd/maps/ichxrom.c
index f102bf243a74..1337a4191a0c 100644
--- a/drivers/mtd/maps/ichxrom.c
+++ b/drivers/mtd/maps/ichxrom.c
@@ -175,12 +175,9 @@ static int __devinit ichxrom_init_one (struct pci_dev *pdev,
 	window->rsrc.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	if (request_resource(&iomem_resource, &window->rsrc)) {
 		window->rsrc.parent = NULL;
-		printk(KERN_DEBUG MOD_NAME
-			": %s(): Unable to register resource"
-			" 0x%.16llx-0x%.16llx - kernel bug?\n",
-			__func__,
-			(unsigned long long)window->rsrc.start,
-			(unsigned long long)window->rsrc.end);
+		printk(KERN_DEBUG MOD_NAME ": "
+		       "%s(): Unable to register resource %pR - kernel bug?\n",
+		       __func__, &window->rsrc);
 	}
 
 	/* Map the firmware hub into my address space. */
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 9861814aa027..8506578e6a35 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -274,9 +274,7 @@ static int __devinit of_flash_probe(struct platform_device *dev,
 			continue;
 		}
 
-		dev_dbg(&dev->dev, "of_flash device: %.8llx-%.8llx\n",
-			(unsigned long long)res.start,
-			(unsigned long long)res.end);
+		dev_dbg(&dev->dev, "of_flash device: %pR\n", &res);
 
 		err = -EBUSY;
 		res_size = resource_size(&res);
diff --git a/drivers/mtd/maps/scx200_docflash.c b/drivers/mtd/maps/scx200_docflash.c
index b5391ebb736e..027e628a4f1d 100644
--- a/drivers/mtd/maps/scx200_docflash.c
+++ b/drivers/mtd/maps/scx200_docflash.c
@@ -166,9 +166,8 @@ static int __init init_scx200_docflash(void)
 		outl(pmr, scx200_cb_base + SCx200_PMR);
 	}
 
-       	printk(KERN_INFO NAME ": DOCCS mapped at 0x%llx-0x%llx, width %d\n",
-			(unsigned long long)docmem.start,
-			(unsigned long long)docmem.end, width);
+	printk(KERN_INFO NAME ": DOCCS mapped at %pR, width %d\n",
+	       &docmem, width);
 
 	scx200_docflash_map.size = size;
 	if (width == 8)
diff --git a/drivers/mtd/maps/tqm8xxl.c b/drivers/mtd/maps/tqm8xxl.c
index 60146984f4be..c08e140d40ed 100644
--- a/drivers/mtd/maps/tqm8xxl.c
+++ b/drivers/mtd/maps/tqm8xxl.c
@@ -139,7 +139,7 @@ static int __init init_tqm_mtd(void)
 			goto error_mem;
 		}
 
-		map_banks[idx]->name = (char *)kmalloc(16, GFP_KERNEL);
+		map_banks[idx]->name = kmalloc(16, GFP_KERNEL);
 
 		if (!map_banks[idx]->name) {
 			ret = -ENOMEM;
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 98240575a18d..145b3d0dc0db 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -522,10 +522,6 @@ static int mtd_blkpg_ioctl(struct mtd_info *mtd,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	/* Only master mtd device must be used to control partitions */
-	if (!mtd_is_master(mtd))
-		return -EINVAL;
-
 	if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
 		return -EFAULT;
 
@@ -535,6 +531,10 @@ static int mtd_blkpg_ioctl(struct mtd_info *mtd,
 	switch (a.op) {
 	case BLKPG_ADD_PARTITION:
 
+		/* Only master mtd device must be used to add partitions */
+		if (mtd_is_partition(mtd))
+			return -EINVAL;
+
 		return mtd_add_partition(mtd, p.devname, p.start, p.length);
 
 	case BLKPG_DEL_PARTITION:
@@ -601,6 +601,7 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 	}
 
 	case MEMGETINFO:
+		memset(&info, 0, sizeof(info));
 		info.type	= mtd->type;
 		info.flags	= mtd->flags;
 		info.size	= mtd->size;
@@ -609,7 +610,6 @@ static int mtd_ioctl(struct file *file, u_int cmd, u_long arg)
 		info.oobsize	= mtd->oobsize;
 		/* The below fields are obsolete */
 		info.ecctype	= -1;
-		info.eccsize	= 0;
 		if (copy_to_user(argp, &info, sizeof(struct mtd_info_user)))
 			return -EFAULT;
 		break;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index bf8de0943103..5f5777bd3f75 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -776,6 +776,7 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to c
 	concat->mtd.size = subdev[0]->size;
 	concat->mtd.erasesize = subdev[0]->erasesize;
 	concat->mtd.writesize = subdev[0]->writesize;
+	concat->mtd.writebufsize = subdev[0]->writebufsize;
 	concat->mtd.subpage_sft = subdev[0]->subpage_sft;
 	concat->mtd.oobsize = subdev[0]->oobsize;
 	concat->mtd.oobavail = subdev[0]->oobavail;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index c948150079be..e3e40f440323 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -401,7 +401,8 @@ static void mtdoops_notify_remove(struct mtd_info *mtd)
 		printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n");
 
 	cxt->mtd = NULL;
-	flush_scheduled_work();
+	flush_work_sync(&cxt->work_erase);
+	flush_work_sync(&cxt->work_write);
 }
 
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 79e3689f1e16..0a4760174782 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -120,8 +120,25 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 		return -EINVAL;
 	if (ops->datbuf && from + ops->len > mtd->size)
 		return -EINVAL;
-	res = part->master->read_oob(part->master, from + part->offset, ops);
 
+	/*
+	 * If OOB is also requested, make sure that we do not read past the end
+	 * of this partition.
+	 */
+	if (ops->oobbuf) {
+		size_t len, pages;
+
+		if (ops->mode == MTD_OOB_AUTO)
+			len = mtd->oobavail;
+		else
+			len = mtd->oobsize;
+		pages = mtd_div_by_ws(mtd->size, mtd);
+		pages -= mtd_div_by_ws(from, mtd);
+		if (ops->ooboffs + ops->ooblen > pages * len)
+			return -EINVAL;
+	}
+
+	res = part->master->read_oob(part->master, from + part->offset, ops);
 	if (unlikely(res)) {
 		if (res == -EUCLEAN)
 			mtd->ecc_stats.corrected++;
@@ -384,6 +401,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 	slave->mtd.flags = master->flags & ~part->mask_flags;
 	slave->mtd.size = part->size;
 	slave->mtd.writesize = master->writesize;
+	slave->mtd.writebufsize = master->writebufsize;
 	slave->mtd.oobsize = master->oobsize;
 	slave->mtd.oobavail = master->oobavail;
 	slave->mtd.subpage_sft = master->subpage_sft;
@@ -720,19 +738,19 @@ int parse_mtd_partitions(struct mtd_info *master, const char **types,
 }
 EXPORT_SYMBOL_GPL(parse_mtd_partitions);
 
-int mtd_is_master(struct mtd_info *mtd)
+int mtd_is_partition(struct mtd_info *mtd)
 {
 	struct mtd_part *part;
-	int nopart = 0;
+	int ispart = 0;
 
 	mutex_lock(&mtd_partitions_mutex);
 	list_for_each_entry(part, &mtd_partitions, list)
 		if (&part->mtd == mtd) {
-			nopart = 1;
+			ispart = 1;
 			break;
 		}
 	mutex_unlock(&mtd_partitions_mutex);
 
-	return nopart;
+	return ispart;
 }
-EXPORT_SYMBOL_GPL(mtd_is_master);
+EXPORT_SYMBOL_GPL(mtd_is_partition);
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 8229802b4346..c89592239bc7 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -96,6 +96,7 @@ config MTD_NAND_SPIA
 config MTD_NAND_AMS_DELTA
 	tristate "NAND Flash device on Amstrad E3"
 	depends on MACH_AMS_DELTA
+	default y
 	help
 	  Support for NAND flash on Amstrad E3 (Delta).
 
diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c
index 2548e1065bf8..a067d090cb31 100644
--- a/drivers/mtd/nand/ams-delta.c
+++ b/drivers/mtd/nand/ams-delta.c
@@ -4,6 +4,8 @@
  *  Copyright (C) 2006 Jonathan McDowell <noodles@earth.li>
  *
  *  Derived from drivers/mtd/toto.c
+ *  Converted to platform driver by Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
+ *  Partially stolen from drivers/mtd/nand/plat_nand.c
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -62,9 +64,10 @@ static struct mtd_partition partition_info[] = {
 static void ams_delta_write_byte(struct mtd_info *mtd, u_char byte)
 {
 	struct nand_chip *this = mtd->priv;
+	void __iomem *io_base = this->priv;
 
-	omap_writew(0, (OMAP1_MPUIO_BASE + OMAP_MPUIO_IO_CNTL));
-	omap_writew(byte, this->IO_ADDR_W);
+	writew(0, io_base + OMAP_MPUIO_IO_CNTL);
+	writew(byte, this->IO_ADDR_W);
 	ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NWE, 0);
 	ndelay(40);
 	ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NWE,
@@ -75,11 +78,12 @@ static u_char ams_delta_read_byte(struct mtd_info *mtd)
 {
 	u_char res;
 	struct nand_chip *this = mtd->priv;
+	void __iomem *io_base = this->priv;
 
 	ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NRE, 0);
 	ndelay(40);
-	omap_writew(~0, (OMAP1_MPUIO_BASE + OMAP_MPUIO_IO_CNTL));
-	res = omap_readw(this->IO_ADDR_R);
+	writew(~0, io_base + OMAP_MPUIO_IO_CNTL);
+	res = readw(this->IO_ADDR_R);
 	ams_delta_latch2_write(AMS_DELTA_LATCH2_NAND_NRE,
 			       AMS_DELTA_LATCH2_NAND_NRE);
 
@@ -151,11 +155,16 @@ static int ams_delta_nand_ready(struct mtd_info *mtd)
 /*
  * Main initialization routine
  */
-static int __init ams_delta_init(void)
+static int __devinit ams_delta_init(struct platform_device *pdev)
 {
 	struct nand_chip *this;
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	void __iomem *io_base;
 	int err = 0;
 
+	if (!res)
+		return -ENXIO;
+
 	/* Allocate memory for MTD device structure and private data */
 	ams_delta_mtd = kmalloc(sizeof(struct mtd_info) +
 				sizeof(struct nand_chip), GFP_KERNEL);
@@ -177,9 +186,25 @@ static int __init ams_delta_init(void)
 	/* Link the private data with the MTD structure */
 	ams_delta_mtd->priv = this;
 
+	if (!request_mem_region(res->start, resource_size(res),
+			dev_name(&pdev->dev))) {
+		dev_err(&pdev->dev, "request_mem_region failed\n");
+		err = -EBUSY;
+		goto out_free;
+	}
+
+	io_base = ioremap(res->start, resource_size(res));
+	if (io_base == NULL) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		err = -EIO;
+		goto out_release_io;
+	}
+
+	this->priv = io_base;
+
 	/* Set address of NAND IO lines */
-	this->IO_ADDR_R = (OMAP1_MPUIO_BASE + OMAP_MPUIO_INPUT_LATCH);
-	this->IO_ADDR_W = (OMAP1_MPUIO_BASE + OMAP_MPUIO_OUTPUT);
+	this->IO_ADDR_R = io_base + OMAP_MPUIO_INPUT_LATCH;
+	this->IO_ADDR_W = io_base + OMAP_MPUIO_OUTPUT;
 	this->read_byte = ams_delta_read_byte;
 	this->write_buf = ams_delta_write_buf;
 	this->read_buf = ams_delta_read_buf;
@@ -195,6 +220,8 @@ static int __init ams_delta_init(void)
 	this->chip_delay = 30;
 	this->ecc.mode = NAND_ECC_SOFT;
 
+	platform_set_drvdata(pdev, io_base);
+
 	/* Set chip enabled, but  */
 	ams_delta_latch2_write(NAND_MASK, AMS_DELTA_LATCH2_NAND_NRE |
 					  AMS_DELTA_LATCH2_NAND_NWE |
@@ -214,25 +241,56 @@ static int __init ams_delta_init(void)
 	goto out;
 
  out_mtd:
+	platform_set_drvdata(pdev, NULL);
+	iounmap(io_base);
+out_release_io:
+	release_mem_region(res->start, resource_size(res));
+out_free:
 	kfree(ams_delta_mtd);
  out:
 	return err;
 }
 
-module_init(ams_delta_init);
-
 /*
  * Clean up routine
  */
-static void __exit ams_delta_cleanup(void)
+static int __devexit ams_delta_cleanup(struct platform_device *pdev)
 {
+	void __iomem *io_base = platform_get_drvdata(pdev);
+	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
 	/* Release resources, unregister device */
 	nand_release(ams_delta_mtd);
 
+	iounmap(io_base);
+	release_mem_region(res->start, resource_size(res));
+
 	/* Free the MTD device structure */
 	kfree(ams_delta_mtd);
+
+	return 0;
+}
+
+static struct platform_driver ams_delta_nand_driver = {
+	.probe		= ams_delta_init,
+	.remove		= __devexit_p(ams_delta_cleanup),
+	.driver		= {
+		.name	= "ams-delta-nand",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init ams_delta_nand_init(void)
+{
+	return platform_driver_register(&ams_delta_nand_driver);
+}
+module_init(ams_delta_nand_init);
+
+static void __exit ams_delta_nand_exit(void)
+{
+	platform_driver_unregister(&ams_delta_nand_driver);
 }
-module_exit(ams_delta_cleanup);
+module_exit(ams_delta_nand_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jonathan McDowell <noodles@earth.li>");
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index c141b07b25d1..7a13d42cbabd 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -388,6 +388,8 @@ static void fsl_elbc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 		         "page_addr: 0x%x, column: 0x%x.\n",
 		         page_addr, column);
 
+		elbc_fcm_ctrl->column = column;
+		elbc_fcm_ctrl->oob = 0;
 		elbc_fcm_ctrl->use_mdr = 1;
 
 		fcr = (NAND_CMD_STATUS   << FCR_CMD1_SHIFT) |
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 02edfba25b0c..205b10b9f9b9 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -31,6 +31,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/mtd/fsmc.h>
+#include <linux/amba/bus.h>
 #include <mtd/mtd-abi.h>
 
 static struct nand_ecclayout fsmc_ecc1_layout = {
@@ -119,21 +120,36 @@ static struct fsmc_eccplace fsmc_ecc4_sp_place = {
 	}
 };
 
-/*
- * Default partition tables to be used if the partition information not
- * provided through platform data
- */
-#define PARTITION(n, off, sz)	{.name = n, .offset = off, .size = sz}
 
+#ifdef CONFIG_MTD_PARTITIONS
 /*
+ * Default partition tables to be used if the partition information not
+ * provided through platform data.
+ *
  * Default partition layout for small page(= 512 bytes) devices
  * Size for "Root file system" is updated in driver based on actual device size
  */
 static struct mtd_partition partition_info_16KB_blk[] = {
-	PARTITION("X-loader", 0, 4 * 0x4000),
-	PARTITION("U-Boot", 0x10000, 20 * 0x4000),
-	PARTITION("Kernel", 0x60000, 256 * 0x4000),
-	PARTITION("Root File System", 0x460000, 0),
+	{
+		.name = "X-loader",
+		.offset = 0,
+		.size = 4*0x4000,
+	},
+	{
+		.name = "U-Boot",
+		.offset = 0x10000,
+		.size = 20*0x4000,
+	},
+	{
+		.name = "Kernel",
+		.offset = 0x60000,
+		.size = 256*0x4000,
+	},
+	{
+		.name = "Root File System",
+		.offset = 0x460000,
+		.size = 0,
+	},
 };
 
 /*
@@ -141,19 +157,37 @@ static struct mtd_partition partition_info_16KB_blk[] = {
  * Size for "Root file system" is updated in driver based on actual device size
  */
 static struct mtd_partition partition_info_128KB_blk[] = {
-	PARTITION("X-loader", 0, 4 * 0x20000),
-	PARTITION("U-Boot", 0x80000, 12 * 0x20000),
-	PARTITION("Kernel", 0x200000, 48 * 0x20000),
-	PARTITION("Root File System", 0x800000, 0),
+	{
+		.name = "X-loader",
+		.offset = 0,
+		.size = 4*0x20000,
+	},
+	{
+		.name = "U-Boot",
+		.offset = 0x80000,
+		.size = 12*0x20000,
+	},
+	{
+		.name = "Kernel",
+		.offset = 0x200000,
+		.size = 48*0x20000,
+	},
+	{
+		.name = "Root File System",
+		.offset = 0x800000,
+		.size = 0,
+	},
 };
 
 #ifdef CONFIG_MTD_CMDLINE_PARTS
 const char *part_probes[] = { "cmdlinepart", NULL };
 #endif
+#endif
 
 /**
- * struct fsmc_nand_data - atructure for FSMC NAND device state
+ * struct fsmc_nand_data - structure for FSMC NAND device state
  *
+ * @pid:		Part ID on the AMBA PrimeCell format
  * @mtd:		MTD info for a NAND flash.
  * @nand:		Chip related info for a NAND flash.
  * @partitions:		Partition info for a NAND Flash.
@@ -169,6 +203,7 @@ const char *part_probes[] = { "cmdlinepart", NULL };
  * @regs_va:		FSMC regs base address.
  */
 struct fsmc_nand_data {
+	u32			pid;
 	struct mtd_info		mtd;
 	struct nand_chip	nand;
 	struct mtd_partition	*partitions;
@@ -508,7 +543,9 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	struct nand_chip *nand;
 	struct fsmc_regs *regs;
 	struct resource *res;
-	int nr_parts, ret = 0;
+	int ret = 0;
+	u32 pid;
+	int i;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "platform data is NULL\n");
@@ -598,6 +635,18 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_probe1;
 
+	/*
+	 * This device ID is actually a common AMBA ID as used on the
+	 * AMBA PrimeCell bus. However it is not a PrimeCell.
+	 */
+	for (pid = 0, i = 0; i < 4; i++)
+		pid |= (readl(host->regs_va + resource_size(res) - 0x20 + 4 * i) & 255) << (i * 8);
+	host->pid = pid;
+	dev_info(&pdev->dev, "FSMC device partno %03x, manufacturer %02x, "
+		 "revision %02x, config %02x\n",
+		 AMBA_PART_BITS(pid), AMBA_MANF_BITS(pid),
+		 AMBA_REV_BITS(pid), AMBA_CONFIG_BITS(pid));
+
 	host->bank = pdata->bank;
 	host->select_chip = pdata->select_bank;
 	regs = host->regs_va;
@@ -625,7 +674,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 
 	fsmc_nand_setup(regs, host->bank, nand->options & NAND_BUSWIDTH_16);
 
-	if (get_fsmc_version(host->regs_va) == FSMC_VER8) {
+	if (AMBA_REV_BITS(host->pid) >= 8) {
 		nand->ecc.read_page = fsmc_read_page_hwecc;
 		nand->ecc.calculate = fsmc_read_hwecc_ecc4;
 		nand->ecc.correct = fsmc_correct_data;
@@ -645,7 +694,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 		goto err_probe;
 	}
 
-	if (get_fsmc_version(host->regs_va) == FSMC_VER8) {
+	if (AMBA_REV_BITS(host->pid) >= 8) {
 		if (host->mtd.writesize == 512) {
 			nand->ecc.layout = &fsmc_ecc4_sp_layout;
 			host->ecc_place = &fsmc_ecc4_sp_place;
@@ -676,11 +725,9 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	 * Check if partition info passed via command line
 	 */
 	host->mtd.name = "nand";
-	nr_parts = parse_mtd_partitions(&host->mtd, part_probes,
+	host->nr_partitions = parse_mtd_partitions(&host->mtd, part_probes,
 			&host->partitions, 0);
-	if (nr_parts > 0) {
-		host->nr_partitions = nr_parts;
-	} else {
+	if (host->nr_partitions <= 0) {
 #endif
 		/*
 		 * Check if partition info passed via command line
diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index 67343fc31bd5..cea38a5d4ac5 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c
@@ -251,58 +251,6 @@ static int jz_nand_correct_ecc_rs(struct mtd_info *mtd, uint8_t *dat,
 	return 0;
 }
 
-
-/* Copy paste of nand_read_page_hwecc_oob_first except for different eccpos
- * handling. The ecc area is for 4k chips 72 bytes long and thus does not fit
- * into the eccpos array. */
-static int jz_nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int page)
-{
-	int i, eccsize = chip->ecc.size;
-	int eccbytes = chip->ecc.bytes;
-	int eccsteps = chip->ecc.steps;
-	uint8_t *p = buf;
-	unsigned int ecc_offset = chip->page_shift;
-
-	/* Read the OOB area first */
-	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
-
-	for (i = ecc_offset; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
-		int stat;
-
-		chip->ecc.hwctl(mtd, NAND_ECC_READ);
-		chip->read_buf(mtd, p, eccsize);
-
-		stat = chip->ecc.correct(mtd, p, &chip->oob_poi[i], NULL);
-		if (stat < 0)
-			mtd->ecc_stats.failed++;
-		else
-			mtd->ecc_stats.corrected += stat;
-	}
-	return 0;
-}
-
-/* Copy-and-paste of nand_write_page_hwecc with different eccpos handling. */
-static void jz_nand_write_page_hwecc(struct mtd_info *mtd,
-	struct nand_chip *chip, const uint8_t *buf)
-{
-	int i, eccsize = chip->ecc.size;
-	int eccbytes = chip->ecc.bytes;
-	int eccsteps = chip->ecc.steps;
-	const uint8_t *p = buf;
-	unsigned int ecc_offset = chip->page_shift;
-
-	for (i = ecc_offset; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
-		chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
-		chip->write_buf(mtd, p, eccsize);
-		chip->ecc.calculate(mtd, p, &chip->oob_poi[i]);
-	}
-
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
-}
-
 #ifdef CONFIG_MTD_CMDLINE_PARTS
 static const char *part_probes[] = {"cmdline", NULL};
 #endif
@@ -393,9 +341,6 @@ static int __devinit jz_nand_probe(struct platform_device *pdev)
 	chip->ecc.size		= 512;
 	chip->ecc.bytes		= 9;
 
-	chip->ecc.read_page	= jz_nand_read_page_hwecc_oob_first;
-	chip->ecc.write_page	= jz_nand_write_page_hwecc;
-
 	if (pdata)
 		chip->ecc.layout = pdata->ecc_layout;
 
@@ -489,7 +434,7 @@ static int __devexit jz_nand_remove(struct platform_device *pdev)
 	return 0;
 }
 
-struct platform_driver jz_nand_driver = {
+static struct platform_driver jz_nand_driver = {
 	.probe = jz_nand_probe,
 	.remove = __devexit_p(jz_nand_remove),
 	.driver = {
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 214b03afdd48..ef932ba55a0b 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -1009,7 +1009,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
 	struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
 	struct mxc_nand_host *host;
 	struct resource *res;
-	int err = 0, nr_parts = 0;
+	int err = 0, __maybe_unused nr_parts = 0;
 	struct nand_ecclayout *oob_smallpage, *oob_largepage;
 
 	/* Allocate memory for MTD device structure and private data */
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 31bf376b82a0..a9c6ce745767 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2865,20 +2865,24 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 
 	/* check version */
 	val = le16_to_cpu(p->revision);
-	if (val == 1 || val > (1 << 4)) {
-		printk(KERN_INFO "%s: unsupported ONFI version: %d\n",
-								__func__, val);
-		return 0;
-	}
-
-	if (val & (1 << 4))
+	if (val & (1 << 5))
+		chip->onfi_version = 23;
+	else if (val & (1 << 4))
 		chip->onfi_version = 22;
 	else if (val & (1 << 3))
 		chip->onfi_version = 21;
 	else if (val & (1 << 2))
 		chip->onfi_version = 20;
-	else
+	else if (val & (1 << 1))
 		chip->onfi_version = 10;
+	else
+		chip->onfi_version = 0;
+
+	if (!chip->onfi_version) {
+		printk(KERN_INFO "%s: unsupported ONFI version: %d\n",
+								__func__, val);
+		return 0;
+	}
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
 	sanitize_string(p->model, sizeof(p->model));
@@ -2887,7 +2891,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 	mtd->writesize = le32_to_cpu(p->byte_per_page);
 	mtd->erasesize = le32_to_cpu(p->pages_per_block) * mtd->writesize;
 	mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
-	chip->chipsize = le32_to_cpu(p->blocks_per_lun) * mtd->erasesize;
+	chip->chipsize = (uint64_t)le32_to_cpu(p->blocks_per_lun) * mtd->erasesize;
 	busw = 0;
 	if (le16_to_cpu(p->features) & 1)
 		busw = NAND_BUSWIDTH_16;
@@ -3157,7 +3161,7 @@ ident_done:
 	printk(KERN_INFO "NAND device: Manufacturer ID:"
 		" 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id, *dev_id,
 		nand_manuf_ids[maf_idx].name,
-	chip->onfi_version ? type->name : chip->onfi_params.model);
+		chip->onfi_version ? chip->onfi_params.model : type->name);
 
 	return type;
 }
@@ -3435,6 +3439,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 	mtd->resume = nand_resume;
 	mtd->block_isbad = nand_block_isbad;
 	mtd->block_markbad = nand_block_markbad;
+	mtd->writebufsize = mtd->writesize;
 
 	/* propagate ecc.layout to mtd_info */
 	mtd->ecclayout = chip->ecc.layout;
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 586b981f0e61..6ebd869993aa 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -1092,7 +1092,8 @@ static void mark_bbt_region(struct mtd_info *mtd, struct nand_bbt_descr *td)
 
 /**
  * verify_bbt_descr - verify the bad block description
- * @bd:			the table to verify
+ * @mtd:	MTD device structure
+ * @bd:		the table to verify
  *
  * This functions performs a few sanity checks on the bad block description
  * table.
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index a6a73aab1253..a5aa99f014ba 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -210,12 +210,12 @@ MODULE_PARM_DESC(bbt,		 "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d
 #define STATE_CMD_READ0        0x00000001 /* read data from the beginning of page */
 #define STATE_CMD_READ1        0x00000002 /* read data from the second half of page */
 #define STATE_CMD_READSTART    0x00000003 /* read data second command (large page devices) */
-#define STATE_CMD_PAGEPROG     0x00000004 /* start page programm */
+#define STATE_CMD_PAGEPROG     0x00000004 /* start page program */
 #define STATE_CMD_READOOB      0x00000005 /* read OOB area */
 #define STATE_CMD_ERASE1       0x00000006 /* sector erase first command */
 #define STATE_CMD_STATUS       0x00000007 /* read status */
 #define STATE_CMD_STATUS_M     0x00000008 /* read multi-plane status (isn't implemented) */
-#define STATE_CMD_SEQIN        0x00000009 /* sequential data imput */
+#define STATE_CMD_SEQIN        0x00000009 /* sequential data input */
 #define STATE_CMD_READID       0x0000000A /* read ID */
 #define STATE_CMD_ERASE2       0x0000000B /* sector erase second command */
 #define STATE_CMD_RESET        0x0000000C /* reset */
@@ -230,7 +230,7 @@ MODULE_PARM_DESC(bbt,		 "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d
 #define STATE_ADDR_ZERO        0x00000040 /* one byte zero address was accepted */
 #define STATE_ADDR_MASK        0x00000070 /* address states mask */
 
-/* Durind data input/output the simulator is in these states */
+/* During data input/output the simulator is in these states */
 #define STATE_DATAIN           0x00000100 /* waiting for data input */
 #define STATE_DATAIN_MASK      0x00000100 /* data input states mask */
 
@@ -248,7 +248,7 @@ MODULE_PARM_DESC(bbt,		 "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d
 
 /* Simulator's actions bit masks */
 #define ACTION_CPY       0x00100000 /* copy page/OOB to the internal buffer */
-#define ACTION_PRGPAGE   0x00200000 /* programm the internal buffer to flash */
+#define ACTION_PRGPAGE   0x00200000 /* program the internal buffer to flash */
 #define ACTION_SECERASE  0x00300000 /* erase sector */
 #define ACTION_ZEROOFF   0x00400000 /* don't add any offset to address */
 #define ACTION_HALFOFF   0x00500000 /* add to address half of page */
@@ -263,18 +263,18 @@ MODULE_PARM_DESC(bbt,		 "0 OOB, 1 BBT with marker in OOB, 2 BBT with marker in d
 #define OPT_PAGE512      0x00000002 /* 512-byte  page chips */
 #define OPT_PAGE2048     0x00000008 /* 2048-byte page chips */
 #define OPT_SMARTMEDIA   0x00000010 /* SmartMedia technology chips */
-#define OPT_AUTOINCR     0x00000020 /* page number auto inctimentation is possible */
+#define OPT_AUTOINCR     0x00000020 /* page number auto incrementation is possible */
 #define OPT_PAGE512_8BIT 0x00000040 /* 512-byte page chips with 8-bit bus width */
 #define OPT_PAGE4096     0x00000080 /* 4096-byte page chips */
 #define OPT_LARGEPAGE    (OPT_PAGE2048 | OPT_PAGE4096) /* 2048 & 4096-byte page chips */
 #define OPT_SMALLPAGE    (OPT_PAGE256  | OPT_PAGE512)  /* 256 and 512-byte page chips */
 
-/* Remove action bits ftom state */
+/* Remove action bits from state */
 #define NS_STATE(x) ((x) & ~ACTION_MASK)
 
 /*
  * Maximum previous states which need to be saved. Currently saving is
- * only needed for page programm operation with preceeded read command
+ * only needed for page program operation with preceded read command
  * (which is only valid for 512-byte pages).
  */
 #define NS_MAX_PREVSTATES 1
@@ -380,16 +380,16 @@ static struct nandsim_operations {
 	/* Read OOB */
 	{OPT_SMALLPAGE, {STATE_CMD_READOOB | ACTION_OOBOFF, STATE_ADDR_PAGE | ACTION_CPY,
 			STATE_DATAOUT, STATE_READY}},
-	/* Programm page starting from the beginning */
+	/* Program page starting from the beginning */
 	{OPT_ANY, {STATE_CMD_SEQIN, STATE_ADDR_PAGE, STATE_DATAIN,
 			STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}},
-	/* Programm page starting from the beginning */
+	/* Program page starting from the beginning */
 	{OPT_SMALLPAGE, {STATE_CMD_READ0, STATE_CMD_SEQIN | ACTION_ZEROOFF, STATE_ADDR_PAGE,
 			      STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}},
-	/* Programm page starting from the second half */
+	/* Program page starting from the second half */
 	{OPT_PAGE512, {STATE_CMD_READ1, STATE_CMD_SEQIN | ACTION_HALFOFF, STATE_ADDR_PAGE,
 			      STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}},
-	/* Programm OOB */
+	/* Program OOB */
 	{OPT_SMALLPAGE, {STATE_CMD_READOOB, STATE_CMD_SEQIN | ACTION_OOBOFF, STATE_ADDR_PAGE,
 			      STATE_DATAIN, STATE_CMD_PAGEPROG | ACTION_PRGPAGE, STATE_READY}},
 	/* Erase sector */
@@ -470,7 +470,7 @@ static int alloc_device(struct nandsim *ns)
 			err = -EINVAL;
 			goto err_close;
 		}
-		ns->pages_written = vmalloc(ns->geom.pgnum);
+		ns->pages_written = vzalloc(ns->geom.pgnum);
 		if (!ns->pages_written) {
 			NS_ERR("alloc_device: unable to allocate pages written array\n");
 			err = -ENOMEM;
@@ -483,7 +483,6 @@ static int alloc_device(struct nandsim *ns)
 			goto err_free;
 		}
 		ns->cfile = cfile;
-		memset(ns->pages_written, 0, ns->geom.pgnum);
 		return 0;
 	}
 
@@ -1171,9 +1170,9 @@ static inline void switch_to_ready_state(struct nandsim *ns, u_char status)
  * of supported operations.
  *
  * Operation can be unknown because of the following.
- *   1. New command was accepted and this is the firs call to find the
+ *   1. New command was accepted and this is the first call to find the
  *      correspondent states chain. In this case ns->npstates = 0;
- *   2. There is several operations which begin with the same command(s)
+ *   2. There are several operations which begin with the same command(s)
  *      (for example program from the second half and read from the
  *      second half operations both begin with the READ1 command). In this
  *      case the ns->pstates[] array contains previous states.
@@ -1186,7 +1185,7 @@ static inline void switch_to_ready_state(struct nandsim *ns, u_char status)
  * ns->ops, ns->state, ns->nxstate are initialized, ns->npstate is
  * zeroed).
  *
- * If there are several maches, the current state is pushed to the
+ * If there are several matches, the current state is pushed to the
  * ns->pstates.
  *
  * The operation can be unknown only while commands are input to the chip.
@@ -1195,10 +1194,10 @@ static inline void switch_to_ready_state(struct nandsim *ns, u_char status)
  * operation is searched using the following pattern:
  *     ns->pstates[0], ... ns->pstates[ns->npstates], <address input>
  *
- * It is supposed that this pattern must either match one operation on
+ * It is supposed that this pattern must either match one operation or
  * none. There can't be ambiguity in that case.
  *
- * If no matches found, the functions does the following:
+ * If no matches found, the function does the following:
  *   1. if there are saved states present, try to ignore them and search
  *      again only using the last command. If nothing was found, switch
  *      to the STATE_READY state.
@@ -1668,7 +1667,7 @@ static int do_state_action(struct nandsim *ns, uint32_t action)
 
 	case ACTION_PRGPAGE:
 		/*
-		 * Programm page - move internal buffer data to the page.
+		 * Program page - move internal buffer data to the page.
 		 */
 
 		if (ns->lines.wp) {
@@ -1933,7 +1932,7 @@ static u_char ns_nand_read_byte(struct mtd_info *mtd)
 		NS_DBG("read_byte: all bytes were read\n");
 
 		/*
-		 * The OPT_AUTOINCR allows to read next conseqitive pages without
+		 * The OPT_AUTOINCR allows to read next consecutive pages without
 		 * new read operation cycle.
 		 */
 		if ((ns->options & OPT_AUTOINCR) && NS_STATE(ns->state) == STATE_DATAOUT) {
diff --git a/drivers/mtd/nand/pasemi_nand.c b/drivers/mtd/nand/pasemi_nand.c
index 6ddb2461d740..bb277a54986f 100644
--- a/drivers/mtd/nand/pasemi_nand.c
+++ b/drivers/mtd/nand/pasemi_nand.c
@@ -107,7 +107,7 @@ static int __devinit pasemi_nand_probe(struct platform_device *ofdev,
 	if (pasemi_nand_mtd)
 		return -ENODEV;
 
-	pr_debug("pasemi_nand at %llx-%llx\n", res.start, res.end);
+	pr_debug("pasemi_nand at %pR\n", &res);
 
 	/* Allocate memory for MTD device structure and private data */
 	pasemi_nand_mtd = kzalloc(sizeof(struct mtd_info) +
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 17f8518cc5eb..ea2c288df3f6 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -885,6 +885,7 @@ static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info)
 	/* set info fields needed to __readid */
 	info->read_id_bytes = (info->page_size == 2048) ? 4 : 2;
 	info->reg_ndcr = ndcr;
+	info->cmdset = &default_cmdset;
 
 	if (__readid(info, &id))
 		return -ENODEV;
@@ -915,7 +916,6 @@ static int pxa3xx_nand_detect_config(struct pxa3xx_nand_info *info)
 
 	info->ndtr0cs0 = nand_readl(info, NDTR0CS0);
 	info->ndtr1cs0 = nand_readl(info, NDTR1CS0);
-	info->cmdset = &default_cmdset;
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/txx9ndfmc.c b/drivers/mtd/nand/txx9ndfmc.c
index 054a41c0ef4a..ca270a4881a4 100644
--- a/drivers/mtd/nand/txx9ndfmc.c
+++ b/drivers/mtd/nand/txx9ndfmc.c
@@ -277,8 +277,9 @@ static int txx9ndfmc_nand_scan(struct mtd_info *mtd)
 	ret = nand_scan_ident(mtd, 1, NULL);
 	if (!ret) {
 		if (mtd->writesize >= 512) {
-			chip->ecc.size = mtd->writesize;
-			chip->ecc.bytes = 3 * (mtd->writesize / 256);
+			/* Hardware ECC 6 byte ECC per 512 Byte data */
+			chip->ecc.size = 512;
+			chip->ecc.bytes = 6;
 		}
 		ret = nand_scan_tail(mtd);
 	}
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index d0894ca7798b..ac31f461cc1c 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -35,6 +35,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/regulator/consumer.h>
 
 #include <asm/mach/flash.h>
 #include <plat/gpmc.h>
@@ -63,8 +64,13 @@ struct omap2_onenand {
 	int dma_channel;
 	int freq;
 	int (*setup)(void __iomem *base, int freq);
+	struct regulator *regulator;
 };
 
+#ifdef CONFIG_MTD_PARTITIONS
+static const char *part_probes[] = { "cmdlinepart", NULL,  };
+#endif
+
 static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data)
 {
 	struct omap2_onenand *c = data;
@@ -108,8 +114,9 @@ static void wait_warn(char *msg, int state, unsigned int ctrl,
 static int omap2_onenand_wait(struct mtd_info *mtd, int state)
 {
 	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+	struct onenand_chip *this = mtd->priv;
 	unsigned int intr = 0;
-	unsigned int ctrl;
+	unsigned int ctrl, ctrl_mask;
 	unsigned long timeout;
 	u32 syscfg;
 
@@ -180,7 +187,8 @@ retry:
 			if (result == 0) {
 				/* Timeout after 20ms */
 				ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS);
-				if (ctrl & ONENAND_CTRL_ONGO) {
+				if (ctrl & ONENAND_CTRL_ONGO &&
+				    !this->ongoing) {
 					/*
 					 * The operation seems to be still going
 					 * so give it some more time.
@@ -269,7 +277,11 @@ retry:
 		return -EIO;
 	}
 
-	if (ctrl & 0xFE9F)
+	ctrl_mask = 0xFE9F;
+	if (this->ongoing)
+		ctrl_mask &= ~0x8000;
+
+	if (ctrl & ctrl_mask)
 		wait_warn("unexpected controller status", state, ctrl, intr);
 
 	return 0;
@@ -591,6 +603,30 @@ static void omap2_onenand_shutdown(struct platform_device *pdev)
 	memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE);
 }
 
+static int omap2_onenand_enable(struct mtd_info *mtd)
+{
+	int ret;
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+
+	ret = regulator_enable(c->regulator);
+	if (ret != 0)
+		dev_err(&c->pdev->dev, "cant enable regulator\n");
+
+	return ret;
+}
+
+static int omap2_onenand_disable(struct mtd_info *mtd)
+{
+	int ret;
+	struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd);
+
+	ret = regulator_disable(c->regulator);
+	if (ret != 0)
+		dev_err(&c->pdev->dev, "cant disable regulator\n");
+
+	return ret;
+}
+
 static int __devinit omap2_onenand_probe(struct platform_device *pdev)
 {
 	struct omap_onenand_platform_data *pdata;
@@ -705,8 +741,18 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (pdata->regulator_can_sleep) {
+		c->regulator = regulator_get(&pdev->dev, "vonenand");
+		if (IS_ERR(c->regulator)) {
+			dev_err(&pdev->dev,  "Failed to get regulator\n");
+			goto err_release_dma;
+		}
+		c->onenand.enable = omap2_onenand_enable;
+		c->onenand.disable = omap2_onenand_disable;
+	}
+
 	if ((r = onenand_scan(&c->mtd, 1)) < 0)
-		goto err_release_dma;
+		goto err_release_regulator;
 
 	switch ((c->onenand.version_id >> 4) & 0xf) {
 	case 0:
@@ -727,13 +773,15 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
 	}
 
 #ifdef CONFIG_MTD_PARTITIONS
-	if (pdata->parts != NULL)
-		r = add_mtd_partitions(&c->mtd, pdata->parts,
-				       pdata->nr_parts);
+	r = parse_mtd_partitions(&c->mtd, part_probes, &c->parts, 0);
+	if (r > 0)
+		r = add_mtd_partitions(&c->mtd, c->parts, r);
+	else if (pdata->parts != NULL)
+		r = add_mtd_partitions(&c->mtd, pdata->parts, pdata->nr_parts);
 	else
 #endif
 		r = add_mtd_device(&c->mtd);
-	if (r < 0)
+	if (r)
 		goto err_release_onenand;
 
 	platform_set_drvdata(pdev, c);
@@ -742,6 +790,8 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
 
 err_release_onenand:
 	onenand_release(&c->mtd);
+err_release_regulator:
+	regulator_put(c->regulator);
 err_release_dma:
 	if (c->dma_channel != -1)
 		omap_free_dma(c->dma_channel);
@@ -757,6 +807,7 @@ err_release_mem_region:
 err_free_cs:
 	gpmc_cs_free(c->gpmc_cs);
 err_kfree:
+	kfree(c->parts);
 	kfree(c);
 
 	return r;
@@ -766,18 +817,8 @@ static int __devexit omap2_onenand_remove(struct platform_device *pdev)
 {
 	struct omap2_onenand *c = dev_get_drvdata(&pdev->dev);
 
-	BUG_ON(c == NULL);
-
-#ifdef CONFIG_MTD_PARTITIONS
-	if (c->parts)
-		del_mtd_partitions(&c->mtd);
-	else
-		del_mtd_device(&c->mtd);
-#else
-	del_mtd_device(&c->mtd);
-#endif
-
 	onenand_release(&c->mtd);
+	regulator_put(c->regulator);
 	if (c->dma_channel != -1)
 		omap_free_dma(c->dma_channel);
 	omap2_onenand_shutdown(pdev);
@@ -789,6 +830,7 @@ static int __devexit omap2_onenand_remove(struct platform_device *pdev)
 	iounmap(c->onenand.base);
 	release_mem_region(c->phys_base, ONENAND_IO_SIZE);
 	gpmc_cs_free(c->gpmc_cs);
+	kfree(c->parts);
 	kfree(c);
 
 	return 0;
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 6b3a875647c9..bac41caa8df7 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -400,8 +400,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		value = onenand_bufferram_address(this, block);
 		this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2);
 
-		if (ONENAND_IS_MLC(this) || ONENAND_IS_2PLANE(this) ||
-		    ONENAND_IS_4KB_PAGE(this))
+		if (ONENAND_IS_2PLANE(this) || ONENAND_IS_4KB_PAGE(this))
 			/* It is always BufferRAM0 */
 			ONENAND_SET_BUFFERRAM0(this);
 		else
@@ -430,7 +429,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		case FLEXONENAND_CMD_RECOVER_LSB:
 		case ONENAND_CMD_READ:
 		case ONENAND_CMD_READOOB:
-			if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this))
+			if (ONENAND_IS_4KB_PAGE(this))
 				/* It is always BufferRAM0 */
 				dataram = ONENAND_SET_BUFFERRAM0(this);
 			else
@@ -949,6 +948,8 @@ static int onenand_get_device(struct mtd_info *mtd, int new_state)
 		if (this->state == FL_READY) {
 			this->state = new_state;
 			spin_unlock(&this->chip_lock);
+			if (new_state != FL_PM_SUSPENDED && this->enable)
+				this->enable(mtd);
 			break;
 		}
 		if (new_state == FL_PM_SUSPENDED) {
@@ -975,6 +976,8 @@ static void onenand_release_device(struct mtd_info *mtd)
 {
 	struct onenand_chip *this = mtd->priv;
 
+	if (this->state != FL_PM_SUSPENDED && this->disable)
+		this->disable(mtd);
 	/* Release the chip */
 	spin_lock(&this->chip_lock);
 	this->state = FL_READY;
@@ -1353,7 +1356,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
 
 	stats = mtd->ecc_stats;
 
-	readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
+	readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
 
 	while (read < len) {
 		cond_resched();
@@ -1429,7 +1432,7 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
 	int ret;
 
 	onenand_get_device(mtd, FL_READING);
-	ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ?
+	ret = ONENAND_IS_4KB_PAGE(this) ?
 		onenand_mlc_read_ops_nolock(mtd, from, &ops) :
 		onenand_read_ops_nolock(mtd, from, &ops);
 	onenand_release_device(mtd);
@@ -1464,7 +1467,7 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
 
 	onenand_get_device(mtd, FL_READING);
 	if (ops->datbuf)
-		ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ?
+		ret = ONENAND_IS_4KB_PAGE(this) ?
 			onenand_mlc_read_ops_nolock(mtd, from, ops) :
 			onenand_read_ops_nolock(mtd, from, ops);
 	else
@@ -1485,8 +1488,7 @@ static int onenand_bbt_wait(struct mtd_info *mtd, int state)
 {
 	struct onenand_chip *this = mtd->priv;
 	unsigned long timeout;
-	unsigned int interrupt;
-	unsigned int ctrl;
+	unsigned int interrupt, ctrl, ecc, addr1, addr8;
 
 	/* The 20 msec is enough */
 	timeout = jiffies + msecs_to_jiffies(20);
@@ -1498,25 +1500,28 @@ static int onenand_bbt_wait(struct mtd_info *mtd, int state)
 	/* To get correct interrupt status in timeout case */
 	interrupt = this->read_word(this->base + ONENAND_REG_INTERRUPT);
 	ctrl = this->read_word(this->base + ONENAND_REG_CTRL_STATUS);
+	addr1 = this->read_word(this->base + ONENAND_REG_START_ADDRESS1);
+	addr8 = this->read_word(this->base + ONENAND_REG_START_ADDRESS8);
 
 	if (interrupt & ONENAND_INT_READ) {
-		int ecc = onenand_read_ecc(this);
+		ecc = onenand_read_ecc(this);
 		if (ecc & ONENAND_ECC_2BIT_ALL) {
-			printk(KERN_WARNING "%s: ecc error = 0x%04x, "
-				"controller error 0x%04x\n",
-				__func__, ecc, ctrl);
+			printk(KERN_DEBUG "%s: ecc 0x%04x ctrl 0x%04x "
+			       "intr 0x%04x addr1 %#x addr8 %#x\n",
+			       __func__, ecc, ctrl, interrupt, addr1, addr8);
 			return ONENAND_BBT_READ_ECC_ERROR;
 		}
 	} else {
-		printk(KERN_ERR "%s: read timeout! ctrl=0x%04x intr=0x%04x\n",
-			__func__, ctrl, interrupt);
+		printk(KERN_ERR "%s: read timeout! ctrl 0x%04x "
+		       "intr 0x%04x addr1 %#x addr8 %#x\n",
+		       __func__, ctrl, interrupt, addr1, addr8);
 		return ONENAND_BBT_READ_FATAL_ERROR;
 	}
 
 	/* Initial bad block case: 0x2400 or 0x0400 */
 	if (ctrl & ONENAND_CTRL_ERROR) {
-		printk(KERN_DEBUG "%s: controller error = 0x%04x\n",
-			__func__, ctrl);
+		printk(KERN_DEBUG "%s: ctrl 0x%04x intr 0x%04x addr1 %#x "
+		       "addr8 %#x\n", __func__, ctrl, interrupt, addr1, addr8);
 		return ONENAND_BBT_READ_ERROR;
 	}
 
@@ -1558,7 +1563,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
 
 	column = from & (mtd->oobsize - 1);
 
-	readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
+	readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
 
 	while (read < len) {
 		cond_resched();
@@ -1612,7 +1617,7 @@ static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to
 	u_char *oob_buf = this->oob_buf;
 	int status, i, readcmd;
 
-	readcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
+	readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB;
 
 	this->command(mtd, readcmd, to, mtd->oobsize);
 	onenand_update_bufferram(mtd, to, 0);
@@ -1845,7 +1850,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 	const u_char *buf = ops->datbuf;
 	const u_char *oob = ops->oobbuf;
 	u_char *oobbuf;
-	int ret = 0;
+	int ret = 0, cmd;
 
 	DEBUG(MTD_DEBUG_LEVEL3, "%s: to = 0x%08x, len = %i\n",
 		__func__, (unsigned int) to, (int) len);
@@ -1954,7 +1959,19 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 			ONENAND_SET_NEXT_BUFFERRAM(this);
 		}
 
-		this->command(mtd, ONENAND_CMD_PROG, to, mtd->writesize);
+		this->ongoing = 0;
+		cmd = ONENAND_CMD_PROG;
+
+		/* Exclude 1st OTP and OTP blocks for cache program feature */
+		if (ONENAND_IS_CACHE_PROGRAM(this) &&
+		    likely(onenand_block(this, to) != 0) &&
+		    ONENAND_IS_4KB_PAGE(this) &&
+		    ((written + thislen) < len)) {
+			cmd = ONENAND_CMD_2X_CACHE_PROG;
+			this->ongoing = 1;
+		}
+
+		this->command(mtd, cmd, to, mtd->writesize);
 
 		/*
 		 * 2 PLANE, MLC, and Flex-OneNAND wait here
@@ -2067,7 +2084,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 
 	oobbuf = this->oob_buf;
 
-	oobcmd = ONENAND_IS_MLC(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB;
+	oobcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB;
 
 	/* Loop until all data write */
 	while (written < len) {
@@ -2086,7 +2103,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 			memcpy(oobbuf + column, buf, thislen);
 		this->write_bufferram(mtd, ONENAND_SPARERAM, oobbuf, 0, mtd->oobsize);
 
-		if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this)) {
+		if (ONENAND_IS_4KB_PAGE(this)) {
 			/* Set main area of DataRAM to 0xff*/
 			memset(this->page_buf, 0xff, mtd->writesize);
 			this->write_bufferram(mtd, ONENAND_DATARAM,
@@ -2481,7 +2498,8 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
 	/* Grab the lock and see if the device is available */
 	onenand_get_device(mtd, FL_ERASING);
 
-	if (region || instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) {
+	if (ONENAND_IS_4KB_PAGE(this) || region ||
+	    instr->len < MB_ERASE_MIN_BLK_COUNT * block_size) {
 		/* region is set for Flex-OneNAND (no mb erase) */
 		ret = onenand_block_by_block_erase(mtd, instr,
 						   region, block_size);
@@ -3029,7 +3047,7 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len,
 	this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
 	this->wait(mtd, FL_OTPING);
 
-	ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ?
+	ret = ONENAND_IS_4KB_PAGE(this) ?
 		onenand_mlc_read_ops_nolock(mtd, from, &ops) :
 		onenand_read_ops_nolock(mtd, from, &ops);
 
@@ -3377,8 +3395,10 @@ static void onenand_check_features(struct mtd_info *mtd)
 	case ONENAND_DEVICE_DENSITY_4Gb:
 		if (ONENAND_IS_DDP(this))
 			this->options |= ONENAND_HAS_2PLANE;
-		else if (numbufs == 1)
+		else if (numbufs == 1) {
 			this->options |= ONENAND_HAS_4KB_PAGE;
+			this->options |= ONENAND_HAS_CACHE_PROGRAM;
+		}
 
 	case ONENAND_DEVICE_DENSITY_2Gb:
 		/* 2Gb DDP does not have 2 plane */
@@ -3399,7 +3419,11 @@ static void onenand_check_features(struct mtd_info *mtd)
 		break;
 	}
 
-	if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this))
+	/* The MLC has 4KiB pagesize. */
+	if (ONENAND_IS_MLC(this))
+		this->options |= ONENAND_HAS_4KB_PAGE;
+
+	if (ONENAND_IS_4KB_PAGE(this))
 		this->options &= ~ONENAND_HAS_2PLANE;
 
 	if (FLEXONENAND(this)) {
@@ -3415,6 +3439,8 @@ static void onenand_check_features(struct mtd_info *mtd)
 		printk(KERN_DEBUG "Chip has 2 plane\n");
 	if (this->options & ONENAND_HAS_4KB_PAGE)
 		printk(KERN_DEBUG "Chip has 4KiB pagesize\n");
+	if (this->options & ONENAND_HAS_CACHE_PROGRAM)
+		printk(KERN_DEBUG "Chip has cache program feature\n");
 }
 
 /**
@@ -3831,7 +3857,7 @@ static int onenand_probe(struct mtd_info *mtd)
 	/* The data buffer size is equal to page size */
 	mtd->writesize = this->read_word(this->base + ONENAND_REG_DATA_BUFFER_SIZE);
 	/* We use the full BufferRAM */
-	if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this))
+	if (ONENAND_IS_4KB_PAGE(this))
 		mtd->writesize <<= 1;
 
 	mtd->oobsize = mtd->writesize >> 5;
@@ -4054,6 +4080,7 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 	mtd->block_isbad = onenand_block_isbad;
 	mtd->block_markbad = onenand_block_markbad;
 	mtd->owner = THIS_MODULE;
+	mtd->writebufsize = mtd->writesize;
 
 	/* Unlock whole block */
 	this->unlock_all(mtd);
diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index 01ab5b3c453b..fc2c16a0fd1c 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c
@@ -91,16 +91,18 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
 		for (j = 0; j < len; j++) {
 			/* No need to read pages fully,
 			 * just read required OOB bytes */
-			ret = onenand_bbt_read_oob(mtd, from + j * mtd->writesize + bd->offs, &ops);
+			ret = onenand_bbt_read_oob(mtd,
+				from + j * this->writesize + bd->offs, &ops);
 
 			/* If it is a initial bad block, just ignore it */
 			if (ret == ONENAND_BBT_READ_FATAL_ERROR)
 				return -EIO;
 
-			if (ret || check_short_pattern(&buf[j * scanlen], scanlen, mtd->writesize, bd)) {
+			if (ret || check_short_pattern(&buf[j * scanlen],
+					       scanlen, this->writesize, bd)) {
 				bbm->bbt[i >> 3] |= 0x03 << (i & 0x6);
-				printk(KERN_WARNING "Bad eraseblock %d at 0x%08x\n",
-					i >> 1, (unsigned int) from);
+				printk(KERN_INFO "OneNAND eraseblock %d is an "
+					"initial bad block\n", i >> 1);
 				mtd->ecc_stats.badblocks++;
 				break;
 			}
diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
index 0de7a05e6de0..a4c74a9ba430 100644
--- a/drivers/mtd/onenand/samsung.c
+++ b/drivers/mtd/onenand/samsung.c
@@ -651,7 +651,7 @@ static int s5pc110_read_bufferram(struct mtd_info *mtd, int area,
 	void __iomem *p;
 	void *buf = (void *) buffer;
 	dma_addr_t dma_src, dma_dst;
-	int err, page_dma = 0;
+	int err, ofs, page_dma = 0;
 	struct device *dev = &onenand->pdev->dev;
 
 	p = this->base + area;
@@ -677,10 +677,13 @@ static int s5pc110_read_bufferram(struct mtd_info *mtd, int area,
 		if (!page)
 			goto normal;
 
+		/* Page offset */
+		ofs = ((size_t) buf & ~PAGE_MASK);
 		page_dma = 1;
+
 		/* DMA routine */
 		dma_src = onenand->phys_base + (p - this->base);
-		dma_dst = dma_map_page(dev, page, 0, count, DMA_FROM_DEVICE);
+		dma_dst = dma_map_page(dev, page, ofs, count, DMA_FROM_DEVICE);
 	} else {
 		/* DMA routine */
 		dma_src = onenand->phys_base + (p - this->base);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 5ebe280225d6..f49e49dc5928 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -672,7 +672,33 @@ static int io_init(struct ubi_device *ubi)
 		ubi->nor_flash = 1;
 	}
 
-	ubi->min_io_size = ubi->mtd->writesize;
+	/*
+	 * Set UBI min. I/O size (@ubi->min_io_size). We use @mtd->writebufsize
+	 * for these purposes, not @mtd->writesize. At the moment this does not
+	 * matter for NAND, because currently @mtd->writebufsize is equivalent to
+	 * @mtd->writesize for all NANDs. However, some CFI NOR flashes may
+	 * have @mtd->writebufsize which is multiple of @mtd->writesize.
+	 *
+	 * The reason we use @mtd->writebufsize for @ubi->min_io_size is that
+	 * UBI and UBIFS recovery algorithms rely on the fact that if there was
+	 * an unclean power cut, then we can find offset of the last corrupted
+	 * node, align the offset to @ubi->min_io_size, read the rest of the
+	 * eraseblock starting from this offset, and check whether there are
+	 * only 0xFF bytes. If yes, then we are probably dealing with a
+	 * corruption caused by a power cut, if not, then this is probably some
+	 * severe corruption.
+	 *
+	 * Thus, we have to use the maximum write unit size of the flash, which
+	 * is @mtd->writebufsize, because @mtd->writesize is the minimum write
+	 * size, not the maximum.
+	 */
+	if (ubi->mtd->type == MTD_NANDFLASH)
+		ubi_assert(ubi->mtd->writebufsize == ubi->mtd->writesize);
+	else if (ubi->mtd->type == MTD_NORFLASH)
+		ubi_assert(ubi->mtd->writebufsize % ubi->mtd->writesize == 0);
+
+	ubi->min_io_size = ubi->mtd->writebufsize;
+
 	ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
 
 	/*
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index fcdb7f65fe0b..0b8141fc5c26 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -425,12 +425,11 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
 
 	/* Read both LEB 0 and LEB 1 into memory */
 	ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
-		leb[seb->lnum] = vmalloc(ubi->vtbl_size);
+		leb[seb->lnum] = vzalloc(ubi->vtbl_size);
 		if (!leb[seb->lnum]) {
 			err = -ENOMEM;
 			goto out_free;
 		}
-		memset(leb[seb->lnum], 0, ubi->vtbl_size);
 
 		err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
 				       ubi->vtbl_size);
@@ -516,10 +515,9 @@ static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi,
 	int i;
 	struct ubi_vtbl_record *vtbl;
 
-	vtbl = vmalloc(ubi->vtbl_size);
+	vtbl = vzalloc(ubi->vtbl_size);
 	if (!vtbl)
 		return ERR_PTR(-ENOMEM);
-	memset(vtbl, 0, ubi->vtbl_size);
 
 	for (i = 0; i < ubi->vtbl_slots; i++)
 		memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index 68aaa42d0ced..32f947154c33 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -113,7 +113,7 @@ static void catas_reset(struct work_struct *work)
 void mlx4_start_catas_poll(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	unsigned long addr;
+	phys_addr_t addr;
 
 	INIT_LIST_HEAD(&priv->catas_err.list);
 	init_timer(&priv->catas_err.timer);
@@ -124,8 +124,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
 
 	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
 	if (!priv->catas_err.map) {
-		mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
-			  addr);
+		mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+			  (unsigned long long) addr);
 		return;
 	}
 
diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index f6e0d40cd876..1ff6ca6466ed 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -202,7 +202,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 	if (mlx4_uar_alloc(dev, &mdev->priv_uar))
 		goto err_pd;
 
-	mdev->uar_map = ioremap(mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT,
+				PAGE_SIZE);
 	if (!mdev->uar_map)
 		goto err_uar;
 	spin_lock_init(&mdev->uar_lock);
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 782f11d8fa71..4ffdc18fcb8a 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -829,7 +829,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 		goto err_uar_table_free;
 	}
 
-	priv->kar = ioremap(priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+	priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
 	if (!priv->kar) {
 		mlx4_err(dev, "Couldn't map kernel access region, "
 			 "aborting.\n");
diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c
index c4f88b7ef7b6..79cf42db2ea9 100644
--- a/drivers/net/mlx4/mcg.c
+++ b/drivers/net/mlx4/mcg.c
@@ -95,7 +95,8 @@ static int mlx4_MGID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
  * entry in hash chain and *mgm holds end of hash chain.
  */
 static int find_mgm(struct mlx4_dev *dev,
-		    u8 *gid, struct mlx4_cmd_mailbox *mgm_mailbox,
+		    u8 *gid, enum mlx4_protocol protocol,
+		    struct mlx4_cmd_mailbox *mgm_mailbox,
 		    u16 *hash, int *prev, int *index)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -134,7 +135,8 @@ static int find_mgm(struct mlx4_dev *dev,
 			return err;
 		}
 
-		if (!memcmp(mgm->gid, gid, 16))
+		if (!memcmp(mgm->gid, gid, 16) &&
+		    be32_to_cpu(mgm->members_count) >> 30 == protocol)
 			return err;
 
 		*prev = *index;
@@ -146,7 +148,7 @@ static int find_mgm(struct mlx4_dev *dev,
 }
 
 int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
-			  int block_mcast_loopback)
+			  int block_mcast_loopback, enum mlx4_protocol protocol)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
@@ -165,7 +167,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 
 	mutex_lock(&priv->mcg_table.mutex);
 
-	err = find_mgm(dev, gid, mailbox, &hash, &prev, &index);
+	err = find_mgm(dev, gid, protocol, mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 
@@ -187,7 +189,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 		memcpy(mgm->gid, gid, 16);
 	}
 
-	members_count = be32_to_cpu(mgm->members_count);
+	members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
 	if (members_count == MLX4_QP_PER_MGM) {
 		mlx4_err(dev, "MGM at index %x is full.\n", index);
 		err = -ENOMEM;
@@ -207,7 +209,7 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 	else
 		mgm->qp[members_count++] = cpu_to_be32(qp->qpn & MGM_QPN_MASK);
 
-	mgm->members_count       = cpu_to_be32(members_count);
+	mgm->members_count = cpu_to_be32(members_count | (u32) protocol << 30);
 
 	err = mlx4_WRITE_MCG(dev, index, mailbox);
 	if (err)
@@ -242,7 +244,8 @@ out:
 }
 EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
 
-int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  enum mlx4_protocol protocol)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
@@ -260,7 +263,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
 
 	mutex_lock(&priv->mcg_table.mutex);
 
-	err = find_mgm(dev, gid, mailbox, &hash, &prev, &index);
+	err = find_mgm(dev, gid, protocol, mailbox, &hash, &prev, &index);
 	if (err)
 		goto out;
 
@@ -270,7 +273,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
 		goto out;
 	}
 
-	members_count = be32_to_cpu(mgm->members_count);
+	members_count = be32_to_cpu(mgm->members_count) & 0xffffff;
 	for (loc = -1, i = 0; i < members_count; ++i)
 		if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn)
 			loc = i;
@@ -282,7 +285,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16])
 	}
 
 
-	mgm->members_count = cpu_to_be32(--members_count);
+	mgm->members_count = cpu_to_be32(--members_count | (u32) protocol << 30);
 	mgm->qp[loc]       = mgm->qp[i - 1];
 	mgm->qp[i - 1]     = 0;
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index c787c3d95c60..af824e7e0367 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -692,12 +692,6 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 	return 1;
 }
 
-static void *__init early_device_tree_alloc(u64 size, u64 align)
-{
-	unsigned long mem = early_init_dt_alloc_memory_arch(size, align);
-	return __va(mem);
-}
-
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
@@ -709,7 +703,7 @@ static void *__init early_device_tree_alloc(u64 size, u64 align)
 void __init unflatten_device_tree(void)
 {
 	__unflatten_device_tree(initial_boot_params, &allnodes,
-				early_device_tree_alloc);
+				early_init_dt_alloc_memory_arch);
 
 	/* Get pointer to OF "/chosen" node for use everywhere */
 	of_chosen = of_find_node_by_path("/chosen");
diff --git a/drivers/spi/spi_sh_msiof.c b/drivers/spi/spi_sh_msiof.c
index d93b66743ba7..56f60c8ea0ab 100644
--- a/drivers/spi/spi_sh_msiof.c
+++ b/drivers/spi/spi_sh_msiof.c
@@ -635,7 +635,7 @@ static int sh_msiof_spi_remove(struct platform_device *pdev)
 	ret = spi_bitbang_stop(&p->bitbang);
 	if (!ret) {
 		pm_runtime_disable(&pdev->dev);
-		free_irq(platform_get_irq(pdev, 0), sh_msiof_spi_irq);
+		free_irq(platform_get_irq(pdev, 0), p);
 		iounmap(p->mapbase);
 		clk_put(p->clk);
 		spi_master_put(p->bitbang.master);
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index 87a3a9bd5842..f204d33910ec 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -283,7 +283,7 @@ static int smb_compare_dentry(const struct dentry *,
 		unsigned int, const char *, const struct qstr *);
 static int smb_delete_dentry(const struct dentry *);
 
-static const struct dentry_operations smbfs_dentry_operations =
+const struct dentry_operations smbfs_dentry_operations =
 {
 	.d_revalidate	= smb_lookup_validate,
 	.d_hash		= smb_hash_dentry,
@@ -291,7 +291,7 @@ static const struct dentry_operations smbfs_dentry_operations =
 	.d_delete	= smb_delete_dentry,
 };
 
-static const struct dentry_operations smbfs_dentry_operations_case =
+const struct dentry_operations smbfs_dentry_operations_case =
 {
 	.d_revalidate	= smb_lookup_validate,
 	.d_delete	= smb_delete_dentry,
diff --git a/fs/Kconfig b/fs/Kconfig
index 771f457402d4..9a7921ae4763 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -30,15 +30,6 @@ config FS_MBCACHE
 source "fs/reiserfs/Kconfig"
 source "fs/jfs/Kconfig"
 
-config FS_POSIX_ACL
-# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4)
-#
-# NOTE: you can implement Posix ACLs without these helpers (XFS does).
-# 	Never use this symbol for ifdefs.
-#
-	bool
-	default n
-
 source "fs/xfs/Kconfig"
 source "fs/gfs2/Kconfig"
 source "fs/ocfs2/Kconfig"
@@ -47,6 +38,14 @@ source "fs/nilfs2/Kconfig"
 
 endif # BLOCK
 
+# Posix ACL utility routines
+#
+# Note: Posix ACLs can be implemented without these helpers.  Never use
+# this symbol for ifdefs in core code.
+#
+config FS_POSIX_ACL
+	def_bool n
+
 config EXPORTFS
 	tristate
 
diff --git a/fs/aio.c b/fs/aio.c
index 5e00f15c54aa..fc557a3be0a9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -87,7 +87,7 @@ static int __init aio_setup(void)
 
 	aio_wq = create_workqueue("aio");
 	abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
-	BUG_ON(!abe_pool);
+	BUG_ON(!aio_wq || !abe_pool);
 
 	pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
 
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 7bb3c020e570..ecb9fd3be143 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,8 @@ config BTRFS_FS
 	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
+	select LZO_COMPRESS
+	select LZO_DECOMPRESS
 	help
 	  Btrfs is a new filesystem with extents, writable snapshotting,
 	  support for multiple devices and many more features.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a35eb36b32fd..31610ea73aec 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   transaction.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   export.o tree-log.o acl.o free-space-cache.o zlib.o \
+	   export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
 	   compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6ae2c8cac9d5..15b5ca2a2606 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		size = __btrfs_getxattr(inode, name, value, size);
 		if (size > 0) {
 			acl = posix_acl_from_xattr(value, size);
-			if (IS_ERR(acl))
+			if (IS_ERR(acl)) {
+				kfree(value);
 				return acl;
+			}
 			set_cached_acl(inode, type, acl);
 		}
 		kfree(value);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6ad63f17eca0..ccc991c542df 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -157,7 +157,7 @@ struct btrfs_inode {
 	/*
 	 * always compress this one file
 	 */
-	unsigned force_compress:1;
+	unsigned force_compress:4;
 
 	struct inode vfs_inode;
 };
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b50bc4bd5c56..f745287fbf2e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -62,6 +62,9 @@ struct compressed_bio {
 	/* number of bytes on disk */
 	unsigned long compressed_len;
 
+	/* the compression algorithm for this bio */
+	int compress_type;
+
 	/* number of compressed pages in the array */
 	unsigned long nr_pages;
 
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
 	/* ok, we're the last bio for this extent, lets start
 	 * the decompression.
 	 */
-	ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
-					cb->start,
-					cb->orig_bio->bi_io_vec,
-					cb->orig_bio->bi_vcnt,
-					cb->compressed_len);
+	ret = btrfs_decompress_biovec(cb->compress_type,
+				      cb->compressed_pages,
+				      cb->start,
+				      cb->orig_bio->bi_io_vec,
+				      cb->orig_bio->bi_vcnt,
+				      cb->compressed_len);
 csum_failed:
 	if (ret)
 		cb->errors = 1;
@@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 	cb->len = uncompressed_len;
 	cb->compressed_len = compressed_len;
+	cb->compress_type = extent_compress_type(bio_flags);
 	cb->orig_bio = bio;
 
 	nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	bio_put(comp_bio);
 	return 0;
 }
+
+static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
+static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
+static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
+static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
+static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
+
+struct btrfs_compress_op *btrfs_compress_op[] = {
+	&btrfs_zlib_compress,
+	&btrfs_lzo_compress,
+};
+
+int __init btrfs_init_compress(void)
+{
+	int i;
+
+	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+		INIT_LIST_HEAD(&comp_idle_workspace[i]);
+		spin_lock_init(&comp_workspace_lock[i]);
+		atomic_set(&comp_alloc_workspace[i], 0);
+		init_waitqueue_head(&comp_workspace_wait[i]);
+	}
+	return 0;
+}
+
+/*
+ * this finds an available workspace or allocates a new one
+ * ERR_PTR is returned if things go bad.
+ */
+static struct list_head *find_workspace(int type)
+{
+	struct list_head *workspace;
+	int cpus = num_online_cpus();
+	int idx = type - 1;
+
+	struct list_head *idle_workspace	= &comp_idle_workspace[idx];
+	spinlock_t *workspace_lock		= &comp_workspace_lock[idx];
+	atomic_t *alloc_workspace		= &comp_alloc_workspace[idx];
+	wait_queue_head_t *workspace_wait	= &comp_workspace_wait[idx];
+	int *num_workspace			= &comp_num_workspace[idx];
+again:
+	spin_lock(workspace_lock);
+	if (!list_empty(idle_workspace)) {
+		workspace = idle_workspace->next;
+		list_del(workspace);
+		(*num_workspace)--;
+		spin_unlock(workspace_lock);
+		return workspace;
+
+	}
+	if (atomic_read(alloc_workspace) > cpus) {
+		DEFINE_WAIT(wait);
+
+		spin_unlock(workspace_lock);
+		prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
+		if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
+			schedule();
+		finish_wait(workspace_wait, &wait);
+		goto again;
+	}
+	atomic_inc(alloc_workspace);
+	spin_unlock(workspace_lock);
+
+	workspace = btrfs_compress_op[idx]->alloc_workspace();
+	if (IS_ERR(workspace)) {
+		atomic_dec(alloc_workspace);
+		wake_up(workspace_wait);
+	}
+	return workspace;
+}
+
+/*
+ * put a workspace struct back on the list or free it if we have enough
+ * idle ones sitting around
+ */
+static void free_workspace(int type, struct list_head *workspace)
+{
+	int idx = type - 1;
+	struct list_head *idle_workspace	= &comp_idle_workspace[idx];
+	spinlock_t *workspace_lock		= &comp_workspace_lock[idx];
+	atomic_t *alloc_workspace		= &comp_alloc_workspace[idx];
+	wait_queue_head_t *workspace_wait	= &comp_workspace_wait[idx];
+	int *num_workspace			= &comp_num_workspace[idx];
+
+	spin_lock(workspace_lock);
+	if (*num_workspace < num_online_cpus()) {
+		list_add_tail(workspace, idle_workspace);
+		(*num_workspace)++;
+		spin_unlock(workspace_lock);
+		goto wake;
+	}
+	spin_unlock(workspace_lock);
+
+	btrfs_compress_op[idx]->free_workspace(workspace);
+	atomic_dec(alloc_workspace);
+wake:
+	if (waitqueue_active(workspace_wait))
+		wake_up(workspace_wait);
+}
+
+/*
+ * cleanup function for module exit
+ */
+static void free_workspaces(void)
+{
+	struct list_head *workspace;
+	int i;
+
+	for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+		while (!list_empty(&comp_idle_workspace[i])) {
+			workspace = comp_idle_workspace[i].next;
+			list_del(workspace);
+			btrfs_compress_op[i]->free_workspace(workspace);
+			atomic_dec(&comp_alloc_workspace[i]);
+		}
+	}
+}
+
+/*
+ * given an address space and start/len, compress the bytes.
+ *
+ * pages are allocated to hold the compressed result and stored
+ * in 'pages'
+ *
+ * out_pages is used to return the number of pages allocated.  There
+ * may be pages allocated even if we return an error
+ *
+ * total_in is used to return the number of bytes actually read.  It
+ * may be smaller then len if we had to exit early because we
+ * ran out of room in the pages array or because we cross the
+ * max_out threshold.
+ *
+ * total_out is used to return the total number of compressed bytes
+ *
+ * max_out tells us the max number of bytes that we're allowed to
+ * stuff into pages
+ */
+int btrfs_compress_pages(int type, struct address_space *mapping,
+			 u64 start, unsigned long len,
+			 struct page **pages,
+			 unsigned long nr_dest_pages,
+			 unsigned long *out_pages,
+			 unsigned long *total_in,
+			 unsigned long *total_out,
+			 unsigned long max_out)
+{
+	struct list_head *workspace;
+	int ret;
+
+	workspace = find_workspace(type);
+	if (IS_ERR(workspace))
+		return -1;
+
+	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
+						      start, len, pages,
+						      nr_dest_pages, out_pages,
+						      total_in, total_out,
+						      max_out);
+	free_workspace(type, workspace);
+	return ret;
+}
+
+/*
+ * pages_in is an array of pages with compressed data.
+ *
+ * disk_start is the starting logical offset of this array in the file
+ *
+ * bvec is a bio_vec of pages from the file that we want to decompress into
+ *
+ * vcnt is the count of pages in the biovec
+ *
+ * srclen is the number of bytes in pages_in
+ *
+ * The basic idea is that we have a bio that was created by readpages.
+ * The pages in the bio are for the uncompressed data, and they may not
+ * be contiguous.  They all correspond to the range of bytes covered by
+ * the compressed extent.
+ */
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+			    struct bio_vec *bvec, int vcnt, size_t srclen)
+{
+	struct list_head *workspace;
+	int ret;
+
+	workspace = find_workspace(type);
+	if (IS_ERR(workspace))
+		return -ENOMEM;
+
+	ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
+							 disk_start,
+							 bvec, vcnt, srclen);
+	free_workspace(type, workspace);
+	return ret;
+}
+
+/*
+ * a less complex decompression routine.  Our compressed data fits in a
+ * single page, and we want to read a single page out of it.
+ * start_byte tells us the offset into the compressed data we're interested in
+ */
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+		     unsigned long start_byte, size_t srclen, size_t destlen)
+{
+	struct list_head *workspace;
+	int ret;
+
+	workspace = find_workspace(type);
+	if (IS_ERR(workspace))
+		return -ENOMEM;
+
+	ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
+						  dest_page, start_byte,
+						  srclen, destlen);
+
+	free_workspace(type, workspace);
+	return ret;
+}
+
+void __exit btrfs_exit_compress(void)
+{
+	free_workspaces();
+}
+
+/*
+ * Copy uncompressed data from working buffer to pages.
+ *
+ * buf_start is the byte offset we're of the start of our workspace buffer.
+ *
+ * total_out is the last byte of the buffer
+ */
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+			      unsigned long total_out, u64 disk_start,
+			      struct bio_vec *bvec, int vcnt,
+			      unsigned long *page_index,
+			      unsigned long *pg_offset)
+{
+	unsigned long buf_offset;
+	unsigned long current_buf_start;
+	unsigned long start_byte;
+	unsigned long working_bytes = total_out - buf_start;
+	unsigned long bytes;
+	char *kaddr;
+	struct page *page_out = bvec[*page_index].bv_page;
+
+	/*
+	 * start byte is the first byte of the page we're currently
+	 * copying into relative to the start of the compressed data.
+	 */
+	start_byte = page_offset(page_out) - disk_start;
+
+	/* we haven't yet hit data corresponding to this page */
+	if (total_out <= start_byte)
+		return 1;
+
+	/*
+	 * the start of the data we care about is offset into
+	 * the middle of our working buffer
+	 */
+	if (total_out > start_byte && buf_start < start_byte) {
+		buf_offset = start_byte - buf_start;
+		working_bytes -= buf_offset;
+	} else {
+		buf_offset = 0;
+	}
+	current_buf_start = buf_start;
+
+	/* copy bytes from the working buffer into the pages */
+	while (working_bytes > 0) {
+		bytes = min(PAGE_CACHE_SIZE - *pg_offset,
+			    PAGE_CACHE_SIZE - buf_offset);
+		bytes = min(bytes, working_bytes);
+		kaddr = kmap_atomic(page_out, KM_USER0);
+		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(page_out);
+
+		*pg_offset += bytes;
+		buf_offset += bytes;
+		working_bytes -= bytes;
+		current_buf_start += bytes;
+
+		/* check if we need to pick another page */
+		if (*pg_offset == PAGE_CACHE_SIZE) {
+			(*page_index)++;
+			if (*page_index >= vcnt)
+				return 0;
+
+			page_out = bvec[*page_index].bv_page;
+			*pg_offset = 0;
+			start_byte = page_offset(page_out) - disk_start;
+
+			/*
+			 * make sure our new page is covered by this
+			 * working buffer
+			 */
+			if (total_out <= start_byte)
+				return 1;
+
+			/*
+			 * the next page in the biovec might not be adjacent
+			 * to the last page, but it might still be found
+			 * inside this working buffer. bump our offset pointer
+			 */
+			if (total_out > start_byte &&
+			    current_buf_start < start_byte) {
+				buf_offset = start_byte - buf_start;
+				working_bytes = total_out - start_byte;
+				current_buf_start = buf_start + buf_offset;
+			}
+		}
+	}
+
+	return 1;
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 421f5b4aa715..51000174b9d7 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,24 +19,27 @@
 #ifndef __BTRFS_COMPRESSION_
 #define __BTRFS_COMPRESSION_
 
-int btrfs_zlib_decompress(unsigned char *data_in,
-			  struct page *dest_page,
-			  unsigned long start_byte,
-			  size_t srclen, size_t destlen);
-int btrfs_zlib_compress_pages(struct address_space *mapping,
-			      u64 start, unsigned long len,
-			      struct page **pages,
-			      unsigned long nr_dest_pages,
-			      unsigned long *out_pages,
-			      unsigned long *total_in,
-			      unsigned long *total_out,
-			      unsigned long max_out);
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
-			      u64 disk_start,
-			      struct bio_vec *bvec,
-			      int vcnt,
-			      size_t srclen);
-void btrfs_zlib_exit(void);
+int btrfs_init_compress(void);
+void btrfs_exit_compress(void);
+
+int btrfs_compress_pages(int type, struct address_space *mapping,
+			 u64 start, unsigned long len,
+			 struct page **pages,
+			 unsigned long nr_dest_pages,
+			 unsigned long *out_pages,
+			 unsigned long *total_in,
+			 unsigned long *total_out,
+			 unsigned long max_out);
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+			    struct bio_vec *bvec, int vcnt, size_t srclen);
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+		     unsigned long start_byte, size_t srclen, size_t destlen);
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+			      unsigned long total_out, u64 disk_start,
+			      struct bio_vec *bvec, int vcnt,
+			      unsigned long *page_index,
+			      unsigned long *pg_offset);
+
 int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long len, u64 disk_start,
 				  unsigned long compressed_len,
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long nr_pages);
 int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
+
+struct btrfs_compress_op {
+	struct list_head *(*alloc_workspace)(void);
+
+	void (*free_workspace)(struct list_head *workspace);
+
+	int (*compress_pages)(struct list_head *workspace,
+			      struct address_space *mapping,
+			      u64 start, unsigned long len,
+			      struct page **pages,
+			      unsigned long nr_dest_pages,
+			      unsigned long *out_pages,
+			      unsigned long *total_in,
+			      unsigned long *total_out,
+			      unsigned long max_out);
+
+	int (*decompress_biovec)(struct list_head *workspace,
+				 struct page **pages_in,
+				 u64 disk_start,
+				 struct bio_vec *bvec,
+				 int vcnt,
+				 size_t srclen);
+
+	int (*decompress)(struct list_head *workspace,
+			  unsigned char *data_in,
+			  struct page *dest_page,
+			  unsigned long start_byte,
+			  size_t srclen, size_t destlen);
+};
+
+extern struct btrfs_compress_op btrfs_zlib_compress;
+extern struct btrfs_compress_op btrfs_lzo_compress;
+
 #endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9ac171599258..b5baff0dccfe 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
 /* this also releases the path */
 void btrfs_free_path(struct btrfs_path *p)
 {
+	if (!p)
+		return;
 	btrfs_release_path(NULL, p);
 	kmem_cache_free(btrfs_path_cachep, p);
 }
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
 	btrfs_assert_tree_locked(path->nodes[1]);
 
 	right = read_node_slot(root, upper, slot + 1);
+	if (right == NULL)
+		return 1;
+
 	btrfs_tree_lock(right);
 	btrfs_set_lock_blocking(right);
 
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 	btrfs_assert_tree_locked(path->nodes[1]);
 
 	left = read_node_slot(root, path->nodes[1], slot - 1);
+	if (left == NULL)
+		return 1;
+
 	btrfs_tree_lock(left);
 	btrfs_set_lock_blocking(left);
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b875d445ea81..2c98b3af6052 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 #define BTRFS_FSID_SIZE 16
 #define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
 #define BTRFS_HEADER_FLAG_RELOC		(1ULL << 1)
+
+/*
+ * File system states
+ */
+
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR		(1ULL << 2)
+
 #define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
 #define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
 
@@ -399,13 +407,15 @@ struct btrfs_super_block {
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO	(1ULL << 3)
 
 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
-	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -552,9 +562,11 @@ struct btrfs_timespec {
 } __attribute__ ((__packed__));
 
 enum btrfs_compression_type {
-	BTRFS_COMPRESS_NONE = 0,
-	BTRFS_COMPRESS_ZLIB = 1,
-	BTRFS_COMPRESS_LAST = 2,
+	BTRFS_COMPRESS_NONE  = 0,
+	BTRFS_COMPRESS_ZLIB  = 1,
+	BTRFS_COMPRESS_LZO   = 2,
+	BTRFS_COMPRESS_TYPES = 2,
+	BTRFS_COMPRESS_LAST  = 3,
 };
 
 struct btrfs_inode_item {
@@ -598,6 +610,8 @@ struct btrfs_dir_item {
 	u8 type;
 } __attribute__ ((__packed__));
 
+#define BTRFS_ROOT_SUBVOL_RDONLY	(1ULL << 0)
+
 struct btrfs_root_item {
 	struct btrfs_inode_item inode;
 	__le64 generation;
@@ -896,7 +910,8 @@ struct btrfs_fs_info {
 	 */
 	u64 last_trans_log_full_commit;
 	u64 open_ioctl_trans;
-	unsigned long mount_opt;
+	unsigned long mount_opt:20;
+	unsigned long compress_type:4;
 	u64 max_inline;
 	u64 alloc_start;
 	struct btrfs_transaction *running_transaction;
@@ -1051,6 +1066,9 @@ struct btrfs_fs_info {
 	unsigned metadata_ratio;
 
 	void *bdev_holder;
+
+	/* filesystem state */
+	u64 fs_state;
 };
 
 /*
@@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
 BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
 			 last_snapshot, 64);
 
+static inline bool btrfs_root_readonly(struct btrfs_root *root)
+{
+	return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
+}
+
 /* struct btrfs_super_block */
 
 BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
@@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start);
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
@@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 int btrfs_set_block_group_rw(struct btrfs_root *root,
 			     struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
+int btrfs_error_unpin_extent_range(struct btrfs_root *root,
+				   u64 start, u64 end);
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+			       u64 num_bytes);
+
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 		     int level, int *slot);
@@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 /* super.c */
 int btrfs_parse_options(struct btrfs_root *root, char *options);
 int btrfs_sync_fs(struct super_block *sb, int wait);
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+		     unsigned int line, int errno);
+
+#define btrfs_std_error(fs_info, errno)				\
+do {								\
+	if ((errno))						\
+		__btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
+} while (0)
 
 /* acl.c */
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 51d2e4de34eb..b531c36455d8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,20 @@
 static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+				    int read_only);
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+				      struct btrfs_root *root);
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+					struct extent_io_tree *dirty_pages,
+					int mark);
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+				       struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
 
 /*
  * end_io_wq structs are used to do processing in task context when an IO is
@@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 	WARN_ON(len == 0);
 
 	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+	if (eb == NULL) {
+		WARN_ON(1);
+		goto out;
+	}
 	ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
 					     btrfs_header_generation(eb));
 	BUG_ON(ret);
@@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	WARN_ON(len == 0);
 
 	eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+	if (eb == NULL) {
+		ret = -EIO;
+		goto out;
+	}
 
 	found_start = btrfs_header_bytenr(eb);
 	if (found_start != start) {
@@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
 	}
 	btrfs_free_path(path);
 	if (ret) {
+		kfree(root);
 		if (ret > 0)
 			ret = -ENOENT;
 		return ERR_PTR(ret);
@@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
 	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
-	if (!bh)
+	if (!bh) {
+		err = -EINVAL;
 		goto fail_iput;
+	}
 
 	memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
 	memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
@@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	if (!btrfs_super_root(disk_super))
 		goto fail_iput;
 
+	/* check FS state, whether FS is broken. */
+	fs_info->fs_state |= btrfs_super_flags(disk_super);
+
+	btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+
 	ret = btrfs_parse_options(tree_root, options);
 	if (ret) {
 		err = ret;
@@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	}
 
 	features = btrfs_super_incompat_flags(disk_super);
-	if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
-		features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
-		btrfs_set_super_incompat_flags(disk_super, features);
-	}
+	features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+	if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
+		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+	btrfs_set_super_incompat_flags(disk_super, features);
 
 	features = btrfs_super_compat_ro_flags(disk_super) &
 		~BTRFS_FEATURE_COMPAT_RO_SUPP;
@@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 		btrfs_set_opt(fs_info->mount_opt, SSD);
 	}
 
-	if (btrfs_super_log_root(disk_super) != 0) {
+	/* do not make disk changes in broken FS */
+	if (btrfs_super_log_root(disk_super) != 0 &&
+	    !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
 		u64 bytenr = btrfs_super_log_root(disk_super);
 
 		if (fs_devices->rw_devices == 0) {
@@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root)
 	smp_mb();
 
 	btrfs_put_block_group_cache(fs_info);
+
+	/*
+	 * Here come 2 situations when btrfs is broken to flip readonly:
+	 *
+	 * 1. when btrfs flips readonly somewhere else before
+	 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
+	 * and btrfs will skip to write sb directly to keep
+	 * ERROR state on disk.
+	 *
+	 * 2. when btrfs flips readonly just in btrfs_commit_super,
+	 * and in such case, btrfs cannnot write sb via btrfs_commit_super,
+	 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
+	 * btrfs will cleanup all FS resources first and write sb then.
+	 */
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
-		ret =  btrfs_commit_super(root);
+		ret = btrfs_commit_super(root);
+		if (ret)
+			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+	}
+
+	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+		ret = btrfs_error_commit_super(root);
 		if (ret)
 			printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
 	}
@@ -2619,6 +2671,352 @@ out:
 	return 0;
 }
 
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+			      int read_only)
+{
+	if (read_only)
+		return;
+
+	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+		printk(KERN_WARNING "warning: mount fs with errors, "
+		       "running btrfsck is recommended\n");
+}
+
+int btrfs_error_commit_super(struct btrfs_root *root)
+{
+	int ret;
+
+	mutex_lock(&root->fs_info->cleaner_mutex);
+	btrfs_run_delayed_iputs(root);
+	mutex_unlock(&root->fs_info->cleaner_mutex);
+
+	down_write(&root->fs_info->cleanup_work_sem);
+	up_write(&root->fs_info->cleanup_work_sem);
+
+	/* cleanup FS via transaction */
+	btrfs_cleanup_transaction(root);
+
+	ret = write_ctree_super(NULL, root, 0);
+
+	return ret;
+}
+
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
+{
+	struct btrfs_inode *btrfs_inode;
+	struct list_head splice;
+
+	INIT_LIST_HEAD(&splice);
+
+	mutex_lock(&root->fs_info->ordered_operations_mutex);
+	spin_lock(&root->fs_info->ordered_extent_lock);
+
+	list_splice_init(&root->fs_info->ordered_operations, &splice);
+	while (!list_empty(&splice)) {
+		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+					 ordered_operations);
+
+		list_del_init(&btrfs_inode->ordered_operations);
+
+		btrfs_invalidate_inodes(btrfs_inode->root);
+	}
+
+	spin_unlock(&root->fs_info->ordered_extent_lock);
+	mutex_unlock(&root->fs_info->ordered_operations_mutex);
+
+	return 0;
+}
+
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root)
+{
+	struct list_head splice;
+	struct btrfs_ordered_extent *ordered;
+	struct inode *inode;
+
+	INIT_LIST_HEAD(&splice);
+
+	spin_lock(&root->fs_info->ordered_extent_lock);
+
+	list_splice_init(&root->fs_info->ordered_extents, &splice);
+	while (!list_empty(&splice)) {
+		ordered = list_entry(splice.next, struct btrfs_ordered_extent,
+				     root_extent_list);
+
+		list_del_init(&ordered->root_extent_list);
+		atomic_inc(&ordered->refs);
+
+		/* the inode may be getting freed (in sys_unlink path). */
+		inode = igrab(ordered->inode);
+
+		spin_unlock(&root->fs_info->ordered_extent_lock);
+		if (inode)
+			iput(inode);
+
+		atomic_set(&ordered->refs, 1);
+		btrfs_put_ordered_extent(ordered);
+
+		spin_lock(&root->fs_info->ordered_extent_lock);
+	}
+
+	spin_unlock(&root->fs_info->ordered_extent_lock);
+
+	return 0;
+}
+
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+				      struct btrfs_root *root)
+{
+	struct rb_node *node;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	struct btrfs_delayed_ref_node *ref;
+	int ret = 0;
+
+	delayed_refs = &trans->delayed_refs;
+
+	spin_lock(&delayed_refs->lock);
+	if (delayed_refs->num_entries == 0) {
+		printk(KERN_INFO "delayed_refs has NO entry\n");
+		return ret;
+	}
+
+	node = rb_first(&delayed_refs->root);
+	while (node) {
+		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+		node = rb_next(node);
+
+		ref->in_tree = 0;
+		rb_erase(&ref->rb_node, &delayed_refs->root);
+		delayed_refs->num_entries--;
+
+		atomic_set(&ref->refs, 1);
+		if (btrfs_delayed_ref_is_head(ref)) {
+			struct btrfs_delayed_ref_head *head;
+
+			head = btrfs_delayed_node_to_head(ref);
+			mutex_lock(&head->mutex);
+			kfree(head->extent_op);
+			delayed_refs->num_heads--;
+			if (list_empty(&head->cluster))
+				delayed_refs->num_heads_ready--;
+			list_del_init(&head->cluster);
+			mutex_unlock(&head->mutex);
+		}
+
+		spin_unlock(&delayed_refs->lock);
+		btrfs_put_delayed_ref(ref);
+
+		cond_resched();
+		spin_lock(&delayed_refs->lock);
+	}
+
+	spin_unlock(&delayed_refs->lock);
+
+	return ret;
+}
+
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+{
+	struct btrfs_pending_snapshot *snapshot;
+	struct list_head splice;
+
+	INIT_LIST_HEAD(&splice);
+
+	list_splice_init(&t->pending_snapshots, &splice);
+
+	while (!list_empty(&splice)) {
+		snapshot = list_entry(splice.next,
+				      struct btrfs_pending_snapshot,
+				      list);
+
+		list_del_init(&snapshot->list);
+
+		kfree(snapshot);
+	}
+
+	return 0;
+}
+
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
+{
+	struct btrfs_inode *btrfs_inode;
+	struct list_head splice;
+
+	INIT_LIST_HEAD(&splice);
+
+	list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+
+	spin_lock(&root->fs_info->delalloc_lock);
+
+	while (!list_empty(&splice)) {
+		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
+				    delalloc_inodes);
+
+		list_del_init(&btrfs_inode->delalloc_inodes);
+
+		btrfs_invalidate_inodes(btrfs_inode->root);
+	}
+
+	spin_unlock(&root->fs_info->delalloc_lock);
+
+	return 0;
+}
+
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+					struct extent_io_tree *dirty_pages,
+					int mark)
+{
+	int ret;
+	struct page *page;
+	struct inode *btree_inode = root->fs_info->btree_inode;
+	struct extent_buffer *eb;
+	u64 start = 0;
+	u64 end;
+	u64 offset;
+	unsigned long index;
+
+	while (1) {
+		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+					    mark);
+		if (ret)
+			break;
+
+		clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
+		while (start <= end) {
+			index = start >> PAGE_CACHE_SHIFT;
+			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
+			page = find_get_page(btree_inode->i_mapping, index);
+			if (!page)
+				continue;
+			offset = page_offset(page);
+
+			spin_lock(&dirty_pages->buffer_lock);
+			eb = radix_tree_lookup(
+			     &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
+					       offset >> PAGE_CACHE_SHIFT);
+			spin_unlock(&dirty_pages->buffer_lock);
+			if (eb) {
+				ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
+							 &eb->bflags);
+				atomic_set(&eb->refs, 1);
+			}
+			if (PageWriteback(page))
+				end_page_writeback(page);
+
+			lock_page(page);
+			if (PageDirty(page)) {
+				clear_page_dirty_for_io(page);
+				spin_lock_irq(&page->mapping->tree_lock);
+				radix_tree_tag_clear(&page->mapping->page_tree,
+							page_index(page),
+							PAGECACHE_TAG_DIRTY);
+				spin_unlock_irq(&page->mapping->tree_lock);
+			}
+
+			page->mapping->a_ops->invalidatepage(page, 0);
+			unlock_page(page);
+		}
+	}
+
+	return ret;
+}
+
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+				       struct extent_io_tree *pinned_extents)
+{
+	struct extent_io_tree *unpin;
+	u64 start;
+	u64 end;
+	int ret;
+
+	unpin = pinned_extents;
+	while (1) {
+		ret = find_first_extent_bit(unpin, 0, &start, &end,
+					    EXTENT_DIRTY);
+		if (ret)
+			break;
+
+		/* opt_discard */
+		ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+
+		clear_extent_dirty(unpin, start, end, GFP_NOFS);
+		btrfs_error_unpin_extent_range(root, start, end);
+		cond_resched();
+	}
+
+	return 0;
+}
+
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
+{
+	struct btrfs_transaction *t;
+	LIST_HEAD(list);
+
+	WARN_ON(1);
+
+	mutex_lock(&root->fs_info->trans_mutex);
+	mutex_lock(&root->fs_info->transaction_kthread_mutex);
+
+	list_splice_init(&root->fs_info->trans_list, &list);
+	while (!list_empty(&list)) {
+		t = list_entry(list.next, struct btrfs_transaction, list);
+		if (!t)
+			break;
+
+		btrfs_destroy_ordered_operations(root);
+
+		btrfs_destroy_ordered_extents(root);
+
+		btrfs_destroy_delayed_refs(t, root);
+
+		btrfs_block_rsv_release(root,
+					&root->fs_info->trans_block_rsv,
+					t->dirty_pages.dirty_bytes);
+
+		/* FIXME: cleanup wait for commit */
+		t->in_commit = 1;
+		t->blocked = 1;
+		if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
+			wake_up(&root->fs_info->transaction_blocked_wait);
+
+		t->blocked = 0;
+		if (waitqueue_active(&root->fs_info->transaction_wait))
+			wake_up(&root->fs_info->transaction_wait);
+		mutex_unlock(&root->fs_info->trans_mutex);
+
+		mutex_lock(&root->fs_info->trans_mutex);
+		t->commit_done = 1;
+		if (waitqueue_active(&t->commit_wait))
+			wake_up(&t->commit_wait);
+		mutex_unlock(&root->fs_info->trans_mutex);
+
+		mutex_lock(&root->fs_info->trans_mutex);
+
+		btrfs_destroy_pending_snapshots(t);
+
+		btrfs_destroy_delalloc_inodes(root);
+
+		spin_lock(&root->fs_info->new_trans_lock);
+		root->fs_info->running_transaction = NULL;
+		spin_unlock(&root->fs_info->new_trans_lock);
+
+		btrfs_destroy_marked_extents(root, &t->dirty_pages,
+					     EXTENT_DIRTY);
+
+		btrfs_destroy_pinned_extent(root,
+					    root->fs_info->pinned_extents);
+
+		t->use_count = 0;
+		list_del_init(&t->list);
+		memset(t, 0, sizeof(*t));
+		kmem_cache_free(btrfs_transaction_cachep, t);
+	}
+
+	mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	return 0;
+}
+
 static struct extent_io_ops btree_extent_io_ops = {
 	.write_cache_pages_lock_hook = btree_lock_page_hook,
 	.readpage_end_io_hook = btree_readpage_end_io_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 88e825a0bf21..07b20dc2fd95 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,6 +52,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root, int max_mirrors);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
+int btrfs_error_commit_super(struct btrfs_root *root);
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
 					    u64 bytenr, u32 blocksize);
 struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 227e5815d838..b55269340cec 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3089,7 +3089,7 @@ static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
 	return btrfs_reduce_alloc_profile(root, flags);
 }
 
-static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
 {
 	u64 flags;
 
@@ -3161,8 +3161,12 @@ alloc:
 					     bytes + 2 * 1024 * 1024,
 					     alloc_target, 0);
 			btrfs_end_transaction(trans, root);
-			if (ret < 0)
-				return ret;
+			if (ret < 0) {
+				if (ret != -ENOSPC)
+					return ret;
+				else
+					goto commit_trans;
+			}
 
 			if (!data_sinfo) {
 				btrfs_set_inode_space_info(root, inode);
@@ -3173,6 +3177,7 @@ alloc:
 		spin_unlock(&data_sinfo->lock);
 
 		/* commit the current transaction and try again */
+commit_trans:
 		if (!committed && !root->fs_info->open_ioctl_trans) {
 			committed = 1;
 			trans = btrfs_join_transaction(root, 1);
@@ -3721,11 +3726,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
 		return 0;
 	}
 
-	WARN_ON(1);
-	printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
-		block_rsv->size, block_rsv->reserved,
-		block_rsv->freed[0], block_rsv->freed[1]);
-
 	return -ENOSPC;
 }
 
@@ -7970,13 +7970,14 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
 
 	if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
 	    sinfo->bytes_may_use + sinfo->bytes_readonly +
-	    cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
+	    cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
 		sinfo->bytes_readonly += num_bytes;
 		sinfo->bytes_reserved += cache->reserved_pinned;
 		cache->reserved_pinned = 0;
 		cache->ro = 1;
 		ret = 0;
 	}
+
 	spin_unlock(&cache->lock);
 	spin_unlock(&sinfo->lock);
 	return ret;
@@ -8012,6 +8013,62 @@ out:
 	return ret;
 }
 
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * list. takes mirrors into account.
+ */
+static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
+{
+	struct btrfs_block_group_cache *block_group;
+	u64 free_bytes = 0;
+	int factor;
+
+	list_for_each_entry(block_group, groups_list, list) {
+		spin_lock(&block_group->lock);
+
+		if (!block_group->ro) {
+			spin_unlock(&block_group->lock);
+			continue;
+		}
+
+		if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
+					  BTRFS_BLOCK_GROUP_RAID10 |
+					  BTRFS_BLOCK_GROUP_DUP))
+			factor = 2;
+		else
+			factor = 1;
+
+		free_bytes += (block_group->key.offset -
+			       btrfs_block_group_used(&block_group->item)) *
+			       factor;
+
+		spin_unlock(&block_group->lock);
+	}
+
+	return free_bytes;
+}
+
+/*
+ * helper to account the unused space of all the readonly block group in the
+ * space_info. takes mirrors into account.
+ */
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
+{
+	int i;
+	u64 free_bytes = 0;
+
+	spin_lock(&sinfo->lock);
+
+	for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+		if (!list_empty(&sinfo->block_groups[i]))
+			free_bytes += __btrfs_get_ro_block_group_free_space(
+						&sinfo->block_groups[i]);
+
+	spin_unlock(&sinfo->lock);
+
+	return free_bytes;
+}
+
 int btrfs_set_block_group_rw(struct btrfs_root *root,
 			      struct btrfs_block_group_cache *cache)
 {
@@ -8092,7 +8149,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 	mutex_lock(&root->fs_info->chunk_mutex);
 	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
 		u64 min_free = btrfs_block_group_used(&block_group->item);
-		u64 dev_offset, max_avail;
+		u64 dev_offset;
 
 		/*
 		 * check to make sure we can actually find a chunk with enough
@@ -8100,7 +8157,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 		 */
 		if (device->total_bytes > device->bytes_used + min_free) {
 			ret = find_free_dev_extent(NULL, device, min_free,
-						   &dev_offset, &max_avail);
+						   &dev_offset, NULL);
 			if (!ret)
 				break;
 			ret = -1;
@@ -8584,3 +8641,14 @@ out:
 	btrfs_free_path(path);
 	return ret;
 }
+
+int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+{
+	return unpin_extent_range(root, start, end);
+}
+
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+			       u64 num_bytes)
+{
+	return btrfs_discard_extent(root, bytenr, num_bytes);
+}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e86b9f36507..2e993cf1766e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2028,8 +2028,11 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 		BUG_ON(extent_map_end(em) <= cur);
 		BUG_ON(end < cur);
 
-		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
 			this_bio_flag = EXTENT_BIO_COMPRESSED;
+			extent_set_compress_type(&this_bio_flag,
+						 em->compress_type);
+		}
 
 		iosize = min(extent_map_end(em) - cur, end - cur + 1);
 		cur_end = min(extent_map_end(em) - 1, end);
@@ -3072,6 +3075,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
 #endif
 
 	eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+	if (eb == NULL)
+		return NULL;
 	eb->start = start;
 	eb->len = len;
 	spin_lock_init(&eb->lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4183c8178f01..7083cfafd061 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,8 +20,12 @@
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
 #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
-/* flags for bio submission */
+/*
+ * flags for bio submission. The high bits indicate the compression
+ * type for this bio
+ */
 #define EXTENT_BIO_COMPRESSED 1
+#define EXTENT_BIO_FLAG_SHIFT 16
 
 /* these are bit numbers for test/set bit */
 #define EXTENT_BUFFER_UPTODATE 0
@@ -135,6 +139,17 @@ struct extent_buffer {
 	wait_queue_head_t lock_wq;
 };
 
+static inline void extent_set_compress_type(unsigned long *bio_flags,
+					    int compress_type)
+{
+	*bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
+}
+
+static inline int extent_compress_type(unsigned long bio_flags)
+{
+	return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
+}
+
 struct extent_map_tree;
 
 static inline struct extent_state *extent_state_next(struct extent_state *state)
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 23cb8da3ff66..b0e1fce12530 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -3,6 +3,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
+#include "ctree.h"
 #include "extent_map.h"
 
 
@@ -54,6 +55,7 @@ struct extent_map *alloc_extent_map(gfp_t mask)
 		return em;
 	em->in_tree = 0;
 	em->flags = 0;
+	em->compress_type = BTRFS_COMPRESS_NONE;
 	atomic_set(&em->refs, 1);
 	return em;
 }
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index ab6d74b6e647..28b44dbd1e35 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,7 +26,8 @@ struct extent_map {
 	unsigned long flags;
 	struct block_device *bdev;
 	atomic_t refs;
-	int in_tree;
+	unsigned int in_tree:1;
+	unsigned int compress_type:4;
 };
 
 struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 66836d85763b..c800d58f3013 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
+#include <linux/falloc.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/statfs.h>
@@ -224,6 +225,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 
 			split->bdev = em->bdev;
 			split->flags = flags;
+			split->compress_type = em->compress_type;
 			ret = add_extent_mapping(em_tree, split);
 			BUG_ON(ret);
 			free_extent_map(split);
@@ -238,6 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			split->len = em->start + em->len - (start + len);
 			split->bdev = em->bdev;
 			split->flags = flags;
+			split->compress_type = em->compress_type;
 
 			if (compressed) {
 				split->block_len = em->block_len;
@@ -890,6 +893,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	if (err)
 		goto out;
 
+	/*
+	 * If BTRFS flips readonly due to some impossible error
+	 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
+	 * although we have opened a file as writable, we have
+	 * to stop this write operation to ensure FS consistency.
+	 */
+	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+		err = -EROFS;
+		goto out;
+	}
+
 	file_update_time(file);
 	BTRFS_I(inode)->sequence++;
 
@@ -1237,6 +1251,117 @@ static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
 	return 0;
 }
 
+static long btrfs_fallocate(struct file *file, int mode,
+			    loff_t offset, loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct extent_state *cached_state = NULL;
+	u64 cur_offset;
+	u64 last_byte;
+	u64 alloc_start;
+	u64 alloc_end;
+	u64 alloc_hint = 0;
+	u64 locked_end;
+	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+	struct extent_map *em;
+	int ret;
+
+	alloc_start = offset & ~mask;
+	alloc_end =  (offset + len + mask) & ~mask;
+
+	/* We only support the FALLOC_FL_KEEP_SIZE mode */
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	/*
+	 * wait for ordered IO before we have any locks.  We'll loop again
+	 * below with the locks held.
+	 */
+	btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
+	mutex_lock(&inode->i_mutex);
+	ret = inode_newsize_ok(inode, alloc_end);
+	if (ret)
+		goto out;
+
+	if (alloc_start > inode->i_size) {
+		ret = btrfs_cont_expand(inode, alloc_start);
+		if (ret)
+			goto out;
+	}
+
+	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+	if (ret)
+		goto out;
+
+	locked_end = alloc_end - 1;
+	while (1) {
+		struct btrfs_ordered_extent *ordered;
+
+		/* the extent lock is ordered inside the running
+		 * transaction
+		 */
+		lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
+				 locked_end, 0, &cached_state, GFP_NOFS);
+		ordered = btrfs_lookup_first_ordered_extent(inode,
+							    alloc_end - 1);
+		if (ordered &&
+		    ordered->file_offset + ordered->len > alloc_start &&
+		    ordered->file_offset < alloc_end) {
+			btrfs_put_ordered_extent(ordered);
+			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+					     alloc_start, locked_end,
+					     &cached_state, GFP_NOFS);
+			/*
+			 * we can't wait on the range with the transaction
+			 * running or with the extent lock held
+			 */
+			btrfs_wait_ordered_range(inode, alloc_start,
+						 alloc_end - alloc_start);
+		} else {
+			if (ordered)
+				btrfs_put_ordered_extent(ordered);
+			break;
+		}
+	}
+
+	cur_offset = alloc_start;
+	while (1) {
+		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+				      alloc_end - cur_offset, 0);
+		BUG_ON(IS_ERR(em) || !em);
+		last_byte = min(extent_map_end(em), alloc_end);
+		last_byte = (last_byte + mask) & ~mask;
+		if (em->block_start == EXTENT_MAP_HOLE ||
+		    (cur_offset >= inode->i_size &&
+		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+			ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
+							last_byte - cur_offset,
+							1 << inode->i_blkbits,
+							offset + len,
+							&alloc_hint);
+			if (ret < 0) {
+				free_extent_map(em);
+				break;
+			}
+		}
+		free_extent_map(em);
+
+		cur_offset = last_byte;
+		if (cur_offset >= alloc_end) {
+			ret = 0;
+			break;
+		}
+	}
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+			     &cached_state, GFP_NOFS);
+
+	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
+out:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 const struct file_operations btrfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -1248,6 +1373,7 @@ const struct file_operations btrfs_file_operations = {
 	.open		= generic_file_open,
 	.release	= btrfs_release_file,
 	.fsync		= btrfs_sync_file,
+	.fallocate	= btrfs_fallocate,
 	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_ioctl,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a3798a3aa0d2..160b55b3e132 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -122,10 +122,10 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 	size_t cur_size = size;
 	size_t datasize;
 	unsigned long offset;
-	int use_compress = 0;
+	int compress_type = BTRFS_COMPRESS_NONE;
 
 	if (compressed_size && compressed_pages) {
-		use_compress = 1;
+		compress_type = root->fs_info->compress_type;
 		cur_size = compressed_size;
 	}
 
@@ -159,7 +159,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 	btrfs_set_file_extent_ram_bytes(leaf, ei, size);
 	ptr = btrfs_file_extent_inline_start(ei);
 
-	if (use_compress) {
+	if (compress_type != BTRFS_COMPRESS_NONE) {
 		struct page *cpage;
 		int i = 0;
 		while (compressed_size > 0) {
@@ -176,7 +176,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 			compressed_size -= cur_size;
 		}
 		btrfs_set_file_extent_compression(leaf, ei,
-						  BTRFS_COMPRESS_ZLIB);
+						  compress_type);
 	} else {
 		page = find_get_page(inode->i_mapping,
 				     start >> PAGE_CACHE_SHIFT);
@@ -263,6 +263,7 @@ struct async_extent {
 	u64 compressed_size;
 	struct page **pages;
 	unsigned long nr_pages;
+	int compress_type;
 	struct list_head list;
 };
 
@@ -280,7 +281,8 @@ static noinline int add_async_extent(struct async_cow *cow,
 				     u64 start, u64 ram_size,
 				     u64 compressed_size,
 				     struct page **pages,
-				     unsigned long nr_pages)
+				     unsigned long nr_pages,
+				     int compress_type)
 {
 	struct async_extent *async_extent;
 
@@ -290,6 +292,7 @@ static noinline int add_async_extent(struct async_cow *cow,
 	async_extent->compressed_size = compressed_size;
 	async_extent->pages = pages;
 	async_extent->nr_pages = nr_pages;
+	async_extent->compress_type = compress_type;
 	list_add_tail(&async_extent->list, &cow->extents);
 	return 0;
 }
@@ -332,6 +335,7 @@ static noinline int compress_file_range(struct inode *inode,
 	unsigned long max_uncompressed = 128 * 1024;
 	int i;
 	int will_compress;
+	int compress_type = root->fs_info->compress_type;
 
 	actual_end = min_t(u64, isize, end + 1);
 again:
@@ -381,12 +385,16 @@ again:
 		WARN_ON(pages);
 		pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
 
-		ret = btrfs_zlib_compress_pages(inode->i_mapping, start,
-						total_compressed, pages,
-						nr_pages, &nr_pages_ret,
-						&total_in,
-						&total_compressed,
-						max_compressed);
+		if (BTRFS_I(inode)->force_compress)
+			compress_type = BTRFS_I(inode)->force_compress;
+
+		ret = btrfs_compress_pages(compress_type,
+					   inode->i_mapping, start,
+					   total_compressed, pages,
+					   nr_pages, &nr_pages_ret,
+					   &total_in,
+					   &total_compressed,
+					   max_compressed);
 
 		if (!ret) {
 			unsigned long offset = total_compressed &
@@ -493,7 +501,8 @@ again:
 		 * and will submit them to the elevator.
 		 */
 		add_async_extent(async_cow, start, num_bytes,
-				 total_compressed, pages, nr_pages_ret);
+				 total_compressed, pages, nr_pages_ret,
+				 compress_type);
 
 		if (start + num_bytes < end) {
 			start += num_bytes;
@@ -515,7 +524,8 @@ cleanup_and_bail_uncompressed:
 			__set_page_dirty_nobuffers(locked_page);
 			/* unlocked later on in the async handlers */
 		}
-		add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0);
+		add_async_extent(async_cow, start, end - start + 1,
+				 0, NULL, 0, BTRFS_COMPRESS_NONE);
 		*num_added += 1;
 	}
 
@@ -640,6 +650,7 @@ retry:
 		em->block_start = ins.objectid;
 		em->block_len = ins.offset;
 		em->bdev = root->fs_info->fs_devices->latest_bdev;
+		em->compress_type = async_extent->compress_type;
 		set_bit(EXTENT_FLAG_PINNED, &em->flags);
 		set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 
@@ -656,11 +667,13 @@ retry:
 						async_extent->ram_size - 1, 0);
 		}
 
-		ret = btrfs_add_ordered_extent(inode, async_extent->start,
-					       ins.objectid,
-					       async_extent->ram_size,
-					       ins.offset,
-					       BTRFS_ORDERED_COMPRESSED);
+		ret = btrfs_add_ordered_extent_compress(inode,
+						async_extent->start,
+						ins.objectid,
+						async_extent->ram_size,
+						ins.offset,
+						BTRFS_ORDERED_COMPRESSED,
+						async_extent->compress_type);
 		BUG_ON(ret);
 
 		/*
@@ -1670,7 +1683,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	struct btrfs_ordered_extent *ordered_extent = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_state *cached_state = NULL;
-	int compressed = 0;
+	int compress_type = 0;
 	int ret;
 	bool nolock = false;
 
@@ -1711,9 +1724,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
 	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
-		compressed = 1;
+		compress_type = ordered_extent->compress_type;
 	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-		BUG_ON(compressed);
+		BUG_ON(compress_type);
 		ret = btrfs_mark_extent_written(trans, inode,
 						ordered_extent->file_offset,
 						ordered_extent->file_offset +
@@ -1727,7 +1740,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 						ordered_extent->disk_len,
 						ordered_extent->len,
 						ordered_extent->len,
-						compressed, 0, 0,
+						compress_type, 0, 0,
 						BTRFS_FILE_EXTENT_REG);
 		unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
 				   ordered_extent->file_offset,
@@ -1829,6 +1842,8 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
 		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
 			logical = em->block_start;
 			failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+			extent_set_compress_type(&failrec->bio_flags,
+						 em->compress_type);
 		}
 		failrec->logical = logical;
 		free_extent_map(em);
@@ -3671,8 +3686,12 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int err;
 
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
 	err = inode_change_ok(inode, attr);
 	if (err)
 		return err;
@@ -4928,8 +4947,10 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	size_t max_size;
 	unsigned long inline_size;
 	unsigned long ptr;
+	int compress_type;
 
 	WARN_ON(pg_offset != 0);
+	compress_type = btrfs_file_extent_compression(leaf, item);
 	max_size = btrfs_file_extent_ram_bytes(leaf, item);
 	inline_size = btrfs_file_extent_inline_item_len(leaf,
 					btrfs_item_nr(leaf, path->slots[0]));
@@ -4939,8 +4960,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	read_extent_buffer(leaf, tmp, ptr, inline_size);
 
 	max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
-	ret = btrfs_zlib_decompress(tmp, page, extent_offset,
-				    inline_size, max_size);
+	ret = btrfs_decompress(compress_type, tmp, page,
+			       extent_offset, inline_size, max_size);
 	if (ret) {
 		char *kaddr = kmap_atomic(page, KM_USER0);
 		unsigned long copy_size = min_t(u64,
@@ -4982,7 +5003,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_trans_handle *trans = NULL;
-	int compressed;
+	int compress_type;
 
 again:
 	read_lock(&em_tree->lock);
@@ -5041,7 +5062,7 @@ again:
 
 	found_type = btrfs_file_extent_type(leaf, item);
 	extent_start = found_key.offset;
-	compressed = btrfs_file_extent_compression(leaf, item);
+	compress_type = btrfs_file_extent_compression(leaf, item);
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
 		extent_end = extent_start +
@@ -5087,8 +5108,9 @@ again:
 			em->block_start = EXTENT_MAP_HOLE;
 			goto insert;
 		}
-		if (compressed) {
+		if (compress_type != BTRFS_COMPRESS_NONE) {
 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+			em->compress_type = compress_type;
 			em->block_start = bytenr;
 			em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
 									 item);
@@ -5122,12 +5144,14 @@ again:
 		em->len = (copy_size + root->sectorsize - 1) &
 			~((u64)root->sectorsize - 1);
 		em->orig_start = EXTENT_MAP_INLINE;
-		if (compressed)
+		if (compress_type) {
 			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+			em->compress_type = compress_type;
+		}
 		ptr = btrfs_file_extent_inline_start(item) + extent_offset;
 		if (create == 0 && !PageUptodate(page)) {
-			if (btrfs_file_extent_compression(leaf, item) ==
-			    BTRFS_COMPRESS_ZLIB) {
+			if (btrfs_file_extent_compression(leaf, item) !=
+			    BTRFS_COMPRESS_NONE) {
 				ret = uncompress_inline(path, inode, page,
 							pg_offset,
 							extent_offset, item);
@@ -6477,7 +6501,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->ordered_data_close = 0;
 	ei->orphan_meta_reserved = 0;
 	ei->dummy_inode = 0;
-	ei->force_compress = 0;
+	ei->force_compress = BTRFS_COMPRESS_NONE;
 
 	inode = &ei->vfs_inode;
 	extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
@@ -7098,116 +7122,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
 					   min_size, actual_len, alloc_hint, trans);
 }
 
-static long btrfs_fallocate(struct inode *inode, int mode,
-			    loff_t offset, loff_t len)
-{
-	struct extent_state *cached_state = NULL;
-	u64 cur_offset;
-	u64 last_byte;
-	u64 alloc_start;
-	u64 alloc_end;
-	u64 alloc_hint = 0;
-	u64 locked_end;
-	u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
-	struct extent_map *em;
-	int ret;
-
-	alloc_start = offset & ~mask;
-	alloc_end =  (offset + len + mask) & ~mask;
-
-	/* We only support the FALLOC_FL_KEEP_SIZE mode */
-	if (mode && (mode != FALLOC_FL_KEEP_SIZE))
-		return -EOPNOTSUPP;
-
-	/*
-	 * wait for ordered IO before we have any locks.  We'll loop again
-	 * below with the locks held.
-	 */
-	btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
-
-	mutex_lock(&inode->i_mutex);
-	ret = inode_newsize_ok(inode, alloc_end);
-	if (ret)
-		goto out;
-
-	if (alloc_start > inode->i_size) {
-		ret = btrfs_cont_expand(inode, alloc_start);
-		if (ret)
-			goto out;
-	}
-
-	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
-	if (ret)
-		goto out;
-
-	locked_end = alloc_end - 1;
-	while (1) {
-		struct btrfs_ordered_extent *ordered;
-
-		/* the extent lock is ordered inside the running
-		 * transaction
-		 */
-		lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
-				 locked_end, 0, &cached_state, GFP_NOFS);
-		ordered = btrfs_lookup_first_ordered_extent(inode,
-							    alloc_end - 1);
-		if (ordered &&
-		    ordered->file_offset + ordered->len > alloc_start &&
-		    ordered->file_offset < alloc_end) {
-			btrfs_put_ordered_extent(ordered);
-			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-					     alloc_start, locked_end,
-					     &cached_state, GFP_NOFS);
-			/*
-			 * we can't wait on the range with the transaction
-			 * running or with the extent lock held
-			 */
-			btrfs_wait_ordered_range(inode, alloc_start,
-						 alloc_end - alloc_start);
-		} else {
-			if (ordered)
-				btrfs_put_ordered_extent(ordered);
-			break;
-		}
-	}
-
-	cur_offset = alloc_start;
-	while (1) {
-		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
-				      alloc_end - cur_offset, 0);
-		BUG_ON(IS_ERR(em) || !em);
-		last_byte = min(extent_map_end(em), alloc_end);
-		last_byte = (last_byte + mask) & ~mask;
-		if (em->block_start == EXTENT_MAP_HOLE ||
-		    (cur_offset >= inode->i_size &&
-		     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-			ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
-							last_byte - cur_offset,
-							1 << inode->i_blkbits,
-							offset + len,
-							&alloc_hint);
-			if (ret < 0) {
-				free_extent_map(em);
-				break;
-			}
-		}
-		free_extent_map(em);
-
-		cur_offset = last_byte;
-		if (cur_offset >= alloc_end) {
-			ret = 0;
-			break;
-		}
-	}
-	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
-			     &cached_state, GFP_NOFS);
-
-	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
-out:
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
 static int btrfs_set_page_dirty(struct page *page)
 {
 	return __set_page_dirty_nobuffers(page);
@@ -7215,6 +7129,10 @@ static int btrfs_set_page_dirty(struct page *page)
 
 static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+
+	if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
+		return -EROFS;
 	if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
 		return -EACCES;
 	return generic_permission(inode, mask, flags, btrfs_check_acl);
@@ -7310,7 +7228,6 @@ static const struct inode_operations btrfs_file_inode_operations = {
 	.listxattr      = btrfs_listxattr,
 	.removexattr	= btrfs_removexattr,
 	.permission	= btrfs_permission,
-	.fallocate	= btrfs_fallocate,
 	.fiemap		= btrfs_fiemap,
 };
 static const struct inode_operations btrfs_special_inode_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f87552a1d7ea..a506a22b522a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -147,6 +147,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 	unsigned int flags, oldflags;
 	int ret;
 
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
 	if (copy_from_user(&flags, arg, sizeof(flags)))
 		return -EFAULT;
 
@@ -360,7 +363,8 @@ fail:
 }
 
 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
-			   char *name, int namelen, u64 *async_transid)
+			   char *name, int namelen, u64 *async_transid,
+			   bool readonly)
 {
 	struct inode *inode;
 	struct dentry *parent;
@@ -378,6 +382,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 	btrfs_init_block_rsv(&pending_snapshot->block_rsv);
 	pending_snapshot->dentry = dentry;
 	pending_snapshot->root = root;
+	pending_snapshot->readonly = readonly;
 
 	trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
 	if (IS_ERR(trans)) {
@@ -509,7 +514,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 static noinline int btrfs_mksubvol(struct path *parent,
 				   char *name, int namelen,
 				   struct btrfs_root *snap_src,
-				   u64 *async_transid)
+				   u64 *async_transid, bool readonly)
 {
 	struct inode *dir  = parent->dentry->d_inode;
 	struct dentry *dentry;
@@ -541,7 +546,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
 
 	if (snap_src) {
 		error = create_snapshot(snap_src, dentry,
-					name, namelen, async_transid);
+					name, namelen, async_transid, readonly);
 	} else {
 		error = create_subvol(BTRFS_I(dir)->root, dentry,
 				      name, namelen, async_transid);
@@ -638,9 +643,11 @@ static int btrfs_defrag_file(struct file *file,
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_ordered_extent *ordered;
 	struct page *page;
+	struct btrfs_super_block *disk_super;
 	unsigned long last_index;
 	unsigned long ra_pages = root->fs_info->bdi.ra_pages;
 	unsigned long total_read = 0;
+	u64 features;
 	u64 page_start;
 	u64 page_end;
 	u64 last_len = 0;
@@ -648,6 +655,14 @@ static int btrfs_defrag_file(struct file *file,
 	u64 defrag_end = 0;
 	unsigned long i;
 	int ret;
+	int compress_type = BTRFS_COMPRESS_ZLIB;
+
+	if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
+		if (range->compress_type > BTRFS_COMPRESS_TYPES)
+			return -EINVAL;
+		if (range->compress_type)
+			compress_type = range->compress_type;
+	}
 
 	if (inode->i_size == 0)
 		return 0;
@@ -683,7 +698,7 @@ static int btrfs_defrag_file(struct file *file,
 		total_read++;
 		mutex_lock(&inode->i_mutex);
 		if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
-			BTRFS_I(inode)->force_compress = 1;
+			BTRFS_I(inode)->force_compress = compress_type;
 
 		ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
 		if (ret)
@@ -781,10 +796,17 @@ loop_unlock:
 		atomic_dec(&root->fs_info->async_submit_draining);
 
 		mutex_lock(&inode->i_mutex);
-		BTRFS_I(inode)->force_compress = 0;
+		BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
 		mutex_unlock(&inode->i_mutex);
 	}
 
+	disk_super = &root->fs_info->super_copy;
+	features = btrfs_super_incompat_flags(disk_super);
+	if (range->compress_type == BTRFS_COMPRESS_LZO) {
+		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+		btrfs_set_super_incompat_flags(disk_super, features);
+	}
+
 	return 0;
 
 err_reservations:
@@ -901,7 +923,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 						    char *name,
 						    unsigned long fd,
 						    int subvol,
-						    u64 *transid)
+						    u64 *transid,
+						    bool readonly)
 {
 	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
 	struct file *src_file;
@@ -919,7 +942,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 
 	if (subvol) {
 		ret = btrfs_mksubvol(&file->f_path, name, namelen,
-				     NULL, transid);
+				     NULL, transid, readonly);
 	} else {
 		struct inode *src_inode;
 		src_file = fget(fd);
@@ -938,7 +961,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 		}
 		ret = btrfs_mksubvol(&file->f_path, name, namelen,
 				     BTRFS_I(src_inode)->root,
-				     transid);
+				     transid, readonly);
 		fput(src_file);
 	}
 out:
@@ -946,58 +969,139 @@ out:
 }
 
 static noinline int btrfs_ioctl_snap_create(struct file *file,
-					    void __user *arg, int subvol,
-					    int v2)
+					    void __user *arg, int subvol)
 {
-	struct btrfs_ioctl_vol_args *vol_args = NULL;
-	struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
-	char *name;
-	u64 fd;
+	struct btrfs_ioctl_vol_args *vol_args;
 	int ret;
 
-	if (v2) {
-		u64 transid = 0;
-		u64 *ptr = NULL;
+	vol_args = memdup_user(arg, sizeof(*vol_args));
+	if (IS_ERR(vol_args))
+		return PTR_ERR(vol_args);
+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
 
-		vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
-		if (IS_ERR(vol_args_v2))
-			return PTR_ERR(vol_args_v2);
+	ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
+					      vol_args->fd, subvol,
+					      NULL, false);
 
-		if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		name = vol_args_v2->name;
-		fd = vol_args_v2->fd;
-		vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+	kfree(vol_args);
+	return ret;
+}
 
-		if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
-			ptr = &transid;
+static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
+					       void __user *arg, int subvol)
+{
+	struct btrfs_ioctl_vol_args_v2 *vol_args;
+	int ret;
+	u64 transid = 0;
+	u64 *ptr = NULL;
+	bool readonly = false;
 
-		ret = btrfs_ioctl_snap_create_transid(file, name, fd,
-						      subvol, ptr);
+	vol_args = memdup_user(arg, sizeof(*vol_args));
+	if (IS_ERR(vol_args))
+		return PTR_ERR(vol_args);
+	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
 
-		if (ret == 0 && ptr &&
-		    copy_to_user(arg +
-				 offsetof(struct btrfs_ioctl_vol_args_v2,
-					  transid), ptr, sizeof(*ptr)))
-			ret = -EFAULT;
-	} else {
-		vol_args = memdup_user(arg, sizeof(*vol_args));
-		if (IS_ERR(vol_args))
-			return PTR_ERR(vol_args);
-		name = vol_args->name;
-		fd = vol_args->fd;
-		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-
-		ret = btrfs_ioctl_snap_create_transid(file, name, fd,
-						      subvol, NULL);
+	if (vol_args->flags &
+	    ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
+
+	if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+		ptr = &transid;
+	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
+		readonly = true;
+
+	ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
+					      vol_args->fd, subvol,
+					      ptr, readonly);
+
+	if (ret == 0 && ptr &&
+	    copy_to_user(arg +
+			 offsetof(struct btrfs_ioctl_vol_args_v2,
+				  transid), ptr, sizeof(*ptr)))
+		ret = -EFAULT;
 out:
 	kfree(vol_args);
-	kfree(vol_args_v2);
+	return ret;
+}
 
+static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
+						void __user *arg)
+{
+	struct inode *inode = fdentry(file)->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret = 0;
+	u64 flags = 0;
+
+	if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
+		return -EINVAL;
+
+	down_read(&root->fs_info->subvol_sem);
+	if (btrfs_root_readonly(root))
+		flags |= BTRFS_SUBVOL_RDONLY;
+	up_read(&root->fs_info->subvol_sem);
+
+	if (copy_to_user(arg, &flags, sizeof(flags)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
+					      void __user *arg)
+{
+	struct inode *inode = fdentry(file)->d_inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_trans_handle *trans;
+	u64 root_flags;
+	u64 flags;
+	int ret = 0;
+
+	if (root->fs_info->sb->s_flags & MS_RDONLY)
+		return -EROFS;
+
+	if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID)
+		return -EINVAL;
+
+	if (copy_from_user(&flags, arg, sizeof(flags)))
+		return -EFAULT;
+
+	if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+		return -EINVAL;
+
+	if (flags & ~BTRFS_SUBVOL_RDONLY)
+		return -EOPNOTSUPP;
+
+	down_write(&root->fs_info->subvol_sem);
+
+	/* nothing to do */
+	if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
+		goto out;
+
+	root_flags = btrfs_root_flags(&root->root_item);
+	if (flags & BTRFS_SUBVOL_RDONLY)
+		btrfs_set_root_flags(&root->root_item,
+				     root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
+	else
+		btrfs_set_root_flags(&root->root_item,
+				     root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
+
+	trans = btrfs_start_transaction(root, 1);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out_reset;
+	}
+
+	ret = btrfs_update_root(trans, root,
+				&root->root_key, &root->root_item);
+
+	btrfs_commit_transaction(trans, root);
+out_reset:
+	if (ret)
+		btrfs_set_root_flags(&root->root_item, root_flags);
+out:
+	up_write(&root->fs_info->subvol_sem);
 	return ret;
 }
 
@@ -1509,6 +1613,9 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 	struct btrfs_ioctl_defrag_range_args *range;
 	int ret;
 
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
 	ret = mnt_want_write(file->f_path.mnt);
 	if (ret)
 		return ret;
@@ -1637,6 +1744,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
 		return -EINVAL;
 
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
 	ret = mnt_want_write(file->f_path.mnt);
 	if (ret)
 		return ret;
@@ -1958,6 +2068,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
 	if (file->private_data)
 		goto out;
 
+	ret = -EROFS;
+	if (btrfs_root_readonly(root))
+		goto out;
+
 	ret = mnt_want_write(file->f_path.mnt);
 	if (ret)
 		goto out;
@@ -2257,13 +2371,17 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case FS_IOC_GETVERSION:
 		return btrfs_ioctl_getversion(file, argp);
 	case BTRFS_IOC_SNAP_CREATE:
-		return btrfs_ioctl_snap_create(file, argp, 0, 0);
+		return btrfs_ioctl_snap_create(file, argp, 0);
 	case BTRFS_IOC_SNAP_CREATE_V2:
-		return btrfs_ioctl_snap_create(file, argp, 0, 1);
+		return btrfs_ioctl_snap_create_v2(file, argp, 0);
 	case BTRFS_IOC_SUBVOL_CREATE:
-		return btrfs_ioctl_snap_create(file, argp, 1, 0);
+		return btrfs_ioctl_snap_create(file, argp, 1);
 	case BTRFS_IOC_SNAP_DESTROY:
 		return btrfs_ioctl_snap_destroy(file, argp);
+	case BTRFS_IOC_SUBVOL_GETFLAGS:
+		return btrfs_ioctl_subvol_getflags(file, argp);
+	case BTRFS_IOC_SUBVOL_SETFLAGS:
+		return btrfs_ioctl_subvol_setflags(file, argp);
 	case BTRFS_IOC_DEFAULT_SUBVOL:
 		return btrfs_ioctl_default_subvol(file, argp);
 	case BTRFS_IOC_DEFRAG:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index c344d12c646b..8fb382167b13 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -31,6 +31,7 @@ struct btrfs_ioctl_vol_args {
 };
 
 #define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
+#define BTRFS_SUBVOL_RDONLY		(1ULL << 1)
 
 #define BTRFS_SUBVOL_NAME_MAX 4039
 struct btrfs_ioctl_vol_args_v2 {
@@ -133,8 +134,15 @@ struct btrfs_ioctl_defrag_range_args {
 	 */
 	__u32 extent_thresh;
 
+	/*
+	 * which compression method to use if turning on compression
+	 * for this defrag operation.  If unspecified, zlib will
+	 * be used
+	 */
+	__u32 compress_type;
+
 	/* spare for later */
-	__u32 unused[5];
+	__u32 unused[4];
 };
 
 struct btrfs_ioctl_space_info {
@@ -193,4 +201,6 @@ struct btrfs_ioctl_space_args {
 #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
 				   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
 #endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
new file mode 100644
index 000000000000..cc9b450399df
--- /dev/null
+++ b/fs/btrfs/lzo.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2008 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/lzo.h>
+#include "compression.h"
+
+#define LZO_LEN	4
+
+struct workspace {
+	void *mem;
+	void *buf;	/* where compressed data goes */
+	void *cbuf;	/* where decompressed data goes */
+	struct list_head list;
+};
+
+static void lzo_free_workspace(struct list_head *ws)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+
+	vfree(workspace->buf);
+	vfree(workspace->cbuf);
+	vfree(workspace->mem);
+	kfree(workspace);
+}
+
+static struct list_head *lzo_alloc_workspace(void)
+{
+	struct workspace *workspace;
+
+	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
+	if (!workspace)
+		return ERR_PTR(-ENOMEM);
+
+	workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
+	workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+	workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+	if (!workspace->mem || !workspace->buf || !workspace->cbuf)
+		goto fail;
+
+	INIT_LIST_HEAD(&workspace->list);
+
+	return &workspace->list;
+fail:
+	lzo_free_workspace(&workspace->list);
+	return ERR_PTR(-ENOMEM);
+}
+
+static inline void write_compress_length(char *buf, size_t len)
+{
+	__le32 dlen;
+
+	dlen = cpu_to_le32(len);
+	memcpy(buf, &dlen, LZO_LEN);
+}
+
+static inline size_t read_compress_length(char *buf)
+{
+	__le32 dlen;
+
+	memcpy(&dlen, buf, LZO_LEN);
+	return le32_to_cpu(dlen);
+}
+
+static int lzo_compress_pages(struct list_head *ws,
+			      struct address_space *mapping,
+			      u64 start, unsigned long len,
+			      struct page **pages,
+			      unsigned long nr_dest_pages,
+			      unsigned long *out_pages,
+			      unsigned long *total_in,
+			      unsigned long *total_out,
+			      unsigned long max_out)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	int ret = 0;
+	char *data_in;
+	char *cpage_out;
+	int nr_pages = 0;
+	struct page *in_page = NULL;
+	struct page *out_page = NULL;
+	unsigned long bytes_left;
+
+	size_t in_len;
+	size_t out_len;
+	char *buf;
+	unsigned long tot_in = 0;
+	unsigned long tot_out = 0;
+	unsigned long pg_bytes_left;
+	unsigned long out_offset;
+	unsigned long bytes;
+
+	*out_pages = 0;
+	*total_out = 0;
+	*total_in = 0;
+
+	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+	data_in = kmap(in_page);
+
+	/*
+	 * store the size of all chunks of compressed data in
+	 * the first 4 bytes
+	 */
+	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+	if (out_page == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	cpage_out = kmap(out_page);
+	out_offset = LZO_LEN;
+	tot_out = LZO_LEN;
+	pages[0] = out_page;
+	nr_pages = 1;
+	pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+
+	/* compress at most one page of data each time */
+	in_len = min(len, PAGE_CACHE_SIZE);
+	while (tot_in < len) {
+		ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
+				       &out_len, workspace->mem);
+		if (ret != LZO_E_OK) {
+			printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
+			       ret);
+			ret = -1;
+			goto out;
+		}
+
+		/* store the size of this chunk of compressed data */
+		write_compress_length(cpage_out + out_offset, out_len);
+		tot_out += LZO_LEN;
+		out_offset += LZO_LEN;
+		pg_bytes_left -= LZO_LEN;
+
+		tot_in += in_len;
+		tot_out += out_len;
+
+		/* copy bytes from the working buffer into the pages */
+		buf = workspace->cbuf;
+		while (out_len) {
+			bytes = min_t(unsigned long, pg_bytes_left, out_len);
+
+			memcpy(cpage_out + out_offset, buf, bytes);
+
+			out_len -= bytes;
+			pg_bytes_left -= bytes;
+			buf += bytes;
+			out_offset += bytes;
+
+			/*
+			 * we need another page for writing out.
+			 *
+			 * Note if there's less than 4 bytes left, we just
+			 * skip to a new page.
+			 */
+			if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
+			    pg_bytes_left == 0) {
+				if (pg_bytes_left) {
+					memset(cpage_out + out_offset, 0,
+					       pg_bytes_left);
+					tot_out += pg_bytes_left;
+				}
+
+				/* we're done, don't allocate new page */
+				if (out_len == 0 && tot_in >= len)
+					break;
+
+				kunmap(out_page);
+				if (nr_pages == nr_dest_pages) {
+					out_page = NULL;
+					ret = -1;
+					goto out;
+				}
+
+				out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+				if (out_page == NULL) {
+					ret = -ENOMEM;
+					goto out;
+				}
+				cpage_out = kmap(out_page);
+				pages[nr_pages++] = out_page;
+
+				pg_bytes_left = PAGE_CACHE_SIZE;
+				out_offset = 0;
+			}
+		}
+
+		/* we're making it bigger, give up */
+		if (tot_in > 8192 && tot_in < tot_out)
+			goto out;
+
+		/* we're all done */
+		if (tot_in >= len)
+			break;
+
+		if (tot_out > max_out)
+			break;
+
+		bytes_left = len - tot_in;
+		kunmap(in_page);
+		page_cache_release(in_page);
+
+		start += PAGE_CACHE_SIZE;
+		in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+		data_in = kmap(in_page);
+		in_len = min(bytes_left, PAGE_CACHE_SIZE);
+	}
+
+	if (tot_out > tot_in)
+		goto out;
+
+	/* store the size of all chunks of compressed data */
+	cpage_out = kmap(pages[0]);
+	write_compress_length(cpage_out, tot_out);
+
+	kunmap(pages[0]);
+
+	ret = 0;
+	*total_out = tot_out;
+	*total_in = tot_in;
+out:
+	*out_pages = nr_pages;
+	if (out_page)
+		kunmap(out_page);
+
+	if (in_page) {
+		kunmap(in_page);
+		page_cache_release(in_page);
+	}
+
+	return ret;
+}
+
+static int lzo_decompress_biovec(struct list_head *ws,
+				 struct page **pages_in,
+				 u64 disk_start,
+				 struct bio_vec *bvec,
+				 int vcnt,
+				 size_t srclen)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	int ret = 0, ret2;
+	char *data_in;
+	unsigned long page_in_index = 0;
+	unsigned long page_out_index = 0;
+	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
+					PAGE_CACHE_SIZE;
+	unsigned long buf_start;
+	unsigned long buf_offset = 0;
+	unsigned long bytes;
+	unsigned long working_bytes;
+	unsigned long pg_offset;
+
+	size_t in_len;
+	size_t out_len;
+	unsigned long in_offset;
+	unsigned long in_page_bytes_left;
+	unsigned long tot_in;
+	unsigned long tot_out;
+	unsigned long tot_len;
+	char *buf;
+
+	data_in = kmap(pages_in[0]);
+	tot_len = read_compress_length(data_in);
+
+	tot_in = LZO_LEN;
+	in_offset = LZO_LEN;
+	tot_len = min_t(size_t, srclen, tot_len);
+	in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+
+	tot_out = 0;
+	pg_offset = 0;
+
+	while (tot_in < tot_len) {
+		in_len = read_compress_length(data_in + in_offset);
+		in_page_bytes_left -= LZO_LEN;
+		in_offset += LZO_LEN;
+		tot_in += LZO_LEN;
+
+		tot_in += in_len;
+		working_bytes = in_len;
+
+		/* fast path: avoid using the working buffer */
+		if (in_page_bytes_left >= in_len) {
+			buf = data_in + in_offset;
+			bytes = in_len;
+			goto cont;
+		}
+
+		/* copy bytes from the pages into the working buffer */
+		buf = workspace->cbuf;
+		buf_offset = 0;
+		while (working_bytes) {
+			bytes = min(working_bytes, in_page_bytes_left);
+
+			memcpy(buf + buf_offset, data_in + in_offset, bytes);
+			buf_offset += bytes;
+cont:
+			working_bytes -= bytes;
+			in_page_bytes_left -= bytes;
+			in_offset += bytes;
+
+			/* check if we need to pick another page */
+			if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
+			    || in_page_bytes_left == 0) {
+				tot_in += in_page_bytes_left;
+
+				if (working_bytes == 0 && tot_in >= tot_len)
+					break;
+
+				kunmap(pages_in[page_in_index]);
+				page_in_index++;
+				if (page_in_index >= total_pages_in) {
+					ret = -1;
+					data_in = NULL;
+					goto done;
+				}
+				data_in = kmap(pages_in[page_in_index]);
+
+				in_page_bytes_left = PAGE_CACHE_SIZE;
+				in_offset = 0;
+			}
+		}
+
+		out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+		ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
+					    &out_len);
+		if (ret != LZO_E_OK) {
+			printk(KERN_WARNING "btrfs decompress failed\n");
+			ret = -1;
+			break;
+		}
+
+		buf_start = tot_out;
+		tot_out += out_len;
+
+		ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
+						 tot_out, disk_start,
+						 bvec, vcnt,
+						 &page_out_index, &pg_offset);
+		if (ret2 == 0)
+			break;
+	}
+done:
+	if (data_in)
+		kunmap(pages_in[page_in_index]);
+	return ret;
+}
+
+static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
+			  struct page *dest_page,
+			  unsigned long start_byte,
+			  size_t srclen, size_t destlen)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	size_t in_len;
+	size_t out_len;
+	size_t tot_len;
+	int ret = 0;
+	char *kaddr;
+	unsigned long bytes;
+
+	BUG_ON(srclen < LZO_LEN);
+
+	tot_len = read_compress_length(data_in);
+	data_in += LZO_LEN;
+
+	in_len = read_compress_length(data_in);
+	data_in += LZO_LEN;
+
+	out_len = PAGE_CACHE_SIZE;
+	ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
+	if (ret != LZO_E_OK) {
+		printk(KERN_WARNING "btrfs decompress failed!\n");
+		ret = -1;
+		goto out;
+	}
+
+	if (out_len < start_byte) {
+		ret = -1;
+		goto out;
+	}
+
+	bytes = min_t(unsigned long, destlen, out_len - start_byte);
+
+	kaddr = kmap_atomic(dest_page, KM_USER0);
+	memcpy(kaddr, workspace->buf + start_byte, bytes);
+	kunmap_atomic(kaddr, KM_USER0);
+out:
+	return ret;
+}
+
+struct btrfs_compress_op btrfs_lzo_compress = {
+	.alloc_workspace	= lzo_alloc_workspace,
+	.free_workspace		= lzo_free_workspace,
+	.compress_pages		= lzo_compress_pages,
+	.decompress_biovec	= lzo_decompress_biovec,
+	.decompress		= lzo_decompress,
+};
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index ae7737e352c9..2b61e1ddcd99 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -172,7 +172,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
  */
 static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 				      u64 start, u64 len, u64 disk_len,
-				      int type, int dio)
+				      int type, int dio, int compress_type)
 {
 	struct btrfs_ordered_inode_tree *tree;
 	struct rb_node *node;
@@ -189,6 +189,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 	entry->disk_len = disk_len;
 	entry->bytes_left = len;
 	entry->inode = inode;
+	entry->compress_type = compress_type;
 	if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
 		set_bit(type, &entry->flags);
 
@@ -220,14 +221,25 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 			     u64 start, u64 len, u64 disk_len, int type)
 {
 	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
-					  disk_len, type, 0);
+					  disk_len, type, 0,
+					  BTRFS_COMPRESS_NONE);
 }
 
 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
 				 u64 start, u64 len, u64 disk_len, int type)
 {
 	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
-					  disk_len, type, 1);
+					  disk_len, type, 1,
+					  BTRFS_COMPRESS_NONE);
+}
+
+int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+				      u64 start, u64 len, u64 disk_len,
+				      int type, int compress_type)
+{
+	return __btrfs_add_ordered_extent(inode, file_offset, start, len,
+					  disk_len, type, 0,
+					  compress_type);
 }
 
 /*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 61dca83119dd..ff1f69aa1883 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -68,7 +68,7 @@ struct btrfs_ordered_sum {
 
 #define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
 
-#define BTRFS_ORDERED_COMPRESSED 3 /* writing a compressed extent */
+#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
 
 #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
 
@@ -93,6 +93,9 @@ struct btrfs_ordered_extent {
 	/* flags (described above) */
 	unsigned long flags;
 
+	/* compression algorithm */
+	int compress_type;
+
 	/* reference count */
 	atomic_t refs;
 
@@ -148,6 +151,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 			     u64 start, u64 len, u64 disk_len, int type);
 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
 				 u64 start, u64 len, u64 disk_len, int type);
+int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
+				      u64 start, u64 len, u64 disk_len,
+				      int type, int compress_type);
 int btrfs_add_ordered_sum(struct inode *inode,
 			  struct btrfs_ordered_extent *entry,
 			  struct btrfs_ordered_sum *sum);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 22acdaa78ce1..b2130c46fdb5 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,90 @@
 
 static const struct super_operations btrfs_super_ops;
 
+static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
+				      char nbuf[16])
+{
+	char *errstr = NULL;
+
+	switch (errno) {
+	case -EIO:
+		errstr = "IO failure";
+		break;
+	case -ENOMEM:
+		errstr = "Out of memory";
+		break;
+	case -EROFS:
+		errstr = "Readonly filesystem";
+		break;
+	default:
+		if (nbuf) {
+			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
+				errstr = nbuf;
+		}
+		break;
+	}
+
+	return errstr;
+}
+
+static void __save_error_info(struct btrfs_fs_info *fs_info)
+{
+	/*
+	 * today we only save the error info into ram.  Long term we'll
+	 * also send it down to the disk
+	 */
+	fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
+}
+
+/* NOTE:
+ *	We move write_super stuff at umount in order to avoid deadlock
+ *	for umount hold all lock.
+ */
+static void save_error_info(struct btrfs_fs_info *fs_info)
+{
+	__save_error_info(fs_info);
+}
+
+/* btrfs handle error by forcing the filesystem readonly */
+static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
+{
+	struct super_block *sb = fs_info->sb;
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+		sb->s_flags |= MS_RDONLY;
+		printk(KERN_INFO "btrfs is forced readonly\n");
+	}
+}
+
+/*
+ * __btrfs_std_error decodes expected errors from the caller and
+ * invokes the approciate error response.
+ */
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+		     unsigned int line, int errno)
+{
+	struct super_block *sb = fs_info->sb;
+	char nbuf[16];
+	const char *errstr;
+
+	/*
+	 * Special case: if the error is EROFS, and we're already
+	 * under MS_RDONLY, then it is safe here.
+	 */
+	if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
+		return;
+
+	errstr = btrfs_decode_error(fs_info, errno, nbuf);
+	printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
+		sb->s_id, function, line, errstr);
+	save_error_info(fs_info);
+
+	btrfs_handle_error(fs_info);
+}
+
 static void btrfs_put_super(struct super_block *sb)
 {
 	struct btrfs_root *root = btrfs_sb(sb);
@@ -69,9 +153,9 @@ enum {
 	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
 	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
 	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
-	Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
-	Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
-	Opt_user_subvol_rm_allowed,
+	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
+	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
+	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -86,7 +170,9 @@ static match_table_t tokens = {
 	{Opt_alloc_start, "alloc_start=%s"},
 	{Opt_thread_pool, "thread_pool=%d"},
 	{Opt_compress, "compress"},
+	{Opt_compress_type, "compress=%s"},
 	{Opt_compress_force, "compress-force"},
+	{Opt_compress_force_type, "compress-force=%s"},
 	{Opt_ssd, "ssd"},
 	{Opt_ssd_spread, "ssd_spread"},
 	{Opt_nossd, "nossd"},
@@ -112,6 +198,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 	char *p, *num, *orig;
 	int intarg;
 	int ret = 0;
+	char *compress_type;
+	bool compress_force = false;
 
 	if (!options)
 		return 0;
@@ -154,14 +242,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			btrfs_set_opt(info->mount_opt, NODATACOW);
 			btrfs_set_opt(info->mount_opt, NODATASUM);
 			break;
-		case Opt_compress:
-			printk(KERN_INFO "btrfs: use compression\n");
-			btrfs_set_opt(info->mount_opt, COMPRESS);
-			break;
 		case Opt_compress_force:
-			printk(KERN_INFO "btrfs: forcing compression\n");
-			btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+		case Opt_compress_force_type:
+			compress_force = true;
+		case Opt_compress:
+		case Opt_compress_type:
+			if (token == Opt_compress ||
+			    token == Opt_compress_force ||
+			    strcmp(args[0].from, "zlib") == 0) {
+				compress_type = "zlib";
+				info->compress_type = BTRFS_COMPRESS_ZLIB;
+			} else if (strcmp(args[0].from, "lzo") == 0) {
+				compress_type = "lzo";
+				info->compress_type = BTRFS_COMPRESS_LZO;
+			} else {
+				ret = -EINVAL;
+				goto out;
+			}
+
 			btrfs_set_opt(info->mount_opt, COMPRESS);
+			if (compress_force) {
+				btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+				pr_info("btrfs: force %s compression\n",
+					compress_type);
+			} else
+				pr_info("btrfs: use %s compression\n",
+					compress_type);
 			break;
 		case Opt_ssd:
 			printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -753,6 +859,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
+/*
+ * The helper to calc the free space on the devices that can be used to store
+ * file data.
+ */
+static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_device_info *devices_info;
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+	struct btrfs_device *device;
+	u64 skip_space;
+	u64 type;
+	u64 avail_space;
+	u64 used_space;
+	u64 min_stripe_size;
+	int min_stripes = 1;
+	int i = 0, nr_devices;
+	int ret;
+
+	nr_devices = fs_info->fs_devices->rw_devices;
+	BUG_ON(!nr_devices);
+
+	devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+			       GFP_NOFS);
+	if (!devices_info)
+		return -ENOMEM;
+
+	/* calc min stripe number for data space alloction */
+	type = btrfs_get_alloc_profile(root, 1);
+	if (type & BTRFS_BLOCK_GROUP_RAID0)
+		min_stripes = 2;
+	else if (type & BTRFS_BLOCK_GROUP_RAID1)
+		min_stripes = 2;
+	else if (type & BTRFS_BLOCK_GROUP_RAID10)
+		min_stripes = 4;
+
+	if (type & BTRFS_BLOCK_GROUP_DUP)
+		min_stripe_size = 2 * BTRFS_STRIPE_LEN;
+	else
+		min_stripe_size = BTRFS_STRIPE_LEN;
+
+	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+		if (!device->in_fs_metadata)
+			continue;
+
+		avail_space = device->total_bytes - device->bytes_used;
+
+		/* align with stripe_len */
+		do_div(avail_space, BTRFS_STRIPE_LEN);
+		avail_space *= BTRFS_STRIPE_LEN;
+
+		/*
+		 * In order to avoid overwritting the superblock on the drive,
+		 * btrfs starts at an offset of at least 1MB when doing chunk
+		 * allocation.
+		 */
+		skip_space = 1024 * 1024;
+
+		/* user can set the offset in fs_info->alloc_start. */
+		if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
+		    device->total_bytes)
+			skip_space = max(fs_info->alloc_start, skip_space);
+
+		/*
+		 * btrfs can not use the free space in [0, skip_space - 1],
+		 * we must subtract it from the total. In order to implement
+		 * it, we account the used space in this range first.
+		 */
+		ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
+						     &used_space);
+		if (ret) {
+			kfree(devices_info);
+			return ret;
+		}
+
+		/* calc the free space in [0, skip_space - 1] */
+		skip_space -= used_space;
+
+		/*
+		 * we can use the free space in [0, skip_space - 1], subtract
+		 * it from the total.
+		 */
+		if (avail_space && avail_space >= skip_space)
+			avail_space -= skip_space;
+		else
+			avail_space = 0;
+
+		if (avail_space < min_stripe_size)
+			continue;
+
+		devices_info[i].dev = device;
+		devices_info[i].max_avail = avail_space;
+
+		i++;
+	}
+
+	nr_devices = i;
+
+	btrfs_descending_sort_devices(devices_info, nr_devices);
+
+	i = nr_devices - 1;
+	avail_space = 0;
+	while (nr_devices >= min_stripes) {
+		if (devices_info[i].max_avail >= min_stripe_size) {
+			int j;
+			u64 alloc_size;
+
+			avail_space += devices_info[i].max_avail * min_stripes;
+			alloc_size = devices_info[i].max_avail;
+			for (j = i + 1 - min_stripes; j <= i; j++)
+				devices_info[j].max_avail -= alloc_size;
+		}
+		i--;
+		nr_devices--;
+	}
+
+	kfree(devices_info);
+	*free_bytes = avail_space;
+	return 0;
+}
+
 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -760,17 +987,21 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct list_head *head = &root->fs_info->space_info;
 	struct btrfs_space_info *found;
 	u64 total_used = 0;
-	u64 total_used_data = 0;
+	u64 total_free_data = 0;
 	int bits = dentry->d_sb->s_blocksize_bits;
 	__be32 *fsid = (__be32 *)root->fs_info->fsid;
+	int ret;
 
+	/* holding chunk_muext to avoid allocating new chunks */
+	mutex_lock(&root->fs_info->chunk_mutex);
 	rcu_read_lock();
 	list_for_each_entry_rcu(found, head, list) {
-		if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
-				    BTRFS_BLOCK_GROUP_SYSTEM))
-			total_used_data += found->disk_total;
-		else
-			total_used_data += found->disk_used;
+		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+			total_free_data += found->disk_total - found->disk_used;
+			total_free_data -=
+				btrfs_account_ro_block_groups_free_space(found);
+		}
+
 		total_used += found->disk_used;
 	}
 	rcu_read_unlock();
@@ -778,9 +1009,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_namelen = BTRFS_NAME_LEN;
 	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
 	buf->f_bfree = buf->f_blocks - (total_used >> bits);
-	buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
 	buf->f_bsize = dentry->d_sb->s_blocksize;
 	buf->f_type = BTRFS_SUPER_MAGIC;
+	buf->f_bavail = total_free_data;
+	ret = btrfs_calc_avail_data_space(root, &total_free_data);
+	if (ret) {
+		mutex_unlock(&root->fs_info->chunk_mutex);
+		return ret;
+	}
+	buf->f_bavail += total_free_data;
+	buf->f_bavail = buf->f_bavail >> bits;
+	mutex_unlock(&root->fs_info->chunk_mutex);
 
 	/* We treat it as constant endianness (it doesn't matter _which_)
 	   because we want the fsid to come out the same whether mounted
@@ -897,10 +1136,14 @@ static int __init init_btrfs_fs(void)
 	if (err)
 		return err;
 
-	err = btrfs_init_cachep();
+	err = btrfs_init_compress();
 	if (err)
 		goto free_sysfs;
 
+	err = btrfs_init_cachep();
+	if (err)
+		goto free_compress;
+
 	err = extent_io_init();
 	if (err)
 		goto free_cachep;
@@ -928,6 +1171,8 @@ free_extent_io:
 	extent_io_exit();
 free_cachep:
 	btrfs_destroy_cachep();
+free_compress:
+	btrfs_exit_compress();
 free_sysfs:
 	btrfs_exit_sysfs();
 	return err;
@@ -942,7 +1187,7 @@ static void __exit exit_btrfs_fs(void)
 	unregister_filesystem(&btrfs_fs_type);
 	btrfs_exit_sysfs();
 	btrfs_cleanup_fs_uuids();
-	btrfs_zlib_exit();
+	btrfs_exit_compress();
 }
 
 module_init(init_btrfs_fs)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f50e931fc217..bae5c7b8bbe2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -181,6 +181,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 	struct btrfs_trans_handle *h;
 	struct btrfs_transaction *cur_trans;
 	int ret;
+
+	if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+		return ERR_PTR(-EROFS);
 again:
 	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 	if (!h)
@@ -910,6 +913,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	u64 to_reserve = 0;
 	u64 index = 0;
 	u64 objectid;
+	u64 root_flags;
 
 	new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
 	if (!new_root_item) {
@@ -967,6 +971,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
 	memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 
+	root_flags = btrfs_root_flags(new_root_item);
+	if (pending->readonly)
+		root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
+	else
+		root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
+	btrfs_set_root_flags(new_root_item, root_flags);
+
 	old = btrfs_lock_root_node(root);
 	btrfs_cow_block(trans, root, old, NULL, 0, &old);
 	btrfs_set_lock_blocking(old);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index f104b57ad4ef..229a594cacd5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,6 +62,7 @@ struct btrfs_pending_snapshot {
 	struct btrfs_block_rsv block_rsv;
 	/* extra metadata reseration for relocation */
 	int error;
+	bool readonly;
 	struct list_head list;
 };
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1718e1a5c320..d158530233b7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -22,6 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/random.h>
 #include <linux/iocontext.h>
+#include <linux/capability.h>
 #include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
@@ -600,8 +601,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		set_blocksize(bdev, 4096);
 
 		bh = btrfs_read_dev_super(bdev);
-		if (!bh)
+		if (!bh) {
+			ret = -EINVAL;
 			goto error_close;
+		}
 
 		disk_super = (struct btrfs_super_block *)bh->b_data;
 		devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -703,7 +706,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 		goto error_close;
 	bh = btrfs_read_dev_super(bdev);
 	if (!bh) {
-		ret = -EIO;
+		ret = -EINVAL;
 		goto error_close;
 	}
 	disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -729,59 +732,167 @@ error:
 	return ret;
 }
 
+/* helper to account the used device space in the range */
+int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
+				   u64 end, u64 *length)
+{
+	struct btrfs_key key;
+	struct btrfs_root *root = device->dev_root;
+	struct btrfs_dev_extent *dev_extent;
+	struct btrfs_path *path;
+	u64 extent_end;
+	int ret;
+	int slot;
+	struct extent_buffer *l;
+
+	*length = 0;
+
+	if (start >= device->total_bytes)
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->reada = 2;
+
+	key.objectid = device->devid;
+	key.offset = start;
+	key.type = BTRFS_DEV_EXTENT_KEY;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0) {
+		ret = btrfs_previous_item(root, path, key.objectid, key.type);
+		if (ret < 0)
+			goto out;
+	}
+
+	while (1) {
+		l = path->nodes[0];
+		slot = path->slots[0];
+		if (slot >= btrfs_header_nritems(l)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret == 0)
+				continue;
+			if (ret < 0)
+				goto out;
+
+			break;
+		}
+		btrfs_item_key_to_cpu(l, &key, slot);
+
+		if (key.objectid < device->devid)
+			goto next;
+
+		if (key.objectid > device->devid)
+			break;
+
+		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+			goto next;
+
+		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+		extent_end = key.offset + btrfs_dev_extent_length(l,
+								  dev_extent);
+		if (key.offset <= start && extent_end > end) {
+			*length = end - start + 1;
+			break;
+		} else if (key.offset <= start && extent_end > start)
+			*length += extent_end - start;
+		else if (key.offset > start && extent_end <= end)
+			*length += extent_end - key.offset;
+		else if (key.offset > start && key.offset <= end) {
+			*length += end - key.offset + 1;
+			break;
+		} else if (key.offset > end)
+			break;
+
+next:
+		path->slots[0]++;
+	}
+	ret = 0;
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 /*
+ * find_free_dev_extent - find free space in the specified device
+ * @trans:	transaction handler
+ * @device:	the device which we search the free space in
+ * @num_bytes:	the size of the free space that we need
+ * @start:	store the start of the free space.
+ * @len:	the size of the free space. that we find, or the size of the max
+ * 		free space if we don't find suitable free space
+ *
  * this uses a pretty simple search, the expectation is that it is
  * called very infrequently and that a given device has a small number
  * of extents
+ *
+ * @start is used to store the start of the free space if we find. But if we
+ * don't find suitable free space, it will be used to store the start position
+ * of the max free space.
+ *
+ * @len is used to store the size of the free space that we find.
+ * But if we don't find suitable free space, it is used to store the size of
+ * the max free space.
  */
 int find_free_dev_extent(struct btrfs_trans_handle *trans,
 			 struct btrfs_device *device, u64 num_bytes,
-			 u64 *start, u64 *max_avail)
+			 u64 *start, u64 *len)
 {
 	struct btrfs_key key;
 	struct btrfs_root *root = device->dev_root;
-	struct btrfs_dev_extent *dev_extent = NULL;
+	struct btrfs_dev_extent *dev_extent;
 	struct btrfs_path *path;
-	u64 hole_size = 0;
-	u64 last_byte = 0;
-	u64 search_start = 0;
+	u64 hole_size;
+	u64 max_hole_start;
+	u64 max_hole_size;
+	u64 extent_end;
+	u64 search_start;
 	u64 search_end = device->total_bytes;
 	int ret;
-	int slot = 0;
-	int start_found;
+	int slot;
 	struct extent_buffer *l;
 
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-	path->reada = 2;
-	start_found = 0;
-
 	/* FIXME use last free of some kind */
 
 	/* we don't want to overwrite the superblock on the drive,
 	 * so we make sure to start at an offset of at least 1MB
 	 */
-	search_start = max((u64)1024 * 1024, search_start);
+	search_start = 1024 * 1024;
 
-	if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
+	if (root->fs_info->alloc_start + num_bytes <= search_end)
 		search_start = max(root->fs_info->alloc_start, search_start);
 
+	max_hole_start = search_start;
+	max_hole_size = 0;
+
+	if (search_start >= search_end) {
+		ret = -ENOSPC;
+		goto error;
+	}
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	path->reada = 2;
+
 	key.objectid = device->devid;
 	key.offset = search_start;
 	key.type = BTRFS_DEV_EXTENT_KEY;
+
 	ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
 	if (ret < 0)
-		goto error;
+		goto out;
 	if (ret > 0) {
 		ret = btrfs_previous_item(root, path, key.objectid, key.type);
 		if (ret < 0)
-			goto error;
-		if (ret > 0)
-			start_found = 1;
+			goto out;
 	}
-	l = path->nodes[0];
-	btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+
 	while (1) {
 		l = path->nodes[0];
 		slot = path->slots[0];
@@ -790,24 +901,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
 			if (ret == 0)
 				continue;
 			if (ret < 0)
-				goto error;
-no_more_items:
-			if (!start_found) {
-				if (search_start >= search_end) {
-					ret = -ENOSPC;
-					goto error;
-				}
-				*start = search_start;
-				start_found = 1;
-				goto check_pending;
-			}
-			*start = last_byte > search_start ?
-				last_byte : search_start;
-			if (search_end <= *start) {
-				ret = -ENOSPC;
-				goto error;
-			}
-			goto check_pending;
+				goto out;
+
+			break;
 		}
 		btrfs_item_key_to_cpu(l, &key, slot);
 
@@ -815,48 +911,62 @@ no_more_items:
 			goto next;
 
 		if (key.objectid > device->devid)
-			goto no_more_items;
+			break;
 
-		if (key.offset >= search_start && key.offset > last_byte &&
-		    start_found) {
-			if (last_byte < search_start)
-				last_byte = search_start;
-			hole_size = key.offset - last_byte;
+		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+			goto next;
 
-			if (hole_size > *max_avail)
-				*max_avail = hole_size;
+		if (key.offset > search_start) {
+			hole_size = key.offset - search_start;
 
-			if (key.offset > last_byte &&
-			    hole_size >= num_bytes) {
-				*start = last_byte;
-				goto check_pending;
+			if (hole_size > max_hole_size) {
+				max_hole_start = search_start;
+				max_hole_size = hole_size;
+			}
+
+			/*
+			 * If this free space is greater than which we need,
+			 * it must be the max free space that we have found
+			 * until now, so max_hole_start must point to the start
+			 * of this free space and the length of this free space
+			 * is stored in max_hole_size. Thus, we return
+			 * max_hole_start and max_hole_size and go back to the
+			 * caller.
+			 */
+			if (hole_size >= num_bytes) {
+				ret = 0;
+				goto out;
 			}
 		}
-		if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
-			goto next;
 
-		start_found = 1;
 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
-		last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+		extent_end = key.offset + btrfs_dev_extent_length(l,
+								  dev_extent);
+		if (extent_end > search_start)
+			search_start = extent_end;
 next:
 		path->slots[0]++;
 		cond_resched();
 	}
-check_pending:
-	/* we have to make sure we didn't find an extent that has already
-	 * been allocated by the map tree or the original allocation
-	 */
-	BUG_ON(*start < search_start);
 
-	if (*start + num_bytes > search_end) {
-		ret = -ENOSPC;
-		goto error;
+	hole_size = search_end- search_start;
+	if (hole_size > max_hole_size) {
+		max_hole_start = search_start;
+		max_hole_size = hole_size;
 	}
-	/* check for pending inserts here */
-	ret = 0;
 
-error:
+	/* See above. */
+	if (hole_size < num_bytes)
+		ret = -ENOSPC;
+	else
+		ret = 0;
+
+out:
 	btrfs_free_path(path);
+error:
+	*start = max_hole_start;
+	if (len)
+		*len = max_hole_size;
 	return ret;
 }
 
@@ -1196,7 +1306,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 		set_blocksize(bdev, 4096);
 		bh = btrfs_read_dev_super(bdev);
 		if (!bh) {
-			ret = -EIO;
+			ret = -EINVAL;
 			goto error_close;
 		}
 		disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1916,6 +2026,9 @@ int btrfs_balance(struct btrfs_root *dev_root)
 	if (dev_root->fs_info->sb->s_flags & MS_RDONLY)
 		return -EROFS;
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	mutex_lock(&dev_root->fs_info->volume_mutex);
 	dev_root = dev_root->fs_info->dev_root;
 
@@ -2154,66 +2267,67 @@ static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size,
 		return calc_size * num_stripes;
 }
 
-static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *extent_root,
-			       struct map_lookup **map_ret,
-			       u64 *num_bytes, u64 *stripe_size,
-			       u64 start, u64 type)
+/* Used to sort the devices by max_avail(descending sort) */
+int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
 {
-	struct btrfs_fs_info *info = extent_root->fs_info;
-	struct btrfs_device *device = NULL;
-	struct btrfs_fs_devices *fs_devices = info->fs_devices;
-	struct list_head *cur;
-	struct map_lookup *map = NULL;
-	struct extent_map_tree *em_tree;
-	struct extent_map *em;
-	struct list_head private_devs;
-	int min_stripe_size = 1 * 1024 * 1024;
-	u64 calc_size = 1024 * 1024 * 1024;
-	u64 max_chunk_size = calc_size;
-	u64 min_free;
-	u64 avail;
-	u64 max_avail = 0;
-	u64 dev_offset;
-	int num_stripes = 1;
-	int min_stripes = 1;
-	int sub_stripes = 0;
-	int looped = 0;
-	int ret;
-	int index;
-	int stripe_len = 64 * 1024;
+	if (((struct btrfs_device_info *)dev_info1)->max_avail >
+	    ((struct btrfs_device_info *)dev_info2)->max_avail)
+		return -1;
+	else if (((struct btrfs_device_info *)dev_info1)->max_avail <
+		 ((struct btrfs_device_info *)dev_info2)->max_avail)
+		return 1;
+	else
+		return 0;
+}
 
-	if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
-	    (type & BTRFS_BLOCK_GROUP_DUP)) {
-		WARN_ON(1);
-		type &= ~BTRFS_BLOCK_GROUP_DUP;
-	}
-	if (list_empty(&fs_devices->alloc_list))
-		return -ENOSPC;
+static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
+				 int *num_stripes, int *min_stripes,
+				 int *sub_stripes)
+{
+	*num_stripes = 1;
+	*min_stripes = 1;
+	*sub_stripes = 0;
 
 	if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
-		num_stripes = fs_devices->rw_devices;
-		min_stripes = 2;
+		*num_stripes = fs_devices->rw_devices;
+		*min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_DUP)) {
-		num_stripes = 2;
-		min_stripes = 2;
+		*num_stripes = 2;
+		*min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
 		if (fs_devices->rw_devices < 2)
 			return -ENOSPC;
-		num_stripes = 2;
-		min_stripes = 2;
+		*num_stripes = 2;
+		*min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
-		num_stripes = fs_devices->rw_devices;
-		if (num_stripes < 4)
+		*num_stripes = fs_devices->rw_devices;
+		if (*num_stripes < 4)
 			return -ENOSPC;
-		num_stripes &= ~(u32)1;
-		sub_stripes = 2;
-		min_stripes = 4;
+		*num_stripes &= ~(u32)1;
+		*sub_stripes = 2;
+		*min_stripes = 4;
 	}
 
+	return 0;
+}
+
+static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices,
+				    u64 proposed_size, u64 type,
+				    int num_stripes, int small_stripe)
+{
+	int min_stripe_size = 1 * 1024 * 1024;
+	u64 calc_size = proposed_size;
+	u64 max_chunk_size = calc_size;
+	int ncopies = 1;
+
+	if (type & (BTRFS_BLOCK_GROUP_RAID1 |
+		    BTRFS_BLOCK_GROUP_DUP |
+		    BTRFS_BLOCK_GROUP_RAID10))
+		ncopies = 2;
+
 	if (type & BTRFS_BLOCK_GROUP_DATA) {
 		max_chunk_size = 10 * calc_size;
 		min_stripe_size = 64 * 1024 * 1024;
@@ -2230,51 +2344,209 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
 			     max_chunk_size);
 
-again:
-	max_avail = 0;
-	if (!map || map->num_stripes != num_stripes) {
-		kfree(map);
-		map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
-		if (!map)
-			return -ENOMEM;
-		map->num_stripes = num_stripes;
-	}
-
-	if (calc_size * num_stripes > max_chunk_size) {
-		calc_size = max_chunk_size;
+	if (calc_size * num_stripes > max_chunk_size * ncopies) {
+		calc_size = max_chunk_size * ncopies;
 		do_div(calc_size, num_stripes);
-		do_div(calc_size, stripe_len);
-		calc_size *= stripe_len;
+		do_div(calc_size, BTRFS_STRIPE_LEN);
+		calc_size *= BTRFS_STRIPE_LEN;
 	}
 
 	/* we don't want tiny stripes */
-	if (!looped)
+	if (!small_stripe)
 		calc_size = max_t(u64, min_stripe_size, calc_size);
 
 	/*
-	 * we're about to do_div by the stripe_len so lets make sure
+	 * we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure
 	 * we end up with something bigger than a stripe
 	 */
-	calc_size = max_t(u64, calc_size, stripe_len * 4);
+	calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN);
+
+	do_div(calc_size, BTRFS_STRIPE_LEN);
+	calc_size *= BTRFS_STRIPE_LEN;
+
+	return calc_size;
+}
+
+static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
+						      int num_stripes)
+{
+	struct map_lookup *new;
+	size_t len = map_lookup_size(num_stripes);
+
+	BUG_ON(map->num_stripes < num_stripes);
+
+	if (map->num_stripes == num_stripes)
+		return map;
+
+	new = kmalloc(len, GFP_NOFS);
+	if (!new) {
+		/* just change map->num_stripes */
+		map->num_stripes = num_stripes;
+		return map;
+	}
+
+	memcpy(new, map, len);
+	new->num_stripes = num_stripes;
+	kfree(map);
+	return new;
+}
+
+/*
+ * helper to allocate device space from btrfs_device_info, in which we stored
+ * max free space information of every device. It is used when we can not
+ * allocate chunks by default size.
+ *
+ * By this helper, we can allocate a new chunk as larger as possible.
+ */
+static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
+				    struct btrfs_fs_devices *fs_devices,
+				    struct btrfs_device_info *devices,
+				    int nr_device, u64 type,
+				    struct map_lookup **map_lookup,
+				    int min_stripes, u64 *stripe_size)
+{
+	int i, index, sort_again = 0;
+	int min_devices = min_stripes;
+	u64 max_avail, min_free;
+	struct map_lookup *map = *map_lookup;
+	int ret;
+
+	if (nr_device < min_stripes)
+		return -ENOSPC;
+
+	btrfs_descending_sort_devices(devices, nr_device);
+
+	max_avail = devices[0].max_avail;
+	if (!max_avail)
+		return -ENOSPC;
+
+	for (i = 0; i < nr_device; i++) {
+		/*
+		 * if dev_offset = 0, it means the free space of this device
+		 * is less than what we need, and we didn't search max avail
+		 * extent on this device, so do it now.
+		 */
+		if (!devices[i].dev_offset) {
+			ret = find_free_dev_extent(trans, devices[i].dev,
+						   max_avail,
+						   &devices[i].dev_offset,
+						   &devices[i].max_avail);
+			if (ret != 0 && ret != -ENOSPC)
+				return ret;
+			sort_again = 1;
+		}
+	}
+
+	/* we update the max avail free extent of each devices, sort again */
+	if (sort_again)
+		btrfs_descending_sort_devices(devices, nr_device);
+
+	if (type & BTRFS_BLOCK_GROUP_DUP)
+		min_devices = 1;
+
+	if (!devices[min_devices - 1].max_avail)
+		return -ENOSPC;
+
+	max_avail = devices[min_devices - 1].max_avail;
+	if (type & BTRFS_BLOCK_GROUP_DUP)
+		do_div(max_avail, 2);
+
+	max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
+					     min_stripes, 1);
+	if (type & BTRFS_BLOCK_GROUP_DUP)
+		min_free = max_avail * 2;
+	else
+		min_free = max_avail;
+
+	if (min_free > devices[min_devices - 1].max_avail)
+		return -ENOSPC;
+
+	map = __shrink_map_lookup_stripes(map, min_stripes);
+	*stripe_size = max_avail;
+
+	index = 0;
+	for (i = 0; i < min_stripes; i++) {
+		map->stripes[i].dev = devices[index].dev;
+		map->stripes[i].physical = devices[index].dev_offset;
+		if (type & BTRFS_BLOCK_GROUP_DUP) {
+			i++;
+			map->stripes[i].dev = devices[index].dev;
+			map->stripes[i].physical = devices[index].dev_offset +
+						   max_avail;
+		}
+		index++;
+	}
+	*map_lookup = map;
+
+	return 0;
+}
 
-	do_div(calc_size, stripe_len);
-	calc_size *= stripe_len;
+static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+			       struct btrfs_root *extent_root,
+			       struct map_lookup **map_ret,
+			       u64 *num_bytes, u64 *stripe_size,
+			       u64 start, u64 type)
+{
+	struct btrfs_fs_info *info = extent_root->fs_info;
+	struct btrfs_device *device = NULL;
+	struct btrfs_fs_devices *fs_devices = info->fs_devices;
+	struct list_head *cur;
+	struct map_lookup *map;
+	struct extent_map_tree *em_tree;
+	struct extent_map *em;
+	struct btrfs_device_info *devices_info;
+	struct list_head private_devs;
+	u64 calc_size = 1024 * 1024 * 1024;
+	u64 min_free;
+	u64 avail;
+	u64 dev_offset;
+	int num_stripes;
+	int min_stripes;
+	int sub_stripes;
+	int min_devices;	/* the min number of devices we need */
+	int i;
+	int ret;
+	int index;
+
+	if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
+	    (type & BTRFS_BLOCK_GROUP_DUP)) {
+		WARN_ON(1);
+		type &= ~BTRFS_BLOCK_GROUP_DUP;
+	}
+	if (list_empty(&fs_devices->alloc_list))
+		return -ENOSPC;
+
+	ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
+				    &min_stripes, &sub_stripes);
+	if (ret)
+		return ret;
+
+	devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
+			       GFP_NOFS);
+	if (!devices_info)
+		return -ENOMEM;
+
+	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
+	if (!map) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	map->num_stripes = num_stripes;
 
 	cur = fs_devices->alloc_list.next;
 	index = 0;
+	i = 0;
 
-	if (type & BTRFS_BLOCK_GROUP_DUP)
+	calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type,
+					     num_stripes, 0);
+
+	if (type & BTRFS_BLOCK_GROUP_DUP) {
 		min_free = calc_size * 2;
-	else
+		min_devices = 1;
+	} else {
 		min_free = calc_size;
-
-	/*
-	 * we add 1MB because we never use the first 1MB of the device, unless
-	 * we've looped, then we are likely allocating the maximum amount of
-	 * space left already
-	 */
-	if (!looped)
-		min_free += 1024 * 1024;
+		min_devices = min_stripes;
+	}
 
 	INIT_LIST_HEAD(&private_devs);
 	while (index < num_stripes) {
@@ -2287,27 +2559,39 @@ again:
 		cur = cur->next;
 
 		if (device->in_fs_metadata && avail >= min_free) {
-			ret = find_free_dev_extent(trans, device,
-						   min_free, &dev_offset,
-						   &max_avail);
+			ret = find_free_dev_extent(trans, device, min_free,
+						   &devices_info[i].dev_offset,
+						   &devices_info[i].max_avail);
 			if (ret == 0) {
 				list_move_tail(&device->dev_alloc_list,
 					       &private_devs);
 				map->stripes[index].dev = device;
-				map->stripes[index].physical = dev_offset;
+				map->stripes[index].physical =
+						devices_info[i].dev_offset;
 				index++;
 				if (type & BTRFS_BLOCK_GROUP_DUP) {
 					map->stripes[index].dev = device;
 					map->stripes[index].physical =
-						dev_offset + calc_size;
+						devices_info[i].dev_offset +
+						calc_size;
 					index++;
 				}
-			}
-		} else if (device->in_fs_metadata && avail > max_avail)
-			max_avail = avail;
+			} else if (ret != -ENOSPC)
+				goto error;
+
+			devices_info[i].dev = device;
+			i++;
+		} else if (device->in_fs_metadata &&
+			   avail >= BTRFS_STRIPE_LEN) {
+			devices_info[i].dev = device;
+			devices_info[i].max_avail = avail;
+			i++;
+		}
+
 		if (cur == &fs_devices->alloc_list)
 			break;
 	}
+
 	list_splice(&private_devs, &fs_devices->alloc_list);
 	if (index < num_stripes) {
 		if (index >= min_stripes) {
@@ -2316,34 +2600,36 @@ again:
 				num_stripes /= sub_stripes;
 				num_stripes *= sub_stripes;
 			}
-			looped = 1;
-			goto again;
-		}
-		if (!looped && max_avail > 0) {
-			looped = 1;
-			calc_size = max_avail;
-			goto again;
+
+			map = __shrink_map_lookup_stripes(map, num_stripes);
+		} else if (i >= min_devices) {
+			ret = __btrfs_alloc_tiny_space(trans, fs_devices,
+						       devices_info, i, type,
+						       &map, min_stripes,
+						       &calc_size);
+			if (ret)
+				goto error;
+		} else {
+			ret = -ENOSPC;
+			goto error;
 		}
-		kfree(map);
-		return -ENOSPC;
 	}
 	map->sector_size = extent_root->sectorsize;
-	map->stripe_len = stripe_len;
-	map->io_align = stripe_len;
-	map->io_width = stripe_len;
+	map->stripe_len = BTRFS_STRIPE_LEN;
+	map->io_align = BTRFS_STRIPE_LEN;
+	map->io_width = BTRFS_STRIPE_LEN;
 	map->type = type;
-	map->num_stripes = num_stripes;
 	map->sub_stripes = sub_stripes;
 
 	*map_ret = map;
 	*stripe_size = calc_size;
 	*num_bytes = chunk_bytes_by_type(type, calc_size,
-					 num_stripes, sub_stripes);
+					 map->num_stripes, sub_stripes);
 
 	em = alloc_extent_map(GFP_NOFS);
 	if (!em) {
-		kfree(map);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto error;
 	}
 	em->bdev = (struct block_device *)map;
 	em->start = start;
@@ -2376,7 +2662,13 @@ again:
 		index++;
 	}
 
+	kfree(devices_info);
 	return 0;
+
+error:
+	kfree(map);
+	kfree(devices_info);
+	return ret;
 }
 
 static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 1be781079450..7fb59d45fe8c 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -20,8 +20,11 @@
 #define __BTRFS_VOLUMES_
 
 #include <linux/bio.h>
+#include <linux/sort.h>
 #include "async-thread.h"
 
+#define BTRFS_STRIPE_LEN	(64 * 1024)
+
 struct buffer_head;
 struct btrfs_pending_bios {
 	struct bio *head;
@@ -136,6 +139,30 @@ struct btrfs_multi_bio {
 	struct btrfs_bio_stripe stripes[];
 };
 
+struct btrfs_device_info {
+	struct btrfs_device *dev;
+	u64 dev_offset;
+	u64 max_avail;
+};
+
+/* Used to sort the devices by max_avail(descending sort) */
+int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
+
+/*
+ * sort the devices by max_avail, in which max free extent size of each device
+ * is stored.(Descending Sort)
+ */
+static inline void btrfs_descending_sort_devices(
+					struct btrfs_device_info *devices,
+					size_t nr_devices)
+{
+	sort(devices, nr_devices, sizeof(struct btrfs_device_info),
+	     btrfs_cmp_device_free_bytes, NULL);
+}
+
+int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
+				   u64 end, u64 *length);
+
 #define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
 			    (sizeof(struct btrfs_bio_stripe) * (n)))
 
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 698fdd2c739c..a5776531dc2b 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -316,6 +316,15 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
 int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		   size_t size, int flags)
 {
+	struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+
+	/*
+	 * The permission on security.* and system.* is not checked
+	 * in permission().
+	 */
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
 	/*
 	 * If this is a request for a synthetic attribute in the system.*
 	 * namespace use the generic infrastructure to resolve a handler
@@ -336,6 +345,15 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 
 int btrfs_removexattr(struct dentry *dentry, const char *name)
 {
+	struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+
+	/*
+	 * The permission on security.* and system.* is not checked
+	 * in permission().
+	 */
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
 	/*
 	 * If this is a request for a synthetic attribute in the system.*
 	 * namespace use the generic infrastructure to resolve a handler
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index b9cd5445f71c..f5ec2d44150d 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -32,15 +32,6 @@
 #include <linux/bio.h>
 #include "compression.h"
 
-/* Plan: call deflate() with avail_in == *sourcelen,
-	avail_out = *dstlen - 12 and flush == Z_FINISH.
-	If it doesn't manage to finish,	call it again with
-	avail_in == 0 and avail_out set to the remaining 12
-	bytes for it to clean up.
-   Q: Is 12 bytes sufficient?
-*/
-#define STREAM_END_SPACE 12
-
 struct workspace {
 	z_stream inf_strm;
 	z_stream def_strm;
@@ -48,152 +39,51 @@ struct workspace {
 	struct list_head list;
 };
 
-static LIST_HEAD(idle_workspace);
-static DEFINE_SPINLOCK(workspace_lock);
-static unsigned long num_workspace;
-static atomic_t alloc_workspace = ATOMIC_INIT(0);
-static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
+static void zlib_free_workspace(struct list_head *ws)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
 
-/*
- * this finds an available zlib workspace or allocates a new one
- * NULL or an ERR_PTR is returned if things go bad.
- */
-static struct workspace *find_zlib_workspace(void)
+	vfree(workspace->def_strm.workspace);
+	vfree(workspace->inf_strm.workspace);
+	kfree(workspace->buf);
+	kfree(workspace);
+}
+
+static struct list_head *zlib_alloc_workspace(void)
 {
 	struct workspace *workspace;
-	int ret;
-	int cpus = num_online_cpus();
-
-again:
-	spin_lock(&workspace_lock);
-	if (!list_empty(&idle_workspace)) {
-		workspace = list_entry(idle_workspace.next, struct workspace,
-				       list);
-		list_del(&workspace->list);
-		num_workspace--;
-		spin_unlock(&workspace_lock);
-		return workspace;
 
-	}
-	spin_unlock(&workspace_lock);
-	if (atomic_read(&alloc_workspace) > cpus) {
-		DEFINE_WAIT(wait);
-		prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&alloc_workspace) > cpus)
-			schedule();
-		finish_wait(&workspace_wait, &wait);
-		goto again;
-	}
-	atomic_inc(&alloc_workspace);
 	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
-	if (!workspace) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!workspace)
+		return ERR_PTR(-ENOMEM);
 
 	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
-	if (!workspace->def_strm.workspace) {
-		ret = -ENOMEM;
-		goto fail;
-	}
 	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
-	if (!workspace->inf_strm.workspace) {
-		ret = -ENOMEM;
-		goto fail_inflate;
-	}
 	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
-	if (!workspace->buf) {
-		ret = -ENOMEM;
-		goto fail_kmalloc;
-	}
-	return workspace;
-
-fail_kmalloc:
-	vfree(workspace->inf_strm.workspace);
-fail_inflate:
-	vfree(workspace->def_strm.workspace);
-fail:
-	kfree(workspace);
-	atomic_dec(&alloc_workspace);
-	wake_up(&workspace_wait);
-	return ERR_PTR(ret);
-}
-
-/*
- * put a workspace struct back on the list or free it if we have enough
- * idle ones sitting around
- */
-static int free_workspace(struct workspace *workspace)
-{
-	spin_lock(&workspace_lock);
-	if (num_workspace < num_online_cpus()) {
-		list_add_tail(&workspace->list, &idle_workspace);
-		num_workspace++;
-		spin_unlock(&workspace_lock);
-		if (waitqueue_active(&workspace_wait))
-			wake_up(&workspace_wait);
-		return 0;
-	}
-	spin_unlock(&workspace_lock);
-	vfree(workspace->def_strm.workspace);
-	vfree(workspace->inf_strm.workspace);
-	kfree(workspace->buf);
-	kfree(workspace);
+	if (!workspace->def_strm.workspace ||
+	    !workspace->inf_strm.workspace || !workspace->buf)
+		goto fail;
 
-	atomic_dec(&alloc_workspace);
-	if (waitqueue_active(&workspace_wait))
-		wake_up(&workspace_wait);
-	return 0;
-}
+	INIT_LIST_HEAD(&workspace->list);
 
-/*
- * cleanup function for module exit
- */
-static void free_workspaces(void)
-{
-	struct workspace *workspace;
-	while (!list_empty(&idle_workspace)) {
-		workspace = list_entry(idle_workspace.next, struct workspace,
-				       list);
-		list_del(&workspace->list);
-		vfree(workspace->def_strm.workspace);
-		vfree(workspace->inf_strm.workspace);
-		kfree(workspace->buf);
-		kfree(workspace);
-		atomic_dec(&alloc_workspace);
-	}
+	return &workspace->list;
+fail:
+	zlib_free_workspace(&workspace->list);
+	return ERR_PTR(-ENOMEM);
 }
 
-/*
- * given an address space and start/len, compress the bytes.
- *
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
- *
- * out_pages is used to return the number of pages allocated.  There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read.  It
- * may be smaller then len if we had to exit early because we
- * ran out of room in the pages array or because we cross the
- * max_out threshold.
- *
- * total_out is used to return the total number of compressed bytes
- *
- * max_out tells us the max number of bytes that we're allowed to
- * stuff into pages
- */
-int btrfs_zlib_compress_pages(struct address_space *mapping,
-			      u64 start, unsigned long len,
-			      struct page **pages,
-			      unsigned long nr_dest_pages,
-			      unsigned long *out_pages,
-			      unsigned long *total_in,
-			      unsigned long *total_out,
-			      unsigned long max_out)
+static int zlib_compress_pages(struct list_head *ws,
+			       struct address_space *mapping,
+			       u64 start, unsigned long len,
+			       struct page **pages,
+			       unsigned long nr_dest_pages,
+			       unsigned long *out_pages,
+			       unsigned long *total_in,
+			       unsigned long *total_out,
+			       unsigned long max_out)
 {
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret;
-	struct workspace *workspace;
 	char *data_in;
 	char *cpage_out;
 	int nr_pages = 0;
@@ -205,10 +95,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 	*total_out = 0;
 	*total_in = 0;
 
-	workspace = find_zlib_workspace();
-	if (IS_ERR(workspace))
-		return -1;
-
 	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
 		printk(KERN_WARNING "deflateInit failed\n");
 		ret = -1;
@@ -222,6 +108,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 	data_in = kmap(in_page);
 
 	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+	if (out_page == NULL) {
+		ret = -1;
+		goto out;
+	}
 	cpage_out = kmap(out_page);
 	pages[0] = out_page;
 	nr_pages = 1;
@@ -260,6 +150,10 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 				goto out;
 			}
 			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+			if (out_page == NULL) {
+				ret = -1;
+				goto out;
+			}
 			cpage_out = kmap(out_page);
 			pages[nr_pages] = out_page;
 			nr_pages++;
@@ -314,55 +208,26 @@ out:
 		kunmap(in_page);
 		page_cache_release(in_page);
 	}
-	free_workspace(workspace);
 	return ret;
 }
 
-/*
- * pages_in is an array of pages with compressed data.
- *
- * disk_start is the starting logical offset of this array in the file
- *
- * bvec is a bio_vec of pages from the file that we want to decompress into
- *
- * vcnt is the count of pages in the biovec
- *
- * srclen is the number of bytes in pages_in
- *
- * The basic idea is that we have a bio that was created by readpages.
- * The pages in the bio are for the uncompressed data, and they may not
- * be contiguous.  They all correspond to the range of bytes covered by
- * the compressed extent.
- */
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
-			      u64 disk_start,
-			      struct bio_vec *bvec,
-			      int vcnt,
-			      size_t srclen)
+static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
+				  u64 disk_start,
+				  struct bio_vec *bvec,
+				  int vcnt,
+				  size_t srclen)
 {
-	int ret = 0;
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	int ret = 0, ret2;
 	int wbits = MAX_WBITS;
-	struct workspace *workspace;
 	char *data_in;
 	size_t total_out = 0;
-	unsigned long page_bytes_left;
 	unsigned long page_in_index = 0;
 	unsigned long page_out_index = 0;
-	struct page *page_out;
 	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
 					PAGE_CACHE_SIZE;
 	unsigned long buf_start;
-	unsigned long buf_offset;
-	unsigned long bytes;
-	unsigned long working_bytes;
 	unsigned long pg_offset;
-	unsigned long start_byte;
-	unsigned long current_buf_start;
-	char *kaddr;
-
-	workspace = find_zlib_workspace();
-	if (IS_ERR(workspace))
-		return -ENOMEM;
 
 	data_in = kmap(pages_in[page_in_index]);
 	workspace->inf_strm.next_in = data_in;
@@ -372,8 +237,6 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
 	workspace->inf_strm.total_out = 0;
 	workspace->inf_strm.next_out = workspace->buf;
 	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
-	page_out = bvec[page_out_index].bv_page;
-	page_bytes_left = PAGE_CACHE_SIZE;
 	pg_offset = 0;
 
 	/* If it's deflate, and it's got no preset dictionary, then
@@ -389,107 +252,29 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
 
 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 		printk(KERN_WARNING "inflateInit failed\n");
-		ret = -1;
-		goto out;
+		return -1;
 	}
 	while (workspace->inf_strm.total_in < srclen) {
 		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
 		if (ret != Z_OK && ret != Z_STREAM_END)
 			break;
-		/*
-		 * buf start is the byte offset we're of the start of
-		 * our workspace buffer
-		 */
-		buf_start = total_out;
 
-		/* total_out is the last byte of the workspace buffer */
+		buf_start = total_out;
 		total_out = workspace->inf_strm.total_out;
 
-		working_bytes = total_out - buf_start;
-
-		/*
-		 * start byte is the first byte of the page we're currently
-		 * copying into relative to the start of the compressed data.
-		 */
-		start_byte = page_offset(page_out) - disk_start;
-
-		if (working_bytes == 0) {
-			/* we didn't make progress in this inflate
-			 * call, we're done
-			 */
-			if (ret != Z_STREAM_END)
-				ret = -1;
+		/* we didn't make progress in this inflate call, we're done */
+		if (buf_start == total_out)
 			break;
-		}
 
-		/* we haven't yet hit data corresponding to this page */
-		if (total_out <= start_byte)
-			goto next;
-
-		/*
-		 * the start of the data we care about is offset into
-		 * the middle of our working buffer
-		 */
-		if (total_out > start_byte && buf_start < start_byte) {
-			buf_offset = start_byte - buf_start;
-			working_bytes -= buf_offset;
-		} else {
-			buf_offset = 0;
-		}
-		current_buf_start = buf_start;
-
-		/* copy bytes from the working buffer into the pages */
-		while (working_bytes > 0) {
-			bytes = min(PAGE_CACHE_SIZE - pg_offset,
-				    PAGE_CACHE_SIZE - buf_offset);
-			bytes = min(bytes, working_bytes);
-			kaddr = kmap_atomic(page_out, KM_USER0);
-			memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
-			       bytes);
-			kunmap_atomic(kaddr, KM_USER0);
-			flush_dcache_page(page_out);
-
-			pg_offset += bytes;
-			page_bytes_left -= bytes;
-			buf_offset += bytes;
-			working_bytes -= bytes;
-			current_buf_start += bytes;
-
-			/* check if we need to pick another page */
-			if (page_bytes_left == 0) {
-				page_out_index++;
-				if (page_out_index >= vcnt) {
-					ret = 0;
-					goto done;
-				}
-
-				page_out = bvec[page_out_index].bv_page;
-				pg_offset = 0;
-				page_bytes_left = PAGE_CACHE_SIZE;
-				start_byte = page_offset(page_out) - disk_start;
-
-				/*
-				 * make sure our new page is covered by this
-				 * working buffer
-				 */
-				if (total_out <= start_byte)
-					goto next;
-
-				/* the next page in the biovec might not
-				 * be adjacent to the last page, but it
-				 * might still be found inside this working
-				 * buffer.  bump our offset pointer
-				 */
-				if (total_out > start_byte &&
-				    current_buf_start < start_byte) {
-					buf_offset = start_byte - buf_start;
-					working_bytes = total_out - start_byte;
-					current_buf_start = buf_start +
-						buf_offset;
-				}
-			}
+		ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
+						 total_out, disk_start,
+						 bvec, vcnt,
+						 &page_out_index, &pg_offset);
+		if (ret2 == 0) {
+			ret = 0;
+			goto done;
 		}
-next:
+
 		workspace->inf_strm.next_out = workspace->buf;
 		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 
@@ -516,35 +301,21 @@ done:
 	zlib_inflateEnd(&workspace->inf_strm);
 	if (data_in)
 		kunmap(pages_in[page_in_index]);
-out:
-	free_workspace(workspace);
 	return ret;
 }
 
-/*
- * a less complex decompression routine.  Our compressed data fits in a
- * single page, and we want to read a single page out of it.
- * start_byte tells us the offset into the compressed data we're interested in
- */
-int btrfs_zlib_decompress(unsigned char *data_in,
-			  struct page *dest_page,
-			  unsigned long start_byte,
-			  size_t srclen, size_t destlen)
+static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
+			   struct page *dest_page,
+			   unsigned long start_byte,
+			   size_t srclen, size_t destlen)
 {
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret = 0;
 	int wbits = MAX_WBITS;
-	struct workspace *workspace;
 	unsigned long bytes_left = destlen;
 	unsigned long total_out = 0;
 	char *kaddr;
 
-	if (destlen > PAGE_CACHE_SIZE)
-		return -ENOMEM;
-
-	workspace = find_zlib_workspace();
-	if (IS_ERR(workspace))
-		return -ENOMEM;
-
 	workspace->inf_strm.next_in = data_in;
 	workspace->inf_strm.avail_in = srclen;
 	workspace->inf_strm.total_in = 0;
@@ -565,8 +336,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
 
 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 		printk(KERN_WARNING "inflateInit failed\n");
-		ret = -1;
-		goto out;
+		return -1;
 	}
 
 	while (bytes_left > 0) {
@@ -616,12 +386,13 @@ next:
 		ret = 0;
 
 	zlib_inflateEnd(&workspace->inf_strm);
-out:
-	free_workspace(workspace);
 	return ret;
 }
 
-void btrfs_zlib_exit(void)
-{
-    free_workspaces();
-}
+struct btrfs_compress_op btrfs_zlib_compress = {
+	.alloc_workspace	= zlib_alloc_workspace,
+	.free_workspace		= zlib_free_workspace,
+	.compress_pages		= zlib_compress_pages,
+	.decompress_biovec	= zlib_decompress_biovec,
+	.decompress		= zlib_decompress,
+};
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a65d311d163a..9f59887badd2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1113,6 +1113,8 @@ cifs_parse_mount_options(char *options, const char *devname,
 		} else if (!strnicmp(data, "uid", 3) && value && *value) {
 			vol->linux_uid = simple_strtoul(value, &value, 0);
 			uid_specified = true;
+		} else if (!strnicmp(data, "cruid", 5) && value && *value) {
+			vol->cred_uid = simple_strtoul(value, &value, 0);
 		} else if (!strnicmp(data, "forceuid", 8)) {
 			override_uid = 1;
 		} else if (!strnicmp(data, "noforceuid", 10)) {
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 9aad47a2d62f..6783ce6cdc89 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -899,8 +899,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
 	}
 	/* else ERRHRD class errors or junk  - return EIO */
 
-	cFYI(1, "Mapping smb error code %d to POSIX err %d",
-		 smberrcode, rc);
+	cFYI(1, "Mapping smb error code 0x%x to POSIX err %d",
+		 le32_to_cpu(smb->Status.CifsError), rc);
 
 	/* generic corrective action e.g. reconnect SMB session on
 	 * ERRbaduid could be added */
diff --git a/fs/compat.c b/fs/compat.c
index eb1740ac8c0a..f6fd0a00e6cc 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -257,7 +257,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
 }
 
 /*
- * The following statfs calls are copies of code from fs/open.c and
+ * The following statfs calls are copies of code from fs/statfs.c and
  * should be checked against those from time to time
  */
 asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
@@ -320,7 +320,9 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	    __put_user(kbuf->f_namelen, &ubuf->f_namelen) ||
 	    __put_user(kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
 	    __put_user(kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]) ||
-	    __put_user(kbuf->f_frsize, &ubuf->f_frsize))
+	    __put_user(kbuf->f_frsize, &ubuf->f_frsize) ||
+	    __put_user(kbuf->f_flags, &ubuf->f_flags) ||
+	    __clear_user(ubuf->f_spare, sizeof(ubuf->f_spare)))
 		return -EFAULT;
 	return 0;
 }
@@ -597,10 +599,8 @@ ssize_t compat_rw_copy_check_uvector(int type,
 	if (nr_segs > fast_segs) {
 		ret = -ENOMEM;
 		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			*ret_pointer = fast_pointer;
+		if (iov == NULL)
 			goto out;
-		}
 	}
 	*ret_pointer = iov;
 
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index cbadc1bee6e7..bfd8b680e648 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -348,7 +348,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	BUG_ON(!crypt_stat || !crypt_stat->tfm
 	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
 	if (unlikely(ecryptfs_verbosity > 0)) {
-		ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
+		ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
 				crypt_stat->key_size);
 		ecryptfs_dump_hex(crypt_stat->key,
 				  crypt_stat->key_size);
@@ -413,10 +413,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
 	rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
 				(extent_base + extent_offset));
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error attempting to "
-				"derive IV for extent [0x%.16x]; "
-				"rc = [%d]\n", (extent_base + extent_offset),
-				rc);
+		ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
+			"extent [0x%.16llx]; rc = [%d]\n",
+			(unsigned long long)(extent_base + extent_offset), rc);
 		goto out;
 	}
 	if (unlikely(ecryptfs_verbosity > 0)) {
@@ -443,9 +442,9 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
 	}
 	rc = 0;
 	if (unlikely(ecryptfs_verbosity > 0)) {
-		ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
-				"rc = [%d]\n", (extent_base + extent_offset),
-				rc);
+		ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; "
+			"rc = [%d]\n",
+			(unsigned long long)(extent_base + extent_offset), rc);
 		ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
 				"encryption:\n");
 		ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
@@ -540,10 +539,9 @@ static int ecryptfs_decrypt_extent(struct page *page,
 	rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
 				(extent_base + extent_offset));
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error attempting to "
-				"derive IV for extent [0x%.16x]; "
-				"rc = [%d]\n", (extent_base + extent_offset),
-				rc);
+		ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for "
+			"extent [0x%.16llx]; rc = [%d]\n",
+			(unsigned long long)(extent_base + extent_offset), rc);
 		goto out;
 	}
 	if (unlikely(ecryptfs_verbosity > 0)) {
@@ -571,9 +569,9 @@ static int ecryptfs_decrypt_extent(struct page *page,
 	}
 	rc = 0;
 	if (unlikely(ecryptfs_verbosity > 0)) {
-		ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; "
-				"rc = [%d]\n", (extent_base + extent_offset),
-				rc);
+		ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; "
+			"rc = [%d]\n",
+			(unsigned long long)(extent_base + extent_offset), rc);
 		ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
 				"decryption:\n");
 		ecryptfs_dump_hex((char *)(page_address(page)
@@ -780,7 +778,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 	}
 	ecryptfs_printk(KERN_DEBUG,
 			"Initializing cipher [%s]; strlen = [%d]; "
-			"key_size_bits = [%d]\n",
+			"key_size_bits = [%zd]\n",
 			crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
 			crypt_stat->key_size << 3);
 	if (crypt_stat->tfm) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 413a3c48f0bb..dbc84ed96336 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -192,7 +192,6 @@ ecryptfs_get_key_payload_data(struct key *key)
 		(((struct user_key_payload*)key->payload.data)->data);
 }
 
-#define ECRYPTFS_SUPER_MAGIC 0xf15f
 #define ECRYPTFS_MAX_KEYSET_SIZE 1024
 #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64
@@ -584,6 +583,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
 
 #define ecryptfs_printk(type, fmt, arg...) \
         __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
+__attribute__ ((format(printf, 1, 2)))
 void __ecryptfs_printk(const char *fmt, ...);
 
 extern const struct file_operations ecryptfs_main_fops;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 91da02987bff..81e10e6a9443 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -47,7 +47,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 				const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	int rc;
+	ssize_t rc;
 	struct dentry *lower_dentry;
 	struct vfsmount *lower_vfsmount;
 	struct file *file = iocb->ki_filp;
@@ -191,18 +191,16 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 				      | ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	if (!ecryptfs_inode_to_private(inode)->lower_file) {
-		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to initialize "
-			       "the persistent file for the dentry with name "
-			       "[%s]; rc = [%d]\n", __func__,
-			       ecryptfs_dentry->d_name.name, rc);
-			goto out_free;
-		}
+	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to initialize "
+			"the persistent file for the dentry with name "
+			"[%s]; rc = [%d]\n", __func__,
+			ecryptfs_dentry->d_name.name, rc);
+		goto out_free;
 	}
-	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
-	    && !(file->f_flags & O_RDONLY)) {
+	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
+	    == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
 		rc = -EPERM;
 		printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
 		       "file must hence be opened RO\n", __func__);
@@ -243,9 +241,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		}
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
-			"size: [0x%.16x]\n", inode, inode->i_ino,
-			i_size_read(inode));
+	ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
+			"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
+			(unsigned long long)i_size_read(inode));
 	goto out;
 out_free:
 	kmem_cache_free(ecryptfs_file_info_cache,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 64ff02330752..bd33f87a1907 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -185,15 +185,13 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 				"context; rc = [%d]\n", rc);
 		goto out;
 	}
-	if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
-		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to initialize "
-			       "the persistent file for the dentry with name "
-			       "[%s]; rc = [%d]\n", __func__,
-			       ecryptfs_dentry->d_name.name, rc);
-			goto out;
-		}
+	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to initialize "
+			"the persistent file for the dentry with name "
+			"[%s]; rc = [%d]\n", __func__,
+			ecryptfs_dentry->d_name.name, rc);
+		goto out;
 	}
 	rc = ecryptfs_write_metadata(ecryptfs_dentry);
 	if (rc) {
@@ -302,15 +300,13 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		rc = -ENOMEM;
 		goto out;
 	}
-	if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
-		rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
-		if (rc) {
-			printk(KERN_ERR "%s: Error attempting to initialize "
-			       "the persistent file for the dentry with name "
-			       "[%s]; rc = [%d]\n", __func__,
-			       ecryptfs_dentry->d_name.name, rc);
-			goto out_free_kmem;
-		}
+	rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to initialize "
+			"the persistent file for the dentry with name "
+			"[%s]; rc = [%d]\n", __func__,
+			ecryptfs_dentry->d_name.name, rc);
+		goto out_free_kmem;
 	}
 	crypt_stat = &ecryptfs_inode_to_private(
 					ecryptfs_dentry->d_inode)->crypt_stat;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index b1f6858a5223..c1436cff6f2d 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -59,7 +59,7 @@ static int process_request_key_err(long err_code)
 		break;
 	default:
 		ecryptfs_printk(KERN_WARNING, "Unknown error code: "
-				"[0x%.16x]\n", err_code);
+				"[0x%.16lx]\n", err_code);
 		rc = -EINVAL;
 	}
 	return rc;
@@ -130,7 +130,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size,
 	} else {
 		rc = -EINVAL;
 		ecryptfs_printk(KERN_WARNING,
-				"Unsupported packet size: [%d]\n", size);
+				"Unsupported packet size: [%zd]\n", size);
 	}
 	return rc;
 }
@@ -1672,7 +1672,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
 	       auth_tok->session_key.decrypted_key_size);
 	crypt_stat->flags |= ECRYPTFS_KEY_VALID;
 	if (unlikely(ecryptfs_verbosity > 0)) {
-		ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n",
+		ecryptfs_printk(KERN_DEBUG, "FEK of size [%zd]:\n",
 				crypt_stat->key_size);
 		ecryptfs_dump_hex(crypt_stat->key,
 				  crypt_stat->key_size);
@@ -1754,7 +1754,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			if (ECRYPTFS_SIG_SIZE != tag_11_contents_size) {
 				ecryptfs_printk(KERN_ERR, "Expected "
 						"signature of size [%d]; "
-						"read size [%d]\n",
+						"read size [%zd]\n",
 						ECRYPTFS_SIG_SIZE,
 						tag_11_contents_size);
 				rc = -EIO;
@@ -1787,8 +1787,8 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			goto out_wipe_list;
 			break;
 		default:
-			ecryptfs_printk(KERN_DEBUG, "No packet at offset "
-					"[%d] of the file header; hex value of "
+			ecryptfs_printk(KERN_DEBUG, "No packet at offset [%zd] "
+					"of the file header; hex value of "
 					"character is [0x%.2x]\n", i, src[i]);
 			next_packet_is_auth_tok_packet = 0;
 		}
@@ -1864,8 +1864,8 @@ found_matching_auth_tok:
 				"session key for authentication token with sig "
 				"[%.*s]; rc = [%d]. Removing auth tok "
 				"candidate from the list and searching for "
-				"the next match.\n", candidate_auth_tok_sig,
-				ECRYPTFS_SIG_SIZE_HEX, rc);
+				"the next match.\n", ECRYPTFS_SIG_SIZE_HEX,
+				candidate_auth_tok_sig,	rc);
 		list_for_each_entry_safe(auth_tok_list_item,
 					 auth_tok_list_item_tmp,
 					 &auth_tok_list, list) {
@@ -2168,7 +2168,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
 	if (encrypted_session_key_valid) {
 		ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; "
 				"using auth_tok->session_key.encrypted_key, "
-				"where key_rec->enc_key_size = [%d]\n",
+				"where key_rec->enc_key_size = [%zd]\n",
 				key_rec->enc_key_size);
 		memcpy(key_rec->enc_key,
 		       auth_tok->session_key.encrypted_key,
@@ -2198,7 +2198,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
 	if (rc < 1 || rc > 2) {
 		ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
 				"for crypt_stat session key; expected rc = 1; "
-				"got rc = [%d]. key_rec->enc_key_size = [%d]\n",
+				"got rc = [%d]. key_rec->enc_key_size = [%zd]\n",
 				rc, key_rec->enc_key_size);
 		rc = -ENOMEM;
 		goto out;
@@ -2209,7 +2209,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
 		ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
 				"for crypt_stat encrypted session key; "
 				"expected rc = 1; got rc = [%d]. "
-				"key_rec->enc_key_size = [%d]\n", rc,
+				"key_rec->enc_key_size = [%zd]\n", rc,
 				key_rec->enc_key_size);
 		rc = -ENOMEM;
 		goto out;
@@ -2224,7 +2224,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
 		goto out;
 	}
 	rc = 0;
-	ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
+	ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
 			crypt_stat->key_size);
 	rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
 				      (*key_rec).enc_key_size);
@@ -2235,7 +2235,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
 	}
 	ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
 	if (ecryptfs_verbosity > 0) {
-		ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n",
+		ecryptfs_printk(KERN_DEBUG, "EFEK of size [%zd]:\n",
 				key_rec->enc_key_size);
 		ecryptfs_dump_hex(key_rec->enc_key,
 				  key_rec->enc_key_size);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d3b28abdd6aa..758323a0f09a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -36,6 +36,7 @@
 #include <linux/parser.h>
 #include <linux/fs_stack.h>
 #include <linux/slab.h>
+#include <linux/magic.h>
 #include "ecryptfs_kernel.h"
 
 /**
@@ -564,6 +565,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 	ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
+	s->s_magic = ECRYPTFS_SUPER_MAGIC;
 
 	inode = ecryptfs_get_inode(path.dentry->d_inode, s);
 	rc = PTR_ERR(inode);
@@ -808,9 +810,10 @@ static int __init ecryptfs_init(void)
 		ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
 				"larger than the host's page size, and so "
 				"eCryptfs cannot run on this system. The "
-				"default eCryptfs extent size is [%d] bytes; "
-				"the page size is [%d] bytes.\n",
-				ECRYPTFS_DEFAULT_EXTENT_SIZE, PAGE_CACHE_SIZE);
+				"default eCryptfs extent size is [%u] bytes; "
+				"the page size is [%lu] bytes.\n",
+				ECRYPTFS_DEFAULT_EXTENT_SIZE,
+				(unsigned long)PAGE_CACHE_SIZE);
 		goto out;
 	}
 	rc = ecryptfs_init_kmem_caches();
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index b1d82756544b..cc64fca89f8d 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -65,7 +65,7 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
 	rc = ecryptfs_encrypt_page(page);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error encrypting "
-				"page (upper index [0x%.16x])\n", page->index);
+				"page (upper index [0x%.16lx])\n", page->index);
 		ClearPageUptodate(page);
 		goto out;
 	}
@@ -237,7 +237,7 @@ out:
 		ClearPageUptodate(page);
 	else
 		SetPageUptodate(page);
-	ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
+	ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16lx]\n",
 			page->index);
 	unlock_page(page);
 	return rc;
@@ -290,6 +290,7 @@ static int ecryptfs_write_begin(struct file *file,
 		return -ENOMEM;
 	*pagep = page;
 
+	prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
 	if (!PageUptodate(page)) {
 		struct ecryptfs_crypt_stat *crypt_stat =
 			&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
@@ -335,18 +336,23 @@ static int ecryptfs_write_begin(struct file *file,
 				SetPageUptodate(page);
 			}
 		} else {
-			rc = ecryptfs_decrypt_page(page);
-			if (rc) {
-				printk(KERN_ERR "%s: Error decrypting page "
-				       "at index [%ld]; rc = [%d]\n",
-				       __func__, page->index, rc);
-				ClearPageUptodate(page);
-				goto out;
+			if (prev_page_end_size
+			    >= i_size_read(page->mapping->host)) {
+				zero_user(page, 0, PAGE_CACHE_SIZE);
+			} else {
+				rc = ecryptfs_decrypt_page(page);
+				if (rc) {
+					printk(KERN_ERR "%s: Error decrypting "
+					       "page at index [%ld]; "
+					       "rc = [%d]\n",
+					       __func__, page->index, rc);
+					ClearPageUptodate(page);
+					goto out;
+				}
 			}
 			SetPageUptodate(page);
 		}
 	}
-	prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
 	/* If creating a page or more of holes, zero them out via truncate.
 	 * Note, this will increase i_size. */
 	if (index != 0) {
@@ -488,7 +494,7 @@ static int ecryptfs_write_end(struct file *file,
 	} else
 		ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
 	ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
-			"(page w/ index = [0x%.16x], to = [%d])\n", index, to);
+			"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
 	if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 		rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
 						       to);
@@ -503,19 +509,20 @@ static int ecryptfs_write_end(struct file *file,
 	rc = fill_zeros_to_end_of_page(page, to);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
-			"zeros in page with index = [0x%.16x]\n", index);
+			"zeros in page with index = [0x%.16lx]\n", index);
 		goto out;
 	}
 	rc = ecryptfs_encrypt_page(page);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
-				"index [0x%.16x])\n", index);
+				"index [0x%.16lx])\n", index);
 		goto out;
 	}
 	if (pos + copied > i_size_read(ecryptfs_inode)) {
 		i_size_write(ecryptfs_inode, pos + copied);
 		ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
-				"[0x%.16x]\n", i_size_read(ecryptfs_inode));
+			"[0x%.16llx]\n",
+			(unsigned long long)i_size_read(ecryptfs_inode));
 	}
 	rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
 	if (rc)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1de65f572033..0c8d97b56f34 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2065,7 +2065,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
-extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
 			  loff_t len);
 extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
 			  ssize_t len);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c4068f6abf03..63a75810b7c3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3627,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
 }
 
 /*
- * preallocate space for a file. This implements ext4's fallocate inode
+ * preallocate space for a file. This implements ext4's fallocate file
  * operation, which gets called from sys_fallocate system call.
  * For block-mapped files, posix_fallocate should fall back to the method
  * of writing zeroes to the required new blocks (the same behavior which is
  * expected for file systems which do not support fallocate() system call).
  */
-long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
+long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
 	handle_t *handle;
 	loff_t new_size;
 	unsigned int max_blocks;
@@ -3645,7 +3646,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	unsigned int credits, blkbits = inode->i_blkbits;
 
 	/* We only support the FALLOC_FL_KEEP_SIZE mode */
-	if (mode && (mode != FALLOC_FL_KEEP_SIZE))
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
 		return -EOPNOTSUPP;
 
 	/*
@@ -3655,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 		return -EOPNOTSUPP;
 
-	/* preallocation to directories is currently not supported */
-	if (S_ISDIR(inode->i_mode))
-		return -ENODEV;
-
 	map.m_lblk = offset >> blkbits;
 	/*
 	 * We can't just convert len to max_blocks because
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bb003dc9ffff..2e8322c8aa88 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -210,6 +210,7 @@ const struct file_operations ext4_file_operations = {
 	.fsync		= ext4_sync_file,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
+	.fallocate	= ext4_fallocate,
 };
 
 const struct inode_operations ext4_file_inode_operations = {
@@ -223,7 +224,6 @@ const struct inode_operations ext4_file_inode_operations = {
 	.removexattr	= generic_removexattr,
 #endif
 	.check_acl	= ext4_check_acl,
-	.fallocate	= ext4_fallocate,
 	.fiemap		= ext4_fiemap,
 };
 
diff --git a/fs/file_table.c b/fs/file_table.c
index c3dee381f1b4..c3e89adf53c0 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -311,7 +311,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 	struct files_struct *files = current->files;
 
 	*fput_needed = 0;
-	if (likely((atomic_read(&files->count) == 1))) {
+	if (atomic_read(&files->count) == 1) {
 		file = fcheck_files(files, fd);
 	} else {
 		rcu_read_lock();
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index fca6689e12e6..7cfdcb913363 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -19,6 +19,8 @@
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/ext2_fs.h>
+#include <linux/falloc.h>
+#include <linux/swap.h>
 #include <linux/crc32.h>
 #include <linux/writeback.h>
 #include <asm/uaccess.h>
@@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
+static void empty_write_end(struct page *page, unsigned from,
+			   unsigned to)
+{
+	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+
+	page_zero_new_buffers(page, from, to);
+	flush_dcache_page(page);
+	mark_page_accessed(page);
+
+	if (!gfs2_is_writeback(ip))
+		gfs2_page_add_databufs(ip, page, from, to);
+
+	block_commit_write(page, from, to);
+}
+
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+	unsigned start, end, next;
+	struct buffer_head *bh, *head;
+	int error;
+
+	if (!page_has_buffers(page)) {
+		error = __block_write_begin(page, from, to - from, gfs2_block_map);
+		if (unlikely(error))
+			return error;
+
+		empty_write_end(page, from, to);
+		return 0;
+	}
+
+	bh = head = page_buffers(page);
+	next = end = 0;
+	while (next < from) {
+		next += bh->b_size;
+		bh = bh->b_this_page;
+	}
+	start = next;
+	do {
+		next += bh->b_size;
+		if (buffer_mapped(bh)) {
+			if (end) {
+				error = __block_write_begin(page, start, end - start,
+							    gfs2_block_map);
+				if (unlikely(error))
+					return error;
+				empty_write_end(page, start, end);
+				end = 0;
+			}
+			start = next;
+		}
+		else
+			end = next;
+		bh = bh->b_this_page;
+	} while (next < to);
+
+	if (end) {
+		error = __block_write_begin(page, start, end - start, gfs2_block_map);
+		if (unlikely(error))
+			return error;
+		empty_write_end(page, start, end);
+	}
+
+	return 0;
+}
+
+static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
+			   int mode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct buffer_head *dibh;
+	int error;
+	u64 start = offset >> PAGE_CACHE_SHIFT;
+	unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
+	u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+	pgoff_t curr;
+	struct page *page;
+	unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
+	unsigned int from, to;
+
+	if (!end_offset)
+		end_offset = PAGE_CACHE_SIZE;
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (unlikely(error))
+		goto out;
+
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
+	if (gfs2_is_stuffed(ip)) {
+		error = gfs2_unstuff_dinode(ip, NULL);
+		if (unlikely(error))
+			goto out;
+	}
+
+	curr = start;
+	offset = start << PAGE_CACHE_SHIFT;
+	from = start_offset;
+	to = PAGE_CACHE_SIZE;
+	while (curr <= end) {
+		page = grab_cache_page_write_begin(inode->i_mapping, curr,
+						   AOP_FLAG_NOFS);
+		if (unlikely(!page)) {
+			error = -ENOMEM;
+			goto out;
+		}
+
+		if (curr == end)
+			to = end_offset;
+		error = write_empty_blocks(page, from, to);
+		if (!error && offset + to > inode->i_size &&
+		    !(mode & FALLOC_FL_KEEP_SIZE)) {
+			i_size_write(inode, offset + to);
+		}
+		unlock_page(page);
+		page_cache_release(page);
+		if (error)
+			goto out;
+		curr++;
+		offset += PAGE_CACHE_SIZE;
+		from = 0;
+	}
+
+	gfs2_dinode_out(ip, dibh->b_data);
+	mark_inode_dirty(inode);
+
+	brelse(dibh);
+
+out:
+	return error;
+}
+
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
+			    unsigned int *data_blocks, unsigned int *ind_blocks)
+{
+	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
+
+	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
+		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+		max_data -= tmp;
+	}
+	/* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
+	   so it might end up with fewer data blocks */
+	if (max_data <= *data_blocks)
+		return;
+	*data_blocks = max_data;
+	*ind_blocks = max_blocks - max_data;
+	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
+	if (*len > max) {
+		*len = max;
+		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
+	}
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
+			   loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	struct gfs2_inode *ip = GFS2_I(inode);
+	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+	loff_t bytes, max_bytes;
+	struct gfs2_alloc *al;
+	int error;
+	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
+
+	/* We only support the FALLOC_FL_KEEP_SIZE mode */
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
+		 sdp->sd_sb.sb_bsize_shift;
+
+	len = next - offset;
+	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
+	if (!bytes)
+		bytes = UINT_MAX;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+	error = gfs2_glock_nq(&ip->i_gh);
+	if (unlikely(error))
+		goto out_uninit;
+
+	if (!gfs2_write_alloc_required(ip, offset, len))
+		goto out_unlock;
+
+	while (len > 0) {
+		if (len < bytes)
+			bytes = len;
+		al = gfs2_alloc_get(ip);
+		if (!al) {
+			error = -ENOMEM;
+			goto out_unlock;
+		}
+
+		error = gfs2_quota_lock_check(ip);
+		if (error)
+			goto out_alloc_put;
+
+retry:
+		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+
+		al->al_requested = data_blocks + ind_blocks;
+		error = gfs2_inplace_reserve(ip);
+		if (error) {
+			if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
+				bytes >>= 1;
+				goto retry;
+			}
+			goto out_qunlock;
+		}
+		max_bytes = bytes;
+		calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+		al->al_requested = data_blocks + ind_blocks;
+
+		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
+			  RES_RG_HDR + gfs2_rg_blocks(al);
+		if (gfs2_is_jdata(ip))
+			rblocks += data_blocks ? data_blocks : 1;
+
+		error = gfs2_trans_begin(sdp, rblocks,
+					 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+		if (error)
+			goto out_trans_fail;
+
+		error = fallocate_chunk(inode, offset, max_bytes, mode);
+		gfs2_trans_end(sdp);
+
+		if (error)
+			goto out_trans_fail;
+
+		len -= max_bytes;
+		offset += max_bytes;
+		gfs2_inplace_release(ip);
+		gfs2_quota_unlock(ip);
+		gfs2_alloc_put(ip);
+	}
+	goto out_unlock;
+
+out_trans_fail:
+	gfs2_inplace_release(ip);
+out_qunlock:
+	gfs2_quota_unlock(ip);
+out_alloc_put:
+	gfs2_alloc_put(ip);
+out_unlock:
+	gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+	gfs2_holder_uninit(&ip->i_gh);
+	return error;
+}
+
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 
 /**
@@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 	.setlease	= gfs2_setlease,
+	.fallocate	= gfs2_fallocate,
 };
 
 const struct file_operations gfs2_dir_fops = {
@@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 	.setlease	= generic_setlease,
+	.fallocate	= gfs2_fallocate,
 };
 
 const struct file_operations gfs2_dir_fops_nolock = {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 040b5a2e6556..d8b26ac2e20b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -18,8 +18,6 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/fiemap.h>
-#include <linux/swap.h>
-#include <linux/falloc.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	return ret;
 }
 
-static void empty_write_end(struct page *page, unsigned from,
-			   unsigned to)
-{
-	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-
-	page_zero_new_buffers(page, from, to);
-	flush_dcache_page(page);
-	mark_page_accessed(page);
-
-	if (!gfs2_is_writeback(ip))
-		gfs2_page_add_databufs(ip, page, from, to);
-
-	block_commit_write(page, from, to);
-}
-
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
-{
-	unsigned start, end, next;
-	struct buffer_head *bh, *head;
-	int error;
-
-	if (!page_has_buffers(page)) {
-		error = __block_write_begin(page, from, to - from, gfs2_block_map);
-		if (unlikely(error))
-			return error;
-
-		empty_write_end(page, from, to);
-		return 0;
-	}
-
-	bh = head = page_buffers(page);
-	next = end = 0;
-	while (next < from) {
-		next += bh->b_size;
-		bh = bh->b_this_page;
-	}
-	start = next;
-	do {
-		next += bh->b_size;
-		if (buffer_mapped(bh)) {
-			if (end) {
-				error = __block_write_begin(page, start, end - start,
-							    gfs2_block_map);
-				if (unlikely(error))
-					return error;
-				empty_write_end(page, start, end);
-				end = 0;
-			}
-			start = next;
-		}
-		else
-			end = next;
-		bh = bh->b_this_page;
-	} while (next < to);
-
-	if (end) {
-		error = __block_write_begin(page, start, end - start, gfs2_block_map);
-		if (unlikely(error))
-			return error;
-		empty_write_end(page, start, end);
-	}
-
-	return 0;
-}
-
-static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
-			   int mode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct buffer_head *dibh;
-	int error;
-	u64 start = offset >> PAGE_CACHE_SHIFT;
-	unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
-	u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
-	pgoff_t curr;
-	struct page *page;
-	unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
-	unsigned int from, to;
-
-	if (!end_offset)
-		end_offset = PAGE_CACHE_SIZE;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (unlikely(error))
-		goto out;
-
-	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
-	if (gfs2_is_stuffed(ip)) {
-		error = gfs2_unstuff_dinode(ip, NULL);
-		if (unlikely(error))
-			goto out;
-	}
-
-	curr = start;
-	offset = start << PAGE_CACHE_SHIFT;
-	from = start_offset;
-	to = PAGE_CACHE_SIZE;
-	while (curr <= end) {
-		page = grab_cache_page_write_begin(inode->i_mapping, curr,
-						   AOP_FLAG_NOFS);
-		if (unlikely(!page)) {
-			error = -ENOMEM;
-			goto out;
-		}
-
-		if (curr == end)
-			to = end_offset;
-		error = write_empty_blocks(page, from, to);
-		if (!error && offset + to > inode->i_size &&
-		    !(mode & FALLOC_FL_KEEP_SIZE)) {
-			i_size_write(inode, offset + to);
-		}
-		unlock_page(page);
-		page_cache_release(page);
-		if (error)
-			goto out;
-		curr++;
-		offset += PAGE_CACHE_SIZE;
-		from = 0;
-	}
-
-	gfs2_dinode_out(ip, dibh->b_data);
-	mark_inode_dirty(inode);
-
-	brelse(dibh);
-
-out:
-	return error;
-}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
-			    unsigned int *data_blocks, unsigned int *ind_blocks)
-{
-	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
-	unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
-
-	for (tmp = max_data; tmp > sdp->sd_diptrs;) {
-		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
-		max_data -= tmp;
-	}
-	/* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
-	   so it might end up with fewer data blocks */
-	if (max_data <= *data_blocks)
-		return;
-	*data_blocks = max_data;
-	*ind_blocks = max_blocks - max_data;
-	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
-	if (*len > max) {
-		*len = max;
-		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
-	}
-}
-
-static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
-			   loff_t len)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_inode *ip = GFS2_I(inode);
-	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	loff_t bytes, max_bytes;
-	struct gfs2_alloc *al;
-	int error;
-	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
-	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
-
-	/* We only support the FALLOC_FL_KEEP_SIZE mode */
-	if (mode && (mode != FALLOC_FL_KEEP_SIZE))
-		return -EOPNOTSUPP;
-
-	offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
-		 sdp->sd_sb.sb_bsize_shift;
-
-	len = next - offset;
-	bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
-	if (!bytes)
-		bytes = UINT_MAX;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
-	error = gfs2_glock_nq(&ip->i_gh);
-	if (unlikely(error))
-		goto out_uninit;
-
-	if (!gfs2_write_alloc_required(ip, offset, len))
-		goto out_unlock;
-
-	while (len > 0) {
-		if (len < bytes)
-			bytes = len;
-		al = gfs2_alloc_get(ip);
-		if (!al) {
-			error = -ENOMEM;
-			goto out_unlock;
-		}
-
-		error = gfs2_quota_lock_check(ip);
-		if (error)
-			goto out_alloc_put;
-
-retry:
-		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
-
-		al->al_requested = data_blocks + ind_blocks;
-		error = gfs2_inplace_reserve(ip);
-		if (error) {
-			if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
-				bytes >>= 1;
-				goto retry;
-			}
-			goto out_qunlock;
-		}
-		max_bytes = bytes;
-		calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
-		al->al_requested = data_blocks + ind_blocks;
-
-		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
-			  RES_RG_HDR + gfs2_rg_blocks(al);
-		if (gfs2_is_jdata(ip))
-			rblocks += data_blocks ? data_blocks : 1;
-
-		error = gfs2_trans_begin(sdp, rblocks,
-					 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
-		if (error)
-			goto out_trans_fail;
-
-		error = fallocate_chunk(inode, offset, max_bytes, mode);
-		gfs2_trans_end(sdp);
-
-		if (error)
-			goto out_trans_fail;
-
-		len -= max_bytes;
-		offset += max_bytes;
-		gfs2_inplace_release(ip);
-		gfs2_quota_unlock(ip);
-		gfs2_alloc_put(ip);
-	}
-	goto out_unlock;
-
-out_trans_fail:
-	gfs2_inplace_release(ip);
-out_qunlock:
-	gfs2_quota_unlock(ip);
-out_alloc_put:
-	gfs2_alloc_put(ip);
-out_unlock:
-	gfs2_glock_dq(&ip->i_gh);
-out_uninit:
-	gfs2_holder_uninit(&ip->i_gh);
-	return error;
-}
-
-
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
@@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
-	.fallocate = gfs2_fallocate,
 	.fiemap = gfs2_fiemap,
 };
 
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 56f0da1cfd10..1ae35baa539e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -281,7 +281,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 	    attr->ia_size != i_size_read(inode)) {
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
-			return error;
+			goto out_unlock;
 	}
 
 	setattr_copy(inode, attr);
diff --git a/fs/internal.h b/fs/internal.h
index 12ccb86edef7..0663568b1247 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,8 +70,7 @@ extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
 extern void release_mounts(struct list_head *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
 extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
-extern int do_add_mount(struct vfsmount *, struct path *, int);
-extern void mnt_clear_expiry(struct vfsmount *);
+extern int finish_automount(struct vfsmount *, struct path *);
 
 extern void mnt_make_longterm(struct vfsmount *);
 extern void mnt_make_shortterm(struct vfsmount *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d6cc16476620..a59635e295fa 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -86,7 +86,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
 			    u64 phys, u64 len, u32 flags)
 {
 	struct fiemap_extent extent;
-	struct fiemap_extent *dest = fieinfo->fi_extents_start;
+	struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
 
 	/* only count the extents */
 	if (fieinfo->fi_extents_max == 0) {
@@ -173,6 +173,7 @@ static int fiemap_check_ranges(struct super_block *sb,
 static int ioctl_fiemap(struct file *filp, unsigned long arg)
 {
 	struct fiemap fiemap;
+	struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
 	struct fiemap_extent_info fieinfo = { 0, };
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
@@ -182,8 +183,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 	if (!inode->i_op->fiemap)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&fiemap, (struct fiemap __user *)arg,
-			   sizeof(struct fiemap)))
+	if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
 		return -EFAULT;
 
 	if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
@@ -196,7 +196,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 
 	fieinfo.fi_flags = fiemap.fm_flags;
 	fieinfo.fi_extents_max = fiemap.fm_extent_count;
-	fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
+	fieinfo.fi_extents_start = ufiemap->fm_extents;
 
 	if (fiemap.fm_extent_count != 0 &&
 	    !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start,
@@ -209,7 +209,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 	error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len);
 	fiemap.fm_flags = fieinfo.fi_flags;
 	fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
-	if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
+	if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
 		error = -EFAULT;
 
 	return error;
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 85c6be2db02f..3005ec4520ad 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -336,14 +336,13 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
 	size = sizeof(struct jffs2_eraseblock) * c->nr_blocks;
 #ifndef __ECOS
 	if (jffs2_blocks_use_vmalloc(c))
-		c->blocks = vmalloc(size);
+		c->blocks = vzalloc(size);
 	else
 #endif
-		c->blocks = kmalloc(size, GFP_KERNEL);
+		c->blocks = kzalloc(size, GFP_KERNEL);
 	if (!c->blocks)
 		return -ENOMEM;
 
-	memset(c->blocks, 0, size);
 	for (i=0; i<c->nr_blocks; i++) {
 		INIT_LIST_HEAD(&c->blocks[i].list);
 		c->blocks[i].offset = i * c->sector_size;
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index f864005de64c..0bc6a6c80a56 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -144,4 +144,4 @@ struct jffs2_sb_info {
 	void *os_priv;
 };
 
-#endif /* _JFFS2_FB_SB */
+#endif /* _JFFS2_FS_SB */
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 9b572ca40a49..4f9cc0482949 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -151,7 +151,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
 		JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
 			    offset, je32_to_cpu(rx.hdr_crc), crc);
 		xd->flags |= JFFS2_XFLAGS_INVALID;
-		return EIO;
+		return -EIO;
 	}
 	totlen = PAD(sizeof(rx) + rx.name_len + 1 + je16_to_cpu(rx.value_len));
 	if (je16_to_cpu(rx.magic) != JFFS2_MAGIC_BITMASK
@@ -167,7 +167,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
 			    je32_to_cpu(rx.xid), xd->xid,
 			    je32_to_cpu(rx.version), xd->version);
 		xd->flags |= JFFS2_XFLAGS_INVALID;
-		return EIO;
+		return -EIO;
 	}
 	xd->xprefix = rx.xprefix;
 	xd->name_len = rx.name_len;
@@ -230,7 +230,7 @@ static int do_load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum
 			      ref_offset(xd->node), xd->data_crc, crc);
 		kfree(data);
 		xd->flags |= JFFS2_XFLAGS_INVALID;
-		return EIO;
+		return -EIO;
 	}
 
 	xd->flags |= JFFS2_XFLAGS_HOT;
@@ -268,7 +268,7 @@ static int load_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *x
 	if (xd->xname)
 		return 0;
 	if (xd->flags & JFFS2_XFLAGS_INVALID)
-		return EIO;
+		return -EIO;
 	if (unlikely(is_xattr_datum_unchecked(c, xd)))
 		rc = do_verify_xattr_datum(c, xd);
 	if (!rc)
@@ -460,7 +460,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
 	if (crc != je32_to_cpu(rr.node_crc)) {
 		JFFS2_ERROR("node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
 			    offset, je32_to_cpu(rr.node_crc), crc);
-		return EIO;
+		return -EIO;
 	}
 	if (je16_to_cpu(rr.magic) != JFFS2_MAGIC_BITMASK
 	    || je16_to_cpu(rr.nodetype) != JFFS2_NODETYPE_XREF
@@ -470,7 +470,7 @@ static int verify_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *ref
 			    offset, je16_to_cpu(rr.magic), JFFS2_MAGIC_BITMASK,
 			    je16_to_cpu(rr.nodetype), JFFS2_NODETYPE_XREF,
 			    je32_to_cpu(rr.totlen), PAD(sizeof(rr)));
-		return EIO;
+		return -EIO;
 	}
 	ref->ino = je32_to_cpu(rr.ino);
 	ref->xid = je32_to_cpu(rr.xid);
diff --git a/fs/namei.c b/fs/namei.c
index 8f7b41a14882..b753192d8c3f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -923,37 +923,13 @@ static int follow_automount(struct path *path, unsigned flags,
 	if (!mnt) /* mount collision */
 		return 0;
 
-	/* The new mount record should have at least 2 refs to prevent it being
-	 * expired before we get a chance to add it
-	 */
-	BUG_ON(mnt_get_count(mnt) < 2);
-
-	if (mnt->mnt_sb == path->mnt->mnt_sb &&
-	    mnt->mnt_root == path->dentry) {
-		mnt_clear_expiry(mnt);
-		mntput(mnt);
-		mntput(mnt);
-		return -ELOOP;
-	}
+	err = finish_automount(mnt, path);
 
-	/* We need to add the mountpoint to the parent.  The filesystem may
-	 * have placed it on an expiry list, and so we need to make sure it
-	 * won't be expired under us if do_add_mount() fails (do_add_mount()
-	 * will eat a reference unconditionally).
-	 */
-	mntget(mnt);
-	err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
 	switch (err) {
 	case -EBUSY:
 		/* Someone else made a mount here whilst we were busy */
-		err = 0;
-	default:
-		mnt_clear_expiry(mnt);
-		mntput(mnt);
-		mntput(mnt);
-		return err;
+		return 0;
 	case 0:
-		mntput(mnt);
 		dput(path->dentry);
 		if (*need_mntput)
 			mntput(path->mnt);
@@ -961,7 +937,10 @@ static int follow_automount(struct path *path, unsigned flags,
 		path->dentry = dget(mnt->mnt_root);
 		*need_mntput = true;
 		return 0;
+	default:
+		return err;
 	}
+
 }
 
 /*
diff --git a/fs/namespace.c b/fs/namespace.c
index 9f544f35ed34..7b0b95371696 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1872,6 +1872,8 @@ out:
 	return err;
 }
 
+static int do_add_mount(struct vfsmount *, struct path *, int);
+
 /*
  * create a new mount for userspace and request it to be added into the
  * namespace's tree
@@ -1880,6 +1882,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
 			int mnt_flags, char *name, void *data)
 {
 	struct vfsmount *mnt;
+	int err;
 
 	if (!type)
 		return -EINVAL;
@@ -1892,14 +1895,47 @@ static int do_new_mount(struct path *path, char *type, int flags,
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
-	return do_add_mount(mnt, path, mnt_flags);
+	err = do_add_mount(mnt, path, mnt_flags);
+	if (err)
+		mntput(mnt);
+	return err;
+}
+
+int finish_automount(struct vfsmount *m, struct path *path)
+{
+	int err;
+	/* The new mount record should have at least 2 refs to prevent it being
+	 * expired before we get a chance to add it
+	 */
+	BUG_ON(mnt_get_count(m) < 2);
+
+	if (m->mnt_sb == path->mnt->mnt_sb &&
+	    m->mnt_root == path->dentry) {
+		err = -ELOOP;
+		goto fail;
+	}
+
+	err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
+	if (!err)
+		return 0;
+fail:
+	/* remove m from any expiration list it may be on */
+	if (!list_empty(&m->mnt_expire)) {
+		down_write(&namespace_sem);
+		br_write_lock(vfsmount_lock);
+		list_del_init(&m->mnt_expire);
+		br_write_unlock(vfsmount_lock);
+		up_write(&namespace_sem);
+	}
+	mntput(m);
+	mntput(m);
+	return err;
 }
 
 /*
  * add a mount into a namespace's mount tree
- * - this unconditionally eats one of the caller's references to newmnt.
  */
-int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
+static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
 {
 	int err;
 
@@ -1926,15 +1962,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
-	if ((err = graft_tree(newmnt, path)))
-		goto unlock;
-
-	up_write(&namespace_sem);
-	return 0;
+	err = graft_tree(newmnt, path);
 
 unlock:
 	up_write(&namespace_sem);
-	mntput(newmnt);
 	return err;
 }
 
@@ -1956,20 +1987,6 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
 EXPORT_SYMBOL(mnt_set_expiry);
 
 /*
- * Remove a vfsmount from any expiration list it may be on
- */
-void mnt_clear_expiry(struct vfsmount *mnt)
-{
-	if (!list_empty(&mnt->mnt_expire)) {
-		down_write(&namespace_sem);
-		br_write_lock(vfsmount_lock);
-		list_del_init(&mnt->mnt_expire);
-		br_write_unlock(vfsmount_lock);
-		up_write(&namespace_sem);
-	}
-}
-
-/*
  * process a list of expirable mountpoints with the intent of discarding any
  * mountpoints that aren't in use and haven't been touched since last we came
  * here
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 63e3fca266e0..a6651956482e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1989,20 +1989,20 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 	return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
 }
 
-static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
+static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
 			    loff_t len)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_space_resv sr;
 	int change_size = 1;
 	int cmd = OCFS2_IOC_RESVSP64;
 
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+		return -EOPNOTSUPP;
 	if (!ocfs2_writes_unwritten_extents(osb))
 		return -EOPNOTSUPP;
 
-	if (S_ISDIR(inode->i_mode))
-		return -ENODEV;
-
 	if (mode & FALLOC_FL_KEEP_SIZE)
 		change_size = 0;
 
@@ -2610,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= ocfs2_listxattr,
 	.removexattr	= generic_removexattr,
-	.fallocate	= ocfs2_fallocate,
 	.fiemap		= ocfs2_fiemap,
 };
 
@@ -2642,6 +2641,7 @@ const struct file_operations ocfs2_fops = {
 	.flock		= ocfs2_flock,
 	.splice_read	= ocfs2_file_splice_read,
 	.splice_write	= ocfs2_file_splice_write,
+	.fallocate	= ocfs2_fallocate,
 };
 
 const struct file_operations ocfs2_dops = {
diff --git a/fs/open.c b/fs/open.c
index 5b6ef7e2859e..e52389e1f05b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
 		return -EFBIG;
 
-	if (!inode->i_op->fallocate)
+	if (!file->f_op->fallocate)
 		return -EOPNOTSUPP;
 
-	return inode->i_op->fallocate(inode, mode, offset, len);
+	return file->f_op->fallocate(file, mode, offset, len);
 }
 
 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ef51eb43e137..a55c1b46b219 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -37,6 +37,7 @@
 #include "xfs_trace.h"
 
 #include <linux/dcache.h>
+#include <linux/falloc.h>
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
@@ -882,6 +883,60 @@ out_unlock:
 	return ret;
 }
 
+STATIC long
+xfs_file_fallocate(
+	struct file	*file,
+	int		mode,
+	loff_t		offset,
+	loff_t		len)
+{
+	struct inode	*inode = file->f_path.dentry->d_inode;
+	long		error;
+	loff_t		new_size = 0;
+	xfs_flock64_t	bf;
+	xfs_inode_t	*ip = XFS_I(inode);
+	int		cmd = XFS_IOC_RESVSP;
+
+	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+		return -EOPNOTSUPP;
+
+	bf.l_whence = 0;
+	bf.l_start = offset;
+	bf.l_len = len;
+
+	xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+	if (mode & FALLOC_FL_PUNCH_HOLE)
+		cmd = XFS_IOC_UNRESVSP;
+
+	/* check the new inode size is valid before allocating */
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	    offset + len > i_size_read(inode)) {
+		new_size = offset + len;
+		error = inode_newsize_ok(inode, new_size);
+		if (error)
+			goto out_unlock;
+	}
+
+	error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
+	if (error)
+		goto out_unlock;
+
+	/* Change file size if needed */
+	if (new_size) {
+		struct iattr iattr;
+
+		iattr.ia_valid = ATTR_SIZE;
+		iattr.ia_size = new_size;
+		error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+	}
+
+out_unlock:
+	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return error;
+}
+
+
 STATIC int
 xfs_file_open(
 	struct inode	*inode,
@@ -1000,6 +1055,7 @@ const struct file_operations xfs_file_operations = {
 	.open		= xfs_file_open,
 	.release	= xfs_file_release,
 	.fsync		= xfs_file_fsync,
+	.fallocate	= xfs_file_fallocate,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index da54403633b6..bd5727852fd6 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -46,7 +46,6 @@
 #include <linux/namei.h>
 #include <linux/posix_acl.h>
 #include <linux/security.h>
-#include <linux/falloc.h>
 #include <linux/fiemap.h>
 #include <linux/slab.h>
 
@@ -505,61 +504,6 @@ xfs_vn_setattr(
 	return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
 
-STATIC long
-xfs_vn_fallocate(
-	struct inode	*inode,
-	int		mode,
-	loff_t		offset,
-	loff_t		len)
-{
-	long		error;
-	loff_t		new_size = 0;
-	xfs_flock64_t	bf;
-	xfs_inode_t	*ip = XFS_I(inode);
-	int		cmd = XFS_IOC_RESVSP;
-
-	/* preallocation on directories not yet supported */
-	error = -ENODEV;
-	if (S_ISDIR(inode->i_mode))
-		goto out_error;
-
-	bf.l_whence = 0;
-	bf.l_start = offset;
-	bf.l_len = len;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	if (mode & FALLOC_FL_PUNCH_HOLE)
-		cmd = XFS_IOC_UNRESVSP;
-
-	/* check the new inode size is valid before allocating */
-	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-	    offset + len > i_size_read(inode)) {
-		new_size = offset + len;
-		error = inode_newsize_ok(inode, new_size);
-		if (error)
-			goto out_unlock;
-	}
-
-	error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
-	if (error)
-		goto out_unlock;
-
-	/* Change file size if needed */
-	if (new_size) {
-		struct iattr iattr;
-
-		iattr.ia_valid = ATTR_SIZE;
-		iattr.ia_size = new_size;
-		error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
-	}
-
-out_unlock:
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-out_error:
-	return error;
-}
-
 #define XFS_FIEMAP_FLAGS	(FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 
 /*
@@ -653,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = {
 	.getxattr		= generic_getxattr,
 	.removexattr		= generic_removexattr,
 	.listxattr		= xfs_vn_listxattr,
-	.fallocate		= xfs_vn_fallocate,
 	.fiemap			= xfs_vn_fiemap,
 };
 
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index e6cf955ec0fc..0df88897ef84 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -75,11 +75,11 @@ xfs_cmn_err(
 {
 	struct va_format	vaf;
 	va_list			args;
-	int			panic = 0;
+	int			do_panic = 0;
 
 	if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
 		printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
-		panic = 1;
+		do_panic = 1;
 	}
 
 	va_start(args, fmt);
@@ -89,7 +89,7 @@ xfs_cmn_err(
 	printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
 	va_end(args);
 
-	BUG_ON(panic);
+	BUG_ON(do_panic);
 }
 
 void
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 521a0f8974ac..3111385b8ca7 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -12,7 +12,6 @@
  *
  * Please credit ARM.com
  * Documentation: ARM DDI 0196D
- *
  */
 
 #ifndef AMBA_PL08X_H
@@ -22,6 +21,15 @@
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
+struct pl08x_lli;
+struct pl08x_driver_data;
+
+/* Bitmasks for selecting AHB ports for DMA transfers */
+enum {
+	PL08X_AHB1 = (1 << 0),
+	PL08X_AHB2 = (1 << 1)
+};
+
 /**
  * struct pl08x_channel_data - data structure to pass info between
  * platform and PL08x driver regarding channel configuration
@@ -46,8 +54,10 @@
  * @circular_buffer: whether the buffer passed in is circular and
  * shall simply be looped round round (like a record baby round
  * round round round)
- * @single: the device connected to this channel will request single
- * DMA transfers, not bursts. (Bursts are default.)
+ * @single: the device connected to this channel will request single DMA
+ * transfers, not bursts. (Bursts are default.)
+ * @periph_buses: the device connected to this channel is accessible via
+ * these buses (use PL08X_AHB1 | PL08X_AHB2).
  */
 struct pl08x_channel_data {
 	char *bus_id;
@@ -55,10 +65,10 @@ struct pl08x_channel_data {
 	int max_signal;
 	u32 muxval;
 	u32 cctl;
-	u32 ccfg;
 	dma_addr_t addr;
 	bool circular_buffer;
 	bool single;
+	u8 periph_buses;
 };
 
 /**
@@ -67,24 +77,23 @@ struct pl08x_channel_data {
  * @addr: current address
  * @maxwidth: the maximum width of a transfer on this bus
  * @buswidth: the width of this bus in bytes: 1, 2 or 4
- * @fill_bytes: bytes required to fill to the next bus memory
- * boundary
+ * @fill_bytes: bytes required to fill to the next bus memory boundary
  */
 struct pl08x_bus_data {
 	dma_addr_t addr;
 	u8 maxwidth;
 	u8 buswidth;
-	u32 fill_bytes;
+	size_t fill_bytes;
 };
 
 /**
  * struct pl08x_phy_chan - holder for the physical channels
  * @id: physical index to this channel
  * @lock: a lock to use when altering an instance of this struct
- * @signal: the physical signal (aka channel) serving this
- * physical channel right now
- * @serving: the virtual channel currently being served by this
- * physical channel
+ * @signal: the physical signal (aka channel) serving this physical channel
+ * right now
+ * @serving: the virtual channel currently being served by this physical
+ * channel
  */
 struct pl08x_phy_chan {
 	unsigned int id;
@@ -92,11 +101,6 @@ struct pl08x_phy_chan {
 	spinlock_t lock;
 	int signal;
 	struct pl08x_dma_chan *serving;
-	u32 csrc;
-	u32 cdst;
-	u32 clli;
-	u32 cctl;
-	u32 ccfg;
 };
 
 /**
@@ -108,26 +112,23 @@ struct pl08x_txd {
 	struct dma_async_tx_descriptor tx;
 	struct list_head node;
 	enum dma_data_direction	direction;
-	struct pl08x_bus_data srcbus;
-	struct pl08x_bus_data dstbus;
-	int len;
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
 	dma_addr_t llis_bus;
-	void *llis_va;
-	struct pl08x_channel_data *cd;
-	bool active;
+	struct pl08x_lli *llis_va;
+	/* Default cctl value for LLIs */
+	u32 cctl;
 	/*
 	 * Settings to be put into the physical channel when we
-	 * trigger this txd
+	 * trigger this txd.  Other registers are in llis_va[0].
 	 */
-	u32 csrc;
-	u32 cdst;
-	u32 clli;
-	u32 cctl;
+	u32 ccfg;
 };
 
 /**
- * struct pl08x_dma_chan_state - holds the PL08x specific virtual
- * channel states
+ * struct pl08x_dma_chan_state - holds the PL08x specific virtual channel
+ * states
  * @PL08X_CHAN_IDLE: the channel is idle
  * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
  * channel and is running a transfer on it
@@ -147,6 +148,8 @@ enum pl08x_dma_chan_state {
  * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
  * @chan: wrappped abstract channel
  * @phychan: the physical channel utilized by this channel, if there is one
+ * @phychan_hold: if non-zero, hold on to the physical channel even if we
+ * have no pending entries
  * @tasklet: tasklet scheduled by the IRQ to handle actual work etc
  * @name: name of channel
  * @cd: channel platform data
@@ -154,53 +157,49 @@ enum pl08x_dma_chan_state {
  * @runtime_direction: current direction of this channel according to
  * runtime config
  * @lc: last completed transaction on this channel
- * @desc_list: queued transactions pending on this channel
+ * @pend_list: queued transactions pending on this channel
  * @at: active transaction on this channel
- * @lockflags: sometimes we let a lock last between two function calls,
- * especially prep/submit, and then we need to store the IRQ flags
- * in the channel state, here
  * @lock: a lock for this channel data
  * @host: a pointer to the host (internal use)
  * @state: whether the channel is idle, paused, running etc
  * @slave: whether this channel is a device (slave) or for memcpy
- * @waiting: a TX descriptor on this channel which is waiting for
- * a physical channel to become available
+ * @waiting: a TX descriptor on this channel which is waiting for a physical
+ * channel to become available
  */
 struct pl08x_dma_chan {
 	struct dma_chan chan;
 	struct pl08x_phy_chan *phychan;
+	int phychan_hold;
 	struct tasklet_struct tasklet;
 	char *name;
 	struct pl08x_channel_data *cd;
 	dma_addr_t runtime_addr;
 	enum dma_data_direction	runtime_direction;
-	atomic_t last_issued;
 	dma_cookie_t lc;
-	struct list_head desc_list;
+	struct list_head pend_list;
 	struct pl08x_txd *at;
-	unsigned long lockflags;
 	spinlock_t lock;
-	void *host;
+	struct pl08x_driver_data *host;
 	enum pl08x_dma_chan_state state;
 	bool slave;
 	struct pl08x_txd *waiting;
 };
 
 /**
- * struct pl08x_platform_data - the platform configuration for the
- * PL08x PrimeCells.
+ * struct pl08x_platform_data - the platform configuration for the PL08x
+ * PrimeCells.
  * @slave_channels: the channels defined for the different devices on the
  * platform, all inclusive, including multiplexed channels. The available
- * physical channels will be multiplexed around these signals as they
- * are requested, just enumerate all possible channels.
- * @get_signal: request a physical signal to be used for a DMA
- * transfer immediately: if there is some multiplexing or similar blocking
- * the use of the channel the transfer can be denied by returning
- * less than zero, else it returns the allocated signal number
+ * physical channels will be multiplexed around these signals as they are
+ * requested, just enumerate all possible channels.
+ * @get_signal: request a physical signal to be used for a DMA transfer
+ * immediately: if there is some multiplexing or similar blocking the use
+ * of the channel the transfer can be denied by returning less than zero,
+ * else it returns the allocated signal number
  * @put_signal: indicate to the platform that this physical signal is not
  * running any DMA transfer and multiplexing can be recycled
- * @bus_bit_lli: Bit[0] of the address indicated which AHB bus master the
- * LLI addresses are on 0/1 Master 1/2.
+ * @lli_buses: buses which LLIs can be fetched from: PL08X_AHB1 | PL08X_AHB2
+ * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
  */
 struct pl08x_platform_data {
 	struct pl08x_channel_data *slave_channels;
@@ -208,6 +207,8 @@ struct pl08x_platform_data {
 	struct pl08x_channel_data memcpy_channel;
 	int (*get_signal)(struct pl08x_dma_chan *);
 	void (*put_signal)(struct pl08x_dma_chan *);
+	u8 lli_buses;
+	u8 mem_buses;
 };
 
 #ifdef CONFIG_AMBA_PL08X
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8cd00ad98d37..9bebd7f16ef1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -532,7 +532,7 @@ static inline int dmaengine_resume(struct dma_chan *chan)
 	return dmaengine_device_control(chan, DMA_RESUME, 0);
 }
 
-static inline int dmaengine_submit(struct dma_async_tx_descriptor *desc)
+static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
 {
 	return desc->tx_submit(desc);
 }
diff --git a/include/linux/file.h b/include/linux/file.h
index b1e12970f617..e85baebf6279 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -23,7 +23,7 @@ extern struct file *alloc_file(struct path *, fmode_t mode,
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
-	if (unlikely(fput_needed))
+	if (fput_needed)
 		fput(file);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 177b4ddea418..32b38cd829d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1483,8 +1483,8 @@ struct fiemap_extent_info {
 	unsigned int fi_flags;		/* Flags as passed from user */
 	unsigned int fi_extents_mapped;	/* Number of mapped extents */
 	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
-	struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent
-						 * array */
+	struct fiemap_extent __user *fi_extents_start; /* Start of
+							fiemap_extent array */
 };
 int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
 			    u64 phys, u64 len, u32 flags);
@@ -1552,6 +1552,8 @@ struct file_operations {
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
 	int (*setlease)(struct file *, long, struct file_lock **);
+	long (*fallocate)(struct file *file, int mode, loff_t offset,
+			  loff_t len);
 };
 
 #define IPERM_FLAG_RCU	0x0001
@@ -1582,8 +1584,6 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
-	long (*fallocate)(struct inode *inode, int mode, loff_t offset,
-			  loff_t len);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
 } ____cacheline_aligned;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index ff690d05f129..62730ea2b56e 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -16,6 +16,7 @@
 #define TMPFS_MAGIC		0x01021994
 #define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
+#define ECRYPTFS_SUPER_MAGIC	0xf15f
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index a7b15bc7648e..049214642036 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -144,6 +144,11 @@ enum {
 	MLX4_STAT_RATE_OFFSET	= 5
 };
 
+enum mlx4_protocol {
+	MLX4_PROTOCOL_IB,
+	MLX4_PROTOCOL_EN,
+};
+
 enum {
 	MLX4_MTT_FLAG_PRESENT		= 1
 };
@@ -500,8 +505,9 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 
 int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
-			  int block_mcast_loopback);
-int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
+			  int block_mcast_loopback, enum mlx4_protocol protocol);
+int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			  enum mlx4_protocol protocol);
 
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index f407cd4bfb34..e1eebf78caba 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -34,6 +34,7 @@
 #define MLX4_DRIVER_H
 
 #include <linux/device.h>
+#include <linux/mlx4/device.h>
 
 struct mlx4_dev;
 
@@ -44,11 +45,6 @@ enum mlx4_dev_event {
 	MLX4_DEV_EVENT_PORT_REINIT,
 };
 
-enum mlx4_protocol {
-	MLX4_PROTOCOL_IB,
-	MLX4_PROTOCOL_EN,
-};
-
 struct mlx4_interface {
 	void *			(*add)	 (struct mlx4_dev *dev);
 	void			(*remove)(struct mlx4_dev *dev, void *context);
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 4dd0c2cd7659..a9baee6864af 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -527,8 +527,7 @@ struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t s
 struct cfi_fixup {
 	uint16_t mfr;
 	uint16_t id;
-	void (*fixup)(struct mtd_info *mtd, void* param);
-	void* param;
+	void (*fixup)(struct mtd_info *mtd);
 };
 
 #define CFI_MFR_ANY		0xFFFF
diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index 5d2556700ec2..6987995ad3cf 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -16,6 +16,7 @@
 #ifndef __MTD_FSMC_H
 #define __MTD_FSMC_H
 
+#include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/physmap.h>
 #include <linux/types.h>
@@ -27,7 +28,7 @@
 
 /*
  * The placement of the Command Latch Enable (CLE) and
- * Address Latch Enable (ALE) is twised around in the
+ * Address Latch Enable (ALE) is twisted around in the
  * SPEAR310 implementation.
  */
 #if defined(CONFIG_MACH_SPEAR310)
@@ -62,7 +63,7 @@ struct fsmc_nor_bank_regs {
 
 /* ctrl_tim register definitions */
 
-struct fsms_nand_bank_regs {
+struct fsmc_nand_bank_regs {
 	uint32_t pc;
 	uint32_t sts;
 	uint32_t comm;
@@ -78,7 +79,7 @@ struct fsms_nand_bank_regs {
 struct fsmc_regs {
 	struct fsmc_nor_bank_regs nor_bank_regs[FSMC_MAX_NOR_BANKS];
 	uint8_t reserved_1[0x40 - 0x20];
-	struct fsms_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS];
+	struct fsmc_nand_bank_regs bank_regs[FSMC_MAX_NAND_BANKS];
 	uint8_t reserved_2[0xfe0 - 0xc0];
 	uint32_t peripid0;			/* 0xfe0 */
 	uint32_t peripid1;			/* 0xfe4 */
@@ -114,25 +115,6 @@ struct fsmc_regs {
 #define FSMC_THOLD_4		(4 << 16)
 #define FSMC_THIZ_1		(1 << 24)
 
-/* peripid2 register definitions */
-#define FSMC_REVISION_MSK	(0xf)
-#define FSMC_REVISION_SHFT	(0x4)
-
-#define FSMC_VER1		1
-#define FSMC_VER2		2
-#define FSMC_VER3		3
-#define FSMC_VER4		4
-#define FSMC_VER5		5
-#define FSMC_VER6		6
-#define FSMC_VER7		7
-#define FSMC_VER8		8
-
-static inline uint32_t get_fsmc_version(struct fsmc_regs *regs)
-{
-	return (readl(&regs->peripid2) >> FSMC_REVISION_SHFT) &
-				FSMC_REVISION_MSK;
-}
-
 /*
  * There are 13 bytes of ecc for every 512 byte block in FSMC version 8
  * and it has to be read consecutively and immediately after the 512
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index fe8d77ebec13..9d5306bad117 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -144,6 +144,17 @@ struct mtd_info {
 	 */
 	uint32_t writesize;
 
+	/*
+	 * Size of the write buffer used by the MTD. MTD devices having a write
+	 * buffer can write multiple writesize chunks at a time. E.g. while
+	 * writing 4 * writesize bytes to a device with 2 * writesize bytes
+	 * buffer the MTD driver can (but doesn't have to) do 2 writesize
+	 * operations, but not 4. Currently, all NANDs have writebufsize
+	 * equivalent to writesize (NAND page size). Some NOR flashes do have
+	 * writebufsize greater than writesize.
+	 */
+	uint32_t writebufsize;
+
 	uint32_t oobsize;   // Amount of OOB data per block (e.g. 16)
 	uint32_t oobavail;  // Available OOB bytes per block
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 63e17d01fde9..1f489b247a29 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -448,6 +448,8 @@ struct nand_buffers {
  *			See the defines for further explanation.
  * @badblockpos:	[INTERN] position of the bad block marker in the oob
  *			area.
+ * @badblockbits:	[INTERN] number of bits to left-shift the bad block
+ *			number
  * @cellinfo:		[INTERN] MLC/multichip data from chip ident
  * @numchips:		[INTERN] number of physical chips
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 0c8815bfae1c..ae418e41d8f5 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -118,6 +118,8 @@ struct onenand_chip {
 	int (*chip_probe)(struct mtd_info *mtd);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	int (*scan_bbt)(struct mtd_info *mtd);
+	int (*enable)(struct mtd_info *mtd);
+	int (*disable)(struct mtd_info *mtd);
 
 	struct completion	complete;
 	int			irq;
@@ -137,6 +139,14 @@ struct onenand_chip {
 	void			*bbm;
 
 	void			*priv;
+
+	/*
+	 * Shows that the current operation is composed
+	 * of sequence of commands. For example, cache program.
+	 * Such command status OnGo bit is checked at the end of
+	 * sequence.
+	 */
+	unsigned int		ongoing;
 };
 
 /*
@@ -171,6 +181,9 @@ struct onenand_chip {
 #define ONENAND_IS_2PLANE(this)			(0)
 #endif
 
+#define ONENAND_IS_CACHE_PROGRAM(this)					\
+	(this->options & ONENAND_HAS_CACHE_PROGRAM)
+
 /* Check byte access in OneNAND */
 #define ONENAND_CHECK_BYTE_ACCESS(addr)		(addr & 0x1)
 
@@ -181,6 +194,7 @@ struct onenand_chip {
 #define ONENAND_HAS_UNLOCK_ALL		(0x0002)
 #define ONENAND_HAS_2PLANE		(0x0004)
 #define ONENAND_HAS_4KB_PAGE		(0x0008)
+#define ONENAND_HAS_CACHE_PROGRAM	(0x0010)
 #define ONENAND_SKIP_UNLOCK_CHECK	(0x0100)
 #define ONENAND_PAGEBUF_ALLOC		(0x1000)
 #define ONENAND_OOBBUF_ALLOC		(0x2000)
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 2b54316591d2..4a0a8ba90a72 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -89,7 +89,7 @@ static inline int mtd_has_cmdlinepart(void) { return 1; }
 static inline int mtd_has_cmdlinepart(void) { return 0; }
 #endif
 
-int mtd_is_master(struct mtd_info *mtd);
+int mtd_is_partition(struct mtd_info *mtd);
 int mtd_add_partition(struct mtd_info *master, char *name,
 		      long long offset, long long length);
 int mtd_del_partition(struct mtd_info *master, int partno);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 0ef22a1f129e..c84d900fbbb3 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -97,7 +97,7 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
-extern u64 early_init_dt_alloc_memory_arch(u64 size, u64 align);
+extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
 extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
 /*
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e04c4888d1fd..55cd0a0bc977 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -47,10 +47,13 @@
 #include <linux/list.h>
 #include <linux/rwsem.h>
 #include <linux/scatterlist.h>
+#include <linux/workqueue.h>
 
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
+extern struct workqueue_struct *ib_wq;
+
 union ib_gid {
 	u8	raw[16];
 	struct {
diff --git a/mm/internal.h b/mm/internal.h
index 4c98630f0f77..69488205723d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,15 +39,6 @@ static inline void __put_page(struct page *page)
 
 extern unsigned long highest_memmap_pfn;
 
-#ifdef CONFIG_SMP
-extern int putback_active_lru_page(struct zone *zone, struct page *page);
-#else
-static inline int putback_active_lru_page(struct zone *zone, struct page *page)
-{
-	return 0;
-}
-#endif
-
 /*
  * in mm/vmscan.c:
  */
diff --git a/mm/swap.c b/mm/swap.c
index bbc1ce9f9460..c02f93611a84 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -178,13 +178,15 @@ void put_pages_list(struct list_head *pages)
 }
 EXPORT_SYMBOL(put_pages_list);
 
-static void pagevec_lru_move_fn(struct pagevec *pvec,
-				void (*move_fn)(struct page *page, void *arg),
-				void *arg)
+/*
+ * pagevec_move_tail() must be called with IRQ disabled.
+ * Otherwise this may cause nasty races.
+ */
+static void pagevec_move_tail(struct pagevec *pvec)
 {
 	int i;
+	int pgmoved = 0;
 	struct zone *zone = NULL;
-	unsigned long flags = 0;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -192,41 +194,21 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 
 		if (pagezone != zone) {
 			if (zone)
-				spin_unlock_irqrestore(&zone->lru_lock, flags);
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irqsave(&zone->lru_lock, flags);
+			spin_lock(&zone->lru_lock);
+		}
+		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+			int lru = page_lru_base_type(page);
+			list_move_tail(&page->lru, &zone->lru[lru].list);
+			pgmoved++;
 		}
-
-		(*move_fn)(page, arg);
 	}
 	if (zone)
-		spin_unlock_irqrestore(&zone->lru_lock, flags);
-	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
-	pagevec_reinit(pvec);
-}
-
-static void pagevec_move_tail_fn(struct page *page, void *arg)
-{
-	int *pgmoved = arg;
-	struct zone *zone = page_zone(page);
-
-	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-		int lru = page_lru_base_type(page);
-		list_move_tail(&page->lru, &zone->lru[lru].list);
-		(*pgmoved)++;
-	}
-}
-
-/*
- * pagevec_move_tail() must be called with IRQ disabled.
- * Otherwise this may cause nasty races.
- */
-static void pagevec_move_tail(struct pagevec *pvec)
-{
-	int pgmoved = 0;
-
-	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
+		spin_unlock(&zone->lru_lock);
 	__count_vm_events(PGROTATED, pgmoved);
+	release_pages(pvec->pages, pvec->nr, pvec->cold);
+	pagevec_reinit(pvec);
 }
 
 /*
@@ -234,7 +216,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  * inactive list.
  */
-void rotate_reclaimable_page(struct page *page)
+void  rotate_reclaimable_page(struct page *page)
 {
 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
 	    !PageUnevictable(page) && PageLRU(page)) {
@@ -271,94 +253,27 @@ static void update_page_reclaim_stat(struct zone *zone, struct page *page,
 }
 
 /*
- * A page will go to active list either by activate_page or putback_lru_page.
- * In the activate_page case, the page hasn't active bit set. The page might
- * not in LRU list because it's isolated before it gets a chance to be moved to
- * active list. The window is small because pagevec just stores several pages.
- * For such case, we do nothing for such page.
- * In the putback_lru_page case, the page isn't in lru list but has active
- * bit set
+ * FIXME: speed this up?
  */
-static void __activate_page(struct page *page, void *arg)
+void activate_page(struct page *page)
 {
 	struct zone *zone = page_zone(page);
-	int file = page_is_file_cache(page);
-	int lru = page_lru_base_type(page);
-	bool putback = !PageLRU(page);
-
-	/* The page is isolated before it's moved to active list */
-	if (!PageLRU(page) && !PageActive(page))
-		return;
-	if ((PageLRU(page) && PageActive(page)) || PageUnevictable(page))
-		return;
-
-	if (!putback)
-		del_page_from_lru_list(zone, page, lru);
-	else
-		SetPageLRU(page);
-
-	SetPageActive(page);
-	lru += LRU_ACTIVE;
-	add_page_to_lru_list(zone, page, lru);
-
-	if (putback)
-		return;
-	__count_vm_event(PGACTIVATE);
-	update_page_reclaim_stat(zone, page, file, 1);
-}
 
-#ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
-
-static void activate_page_drain(int cpu)
-{
-	struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
-
-	if (pagevec_count(pvec))
-		pagevec_lru_move_fn(pvec, __activate_page, NULL);
-}
-
-void activate_page(struct page *page)
-{
+	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
-
-		page_cache_get(page);
-		if (!pagevec_add(pvec, page))
-			pagevec_lru_move_fn(pvec, __activate_page, NULL);
-		put_cpu_var(activate_page_pvecs);
-	}
-}
+		int file = page_is_file_cache(page);
+		int lru = page_lru_base_type(page);
+		del_page_from_lru_list(zone, page, lru);
 
-/* Caller should hold zone->lru_lock */
-int putback_active_lru_page(struct zone *zone, struct page *page)
-{
-	struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+		SetPageActive(page);
+		lru += LRU_ACTIVE;
+		add_page_to_lru_list(zone, page, lru);
+		__count_vm_event(PGACTIVATE);
 
-	if (!pagevec_add(pvec, page)) {
-		spin_unlock_irq(&zone->lru_lock);
-		pagevec_lru_move_fn(pvec, __activate_page, NULL);
-		spin_lock_irq(&zone->lru_lock);
+		update_page_reclaim_stat(zone, page, file, 1);
 	}
-	put_cpu_var(activate_page_pvecs);
-	return 1;
-}
-
-#else
-static inline void activate_page_drain(int cpu)
-{
-}
-
-void activate_page(struct page *page)
-{
-	struct zone *zone = page_zone(page);
-
-	spin_lock_irq(&zone->lru_lock);
-	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page))
-		__activate_page(page, NULL);
 	spin_unlock_irq(&zone->lru_lock);
 }
-#endif
 
 /*
  * Mark a page as having seen activity.
@@ -457,7 +372,6 @@ static void drain_cpu_pagevecs(int cpu)
 		pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
-	activate_page_drain(cpu);
 }
 
 void lru_add_drain(void)
@@ -602,33 +516,44 @@ void lru_add_page_tail(struct zone* zone,
 	}
 }
 
-static void ____pagevec_lru_add_fn(struct page *page, void *arg)
-{
-	enum lru_list lru = (enum lru_list)arg;
-	struct zone *zone = page_zone(page);
-	int file = is_file_lru(lru);
-	int active = is_active_lru(lru);
-
-	VM_BUG_ON(PageActive(page));
-	VM_BUG_ON(PageUnevictable(page));
-	VM_BUG_ON(PageLRU(page));
-
-	SetPageLRU(page);
-	if (active)
-		SetPageActive(page);
-	update_page_reclaim_stat(zone, page, file, active);
-	add_page_to_lru_list(zone, page, lru);
-}
-
 /*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
+	int i;
+	struct zone *zone = NULL;
+
 	VM_BUG_ON(is_unevictable_lru(lru));
 
-	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		struct zone *pagezone = page_zone(page);
+		int file;
+		int active;
+
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
+		}
+		VM_BUG_ON(PageActive(page));
+		VM_BUG_ON(PageUnevictable(page));
+		VM_BUG_ON(PageLRU(page));
+		SetPageLRU(page);
+		active = is_active_lru(lru);
+		file = is_file_lru(lru);
+		if (active)
+			SetPageActive(page);
+		update_page_reclaim_stat(zone, page, file, active);
+		add_page_to_lru_list(zone, page, lru);
+	}
+	if (zone)
+		spin_unlock_irq(&zone->lru_lock);
+	release_pages(pvec->pages, pvec->nr, pvec->cold);
+	pagevec_reinit(pvec);
 }
 
 EXPORT_SYMBOL(____pagevec_lru_add);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 99999a9b2b0b..47a50962ce81 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1271,16 +1271,14 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
 			spin_lock_irq(&zone->lru_lock);
 			continue;
 		}
+		SetPageLRU(page);
 		lru = page_lru(page);
+		add_page_to_lru_list(zone, page, lru);
 		if (is_active_lru(lru)) {
 			int file = is_file_lru(lru);
 			int numpages = hpage_nr_pages(page);
 			reclaim_stat->recent_rotated[file] += numpages;
-			if (putback_active_lru_page(zone, page))
-				continue;
 		}
-		SetPageLRU(page);
-		add_page_to_lru_list(zone, page, lru);
 		if (!pagevec_add(&pvec, page)) {
 			spin_unlock_irq(&zone->lru_lock);
 			__pagevec_release(&pvec);